Handle errors when deleting objects from S3
I recently noticed that the cost for ListBucket calls was increasing for an application that was using Litestream. After investigating it seemed that the bucket had retained the entire history of data, while Litestream was continually logging that it was deleting the same data: ``` 2022-10-30T12:00:27Z (s3): wal segmented deleted before 0792d3393bf79ced/00000233: n=1428 <snip> 2022-10-30T13:00:24Z (s3): wal segmented deleted before 0792d3393bf79ced/00000233: n=1428 ``` This is occuring because the DeleteObjects call is a batch item, that returns the individual object deletion errors in the response[1]. The S3 replica client discards the response, and only handles errors in the original API call. I had a misconfigured IAM policy that meant all deletes were failing, but this never actually bubbled up as a real error. To fix this, I added a check for the response body to handle any errors the operation might have encountered. Because this may include a large number of errors (in this case 1428 of them), the output is summarized to avoid an overly large error message. When items are not found, they will not return an error[2] - they will still be marked as deleted, so this change should be in-line with the original intentions of this code. 1: https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html#API_DeleteObjects_Example_2 2: https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html
This commit is contained in:
committed by
Ben Johnson
parent
6b93b6012a
commit
5afd0bf161
@@ -208,10 +208,14 @@ func (c *ReplicaClient) DeleteGeneration(ctx context.Context, generation string)
|
|||||||
n = len(objIDs)
|
n = len(objIDs)
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{
|
out, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{
|
||||||
Bucket: aws.String(c.Bucket),
|
Bucket: aws.String(c.Bucket),
|
||||||
Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)},
|
Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)},
|
||||||
}); err != nil {
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := deleteOutputError(out); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
internal.OperationTotalCounterVec.WithLabelValues(ReplicaClientType, "DELETE").Inc()
|
internal.OperationTotalCounterVec.WithLabelValues(ReplicaClientType, "DELETE").Inc()
|
||||||
@@ -303,10 +307,14 @@ func (c *ReplicaClient) DeleteSnapshot(ctx context.Context, generation string, i
|
|||||||
return fmt.Errorf("cannot determine snapshot path: %w", err)
|
return fmt.Errorf("cannot determine snapshot path: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{
|
out, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{
|
||||||
Bucket: aws.String(c.Bucket),
|
Bucket: aws.String(c.Bucket),
|
||||||
Delete: &s3.Delete{Objects: []*s3.ObjectIdentifier{{Key: &key}}, Quiet: aws.Bool(true)},
|
Delete: &s3.Delete{Objects: []*s3.ObjectIdentifier{{Key: &key}}, Quiet: aws.Bool(true)},
|
||||||
}); err != nil {
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := deleteOutputError(out); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -405,13 +413,16 @@ func (c *ReplicaClient) DeleteWALSegments(ctx context.Context, a []litestream.Po
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Delete S3 objects in bulk.
|
// Delete S3 objects in bulk.
|
||||||
if _, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{
|
out, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{
|
||||||
Bucket: aws.String(c.Bucket),
|
Bucket: aws.String(c.Bucket),
|
||||||
Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)},
|
Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)},
|
||||||
}); err != nil {
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := deleteOutputError(out); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
internal.OperationTotalCounterVec.WithLabelValues(ReplicaClientType, "DELETE").Inc()
|
internal.OperationTotalCounterVec.WithLabelValues(ReplicaClientType, "DELETE").Inc()
|
||||||
|
|
||||||
a = a[n:]
|
a = a[n:]
|
||||||
@@ -454,10 +465,14 @@ func (c *ReplicaClient) DeleteAll(ctx context.Context) error {
|
|||||||
n = len(objIDs)
|
n = len(objIDs)
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{
|
out, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{
|
||||||
Bucket: aws.String(c.Bucket),
|
Bucket: aws.String(c.Bucket),
|
||||||
Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)},
|
Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)},
|
||||||
}); err != nil {
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := deleteOutputError(out); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
internal.OperationTotalCounterVec.WithLabelValues(ReplicaClientType, "DELETE").Inc()
|
internal.OperationTotalCounterVec.WithLabelValues(ReplicaClientType, "DELETE").Inc()
|
||||||
@@ -737,3 +752,15 @@ func isNotExists(err error) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func deleteOutputError(out *s3.DeleteObjectsOutput) error {
|
||||||
|
switch len(out.Errors) {
|
||||||
|
case 0:
|
||||||
|
return nil
|
||||||
|
case 1:
|
||||||
|
return fmt.Errorf("deleting object %s: %s - %s", *out.Errors[0].Key, *out.Errors[0].Code, *out.Errors[0].Message)
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("%d errors occured deleting objects, %s: %s - (%s (and %d others)",
|
||||||
|
len(out.Errors), *out.Errors[0].Key, *out.Errors[0].Code, *out.Errors[0].Message, len(out.Errors)-1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user