Handle errors when deleting objects from S3

I recently noticed that the cost for ListBucket calls was increasing for an
application that was using Litestream. After investigating it seemed that the
bucket had retained the entire history of data, while Litestream was
continually logging that it was deleting the same data:

```
2022-10-30T12:00:27Z (s3): wal segmented deleted before 0792d3393bf79ced/00000233: n=1428
<snip>
2022-10-30T13:00:24Z (s3): wal segmented deleted before 0792d3393bf79ced/00000233: n=1428
```

This is occuring because the DeleteObjects call is a batch item, that returns
the individual object deletion errors in the response[1]. The S3 replica client
discards the response, and only handles errors in the original API call. I had
a misconfigured IAM policy that meant all deletes were failing, but this never
actually bubbled up as a real error.

To fix this, I added a check for the response body to handle any errors the
operation might have encountered. Because this may include a large number of
errors (in this case 1428 of them), the output is summarized to avoid an overly
large error message. When items are not found, they will not return an error[2]
- they will still be marked as deleted, so this change should be in-line with
the original intentions of this code.

1: https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html#API_DeleteObjects_Example_2
2: https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html
This commit is contained in:
Lincoln Stoll
2022-10-31 09:14:02 +01:00
committed by Ben Johnson
parent f50f03d8fc
commit 64e535e23b

View File

@@ -207,10 +207,14 @@ func (c *ReplicaClient) DeleteGeneration(ctx context.Context, generation string)
n = len(objIDs) n = len(objIDs)
} }
if _, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{ out, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{
Bucket: aws.String(c.Bucket), Bucket: aws.String(c.Bucket),
Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)}, Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)},
}); err != nil { })
if err != nil {
return err
}
if err := deleteOutputError(out); err != nil {
return err return err
} }
internal.OperationTotalCounterVec.WithLabelValues(ReplicaClientType, "DELETE").Inc() internal.OperationTotalCounterVec.WithLabelValues(ReplicaClientType, "DELETE").Inc()
@@ -297,10 +301,14 @@ func (c *ReplicaClient) DeleteSnapshot(ctx context.Context, generation string, i
key := path.Join(c.Path, "generations", generation, "snapshots", litestream.FormatIndex(index)+".snapshot.lz4") key := path.Join(c.Path, "generations", generation, "snapshots", litestream.FormatIndex(index)+".snapshot.lz4")
if _, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{ out, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{
Bucket: aws.String(c.Bucket), Bucket: aws.String(c.Bucket),
Delete: &s3.Delete{Objects: []*s3.ObjectIdentifier{{Key: &key}}, Quiet: aws.Bool(true)}, Delete: &s3.Delete{Objects: []*s3.ObjectIdentifier{{Key: &key}}, Quiet: aws.Bool(true)},
}); err != nil { })
if err != nil {
return err
}
if err := deleteOutputError(out); err != nil {
return err return err
} }
@@ -397,13 +405,16 @@ func (c *ReplicaClient) DeleteWALSegments(ctx context.Context, a []litestream.Po
} }
// Delete S3 objects in bulk. // Delete S3 objects in bulk.
if _, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{ out, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{
Bucket: aws.String(c.Bucket), Bucket: aws.String(c.Bucket),
Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)}, Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)},
}); err != nil { })
if err != nil {
return err
}
if err := deleteOutputError(out); err != nil {
return err return err
} }
internal.OperationTotalCounterVec.WithLabelValues(ReplicaClientType, "DELETE").Inc() internal.OperationTotalCounterVec.WithLabelValues(ReplicaClientType, "DELETE").Inc()
a = a[n:] a = a[n:]
@@ -446,10 +457,14 @@ func (c *ReplicaClient) DeleteAll(ctx context.Context) error {
n = len(objIDs) n = len(objIDs)
} }
if _, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{ out, err := c.s3.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{
Bucket: aws.String(c.Bucket), Bucket: aws.String(c.Bucket),
Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)}, Delete: &s3.Delete{Objects: objIDs[:n], Quiet: aws.Bool(true)},
}); err != nil { })
if err != nil {
return err
}
if err := deleteOutputError(out); err != nil {
return err return err
} }
internal.OperationTotalCounterVec.WithLabelValues(ReplicaClientType, "DELETE").Inc() internal.OperationTotalCounterVec.WithLabelValues(ReplicaClientType, "DELETE").Inc()
@@ -749,3 +764,15 @@ func isNotExists(err error) bool {
return false return false
} }
} }
func deleteOutputError(out *s3.DeleteObjectsOutput) error {
switch len(out.Errors) {
case 0:
return nil
case 1:
return fmt.Errorf("deleting object %s: %s - %s", *out.Errors[0].Key, *out.Errors[0].Code, *out.Errors[0].Message)
default:
return fmt.Errorf("%d errors occured deleting objects, %s: %s - (%s (and %d others)",
len(out.Errors), *out.Errors[0].Key, *out.Errors[0].Code, *out.Errors[0].Message, len(out.Errors)-1)
}
}