Use S3 ListObjectsV2 for listing files

ListObjects has been deprecated since 2016 and ListObjectsV2 with use of
explicit pagination tokens is more performant for large listings as well.

This also mitigates an issue with iDrive E2 where the StartAfter/Marker
is included in the output, leading to duplicate entries. Right now this
causes an exhaustive prune to delete chunks erroneously flagged as
duplicate, destroying the storage.
This commit is contained in:
2023-09-23 22:12:08 -05:00
parent 4e9d2c4cca
commit 915776161f

View File

@@ -90,48 +90,40 @@ func (storage *S3Storage) ListFiles(threadIndex int, dir string) (files []string
if dir == "snapshots/" { if dir == "snapshots/" {
dir = storage.storageDir + dir dir = storage.storageDir + dir
input := s3.ListObjectsInput{ input := s3.ListObjectsV2Input{
Bucket: aws.String(storage.bucket), Bucket: aws.String(storage.bucket),
Prefix: aws.String(dir), Prefix: aws.String(dir),
Delimiter: aws.String("/"), Delimiter: aws.String("/"),
MaxKeys: aws.Int64(1000),
} }
output, err := storage.client.ListObjects(&input) err := storage.client.ListObjectsV2Pages(&input, func(page *s3.ListObjectsV2Output, lastPage bool) bool {
for _, subDir := range page.CommonPrefixes {
files = append(files, (*subDir.Prefix)[len(dir):])
}
return true
})
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
for _, subDir := range output.CommonPrefixes {
files = append(files, (*subDir.Prefix)[len(dir):])
}
return files, nil, nil return files, nil, nil
} else { } else {
dir = storage.storageDir + dir dir = storage.storageDir + dir
marker := "" input := s3.ListObjectsV2Input{
for { Bucket: aws.String(storage.bucket),
input := s3.ListObjectsInput{ Prefix: aws.String(dir),
Bucket: aws.String(storage.bucket), MaxKeys: aws.Int64(1000),
Prefix: aws.String(dir), }
MaxKeys: aws.Int64(1000),
Marker: aws.String(marker),
}
output, err := storage.client.ListObjects(&input) err := storage.client.ListObjectsV2Pages(&input, func(page *s3.ListObjectsV2Output, lastPage bool) bool {
if err != nil { for _, object := range page.Contents {
return nil, nil, err
}
for _, object := range output.Contents {
files = append(files, (*object.Key)[len(dir):]) files = append(files, (*object.Key)[len(dir):])
sizes = append(sizes, *object.Size) sizes = append(sizes, *object.Size)
} }
return true
if !*output.IsTruncated { })
break if err != nil {
} return nil, nil, err
marker = *output.Contents[len(output.Contents)-1].Key
} }
return files, sizes, nil return files, sizes, nil
} }