Compare commits

..

1 Commits

Author SHA1 Message Date
f48630d8cc Use S3 ListObjectsV2 for listing files
ListObjects has been deprecated since 2016 and ListObjectsV2 with use of
explicit pagination tokens is more performant for large listings as well.

This also mitigates an issue with iDrive E2 where the StartAfter/Marker
is included in the output, leading to duplicate entries. Right now this
causes an exhaustive prune to delete chunks erroneously flagged as
duplicate, destroying the storage.
2023-09-29 20:33:17 -05:00
13 changed files with 65 additions and 210 deletions

View File

@@ -2262,7 +2262,7 @@ func main() {
app.Name = "duplicacy"
app.HelpName = "duplicacy"
app.Usage = "A new generation cloud backup tool based on lock-free deduplication"
app.Version = "3.2.1" + " (" + GitCommit + ")"
app.Version = "3.2.0" + " (" + GitCommit + ")"
// Exit with code 2 if an invalid command is provided
app.CommandNotFound = func(context *cli.Context, command string) {

1
go.mod
View File

@@ -15,6 +15,7 @@ require (
github.com/gilbertchen/gopass v0.0.0-20170109162249-bf9dde6d0d2c
github.com/gilbertchen/highwayhash v0.0.0-20221109044721-eeab1f4799d8
github.com/gilbertchen/keyring v0.0.0-20221004152639-1661cbebc508
github.com/gilbertchen/xattr v0.0.0-20160926155429-68e7a6806b01
github.com/hirochachacha/go-smb2 v1.1.0
github.com/klauspost/compress v1.16.3
github.com/klauspost/reedsolomon v1.9.9

2
go.sum
View File

@@ -47,6 +47,8 @@ github.com/gilbertchen/highwayhash v0.0.0-20221109044721-eeab1f4799d8 h1:ijgl4Y+
github.com/gilbertchen/highwayhash v0.0.0-20221109044721-eeab1f4799d8/go.mod h1:0lQcVva56+L1PuUFXLOsJ6arJQaU0baIH8q+IegeBhg=
github.com/gilbertchen/keyring v0.0.0-20221004152639-1661cbebc508 h1:SqTyk5KkNXp7zTdTttIZSDcTrL5uau4K/2OpKvgBZVI=
github.com/gilbertchen/keyring v0.0.0-20221004152639-1661cbebc508/go.mod h1:w/pisxUZezf2XzU9Ewjphcf6q1mZtOzKPHhJiuc8cag=
github.com/gilbertchen/xattr v0.0.0-20160926155429-68e7a6806b01 h1:LqwS9qL6SrDkp0g0iwUkETrDdtB9gTKaIbSn9imUq5o=
github.com/gilbertchen/xattr v0.0.0-20160926155429-68e7a6806b01/go.mod h1:TMlibuxKfkdtHyltooAw7+DHqRpaXs9nxaffk00Sh1Q=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/goamz/goamz v0.0.0-20180131231218-8b901b531db8 h1:G1U0vew/vA/1/hBmf1XNeyIzJJbPFVv+kb+HPl6rj6c=

View File

@@ -396,7 +396,7 @@ type B2ListFileNamesOutput struct {
func (client *B2Client) ListFileNames(threadIndex int, startFileName string, singleFile bool, includeVersions bool) (files []*B2Entry, err error) {
maxFileCount := 10_000
maxFileCount := 1000
if singleFile {
if includeVersions {
maxFileCount = 4

View File

@@ -304,24 +304,7 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta
remoteEntry = nil
}
if localEntry.IsHardlinkedFrom() {
// FIXME: Sanity check?
// FIXME: perhaps we can make size = 0 an initial invariant of the link?
//
// Note that if the initial size was 0 then this original logic doesn't change!
localEntry.Size = 0
// targetEntry, ok := localEntryList.HardLinkTable[localEntry.Link]
// if !ok {
// LOG_ERROR("BACKUP_CREATE", "Hard link %s not found in entry cache for path %s", localEntry.Link, localEntry.Path)
// }
// localEntry.Size = targetEntry.Size
// localEntry.Hash = targetEntry.Hash
// localEntry.StartChunk = targetEntry.StartChunk
// localEntry.StartOffset = targetEntry.StartOffset
// localEntry.EndChunk = targetEntry.EndChunk
// localEntry.EndOffset = targetEntry.EndOffset
// LOG_DEBUG("BACKUP_CREATE", "Hard link %s to %s in initial listing", localEntry.Link, targetEntry.Path)
} else if compareResult == 0 {
if compareResult == 0 {
// No need to check if it is in hash mode -- in that case remote listing is nil
if localEntry.IsSameAs(remoteEntry) && localEntry.IsFile() {
if localEntry.Size > 0 {
@@ -639,11 +622,6 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta
return true
}
type hardLinkEntry struct {
entry *Entry
willDownload bool
}
// Restore downloads the specified snapshot, compares it with what's on the repository, and then downloads
// files that are different. 'base' is a directory that contains files at a different revision which can
// serve as a local cache to avoid download chunks available locally. It is perfectly ok for 'base' to be
@@ -725,16 +703,8 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
var localEntry *Entry
localListingOK := true
hardLinkTable := make(map[string]hardLinkEntry)
//hardLinks := make([]*Entry, 0)
for remoteEntry := range remoteListingChannel {
if remoteEntry.IsFile() && remoteEntry.Link == "/" {
LOG_INFO("RESTORE_LINK", "Noting hardlinked source file %s", remoteEntry.Path)
hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, false}
}
if len(patterns) > 0 && !MatchPath(remoteEntry.Path, patterns) {
continue
}
@@ -743,8 +713,6 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
var compareResult int
for {
// TODO: We likely need to check if a local listing file exists in the hardLinkTable for the case where one is restoring a hardlink
// to an existing disk file. Right now, we'll just end up downloading the file new.
if localEntry == nil && localListingOK {
localEntry, localListingOK = <- localListingChannel
}
@@ -762,28 +730,12 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
}
if compareResult == 0 {
// if quickMode && localEntry.IsFile() {
if quickMode && localEntry.IsFile() && localEntry.IsSameAs(remoteEntry) {
LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path)
skippedFileSize += localEntry.Size
skippedFileCount++
localEntry = nil
continue
// checkEntry := remoteEntry
// if len(remoteEntry.Link) > 0 && remoteEntry.Link != "/" {
// if e, ok := hardLinkTable[remoteEntry.Link]; !ok {
// LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path)
// } else {
// checkEntry = e.entry
// }
// }
// if localEntry.IsSameAs(checkEntry) {
// LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path)
// skippedFileSize += localEntry.Size
// skippedFileCount++
// localEntry = nil
// continue
// }
}
localEntry = nil
}
@@ -830,21 +782,6 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
}
directoryEntries = append(directoryEntries, remoteEntry)
} else {
// if remoteEntry.Link == "/" {
// hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, true}
// } else if len(remoteEntry.Link) > 0 {
// if e, ok := hardLinkTable[remoteEntry.Link]; !ok {
// LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path)
// } else if !e.willDownload {
// origSourcePath := e.entry.Path
// e.entry.Path = remoteEntry.Path
// remoteEntry = e.entry
// hardLinkTable[origSourcePath] = hardLinkEntry{remoteEntry, true}
// } else {
// hardLinks = append(hardLinks, remoteEntry)
// continue
// }
// }
// We can't download files here since fileEntries needs to be sorted
fileEntries = append(fileEntries, remoteEntry)
totalFileSize += remoteEntry.Size
@@ -900,10 +837,6 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
stat, _ := os.Stat(fullPath)
if stat != nil {
if quickMode {
// cmpFile := file
// if file.IsFile() && len(file.Link) > 0 && file.Link != "/" {
// cmpFile = hardLinkTable[file.Link].entry
// }
if file.IsSameAsFileInfo(stat) {
LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", file.Path)
skippedFileSize += file.Size
@@ -941,6 +874,7 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
downloadedFileSize += file.Size
downloadedFiles = append(downloadedFiles, file)
}
continue
}
@@ -967,15 +901,6 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
file.RestoreMetadata(fullPath, nil, setOwner)
}
// for _, linkEntry := range hardLinks {
// sourcePath := joinPath(top, hardLinkTable[linkEntry.Link].entry.Path)
// fullPath := joinPath(top, linkEntry.Path)
// LOG_INFO("DOWNLOAD_LINK", "Hard linking %s -> %s", fullPath, sourcePath)
// if err := os.Link(sourcePath, fullPath); err != nil {
// LOG_ERROR("DOWNLOAD_LINK", "Failed to create hard link %s -> %s", fullPath, sourcePath)
// }
// }
if deleteMode && len(patterns) == 0 {
// Reverse the order to make sure directories are empty before being deleted
for i := range extraFiles {
@@ -1128,9 +1053,6 @@ func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top s
lastEndChunk := 0
uploadEntryInfoFunc := func(entry *Entry) error {
if entry.IsHardlinkRoot() {
entryList.HardLinkTable[entry.Path] = entry
}
if entry.IsFile() && entry.Size > 0 {
delta := entry.StartChunk - len(chunkHashes) + 1
@@ -1153,14 +1075,6 @@ func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top s
entry.StartChunk -= lastEndChunk
lastEndChunk = entry.EndChunk
entry.EndChunk = delta
} else if entry.IsHardlinkedFrom() {
targetEntry, ok := entryList.HardLinkTable[entry.Link]
if !ok {
LOG_ERROR("SNAPSHOT_UPLOAD", "Unable to find hardlink target for %s to %s", entry.Path, entry.Link)
}
// FIXME: We will use a copy, so it is probably sufficient to skip rereading xattrs and such in the initial code
entry = entry.LinkTo(targetEntry)
LOG_DEBUG("SNAPSHOT_UPLOAD", "Uploading cloned hardlink entry for %s to %s", entry.Path, entry.Link)
}
buffer.Reset()

View File

@@ -4,8 +4,6 @@
package duplicacy
import (
"bytes"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"fmt"
@@ -17,10 +15,12 @@ import (
"sort"
"strconv"
"strings"
"syscall"
"time"
"bytes"
"crypto/sha256"
"github.com/vmihailenco/msgpack"
)
// This is the hidden directory in the repository for storing various files.
@@ -119,27 +119,6 @@ func (entry *Entry) Copy() *Entry {
}
}
func (entry *Entry) LinkTo(target *Entry) *Entry {
return &Entry{
Path: entry.Path,
Size: target.Size,
Time: target.Time,
Mode: target.Mode,
Link: entry.Link,
Hash: target.Hash,
UID: target.UID,
GID: target.GID,
StartChunk: target.StartChunk,
StartOffset: target.StartOffset,
EndChunk: target.EndChunk,
EndOffset: target.EndOffset,
Attributes: target.Attributes,
}
}
// CreateEntryFromJSON creates an entry from a json description.
func (entry *Entry) UnmarshalJSON(description []byte) (err error) {
@@ -513,18 +492,6 @@ func (entry *Entry) IsComplete() bool {
return entry.Size >= 0
}
func (entry *Entry) IsFileNotHardlink() bool {
return entry.IsFile() && (len(entry.Link) == 0 || entry.Link == "/")
}
func (entry *Entry) IsHardlinkedFrom() bool {
return entry.IsFile() && len(entry.Link) > 0 && entry.Link != "/"
}
func (entry *Entry) IsHardlinkRoot() bool {
return entry.IsFile() && entry.Link == "/"
}
func (entry *Entry) GetPermissions() os.FileMode {
return os.FileMode(entry.Mode) & fileModeMask
}
@@ -727,26 +694,10 @@ func (files FileInfoCompare) Less(i, j int) bool {
}
}
type listEntryLinkKey struct {
dev uint64
ino uint64
}
type ListingState struct {
linkTable map[listEntryLinkKey]string // map unique inode details to initially found path
}
func NewListingState() *ListingState {
return &ListingState{
linkTable: make(map[listEntryLinkKey]string),
}
}
// ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths
// are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files.
func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool,
listingState *ListingState,
listingChannel chan *Entry) (directoryList []*Entry, skippedFiles []string, err error) {
func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, listingChannel chan *Entry) (directoryList []*Entry,
skippedFiles []string, err error) {
LOG_DEBUG("LIST_ENTRIES", "Listing %s", path)
@@ -833,20 +784,6 @@ func ListEntries(top string, path string, patterns []string, nobackupFile string
continue
}
if entry.IsFile() {
stat, ok := f.Sys().(*syscall.Stat_t)
if ok && stat != nil && stat.Nlink > 1 {
k := listEntryLinkKey{dev: uint64(stat.Dev), ino: uint64(stat.Ino)}
if path, ok := listingState.linkTable[k]; ok {
LOG_DEBUG("LIST_HARDLINK", "Detected hardlink %s to %s", entry.Path, path)
entry.Link = path
} else {
entry.Link = "/"
listingState.linkTable[k] = entry.Path
}
}
}
if entry.IsDir() {
directoryList = append(directoryList, entry)
} else {

View File

@@ -5,8 +5,6 @@
package duplicacy
import (
"bytes"
"encoding/json"
"io/ioutil"
"math/rand"
"os"
@@ -15,9 +13,10 @@ import (
"sort"
"strings"
"testing"
"bytes"
"encoding/json"
"github.com/pkg/xattr"
"github.com/gilbertchen/xattr"
"github.com/vmihailenco/msgpack"
)
@@ -234,16 +233,8 @@ func TestEntryOrder(t *testing.T) {
// TestEntryExcludeByAttribute tests the excludeByAttribute parameter to the ListEntries function
func TestEntryExcludeByAttribute(t *testing.T) {
var excludeAttrName string
var excludeAttrValue []byte
if runtime.GOOS == "darwin" {
excludeAttrName = "com.apple.metadata:com_apple_backup_excludeItem"
excludeAttrValue = []byte("com.apple.backupd")
} else if runtime.GOOS == "linux" || runtime.GOOS == "freebsd" || runtime.GOOS == "netbsd" || runtime.GOOS == "solaris" {
excludeAttrName = "user.duplicacy_exclude"
} else {
t.Skip("skipping test, not darwin, linux, freebsd, netbsd, or solaris")
if !(runtime.GOOS == "darwin" || runtime.GOOS == "linux") {
t.Skip("skipping test not darwin or linux")
}
testDir := filepath.Join(os.TempDir(), "duplicacy_test")
@@ -282,7 +273,7 @@ func TestEntryExcludeByAttribute(t *testing.T) {
for _, file := range DATA {
fullPath := filepath.Join(testDir, file)
if strings.Contains(file, "exclude") {
xattr.Set(fullPath, excludeAttrName, excludeAttrValue)
xattr.Setxattr(fullPath, "com.apple.metadata:com_apple_backup_excludeItem", []byte("com.apple.backupd"))
}
}

View File

@@ -62,7 +62,6 @@ type EntryList struct {
uploadedChunkIndex int // counter for upload chunks
uploadedChunkOffset int // the start offset for the current modified entry
HardLinkTable map[string]*Entry
}
// Create a new entry list
@@ -79,7 +78,6 @@ func CreateEntryList(snapshotID string, cachePath string, maximumInMemoryEntries
maximumInMemoryEntries: maximumInMemoryEntries,
cachePath: cachePath,
Token: string(token),
HardLinkTable: make(map[string]*Entry),
}
return entryList, nil
@@ -120,7 +118,7 @@ func (entryList *EntryList)AddEntry(entry *Entry) error {
if !entry.IsComplete() {
if entry.IsDir() || entry.IsLink() {
entry.Size = 0
} else if !entry.IsHardlinkedFrom() {
} else {
modifiedEntry := ModifiedEntry {
Path: entry.Path,
Size: -1,

View File

@@ -68,7 +68,6 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
skippedDirectories *[]string, skippedFiles *[]string) {
var patterns []string
var listingState = NewListingState()
if filtersFile == "" {
filtersFile = joinPath(GetDuplicacyPreferencePath(), "filters")
@@ -82,7 +81,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
directory := directories[len(directories)-1]
directories = directories[:len(directories)-1]
subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingState, listingChannel)
subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingChannel)
if err != nil {
if directory.Path == "" {
LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err)

View File

@@ -8,7 +8,7 @@ import (
"strings"
)
func excludedByAttribute(attributes map[string][]byte) bool {
value, ok := attributes["com.apple.metadata:com_apple_backup_excludeItem"]
func excludedByAttribute(attirbutes map[string][]byte) bool {
value, ok := attirbutes["com.apple.metadata:com_apple_backup_excludeItem"]
return ok && strings.Contains(string(value), "com.apple.backupd")
}

View File

@@ -0,0 +1,13 @@
// Copyright (c) Acrosync LLC. All rights reserved.
// Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com
package duplicacy
import (
)
func excludedByAttribute(attirbutes map[string][]byte) bool {
_, ok := attirbutes["duplicacy_exclude"]
return ok
}

View File

@@ -2,12 +2,12 @@
// Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com
//go:build freebsd || netbsd || linux || solaris
// +build freebsd netbsd linux solaris
package duplicacy
func excludedByAttribute(attributes map[string][]byte) bool {
_, ok := attributes["user.duplicacy_exclude"]
import (
)
func excludedByAttribute(attirbutes map[string][]byte) bool {
_, ok := attirbutes["duplicacy_exclude"]
return ok
}

View File

@@ -132,6 +132,6 @@ func SplitDir(fullPath string) (dir string, file string) {
return fullPath[:i+1], fullPath[i+1:]
}
func excludedByAttribute(attributes map[string][]byte) bool {
func excludedByAttribute(attirbutes map[string][]byte) bool {
return false
}