Compare commits

...

3 Commits

Author SHA1 Message Date
2bb70595c5 Initial hardlink in snapshot support.
- Create a new snapshot version number as this method is not backwards
compatible.
- This has some breakages with restoring. Namely if the root file
is not marked for download any hardlinked files that need to be restored
will not be linked, they will be restored as a regular file
2023-09-29 23:39:02 -05:00
e3bf370e35 Fix exclude_by_attribute feature on POSIX
The exclude by attribute function is broken on non-Darwin POSIX: linux and freebsd.
This is because those xattrs must be prefixed by a legal namespace. The old xattr
library implicitly appended the user namespace to the xattr, but the current
official go pkg does not (which is just as well).

Also fix the test to remove the discordant old xattr dependency and provide
test cases for both darwin and non-darwin POSIX.
2023-09-29 23:38:32 -05:00
f48630d8cc Use S3 ListObjectsV2 for listing files
ListObjects has been deprecated since 2016 and ListObjectsV2 with use of
explicit pagination tokens is more performant for large listings as well.

This also mitigates an issue with iDrive E2 where the StartAfter/Marker
is included in the output, leading to duplicate entries. Right now this
causes an exhaustive prune to delete chunks erroneously flagged as
duplicate, destroying the storage.
2023-09-29 20:33:17 -05:00
11 changed files with 141 additions and 69 deletions

1
go.mod
View File

@@ -15,7 +15,6 @@ require (
github.com/gilbertchen/gopass v0.0.0-20170109162249-bf9dde6d0d2c github.com/gilbertchen/gopass v0.0.0-20170109162249-bf9dde6d0d2c
github.com/gilbertchen/highwayhash v0.0.0-20221109044721-eeab1f4799d8 github.com/gilbertchen/highwayhash v0.0.0-20221109044721-eeab1f4799d8
github.com/gilbertchen/keyring v0.0.0-20221004152639-1661cbebc508 github.com/gilbertchen/keyring v0.0.0-20221004152639-1661cbebc508
github.com/gilbertchen/xattr v0.0.0-20160926155429-68e7a6806b01
github.com/hirochachacha/go-smb2 v1.1.0 github.com/hirochachacha/go-smb2 v1.1.0
github.com/klauspost/compress v1.16.3 github.com/klauspost/compress v1.16.3
github.com/klauspost/reedsolomon v1.9.9 github.com/klauspost/reedsolomon v1.9.9

2
go.sum
View File

@@ -47,8 +47,6 @@ github.com/gilbertchen/highwayhash v0.0.0-20221109044721-eeab1f4799d8 h1:ijgl4Y+
github.com/gilbertchen/highwayhash v0.0.0-20221109044721-eeab1f4799d8/go.mod h1:0lQcVva56+L1PuUFXLOsJ6arJQaU0baIH8q+IegeBhg= github.com/gilbertchen/highwayhash v0.0.0-20221109044721-eeab1f4799d8/go.mod h1:0lQcVva56+L1PuUFXLOsJ6arJQaU0baIH8q+IegeBhg=
github.com/gilbertchen/keyring v0.0.0-20221004152639-1661cbebc508 h1:SqTyk5KkNXp7zTdTttIZSDcTrL5uau4K/2OpKvgBZVI= github.com/gilbertchen/keyring v0.0.0-20221004152639-1661cbebc508 h1:SqTyk5KkNXp7zTdTttIZSDcTrL5uau4K/2OpKvgBZVI=
github.com/gilbertchen/keyring v0.0.0-20221004152639-1661cbebc508/go.mod h1:w/pisxUZezf2XzU9Ewjphcf6q1mZtOzKPHhJiuc8cag= github.com/gilbertchen/keyring v0.0.0-20221004152639-1661cbebc508/go.mod h1:w/pisxUZezf2XzU9Ewjphcf6q1mZtOzKPHhJiuc8cag=
github.com/gilbertchen/xattr v0.0.0-20160926155429-68e7a6806b01 h1:LqwS9qL6SrDkp0g0iwUkETrDdtB9gTKaIbSn9imUq5o=
github.com/gilbertchen/xattr v0.0.0-20160926155429-68e7a6806b01/go.mod h1:TMlibuxKfkdtHyltooAw7+DHqRpaXs9nxaffk00Sh1Q=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/goamz/goamz v0.0.0-20180131231218-8b901b531db8 h1:G1U0vew/vA/1/hBmf1XNeyIzJJbPFVv+kb+HPl6rj6c= github.com/goamz/goamz v0.0.0-20180131231218-8b901b531db8 h1:G1U0vew/vA/1/hBmf1XNeyIzJJbPFVv+kb+HPl6rj6c=

View File

@@ -622,6 +622,11 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta
return true return true
} }
type hardLinkEntry struct {
entry *Entry
willDownload bool
}
// Restore downloads the specified snapshot, compares it with what's on the repository, and then downloads // Restore downloads the specified snapshot, compares it with what's on the repository, and then downloads
// files that are different. 'base' is a directory that contains files at a different revision which can // files that are different. 'base' is a directory that contains files at a different revision which can
// serve as a local cache to avoid download chunks available locally. It is perfectly ok for 'base' to be // serve as a local cache to avoid download chunks available locally. It is perfectly ok for 'base' to be
@@ -703,8 +708,16 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
var localEntry *Entry var localEntry *Entry
localListingOK := true localListingOK := true
hardLinkTable := make(map[string]hardLinkEntry)
hardLinks := make([]*Entry, 0)
for remoteEntry := range remoteListingChannel { for remoteEntry := range remoteListingChannel {
if remoteEntry.IsFile() && remoteEntry.Link == "/" {
LOG_INFO("RESTORE_LINK", "Noting hardlinked source file %s", remoteEntry.Path)
hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, false}
}
if len(patterns) > 0 && !MatchPath(remoteEntry.Path, patterns) { if len(patterns) > 0 && !MatchPath(remoteEntry.Path, patterns) {
continue continue
} }
@@ -713,6 +726,8 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
var compareResult int var compareResult int
for { for {
// TODO: We likely need to check if a local listing file exists in the hardLinkTable for the case where one is restoring a hardlink
// to an existing disk file. Right now, we'll just end up downloading the file new.
if localEntry == nil && localListingOK { if localEntry == nil && localListingOK {
localEntry, localListingOK = <- localListingChannel localEntry, localListingOK = <- localListingChannel
} }
@@ -730,13 +745,23 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
} }
if compareResult == 0 { if compareResult == 0 {
if quickMode && localEntry.IsFile() && localEntry.IsSameAs(remoteEntry) { if quickMode && localEntry.IsFile() {
checkEntry := remoteEntry
if len(remoteEntry.Link) > 0 && remoteEntry.Link != "/" {
if e, ok := hardLinkTable[remoteEntry.Link]; !ok {
LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path)
} else {
checkEntry = e.entry
}
}
if localEntry.IsSameAs(checkEntry) {
LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path) LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path)
skippedFileSize += localEntry.Size skippedFileSize += localEntry.Size
skippedFileCount++ skippedFileCount++
localEntry = nil localEntry = nil
continue continue
} }
}
localEntry = nil localEntry = nil
} }
@@ -782,6 +807,21 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
} }
directoryEntries = append(directoryEntries, remoteEntry) directoryEntries = append(directoryEntries, remoteEntry)
} else { } else {
if remoteEntry.Link == "/" {
hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, true}
} else if len(remoteEntry.Link) > 0 {
if e, ok := hardLinkTable[remoteEntry.Link]; !ok {
LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path)
} else if !e.willDownload {
origSourcePath := e.entry.Path
e.entry.Path = remoteEntry.Path
remoteEntry = e.entry
hardLinkTable[origSourcePath] = hardLinkEntry{remoteEntry, true}
} else {
hardLinks = append(hardLinks, remoteEntry)
continue
}
}
// We can't download files here since fileEntries needs to be sorted // We can't download files here since fileEntries needs to be sorted
fileEntries = append(fileEntries, remoteEntry) fileEntries = append(fileEntries, remoteEntry)
totalFileSize += remoteEntry.Size totalFileSize += remoteEntry.Size
@@ -837,7 +877,11 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
stat, _ := os.Stat(fullPath) stat, _ := os.Stat(fullPath)
if stat != nil { if stat != nil {
if quickMode { if quickMode {
if file.IsSameAsFileInfo(stat) { cmpFile := file
if file.IsFile() && len(file.Link) > 0 && file.Link != "/" {
cmpFile = hardLinkTable[file.Link].entry
}
if cmpFile.IsSameAsFileInfo(stat) {
LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", file.Path) LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", file.Path)
skippedFileSize += file.Size skippedFileSize += file.Size
skippedFileCount++ skippedFileCount++
@@ -874,7 +918,6 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
downloadedFileSize += file.Size downloadedFileSize += file.Size
downloadedFiles = append(downloadedFiles, file) downloadedFiles = append(downloadedFiles, file)
} }
continue continue
} }
@@ -901,6 +944,15 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
file.RestoreMetadata(fullPath, nil, setOwner) file.RestoreMetadata(fullPath, nil, setOwner)
} }
for _, linkEntry := range hardLinks {
sourcePath := joinPath(top, hardLinkTable[linkEntry.Link].entry.Path)
fullPath := joinPath(top, linkEntry.Path)
LOG_INFO("DOWNLOAD_LINK", "Hard linking %s -> %s", fullPath, sourcePath)
if err := os.Link(sourcePath, fullPath); err != nil {
LOG_ERROR("DOWNLOAD_LINK", "Failed to create hard link %s -> %s", fullPath, sourcePath)
}
}
if deleteMode && len(patterns) == 0 { if deleteMode && len(patterns) == 0 {
// Reverse the order to make sure directories are empty before being deleted // Reverse the order to make sure directories are empty before being deleted
for i := range extraFiles { for i := range extraFiles {

View File

@@ -18,6 +18,7 @@ import (
"time" "time"
"bytes" "bytes"
"crypto/sha256" "crypto/sha256"
"syscall"
"github.com/vmihailenco/msgpack" "github.com/vmihailenco/msgpack"
@@ -694,10 +695,26 @@ func (files FileInfoCompare) Less(i, j int) bool {
} }
} }
type listEntryLinkKey struct {
dev uint64
ino uint64
}
type ListingState struct {
linkTable map[listEntryLinkKey]string // map unique inode details to initially found path
}
func NewListingState() *ListingState {
return &ListingState{
linkTable: make(map[listEntryLinkKey]string),
}
}
// ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths // ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths
// are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files. // are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files.
func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, listingChannel chan *Entry) (directoryList []*Entry, func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool,
skippedFiles []string, err error) { listingState *ListingState,
listingChannel chan *Entry) (directoryList []*Entry, skippedFiles []string, err error) {
LOG_DEBUG("LIST_ENTRIES", "Listing %s", path) LOG_DEBUG("LIST_ENTRIES", "Listing %s", path)
@@ -784,6 +801,21 @@ func ListEntries(top string, path string, patterns []string, nobackupFile string
continue continue
} }
if entry.IsFile() {
stat, ok := f.Sys().(*syscall.Stat_t)
if ok && stat != nil && stat.Nlink > 1 {
k := listEntryLinkKey{dev: uint64(stat.Dev), ino: uint64(stat.Ino)}
if path, ok := listingState.linkTable[k]; ok {
LOG_WARN("LIST_LINK", "Linking %s to %s", entry.Path, path)
entry.Link = path
entry.Size = 0
} else {
entry.Link = "/"
listingState.linkTable[k] = entry.Path
}
}
}
if entry.IsDir() { if entry.IsDir() {
directoryList = append(directoryList, entry) directoryList = append(directoryList, entry)
} else { } else {

View File

@@ -5,6 +5,8 @@
package duplicacy package duplicacy
import ( import (
"bytes"
"encoding/json"
"io/ioutil" "io/ioutil"
"math/rand" "math/rand"
"os" "os"
@@ -13,10 +15,9 @@ import (
"sort" "sort"
"strings" "strings"
"testing" "testing"
"bytes"
"encoding/json"
"github.com/gilbertchen/xattr" "github.com/pkg/xattr"
"github.com/vmihailenco/msgpack" "github.com/vmihailenco/msgpack"
) )
@@ -233,8 +234,16 @@ func TestEntryOrder(t *testing.T) {
// TestEntryExcludeByAttribute tests the excludeByAttribute parameter to the ListEntries function // TestEntryExcludeByAttribute tests the excludeByAttribute parameter to the ListEntries function
func TestEntryExcludeByAttribute(t *testing.T) { func TestEntryExcludeByAttribute(t *testing.T) {
if !(runtime.GOOS == "darwin" || runtime.GOOS == "linux") { var excludeAttrName string
t.Skip("skipping test not darwin or linux") var excludeAttrValue []byte
if runtime.GOOS == "darwin" {
excludeAttrName = "com.apple.metadata:com_apple_backup_excludeItem"
excludeAttrValue = []byte("com.apple.backupd")
} else if runtime.GOOS == "linux" || runtime.GOOS == "freebsd" || runtime.GOOS == "netbsd" || runtime.GOOS == "solaris" {
excludeAttrName = "user.duplicacy_exclude"
} else {
t.Skip("skipping test, not darwin, linux, freebsd, netbsd, or solaris")
} }
testDir := filepath.Join(os.TempDir(), "duplicacy_test") testDir := filepath.Join(os.TempDir(), "duplicacy_test")
@@ -273,7 +282,7 @@ func TestEntryExcludeByAttribute(t *testing.T) {
for _, file := range DATA { for _, file := range DATA {
fullPath := filepath.Join(testDir, file) fullPath := filepath.Join(testDir, file)
if strings.Contains(file, "exclude") { if strings.Contains(file, "exclude") {
xattr.Setxattr(fullPath, "com.apple.metadata:com_apple_backup_excludeItem", []byte("com.apple.backupd")) xattr.Set(fullPath, excludeAttrName, excludeAttrValue)
} }
} }

View File

@@ -90,48 +90,40 @@ func (storage *S3Storage) ListFiles(threadIndex int, dir string) (files []string
if dir == "snapshots/" { if dir == "snapshots/" {
dir = storage.storageDir + dir dir = storage.storageDir + dir
input := s3.ListObjectsInput{ input := s3.ListObjectsV2Input{
Bucket: aws.String(storage.bucket), Bucket: aws.String(storage.bucket),
Prefix: aws.String(dir), Prefix: aws.String(dir),
Delimiter: aws.String("/"), Delimiter: aws.String("/"),
MaxKeys: aws.Int64(1000),
} }
output, err := storage.client.ListObjects(&input) err := storage.client.ListObjectsV2Pages(&input, func(page *s3.ListObjectsV2Output, lastPage bool) bool {
for _, subDir := range page.CommonPrefixes {
files = append(files, (*subDir.Prefix)[len(dir):])
}
return true
})
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
for _, subDir := range output.CommonPrefixes {
files = append(files, (*subDir.Prefix)[len(dir):])
}
return files, nil, nil return files, nil, nil
} else { } else {
dir = storage.storageDir + dir dir = storage.storageDir + dir
marker := "" input := s3.ListObjectsV2Input{
for {
input := s3.ListObjectsInput{
Bucket: aws.String(storage.bucket), Bucket: aws.String(storage.bucket),
Prefix: aws.String(dir), Prefix: aws.String(dir),
MaxKeys: aws.Int64(1000), MaxKeys: aws.Int64(1000),
Marker: aws.String(marker),
} }
output, err := storage.client.ListObjects(&input) err := storage.client.ListObjectsV2Pages(&input, func(page *s3.ListObjectsV2Output, lastPage bool) bool {
if err != nil { for _, object := range page.Contents {
return nil, nil, err
}
for _, object := range output.Contents {
files = append(files, (*object.Key)[len(dir):]) files = append(files, (*object.Key)[len(dir):])
sizes = append(sizes, *object.Size) sizes = append(sizes, *object.Size)
} }
return true
if !*output.IsTruncated { })
break if err != nil {
} return nil, nil, err
marker = *output.Contents[len(output.Contents)-1].Key
} }
return files, sizes, nil return files, sizes, nil
} }

View File

@@ -51,7 +51,7 @@ type Snapshot struct {
// CreateEmptySnapshot creates an empty snapshot. // CreateEmptySnapshot creates an empty snapshot.
func CreateEmptySnapshot(id string) (snapshto *Snapshot) { func CreateEmptySnapshot(id string) (snapshto *Snapshot) {
return &Snapshot{ return &Snapshot{
Version: 1, Version: 0x6a6c01,
ID: id, ID: id,
Revision: 0, Revision: 0,
StartTime: time.Now().Unix(), StartTime: time.Now().Unix(),
@@ -68,6 +68,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
skippedDirectories *[]string, skippedFiles *[]string) { skippedDirectories *[]string, skippedFiles *[]string) {
var patterns []string var patterns []string
var listingState = NewListingState()
if filtersFile == "" { if filtersFile == "" {
filtersFile = joinPath(GetDuplicacyPreferencePath(), "filters") filtersFile = joinPath(GetDuplicacyPreferencePath(), "filters")
@@ -81,7 +82,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
directory := directories[len(directories)-1] directory := directories[len(directories)-1]
directories = directories[:len(directories)-1] directories = directories[:len(directories)-1]
subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingChannel) subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingState, listingChannel)
if err != nil { if err != nil {
if directory.Path == "" { if directory.Path == "" {
LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err) LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err)
@@ -160,7 +161,7 @@ func (snapshot *Snapshot)ListRemoteFiles(config *Config, chunkOperator *ChunkOpe
return return
} }
} }
} else if snapshot.Version == 1 { } else if snapshot.Version == 1 || snapshot.Version == 0x6a6c01 {
decoder := msgpack.NewDecoder(reader) decoder := msgpack.NewDecoder(reader)
lastEndChunk := 0 lastEndChunk := 0

View File

@@ -8,7 +8,7 @@ import (
"strings" "strings"
) )
func excludedByAttribute(attirbutes map[string][]byte) bool { func excludedByAttribute(attributes map[string][]byte) bool {
value, ok := attirbutes["com.apple.metadata:com_apple_backup_excludeItem"] value, ok := attributes["com.apple.metadata:com_apple_backup_excludeItem"]
return ok && strings.Contains(string(value), "com.apple.backupd") return ok && strings.Contains(string(value), "com.apple.backupd")
} }

View File

@@ -1,13 +0,0 @@
// Copyright (c) Acrosync LLC. All rights reserved.
// Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com
package duplicacy
import (
)
func excludedByAttribute(attirbutes map[string][]byte) bool {
_, ok := attirbutes["duplicacy_exclude"]
return ok
}

View File

@@ -2,12 +2,14 @@
// Free for personal use and commercial trial // Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com // Commercial use requires per-user licenses available from https://duplicacy.com
// +build freebsd netbsd linux solaris
package duplicacy package duplicacy
import ( import (
) )
func excludedByAttribute(attirbutes map[string][]byte) bool { func excludedByAttribute(attributes map[string][]byte) bool {
_, ok := attirbutes["duplicacy_exclude"] _, ok := attributes["user.duplicacy_exclude"]
return ok return ok
} }

View File

@@ -132,6 +132,6 @@ func SplitDir(fullPath string) (dir string, file string) {
return fullPath[:i+1], fullPath[i+1:] return fullPath[:i+1], fullPath[i+1:]
} }
func excludedByAttribute(attirbutes map[string][]byte) bool { func excludedByAttribute(attributes map[string][]byte) bool {
return false return false
} }