Compare commits

..

6 Commits

Author SHA1 Message Date
bc49f73d5a Check the good stuff 2023-10-02 12:49:58 -05:00
9be4e2b9c9 Initial hardlink in snapshot support.
- Create a new snapshot version number as this method is not backwards
compatible.
- This has some breakages with restoring. Namely if the root file
is not marked for download any hardlinked files that need to be restored
will not be linked, they will be restored as a regular file
2023-10-02 12:49:58 -05:00
c07eef5063 Increase b2 client max file listing count to 10000
Considerable speed improvement with listing large storage.
2023-10-02 12:46:02 -05:00
2fdedcb9dd Fix exclude_by_attribute feature on POSIX
The exclude by attribute function is broken on non-Darwin POSIX: linux and freebsd.
This is because those xattrs must be prefixed by a legal namespace. The old xattr
library implicitly appended the user namespace to the xattr, but the current
official go pkg does not (which is just as well).

Also fix the test to remove the discordant old xattr dependency and provide
test cases for both darwin and non-darwin POSIX.
2023-10-02 12:41:50 -05:00
7bdd1cabd3 Use S3 ListObjectsV2 for listing files
ListObjects has been deprecated since 2016 and ListObjectsV2 with use of
explicit pagination tokens is more performant for large listings as well.

This also mitigates an issue with iDrive E2 where the StartAfter/Marker
is included in the output, leading to duplicate entries. Right now this
causes an exhaustive prune to delete chunks erroneously flagged as
duplicate, destroying the storage.
2023-10-02 12:41:50 -05:00
Gilbert Chen
fd3bceae19 Bump version to 3.2.1 2023-10-02 12:30:23 -04:00
7 changed files with 135 additions and 70 deletions

View File

@@ -2262,7 +2262,7 @@ func main() {
app.Name = "duplicacy" app.Name = "duplicacy"
app.HelpName = "duplicacy" app.HelpName = "duplicacy"
app.Usage = "A new generation cloud backup tool based on lock-free deduplication" app.Usage = "A new generation cloud backup tool based on lock-free deduplication"
app.Version = "3.2.0" + " (" + GitCommit + ")" app.Version = "3.2.1" + " (" + GitCommit + ")"
// Exit with code 2 if an invalid command is provided // Exit with code 2 if an invalid command is provided
app.CommandNotFound = func(context *cli.Context, command string) { app.CommandNotFound = func(context *cli.Context, command string) {

View File

@@ -396,7 +396,7 @@ type B2ListFileNamesOutput struct {
func (client *B2Client) ListFileNames(threadIndex int, startFileName string, singleFile bool, includeVersions bool) (files []*B2Entry, err error) { func (client *B2Client) ListFileNames(threadIndex int, startFileName string, singleFile bool, includeVersions bool) (files []*B2Entry, err error) {
maxFileCount := 1000 maxFileCount := 10_000
if singleFile { if singleFile {
if includeVersions { if includeVersions {
maxFileCount = 4 maxFileCount = 4

View File

@@ -304,7 +304,24 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta
remoteEntry = nil remoteEntry = nil
} }
if compareResult == 0 { if localEntry.IsHardlinkedFrom() {
// FIXME: Sanity check?
// FIXME: perhaps we can make size = 0 an initial invariant of the link?
//
// Note that if the initial size was 0 then this original logic doesn't change!
localEntry.Size = 0
// targetEntry, ok := localEntryList.HardLinkTable[localEntry.Link]
// if !ok {
// LOG_ERROR("BACKUP_CREATE", "Hard link %s not found in entry cache for path %s", localEntry.Link, localEntry.Path)
// }
// localEntry.Size = targetEntry.Size
// localEntry.Hash = targetEntry.Hash
// localEntry.StartChunk = targetEntry.StartChunk
// localEntry.StartOffset = targetEntry.StartOffset
// localEntry.EndChunk = targetEntry.EndChunk
// localEntry.EndOffset = targetEntry.EndOffset
// LOG_DEBUG("BACKUP_CREATE", "Hard link %s to %s in initial listing", localEntry.Link, targetEntry.Path)
} else if compareResult == 0 {
// No need to check if it is in hash mode -- in that case remote listing is nil // No need to check if it is in hash mode -- in that case remote listing is nil
if localEntry.IsSameAs(remoteEntry) && localEntry.IsFile() { if localEntry.IsSameAs(remoteEntry) && localEntry.IsFile() {
if localEntry.Size > 0 { if localEntry.Size > 0 {
@@ -709,7 +726,7 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
localListingOK := true localListingOK := true
hardLinkTable := make(map[string]hardLinkEntry) hardLinkTable := make(map[string]hardLinkEntry)
hardLinks := make([]*Entry, 0) //hardLinks := make([]*Entry, 0)
for remoteEntry := range remoteListingChannel { for remoteEntry := range remoteListingChannel {
@@ -745,22 +762,28 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
} }
if compareResult == 0 { if compareResult == 0 {
if quickMode && localEntry.IsFile() { // if quickMode && localEntry.IsFile() {
checkEntry := remoteEntry if quickMode && localEntry.IsFile() && localEntry.IsSameAs(remoteEntry) {
if len(remoteEntry.Link) > 0 && remoteEntry.Link != "/" {
if e, ok := hardLinkTable[remoteEntry.Link]; !ok {
LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path)
} else {
checkEntry = e.entry
}
}
if localEntry.IsSameAs(checkEntry) {
LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path) LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path)
skippedFileSize += localEntry.Size skippedFileSize += localEntry.Size
skippedFileCount++ skippedFileCount++
localEntry = nil localEntry = nil
continue continue
} // checkEntry := remoteEntry
// if len(remoteEntry.Link) > 0 && remoteEntry.Link != "/" {
// if e, ok := hardLinkTable[remoteEntry.Link]; !ok {
// LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path)
// } else {
// checkEntry = e.entry
// }
// }
// if localEntry.IsSameAs(checkEntry) {
// LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path)
// skippedFileSize += localEntry.Size
// skippedFileCount++
// localEntry = nil
// continue
// }
} }
localEntry = nil localEntry = nil
} }
@@ -807,21 +830,21 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
} }
directoryEntries = append(directoryEntries, remoteEntry) directoryEntries = append(directoryEntries, remoteEntry)
} else { } else {
if remoteEntry.Link == "/" { // if remoteEntry.Link == "/" {
hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, true} // hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, true}
} else if len(remoteEntry.Link) > 0 { // } else if len(remoteEntry.Link) > 0 {
if e, ok := hardLinkTable[remoteEntry.Link]; !ok { // if e, ok := hardLinkTable[remoteEntry.Link]; !ok {
LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path) // LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path)
} else if !e.willDownload { // } else if !e.willDownload {
origSourcePath := e.entry.Path // origSourcePath := e.entry.Path
e.entry.Path = remoteEntry.Path // e.entry.Path = remoteEntry.Path
remoteEntry = e.entry // remoteEntry = e.entry
hardLinkTable[origSourcePath] = hardLinkEntry{remoteEntry, true} // hardLinkTable[origSourcePath] = hardLinkEntry{remoteEntry, true}
} else { // } else {
hardLinks = append(hardLinks, remoteEntry) // hardLinks = append(hardLinks, remoteEntry)
continue // continue
} // }
} // }
// We can't download files here since fileEntries needs to be sorted // We can't download files here since fileEntries needs to be sorted
fileEntries = append(fileEntries, remoteEntry) fileEntries = append(fileEntries, remoteEntry)
totalFileSize += remoteEntry.Size totalFileSize += remoteEntry.Size
@@ -877,11 +900,11 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
stat, _ := os.Stat(fullPath) stat, _ := os.Stat(fullPath)
if stat != nil { if stat != nil {
if quickMode { if quickMode {
cmpFile := file // cmpFile := file
if file.IsFile() && len(file.Link) > 0 && file.Link != "/" { // if file.IsFile() && len(file.Link) > 0 && file.Link != "/" {
cmpFile = hardLinkTable[file.Link].entry // cmpFile = hardLinkTable[file.Link].entry
} // }
if cmpFile.IsSameAsFileInfo(stat) { if file.IsSameAsFileInfo(stat) {
LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", file.Path) LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", file.Path)
skippedFileSize += file.Size skippedFileSize += file.Size
skippedFileCount++ skippedFileCount++
@@ -944,14 +967,14 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
file.RestoreMetadata(fullPath, nil, setOwner) file.RestoreMetadata(fullPath, nil, setOwner)
} }
for _, linkEntry := range hardLinks { // for _, linkEntry := range hardLinks {
sourcePath := joinPath(top, hardLinkTable[linkEntry.Link].entry.Path) // sourcePath := joinPath(top, hardLinkTable[linkEntry.Link].entry.Path)
fullPath := joinPath(top, linkEntry.Path) // fullPath := joinPath(top, linkEntry.Path)
LOG_INFO("DOWNLOAD_LINK", "Hard linking %s -> %s", fullPath, sourcePath) // LOG_INFO("DOWNLOAD_LINK", "Hard linking %s -> %s", fullPath, sourcePath)
if err := os.Link(sourcePath, fullPath); err != nil { // if err := os.Link(sourcePath, fullPath); err != nil {
LOG_ERROR("DOWNLOAD_LINK", "Failed to create hard link %s -> %s", fullPath, sourcePath) // LOG_ERROR("DOWNLOAD_LINK", "Failed to create hard link %s -> %s", fullPath, sourcePath)
} // }
} // }
if deleteMode && len(patterns) == 0 { if deleteMode && len(patterns) == 0 {
// Reverse the order to make sure directories are empty before being deleted // Reverse the order to make sure directories are empty before being deleted
@@ -1105,6 +1128,9 @@ func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top s
lastEndChunk := 0 lastEndChunk := 0
uploadEntryInfoFunc := func(entry *Entry) error { uploadEntryInfoFunc := func(entry *Entry) error {
if entry.IsHardlinkRoot() {
entryList.HardLinkTable[entry.Path] = entry
}
if entry.IsFile() && entry.Size > 0 { if entry.IsFile() && entry.Size > 0 {
delta := entry.StartChunk - len(chunkHashes) + 1 delta := entry.StartChunk - len(chunkHashes) + 1
@@ -1127,6 +1153,14 @@ func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top s
entry.StartChunk -= lastEndChunk entry.StartChunk -= lastEndChunk
lastEndChunk = entry.EndChunk lastEndChunk = entry.EndChunk
entry.EndChunk = delta entry.EndChunk = delta
} else if entry.IsHardlinkedFrom() {
targetEntry, ok := entryList.HardLinkTable[entry.Link]
if !ok {
LOG_ERROR("SNAPSHOT_UPLOAD", "Unable to find hardlink target for %s to %s", entry.Path, entry.Link)
}
// FIXME: We will use a copy, so it is probably sufficient to skip rereading xattrs and such in the initial code
entry = entry.LinkTo(targetEntry)
LOG_DEBUG("SNAPSHOT_UPLOAD", "Uploading cloned hardlink entry for %s to %s", entry.Path, entry.Link)
} }
buffer.Reset() buffer.Reset()

View File

@@ -4,6 +4,8 @@
package duplicacy package duplicacy
import ( import (
"bytes"
"crypto/sha256"
"encoding/base64" "encoding/base64"
"encoding/json" "encoding/json"
"fmt" "fmt"
@@ -15,13 +17,10 @@ import (
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"time"
"bytes"
"crypto/sha256"
"syscall" "syscall"
"time"
"github.com/vmihailenco/msgpack" "github.com/vmihailenco/msgpack"
) )
// This is the hidden directory in the repository for storing various files. // This is the hidden directory in the repository for storing various files.
@@ -120,6 +119,27 @@ func (entry *Entry) Copy() *Entry {
} }
} }
func (entry *Entry) LinkTo(target *Entry) *Entry {
return &Entry{
Path: entry.Path,
Size: target.Size,
Time: target.Time,
Mode: target.Mode,
Link: entry.Link,
Hash: target.Hash,
UID: target.UID,
GID: target.GID,
StartChunk: target.StartChunk,
StartOffset: target.StartOffset,
EndChunk: target.EndChunk,
EndOffset: target.EndOffset,
Attributes: target.Attributes,
}
}
// CreateEntryFromJSON creates an entry from a json description. // CreateEntryFromJSON creates an entry from a json description.
func (entry *Entry) UnmarshalJSON(description []byte) (err error) { func (entry *Entry) UnmarshalJSON(description []byte) (err error) {
@@ -493,14 +513,26 @@ func (entry *Entry) IsComplete() bool {
return entry.Size >= 0 return entry.Size >= 0
} }
func (entry *Entry) IsFileNotHardlink() bool {
return entry.IsFile() && (len(entry.Link) == 0 || entry.Link == "/")
}
func (entry *Entry) IsHardlinkedFrom() bool {
return entry.IsFile() && len(entry.Link) > 0 && entry.Link != "/"
}
func (entry *Entry) IsHardlinkRoot() bool {
return entry.IsFile() && entry.Link == "/"
}
func (entry *Entry) GetPermissions() os.FileMode { func (entry *Entry) GetPermissions() os.FileMode {
return os.FileMode(entry.Mode) & fileModeMask return os.FileMode(entry.Mode) & fileModeMask
} }
func (entry *Entry) GetParent() string { func (entry *Entry) GetParent() string {
path := entry.Path path := entry.Path
if path != "" && path[len(path) - 1] == '/' { if path != "" && path[len(path)-1] == '/' {
path = path[:len(path) - 1] path = path[:len(path)-1]
} }
i := strings.LastIndex(path, "/") i := strings.LastIndex(path, "/")
if i == -1 { if i == -1 {
@@ -597,7 +629,7 @@ func ComparePaths(left string, right string) int {
for i := p; i < len(left); i++ { for i := p; i < len(left); i++ {
c3 = left[i] c3 = left[i]
if c3 == '/' { if c3 == '/' {
last1 = i == len(left) - 1 last1 = i == len(left)-1
break break
} }
} }
@@ -607,7 +639,7 @@ func ComparePaths(left string, right string) int {
for i := p; i < len(right); i++ { for i := p; i < len(right); i++ {
c4 = right[i] c4 = right[i]
if c4 == '/' { if c4 == '/' {
last2 = i == len(right) - 1 last2 = i == len(right)-1
break break
} }
} }
@@ -806,9 +838,8 @@ func ListEntries(top string, path string, patterns []string, nobackupFile string
if ok && stat != nil && stat.Nlink > 1 { if ok && stat != nil && stat.Nlink > 1 {
k := listEntryLinkKey{dev: uint64(stat.Dev), ino: uint64(stat.Ino)} k := listEntryLinkKey{dev: uint64(stat.Dev), ino: uint64(stat.Ino)}
if path, ok := listingState.linkTable[k]; ok { if path, ok := listingState.linkTable[k]; ok {
LOG_WARN("LIST_LINK", "Linking %s to %s", entry.Path, path) LOG_DEBUG("LIST_HARDLINK", "Detected hardlink %s to %s", entry.Path, path)
entry.Link = path entry.Link = path
entry.Size = 0
} else { } else {
entry.Link = "/" entry.Link = "/"
listingState.linkTable[k] = entry.Path listingState.linkTable[k] = entry.Path

View File

@@ -62,6 +62,7 @@ type EntryList struct {
uploadedChunkIndex int // counter for upload chunks uploadedChunkIndex int // counter for upload chunks
uploadedChunkOffset int // the start offset for the current modified entry uploadedChunkOffset int // the start offset for the current modified entry
HardLinkTable map[string]*Entry
} }
// Create a new entry list // Create a new entry list
@@ -78,6 +79,7 @@ func CreateEntryList(snapshotID string, cachePath string, maximumInMemoryEntries
maximumInMemoryEntries: maximumInMemoryEntries, maximumInMemoryEntries: maximumInMemoryEntries,
cachePath: cachePath, cachePath: cachePath,
Token: string(token), Token: string(token),
HardLinkTable: make(map[string]*Entry),
} }
return entryList, nil return entryList, nil
@@ -118,7 +120,7 @@ func (entryList *EntryList)AddEntry(entry *Entry) error {
if !entry.IsComplete() { if !entry.IsComplete() {
if entry.IsDir() || entry.IsLink() { if entry.IsDir() || entry.IsLink() {
entry.Size = 0 entry.Size = 0
} else { } else if !entry.IsHardlinkedFrom() {
modifiedEntry := ModifiedEntry { modifiedEntry := ModifiedEntry {
Path: entry.Path, Path: entry.Path,
Size: -1, Size: -1,

View File

@@ -51,7 +51,7 @@ type Snapshot struct {
// CreateEmptySnapshot creates an empty snapshot. // CreateEmptySnapshot creates an empty snapshot.
func CreateEmptySnapshot(id string) (snapshto *Snapshot) { func CreateEmptySnapshot(id string) (snapshto *Snapshot) {
return &Snapshot{ return &Snapshot{
Version: 0x6a6c01, Version: 1,
ID: id, ID: id,
Revision: 0, Revision: 0,
StartTime: time.Now().Unix(), StartTime: time.Now().Unix(),
@@ -161,7 +161,7 @@ func (snapshot *Snapshot)ListRemoteFiles(config *Config, chunkOperator *ChunkOpe
return return
} }
} }
} else if snapshot.Version == 1 || snapshot.Version == 0x6a6c01 { } else if snapshot.Version == 1 {
decoder := msgpack.NewDecoder(reader) decoder := msgpack.NewDecoder(reader)
lastEndChunk := 0 lastEndChunk := 0

View File

@@ -2,13 +2,11 @@
// Free for personal use and commercial trial // Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com // Commercial use requires per-user licenses available from https://duplicacy.com
//go:build freebsd || netbsd || linux || solaris
// +build freebsd netbsd linux solaris // +build freebsd netbsd linux solaris
package duplicacy package duplicacy
import (
)
func excludedByAttribute(attributes map[string][]byte) bool { func excludedByAttribute(attributes map[string][]byte) bool {
_, ok := attributes["user.duplicacy_exclude"] _, ok := attributes["user.duplicacy_exclude"]
return ok return ok