From 2bb70595c561af85e9caee823fb1474ac2d9fb70 Mon Sep 17 00:00:00 2001 From: "John K. Luebs" Date: Fri, 29 Sep 2023 16:28:47 -0500 Subject: [PATCH] Initial hardlink in snapshot support. - Create a new snapshot version number as this method is not backwards compatible. - This has some breakages with restoring. Namely if the root file is not marked for download any hardlinked files that need to be restored will not be linked, they will be restored as a regular file --- src/duplicacy_backupmanager.go | 68 ++++++++++++++++++++++++++++++---- src/duplicacy_entry.go | 36 +++++++++++++++++- src/duplicacy_snapshot.go | 7 ++-- 3 files changed, 98 insertions(+), 13 deletions(-) diff --git a/src/duplicacy_backupmanager.go b/src/duplicacy_backupmanager.go index 69fc59d..db0d385 100644 --- a/src/duplicacy_backupmanager.go +++ b/src/duplicacy_backupmanager.go @@ -622,6 +622,11 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta return true } +type hardLinkEntry struct { + entry *Entry + willDownload bool +} + // Restore downloads the specified snapshot, compares it with what's on the repository, and then downloads // files that are different. 'base' is a directory that contains files at a different revision which can // serve as a local cache to avoid download chunks available locally. It is perfectly ok for 'base' to be @@ -703,8 +708,16 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu var localEntry *Entry localListingOK := true + hardLinkTable := make(map[string]hardLinkEntry) + hardLinks := make([]*Entry, 0) + for remoteEntry := range remoteListingChannel { + if remoteEntry.IsFile() && remoteEntry.Link == "/" { + LOG_INFO("RESTORE_LINK", "Noting hardlinked source file %s", remoteEntry.Path) + hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, false} + } + if len(patterns) > 0 && !MatchPath(remoteEntry.Path, patterns) { continue } @@ -713,6 +726,8 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu var compareResult int for { + // TODO: We likely need to check if a local listing file exists in the hardLinkTable for the case where one is restoring a hardlink + // to an existing disk file. Right now, we'll just end up downloading the file new. if localEntry == nil && localListingOK { localEntry, localListingOK = <- localListingChannel } @@ -730,12 +745,22 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu } if compareResult == 0 { - if quickMode && localEntry.IsFile() && localEntry.IsSameAs(remoteEntry) { - LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path) - skippedFileSize += localEntry.Size - skippedFileCount++ - localEntry = nil - continue + if quickMode && localEntry.IsFile() { + checkEntry := remoteEntry + if len(remoteEntry.Link) > 0 && remoteEntry.Link != "/" { + if e, ok := hardLinkTable[remoteEntry.Link]; !ok { + LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path) + } else { + checkEntry = e.entry + } + } + if localEntry.IsSameAs(checkEntry) { + LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path) + skippedFileSize += localEntry.Size + skippedFileCount++ + localEntry = nil + continue + } } localEntry = nil } @@ -782,6 +807,21 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu } directoryEntries = append(directoryEntries, remoteEntry) } else { + if remoteEntry.Link == "/" { + hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, true} + } else if len(remoteEntry.Link) > 0 { + if e, ok := hardLinkTable[remoteEntry.Link]; !ok { + LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path) + } else if !e.willDownload { + origSourcePath := e.entry.Path + e.entry.Path = remoteEntry.Path + remoteEntry = e.entry + hardLinkTable[origSourcePath] = hardLinkEntry{remoteEntry, true} + } else { + hardLinks = append(hardLinks, remoteEntry) + continue + } + } // We can't download files here since fileEntries needs to be sorted fileEntries = append(fileEntries, remoteEntry) totalFileSize += remoteEntry.Size @@ -837,7 +877,11 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu stat, _ := os.Stat(fullPath) if stat != nil { if quickMode { - if file.IsSameAsFileInfo(stat) { + cmpFile := file + if file.IsFile() && len(file.Link) > 0 && file.Link != "/" { + cmpFile = hardLinkTable[file.Link].entry + } + if cmpFile.IsSameAsFileInfo(stat) { LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", file.Path) skippedFileSize += file.Size skippedFileCount++ @@ -874,7 +918,6 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu downloadedFileSize += file.Size downloadedFiles = append(downloadedFiles, file) } - continue } @@ -901,6 +944,15 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu file.RestoreMetadata(fullPath, nil, setOwner) } + for _, linkEntry := range hardLinks { + sourcePath := joinPath(top, hardLinkTable[linkEntry.Link].entry.Path) + fullPath := joinPath(top, linkEntry.Path) + LOG_INFO("DOWNLOAD_LINK", "Hard linking %s -> %s", fullPath, sourcePath) + if err := os.Link(sourcePath, fullPath); err != nil { + LOG_ERROR("DOWNLOAD_LINK", "Failed to create hard link %s -> %s", fullPath, sourcePath) + } + } + if deleteMode && len(patterns) == 0 { // Reverse the order to make sure directories are empty before being deleted for i := range extraFiles { diff --git a/src/duplicacy_entry.go b/src/duplicacy_entry.go index fa56b86..1239b54 100644 --- a/src/duplicacy_entry.go +++ b/src/duplicacy_entry.go @@ -18,6 +18,7 @@ import ( "time" "bytes" "crypto/sha256" + "syscall" "github.com/vmihailenco/msgpack" @@ -694,10 +695,26 @@ func (files FileInfoCompare) Less(i, j int) bool { } } +type listEntryLinkKey struct { + dev uint64 + ino uint64 +} + +type ListingState struct { + linkTable map[listEntryLinkKey]string // map unique inode details to initially found path +} + +func NewListingState() *ListingState { + return &ListingState{ + linkTable: make(map[listEntryLinkKey]string), + } +} + // ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths // are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files. -func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, listingChannel chan *Entry) (directoryList []*Entry, - skippedFiles []string, err error) { +func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, + listingState *ListingState, + listingChannel chan *Entry) (directoryList []*Entry, skippedFiles []string, err error) { LOG_DEBUG("LIST_ENTRIES", "Listing %s", path) @@ -784,6 +801,21 @@ func ListEntries(top string, path string, patterns []string, nobackupFile string continue } + if entry.IsFile() { + stat, ok := f.Sys().(*syscall.Stat_t) + if ok && stat != nil && stat.Nlink > 1 { + k := listEntryLinkKey{dev: uint64(stat.Dev), ino: uint64(stat.Ino)} + if path, ok := listingState.linkTable[k]; ok { + LOG_WARN("LIST_LINK", "Linking %s to %s", entry.Path, path) + entry.Link = path + entry.Size = 0 + } else { + entry.Link = "/" + listingState.linkTable[k] = entry.Path + } + } + } + if entry.IsDir() { directoryList = append(directoryList, entry) } else { diff --git a/src/duplicacy_snapshot.go b/src/duplicacy_snapshot.go index efac81d..076f216 100644 --- a/src/duplicacy_snapshot.go +++ b/src/duplicacy_snapshot.go @@ -51,7 +51,7 @@ type Snapshot struct { // CreateEmptySnapshot creates an empty snapshot. func CreateEmptySnapshot(id string) (snapshto *Snapshot) { return &Snapshot{ - Version: 1, + Version: 0x6a6c01, ID: id, Revision: 0, StartTime: time.Now().Unix(), @@ -68,6 +68,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string, skippedDirectories *[]string, skippedFiles *[]string) { var patterns []string + var listingState = NewListingState() if filtersFile == "" { filtersFile = joinPath(GetDuplicacyPreferencePath(), "filters") @@ -81,7 +82,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string, directory := directories[len(directories)-1] directories = directories[:len(directories)-1] - subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingChannel) + subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingState, listingChannel) if err != nil { if directory.Path == "" { LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err) @@ -160,7 +161,7 @@ func (snapshot *Snapshot)ListRemoteFiles(config *Config, chunkOperator *ChunkOpe return } } - } else if snapshot.Version == 1 { + } else if snapshot.Version == 1 || snapshot.Version == 0x6a6c01 { decoder := msgpack.NewDecoder(reader) lastEndChunk := 0