From 16885eaa613ad6969a00f0177d84c2401004494b Mon Sep 17 00:00:00 2001 From: "John K. Luebs" Date: Tue, 3 Oct 2023 00:09:04 -0500 Subject: [PATCH] Support backup and restore of hardlinks This tracks inode/device from the stat info and creates backward compatible snapshots that allow preserving hardlinks. Backwards compatibility is preserved by saving a virtual inode number index in the Link field of the file entry. Since this field was previously only used for symlinks, this won't break old versions. Additionally, the entry data is cloned so restoration with an old version works. Current limitations are primarility with restore. They include: - no command line option to prevent hard link restore - if a file has the immutable or append only flag it will be set before hardlinks are restored, so hardlinking will fail. - if a partial restore includes a hardlink but not the parent directories the hardlink will fail. These will be solved by grouping restore of hardlinks together with file, prior to applying final metadata. - if a file is changed and is being rewritten by a restore hardlinks are not preserved. --- src/duplicacy_backupmanager.go | 72 +++++++++++++++++++++- src/duplicacy_entry.go | 109 +++++++++++++++++++++++++++------ src/duplicacy_snapshot.go | 3 +- 3 files changed, 162 insertions(+), 22 deletions(-) diff --git a/src/duplicacy_backupmanager.go b/src/duplicacy_backupmanager.go index c979030..51c109c 100644 --- a/src/duplicacy_backupmanager.go +++ b/src/duplicacy_backupmanager.go @@ -703,8 +703,19 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu var localEntry *Entry localListingOK := true + type hardLinkEntry struct { + entry *Entry + willDownload bool + } + var hardLinkTable []hardLinkEntry + var hardLinks []*Entry + for remoteEntry := range remoteListingChannel { + if remoteEntry.IsHardlinkRoot() { + hardLinkTable = append(hardLinkTable, hardLinkEntry{remoteEntry, false}) + } + if len(patterns) > 0 && !MatchPath(remoteEntry.Path, patterns) { continue } @@ -783,6 +794,21 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu remoteEntry.RestoreEarlyDirFlags(fullPath) directoryEntries = append(directoryEntries, remoteEntry) } else { + if remoteEntry.IsHardlinkRoot() { + hardLinkTable[len(hardLinkTable)-1] = hardLinkEntry{remoteEntry, true} + } else if remoteEntry.IsHardlinkedFrom() { + i, err := strconv.ParseUint(remoteEntry.Link, 16, 64) + if err != nil { + LOG_ERROR("RESTORE_HARDLINK", "Decode error in hardlink entry, expected hex int, got %s", remoteEntry.Link) + return 0 + } + if !hardLinkTable[i].willDownload { + hardLinkTable[i] = hardLinkEntry{remoteEntry, true} + } else { + hardLinks = append(hardLinks, remoteEntry) + continue + } + } // We can't download files here since fileEntries needs to be sorted fileEntries = append(fileEntries, remoteEntry) totalFileSize += remoteEntry.Size @@ -902,6 +928,17 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu file.RestoreMetadata(fullPath, nil, setOwner) } + for _, linkEntry := range hardLinks { + i, _ := strconv.ParseUint(linkEntry.Link, 16, 64) + sourcePath := joinPath(top, hardLinkTable[i].entry.Path) + fullPath := joinPath(top, linkEntry.Path) + LOG_INFO("RESTORE_HARDLINK", "Hard linking %s to %s", fullPath, sourcePath) + if err := os.Link(sourcePath, fullPath); err != nil { + LOG_ERROR("RESTORE_HARDLINK", "Failed to create hard link %s to %s", fullPath, sourcePath) + return 0 + } + } + if deleteMode && len(patterns) == 0 { // Reverse the order to make sure directories are empty before being deleted for i := range extraFiles { @@ -1053,8 +1090,13 @@ func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top s lastEndChunk := 0 - uploadEntryInfoFunc := func(entry *Entry) error { + type hardLinkEntry struct { + entry *Entry + startChunk int + } + var hardLinkTable []hardLinkEntry + uploadEntryInfoFunc := func(entry *Entry) error { if entry.IsFile() && entry.Size > 0 { delta := entry.StartChunk - len(chunkHashes) + 1 if entry.StartChunk != lastChunk { @@ -1072,10 +1114,38 @@ func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top s entry.StartChunk -= delta entry.EndChunk -= delta + if entry.IsHardlinkRoot() { + LOG_DEBUG("SNAPSHOT_UPLOAD", "Hard link root %s %v %v", entry.Path, entry.StartChunk, entry.EndChunk) + hardLinkTable = append(hardLinkTable, hardLinkEntry{entry, entry.StartChunk}) + } + delta = entry.EndChunk - entry.StartChunk entry.StartChunk -= lastEndChunk lastEndChunk = entry.EndChunk entry.EndChunk = delta + } else if entry.IsHardlinkedFrom() { + i, err := strconv.ParseUint(entry.Link, 16, 64) + if err != nil { + LOG_ERROR("SNAPSHOT_UPLOAD", "Decode error in hardlink entry, expected hex int, got %s", entry.Link) + return err + } + + targetEntry := hardLinkTable[i].entry + var startChunk, endChunk int + + if targetEntry.Size > 0 { + startChunk = hardLinkTable[i].startChunk - lastEndChunk + endChunk = targetEntry.EndChunk + } + entry = entry.HardLinkTo(targetEntry, startChunk, endChunk) + + if targetEntry.Size > 0 { + lastEndChunk = hardLinkTable[i].startChunk + endChunk + } + + LOG_DEBUG("SNAPSHOT_UPLOAD", "Uploading cloned hardlink for %s to %s (%v %v)", entry.Path, targetEntry.Path, startChunk, endChunk) + } else if entry.IsHardlinkRoot() { + hardLinkTable = append(hardLinkTable, hardLinkEntry{entry, 0}) } buffer.Reset() diff --git a/src/duplicacy_entry.go b/src/duplicacy_entry.go index 31cbad1..b33b53b 100644 --- a/src/duplicacy_entry.go +++ b/src/duplicacy_entry.go @@ -4,6 +4,8 @@ package duplicacy import ( + "bytes" + "crypto/sha256" "encoding/base64" "encoding/json" "fmt" @@ -15,12 +17,10 @@ import ( "sort" "strconv" "strings" + "syscall" "time" - "bytes" - "crypto/sha256" - - "github.com/vmihailenco/msgpack" + "github.com/vmihailenco/msgpack" ) // This is the hidden directory in the repository for storing various files. @@ -110,15 +110,36 @@ func (entry *Entry) Copy() *Entry { UID: entry.UID, GID: entry.GID, - StartChunk: entry.StartChunk, + StartChunk: entry.StartChunk, StartOffset: entry.StartOffset, - EndChunk: entry.EndChunk, - EndOffset: entry.EndOffset, + EndChunk: entry.EndChunk, + EndOffset: entry.EndOffset, Attributes: entry.Attributes, } } +func (entry *Entry) HardLinkTo(target *Entry, startChunk int, endChunk int) *Entry { + return &Entry{ + Path: entry.Path, + Size: target.Size, + Time: target.Time, + Mode: target.Mode, + Link: entry.Link, + Hash: target.Hash, + + UID: target.UID, + GID: target.GID, + + StartChunk: startChunk, + StartOffset: target.StartOffset, + EndChunk: endChunk, + EndOffset: target.EndOffset, + + Attributes: target.Attributes, + } +} + // CreateEntryFromJSON creates an entry from a json description. func (entry *Entry) UnmarshalJSON(description []byte) (err error) { @@ -362,12 +383,12 @@ func (entry *Entry) EncodeMsgpack(encoder *msgpack.Encoder) error { if entry.Attributes != nil { attributes := make([]string, numberOfAttributes) - i := 0 - for attribute := range *entry.Attributes { - attributes[i] = attribute - i++ - } - sort.Strings(attributes) + i := 0 + for attribute := range *entry.Attributes { + attributes[i] = attribute + i++ + } + sort.Strings(attributes) for _, attribute := range attributes { err = encoder.EncodeString(attribute) if err != nil { @@ -380,7 +401,7 @@ func (entry *Entry) EncodeMsgpack(encoder *msgpack.Encoder) error { } } - return nil + return nil } func (entry *Entry) DecodeMsgpack(decoder *msgpack.Decoder) error { @@ -492,14 +513,22 @@ func (entry *Entry) IsComplete() bool { return entry.Size >= 0 } +func (entry *Entry) IsHardlinkedFrom() bool { + return entry.IsFile() && len(entry.Link) > 0 && entry.Link != "/" +} + +func (entry *Entry) IsHardlinkRoot() bool { + return entry.IsFile() && entry.Link == "/" +} + func (entry *Entry) GetPermissions() os.FileMode { return os.FileMode(entry.Mode) & fileModeMask } func (entry *Entry) GetParent() string { path := entry.Path - if path != "" && path[len(path) - 1] == '/' { - path = path[:len(path) - 1] + if path != "" && path[len(path)-1] == '/' { + path = path[:len(path)-1] } i := strings.LastIndex(path, "/") if i == -1 { @@ -596,7 +625,7 @@ func ComparePaths(left string, right string) int { for i := p; i < len(left); i++ { c3 = left[i] if c3 == '/' { - last1 = i == len(left) - 1 + last1 = i == len(left)-1 break } } @@ -606,7 +635,7 @@ func ComparePaths(left string, right string) int { for i := p; i < len(right); i++ { c4 = right[i] if c4 == '/' { - last2 = i == len(right) - 1 + last2 = i == len(right)-1 break } } @@ -694,10 +723,27 @@ func (files FileInfoCompare) Less(i, j int) bool { } } +type listEntryLinkKey struct { + dev uint64 + ino uint64 +} + +type ListingState struct { + linkIndex int + linkTable map[listEntryLinkKey]int // map unique inode details to initially found path +} + +func NewListingState() *ListingState { + return &ListingState{ + linkTable: make(map[listEntryLinkKey]int), + } +} + // ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths // are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files. -func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, listingChannel chan *Entry) (directoryList []*Entry, - skippedFiles []string, err error) { +func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, + listingState *ListingState, + listingChannel chan *Entry) (directoryList []*Entry, skippedFiles []string, err error) { LOG_DEBUG("LIST_ENTRIES", "Listing %s", path) @@ -777,6 +823,24 @@ func ListEntries(top string, path string, patterns []string, nobackupFile string continue } + var linkKey *listEntryLinkKey + + if stat, ok := f.Sys().(*syscall.Stat_t); entry.IsFile() && ok && stat != nil && stat.Nlink > 1 { + k := listEntryLinkKey{dev: uint64(stat.Dev), ino: uint64(stat.Ino)} + if linkIndex, seen := listingState.linkTable[k]; seen { + if linkIndex == -1 { + LOG_DEBUG("LIST_EXCLUDE", "%s is excluded by attribute (hardlink)", entry.Path) + continue + } + entry.Size = 0 + entry.Link = strconv.FormatInt(int64(linkIndex), 16) + } else { + entry.Link = "/" + listingState.linkTable[k] = -1 + linkKey = &k + } + } + entry.ReadAttributes(top) if excludeByAttribute && entry.Attributes != nil && excludedByAttribute(*entry.Attributes) { @@ -784,6 +848,11 @@ func ListEntries(top string, path string, patterns []string, nobackupFile string continue } + if linkKey != nil { + listingState.linkTable[*linkKey] = listingState.linkIndex + listingState.linkIndex++ + } + if entry.IsDir() { directoryList = append(directoryList, entry) } else { diff --git a/src/duplicacy_snapshot.go b/src/duplicacy_snapshot.go index efac81d..de94878 100644 --- a/src/duplicacy_snapshot.go +++ b/src/duplicacy_snapshot.go @@ -68,6 +68,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string, skippedDirectories *[]string, skippedFiles *[]string) { var patterns []string + listingState := NewListingState() if filtersFile == "" { filtersFile = joinPath(GetDuplicacyPreferencePath(), "filters") @@ -81,7 +82,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string, directory := directories[len(directories)-1] directories = directories[:len(directories)-1] - subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingChannel) + subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingState, listingChannel) if err != nil { if directory.Path == "" { LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err)