From bc49f73d5a3dd060374935b899b74b0e0dfcb111 Mon Sep 17 00:00:00 2001 From: "John K. Luebs" Date: Sun, 1 Oct 2023 18:14:14 -0500 Subject: [PATCH] Check the good stuff --- src/duplicacy_backupmanager.go | 116 +++++++++++++++++++++------------ src/duplicacy_entry.go | 73 +++++++++++++++------ src/duplicacy_entrylist.go | 4 +- src/duplicacy_snapshot.go | 4 +- 4 files changed, 132 insertions(+), 65 deletions(-) diff --git a/src/duplicacy_backupmanager.go b/src/duplicacy_backupmanager.go index db0d385..67ac190 100644 --- a/src/duplicacy_backupmanager.go +++ b/src/duplicacy_backupmanager.go @@ -304,7 +304,24 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta remoteEntry = nil } - if compareResult == 0 { + if localEntry.IsHardlinkedFrom() { + // FIXME: Sanity check? + // FIXME: perhaps we can make size = 0 an initial invariant of the link? + // + // Note that if the initial size was 0 then this original logic doesn't change! + localEntry.Size = 0 + // targetEntry, ok := localEntryList.HardLinkTable[localEntry.Link] + // if !ok { + // LOG_ERROR("BACKUP_CREATE", "Hard link %s not found in entry cache for path %s", localEntry.Link, localEntry.Path) + // } + // localEntry.Size = targetEntry.Size + // localEntry.Hash = targetEntry.Hash + // localEntry.StartChunk = targetEntry.StartChunk + // localEntry.StartOffset = targetEntry.StartOffset + // localEntry.EndChunk = targetEntry.EndChunk + // localEntry.EndOffset = targetEntry.EndOffset + // LOG_DEBUG("BACKUP_CREATE", "Hard link %s to %s in initial listing", localEntry.Link, targetEntry.Path) + } else if compareResult == 0 { // No need to check if it is in hash mode -- in that case remote listing is nil if localEntry.IsSameAs(remoteEntry) && localEntry.IsFile() { if localEntry.Size > 0 { @@ -709,7 +726,7 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu localListingOK := true hardLinkTable := make(map[string]hardLinkEntry) - hardLinks := make([]*Entry, 0) + //hardLinks := make([]*Entry, 0) for remoteEntry := range remoteListingChannel { @@ -745,22 +762,28 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu } if compareResult == 0 { - if quickMode && localEntry.IsFile() { - checkEntry := remoteEntry - if len(remoteEntry.Link) > 0 && remoteEntry.Link != "/" { - if e, ok := hardLinkTable[remoteEntry.Link]; !ok { - LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path) - } else { - checkEntry = e.entry - } - } - if localEntry.IsSameAs(checkEntry) { + // if quickMode && localEntry.IsFile() { + if quickMode && localEntry.IsFile() && localEntry.IsSameAs(remoteEntry) { LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path) skippedFileSize += localEntry.Size skippedFileCount++ localEntry = nil continue - } + // checkEntry := remoteEntry + // if len(remoteEntry.Link) > 0 && remoteEntry.Link != "/" { + // if e, ok := hardLinkTable[remoteEntry.Link]; !ok { + // LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path) + // } else { + // checkEntry = e.entry + // } + // } + // if localEntry.IsSameAs(checkEntry) { + // LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path) + // skippedFileSize += localEntry.Size + // skippedFileCount++ + // localEntry = nil + // continue + // } } localEntry = nil } @@ -807,21 +830,21 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu } directoryEntries = append(directoryEntries, remoteEntry) } else { - if remoteEntry.Link == "/" { - hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, true} - } else if len(remoteEntry.Link) > 0 { - if e, ok := hardLinkTable[remoteEntry.Link]; !ok { - LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path) - } else if !e.willDownload { - origSourcePath := e.entry.Path - e.entry.Path = remoteEntry.Path - remoteEntry = e.entry - hardLinkTable[origSourcePath] = hardLinkEntry{remoteEntry, true} - } else { - hardLinks = append(hardLinks, remoteEntry) - continue - } - } + // if remoteEntry.Link == "/" { + // hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, true} + // } else if len(remoteEntry.Link) > 0 { + // if e, ok := hardLinkTable[remoteEntry.Link]; !ok { + // LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path) + // } else if !e.willDownload { + // origSourcePath := e.entry.Path + // e.entry.Path = remoteEntry.Path + // remoteEntry = e.entry + // hardLinkTable[origSourcePath] = hardLinkEntry{remoteEntry, true} + // } else { + // hardLinks = append(hardLinks, remoteEntry) + // continue + // } + // } // We can't download files here since fileEntries needs to be sorted fileEntries = append(fileEntries, remoteEntry) totalFileSize += remoteEntry.Size @@ -877,11 +900,11 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu stat, _ := os.Stat(fullPath) if stat != nil { if quickMode { - cmpFile := file - if file.IsFile() && len(file.Link) > 0 && file.Link != "/" { - cmpFile = hardLinkTable[file.Link].entry - } - if cmpFile.IsSameAsFileInfo(stat) { + // cmpFile := file + // if file.IsFile() && len(file.Link) > 0 && file.Link != "/" { + // cmpFile = hardLinkTable[file.Link].entry + // } + if file.IsSameAsFileInfo(stat) { LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", file.Path) skippedFileSize += file.Size skippedFileCount++ @@ -944,14 +967,14 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu file.RestoreMetadata(fullPath, nil, setOwner) } - for _, linkEntry := range hardLinks { - sourcePath := joinPath(top, hardLinkTable[linkEntry.Link].entry.Path) - fullPath := joinPath(top, linkEntry.Path) - LOG_INFO("DOWNLOAD_LINK", "Hard linking %s -> %s", fullPath, sourcePath) - if err := os.Link(sourcePath, fullPath); err != nil { - LOG_ERROR("DOWNLOAD_LINK", "Failed to create hard link %s -> %s", fullPath, sourcePath) - } - } + // for _, linkEntry := range hardLinks { + // sourcePath := joinPath(top, hardLinkTable[linkEntry.Link].entry.Path) + // fullPath := joinPath(top, linkEntry.Path) + // LOG_INFO("DOWNLOAD_LINK", "Hard linking %s -> %s", fullPath, sourcePath) + // if err := os.Link(sourcePath, fullPath); err != nil { + // LOG_ERROR("DOWNLOAD_LINK", "Failed to create hard link %s -> %s", fullPath, sourcePath) + // } + // } if deleteMode && len(patterns) == 0 { // Reverse the order to make sure directories are empty before being deleted @@ -1105,6 +1128,9 @@ func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top s lastEndChunk := 0 uploadEntryInfoFunc := func(entry *Entry) error { + if entry.IsHardlinkRoot() { + entryList.HardLinkTable[entry.Path] = entry + } if entry.IsFile() && entry.Size > 0 { delta := entry.StartChunk - len(chunkHashes) + 1 @@ -1127,6 +1153,14 @@ func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top s entry.StartChunk -= lastEndChunk lastEndChunk = entry.EndChunk entry.EndChunk = delta + } else if entry.IsHardlinkedFrom() { + targetEntry, ok := entryList.HardLinkTable[entry.Link] + if !ok { + LOG_ERROR("SNAPSHOT_UPLOAD", "Unable to find hardlink target for %s to %s", entry.Path, entry.Link) + } + // FIXME: We will use a copy, so it is probably sufficient to skip rereading xattrs and such in the initial code + entry = entry.LinkTo(targetEntry) + LOG_DEBUG("SNAPSHOT_UPLOAD", "Uploading cloned hardlink entry for %s to %s", entry.Path, entry.Link) } buffer.Reset() diff --git a/src/duplicacy_entry.go b/src/duplicacy_entry.go index 1239b54..dbacb6d 100644 --- a/src/duplicacy_entry.go +++ b/src/duplicacy_entry.go @@ -4,6 +4,8 @@ package duplicacy import ( + "bytes" + "crypto/sha256" "encoding/base64" "encoding/json" "fmt" @@ -15,13 +17,10 @@ import ( "sort" "strconv" "strings" - "time" - "bytes" - "crypto/sha256" "syscall" + "time" - "github.com/vmihailenco/msgpack" - + "github.com/vmihailenco/msgpack" ) // This is the hidden directory in the repository for storing various files. @@ -111,15 +110,36 @@ func (entry *Entry) Copy() *Entry { UID: entry.UID, GID: entry.GID, - StartChunk: entry.StartChunk, + StartChunk: entry.StartChunk, StartOffset: entry.StartOffset, - EndChunk: entry.EndChunk, - EndOffset: entry.EndOffset, + EndChunk: entry.EndChunk, + EndOffset: entry.EndOffset, Attributes: entry.Attributes, } } +func (entry *Entry) LinkTo(target *Entry) *Entry { + return &Entry{ + Path: entry.Path, + Size: target.Size, + Time: target.Time, + Mode: target.Mode, + Link: entry.Link, + Hash: target.Hash, + + UID: target.UID, + GID: target.GID, + + StartChunk: target.StartChunk, + StartOffset: target.StartOffset, + EndChunk: target.EndChunk, + EndOffset: target.EndOffset, + + Attributes: target.Attributes, + } +} + // CreateEntryFromJSON creates an entry from a json description. func (entry *Entry) UnmarshalJSON(description []byte) (err error) { @@ -363,12 +383,12 @@ func (entry *Entry) EncodeMsgpack(encoder *msgpack.Encoder) error { if entry.Attributes != nil { attributes := make([]string, numberOfAttributes) - i := 0 - for attribute := range *entry.Attributes { - attributes[i] = attribute - i++ - } - sort.Strings(attributes) + i := 0 + for attribute := range *entry.Attributes { + attributes[i] = attribute + i++ + } + sort.Strings(attributes) for _, attribute := range attributes { err = encoder.EncodeString(attribute) if err != nil { @@ -381,7 +401,7 @@ func (entry *Entry) EncodeMsgpack(encoder *msgpack.Encoder) error { } } - return nil + return nil } func (entry *Entry) DecodeMsgpack(decoder *msgpack.Decoder) error { @@ -493,14 +513,26 @@ func (entry *Entry) IsComplete() bool { return entry.Size >= 0 } +func (entry *Entry) IsFileNotHardlink() bool { + return entry.IsFile() && (len(entry.Link) == 0 || entry.Link == "/") +} + +func (entry *Entry) IsHardlinkedFrom() bool { + return entry.IsFile() && len(entry.Link) > 0 && entry.Link != "/" +} + +func (entry *Entry) IsHardlinkRoot() bool { + return entry.IsFile() && entry.Link == "/" +} + func (entry *Entry) GetPermissions() os.FileMode { return os.FileMode(entry.Mode) & fileModeMask } func (entry *Entry) GetParent() string { path := entry.Path - if path != "" && path[len(path) - 1] == '/' { - path = path[:len(path) - 1] + if path != "" && path[len(path)-1] == '/' { + path = path[:len(path)-1] } i := strings.LastIndex(path, "/") if i == -1 { @@ -597,7 +629,7 @@ func ComparePaths(left string, right string) int { for i := p; i < len(left); i++ { c3 = left[i] if c3 == '/' { - last1 = i == len(left) - 1 + last1 = i == len(left)-1 break } } @@ -607,7 +639,7 @@ func ComparePaths(left string, right string) int { for i := p; i < len(right); i++ { c4 = right[i] if c4 == '/' { - last2 = i == len(right) - 1 + last2 = i == len(right)-1 break } } @@ -806,9 +838,8 @@ func ListEntries(top string, path string, patterns []string, nobackupFile string if ok && stat != nil && stat.Nlink > 1 { k := listEntryLinkKey{dev: uint64(stat.Dev), ino: uint64(stat.Ino)} if path, ok := listingState.linkTable[k]; ok { - LOG_WARN("LIST_LINK", "Linking %s to %s", entry.Path, path) + LOG_DEBUG("LIST_HARDLINK", "Detected hardlink %s to %s", entry.Path, path) entry.Link = path - entry.Size = 0 } else { entry.Link = "/" listingState.linkTable[k] = entry.Path diff --git a/src/duplicacy_entrylist.go b/src/duplicacy_entrylist.go index 6f21a5f..b2da9c5 100644 --- a/src/duplicacy_entrylist.go +++ b/src/duplicacy_entrylist.go @@ -62,6 +62,7 @@ type EntryList struct { uploadedChunkIndex int // counter for upload chunks uploadedChunkOffset int // the start offset for the current modified entry + HardLinkTable map[string]*Entry } // Create a new entry list @@ -78,6 +79,7 @@ func CreateEntryList(snapshotID string, cachePath string, maximumInMemoryEntries maximumInMemoryEntries: maximumInMemoryEntries, cachePath: cachePath, Token: string(token), + HardLinkTable: make(map[string]*Entry), } return entryList, nil @@ -118,7 +120,7 @@ func (entryList *EntryList)AddEntry(entry *Entry) error { if !entry.IsComplete() { if entry.IsDir() || entry.IsLink() { entry.Size = 0 - } else { + } else if !entry.IsHardlinkedFrom() { modifiedEntry := ModifiedEntry { Path: entry.Path, Size: -1, diff --git a/src/duplicacy_snapshot.go b/src/duplicacy_snapshot.go index 076f216..f80e517 100644 --- a/src/duplicacy_snapshot.go +++ b/src/duplicacy_snapshot.go @@ -51,7 +51,7 @@ type Snapshot struct { // CreateEmptySnapshot creates an empty snapshot. func CreateEmptySnapshot(id string) (snapshto *Snapshot) { return &Snapshot{ - Version: 0x6a6c01, + Version: 1, ID: id, Revision: 0, StartTime: time.Now().Unix(), @@ -161,7 +161,7 @@ func (snapshot *Snapshot)ListRemoteFiles(config *Config, chunkOperator *ChunkOpe return } } - } else if snapshot.Version == 1 || snapshot.Version == 0x6a6c01 { + } else if snapshot.Version == 1 { decoder := msgpack.NewDecoder(reader) lastEndChunk := 0