Compare commits

...

2 Commits

Author SHA1 Message Date
bc49f73d5a Check the good stuff 2023-10-02 12:49:58 -05:00
9be4e2b9c9 Initial hardlink in snapshot support.
- Create a new snapshot version number as this method is not backwards
compatible.
- This has some breakages with restoring. Namely if the root file
is not marked for download any hardlinked files that need to be restored
will not be linked, they will be restored as a regular file
2023-10-02 12:49:58 -05:00
4 changed files with 182 additions and 30 deletions

View File

@@ -304,7 +304,24 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta
remoteEntry = nil remoteEntry = nil
} }
if compareResult == 0 { if localEntry.IsHardlinkedFrom() {
// FIXME: Sanity check?
// FIXME: perhaps we can make size = 0 an initial invariant of the link?
//
// Note that if the initial size was 0 then this original logic doesn't change!
localEntry.Size = 0
// targetEntry, ok := localEntryList.HardLinkTable[localEntry.Link]
// if !ok {
// LOG_ERROR("BACKUP_CREATE", "Hard link %s not found in entry cache for path %s", localEntry.Link, localEntry.Path)
// }
// localEntry.Size = targetEntry.Size
// localEntry.Hash = targetEntry.Hash
// localEntry.StartChunk = targetEntry.StartChunk
// localEntry.StartOffset = targetEntry.StartOffset
// localEntry.EndChunk = targetEntry.EndChunk
// localEntry.EndOffset = targetEntry.EndOffset
// LOG_DEBUG("BACKUP_CREATE", "Hard link %s to %s in initial listing", localEntry.Link, targetEntry.Path)
} else if compareResult == 0 {
// No need to check if it is in hash mode -- in that case remote listing is nil // No need to check if it is in hash mode -- in that case remote listing is nil
if localEntry.IsSameAs(remoteEntry) && localEntry.IsFile() { if localEntry.IsSameAs(remoteEntry) && localEntry.IsFile() {
if localEntry.Size > 0 { if localEntry.Size > 0 {
@@ -622,6 +639,11 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta
return true return true
} }
type hardLinkEntry struct {
entry *Entry
willDownload bool
}
// Restore downloads the specified snapshot, compares it with what's on the repository, and then downloads // Restore downloads the specified snapshot, compares it with what's on the repository, and then downloads
// files that are different. 'base' is a directory that contains files at a different revision which can // files that are different. 'base' is a directory that contains files at a different revision which can
// serve as a local cache to avoid download chunks available locally. It is perfectly ok for 'base' to be // serve as a local cache to avoid download chunks available locally. It is perfectly ok for 'base' to be
@@ -703,8 +725,16 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
var localEntry *Entry var localEntry *Entry
localListingOK := true localListingOK := true
hardLinkTable := make(map[string]hardLinkEntry)
//hardLinks := make([]*Entry, 0)
for remoteEntry := range remoteListingChannel { for remoteEntry := range remoteListingChannel {
if remoteEntry.IsFile() && remoteEntry.Link == "/" {
LOG_INFO("RESTORE_LINK", "Noting hardlinked source file %s", remoteEntry.Path)
hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, false}
}
if len(patterns) > 0 && !MatchPath(remoteEntry.Path, patterns) { if len(patterns) > 0 && !MatchPath(remoteEntry.Path, patterns) {
continue continue
} }
@@ -713,6 +743,8 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
var compareResult int var compareResult int
for { for {
// TODO: We likely need to check if a local listing file exists in the hardLinkTable for the case where one is restoring a hardlink
// to an existing disk file. Right now, we'll just end up downloading the file new.
if localEntry == nil && localListingOK { if localEntry == nil && localListingOK {
localEntry, localListingOK = <- localListingChannel localEntry, localListingOK = <- localListingChannel
} }
@@ -730,12 +762,28 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
} }
if compareResult == 0 { if compareResult == 0 {
// if quickMode && localEntry.IsFile() {
if quickMode && localEntry.IsFile() && localEntry.IsSameAs(remoteEntry) { if quickMode && localEntry.IsFile() && localEntry.IsSameAs(remoteEntry) {
LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path) LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path)
skippedFileSize += localEntry.Size skippedFileSize += localEntry.Size
skippedFileCount++ skippedFileCount++
localEntry = nil localEntry = nil
continue continue
// checkEntry := remoteEntry
// if len(remoteEntry.Link) > 0 && remoteEntry.Link != "/" {
// if e, ok := hardLinkTable[remoteEntry.Link]; !ok {
// LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path)
// } else {
// checkEntry = e.entry
// }
// }
// if localEntry.IsSameAs(checkEntry) {
// LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path)
// skippedFileSize += localEntry.Size
// skippedFileCount++
// localEntry = nil
// continue
// }
} }
localEntry = nil localEntry = nil
} }
@@ -782,6 +830,21 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
} }
directoryEntries = append(directoryEntries, remoteEntry) directoryEntries = append(directoryEntries, remoteEntry)
} else { } else {
// if remoteEntry.Link == "/" {
// hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, true}
// } else if len(remoteEntry.Link) > 0 {
// if e, ok := hardLinkTable[remoteEntry.Link]; !ok {
// LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path)
// } else if !e.willDownload {
// origSourcePath := e.entry.Path
// e.entry.Path = remoteEntry.Path
// remoteEntry = e.entry
// hardLinkTable[origSourcePath] = hardLinkEntry{remoteEntry, true}
// } else {
// hardLinks = append(hardLinks, remoteEntry)
// continue
// }
// }
// We can't download files here since fileEntries needs to be sorted // We can't download files here since fileEntries needs to be sorted
fileEntries = append(fileEntries, remoteEntry) fileEntries = append(fileEntries, remoteEntry)
totalFileSize += remoteEntry.Size totalFileSize += remoteEntry.Size
@@ -837,6 +900,10 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
stat, _ := os.Stat(fullPath) stat, _ := os.Stat(fullPath)
if stat != nil { if stat != nil {
if quickMode { if quickMode {
// cmpFile := file
// if file.IsFile() && len(file.Link) > 0 && file.Link != "/" {
// cmpFile = hardLinkTable[file.Link].entry
// }
if file.IsSameAsFileInfo(stat) { if file.IsSameAsFileInfo(stat) {
LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", file.Path) LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", file.Path)
skippedFileSize += file.Size skippedFileSize += file.Size
@@ -874,7 +941,6 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
downloadedFileSize += file.Size downloadedFileSize += file.Size
downloadedFiles = append(downloadedFiles, file) downloadedFiles = append(downloadedFiles, file)
} }
continue continue
} }
@@ -901,6 +967,15 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
file.RestoreMetadata(fullPath, nil, setOwner) file.RestoreMetadata(fullPath, nil, setOwner)
} }
// for _, linkEntry := range hardLinks {
// sourcePath := joinPath(top, hardLinkTable[linkEntry.Link].entry.Path)
// fullPath := joinPath(top, linkEntry.Path)
// LOG_INFO("DOWNLOAD_LINK", "Hard linking %s -> %s", fullPath, sourcePath)
// if err := os.Link(sourcePath, fullPath); err != nil {
// LOG_ERROR("DOWNLOAD_LINK", "Failed to create hard link %s -> %s", fullPath, sourcePath)
// }
// }
if deleteMode && len(patterns) == 0 { if deleteMode && len(patterns) == 0 {
// Reverse the order to make sure directories are empty before being deleted // Reverse the order to make sure directories are empty before being deleted
for i := range extraFiles { for i := range extraFiles {
@@ -1053,6 +1128,9 @@ func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top s
lastEndChunk := 0 lastEndChunk := 0
uploadEntryInfoFunc := func(entry *Entry) error { uploadEntryInfoFunc := func(entry *Entry) error {
if entry.IsHardlinkRoot() {
entryList.HardLinkTable[entry.Path] = entry
}
if entry.IsFile() && entry.Size > 0 { if entry.IsFile() && entry.Size > 0 {
delta := entry.StartChunk - len(chunkHashes) + 1 delta := entry.StartChunk - len(chunkHashes) + 1
@@ -1075,6 +1153,14 @@ func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top s
entry.StartChunk -= lastEndChunk entry.StartChunk -= lastEndChunk
lastEndChunk = entry.EndChunk lastEndChunk = entry.EndChunk
entry.EndChunk = delta entry.EndChunk = delta
} else if entry.IsHardlinkedFrom() {
targetEntry, ok := entryList.HardLinkTable[entry.Link]
if !ok {
LOG_ERROR("SNAPSHOT_UPLOAD", "Unable to find hardlink target for %s to %s", entry.Path, entry.Link)
}
// FIXME: We will use a copy, so it is probably sufficient to skip rereading xattrs and such in the initial code
entry = entry.LinkTo(targetEntry)
LOG_DEBUG("SNAPSHOT_UPLOAD", "Uploading cloned hardlink entry for %s to %s", entry.Path, entry.Link)
} }
buffer.Reset() buffer.Reset()

View File

@@ -4,6 +4,8 @@
package duplicacy package duplicacy
import ( import (
"bytes"
"crypto/sha256"
"encoding/base64" "encoding/base64"
"encoding/json" "encoding/json"
"fmt" "fmt"
@@ -15,12 +17,10 @@ import (
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"syscall"
"time" "time"
"bytes"
"crypto/sha256"
"github.com/vmihailenco/msgpack" "github.com/vmihailenco/msgpack"
) )
// This is the hidden directory in the repository for storing various files. // This is the hidden directory in the repository for storing various files.
@@ -119,6 +119,27 @@ func (entry *Entry) Copy() *Entry {
} }
} }
func (entry *Entry) LinkTo(target *Entry) *Entry {
return &Entry{
Path: entry.Path,
Size: target.Size,
Time: target.Time,
Mode: target.Mode,
Link: entry.Link,
Hash: target.Hash,
UID: target.UID,
GID: target.GID,
StartChunk: target.StartChunk,
StartOffset: target.StartOffset,
EndChunk: target.EndChunk,
EndOffset: target.EndOffset,
Attributes: target.Attributes,
}
}
// CreateEntryFromJSON creates an entry from a json description. // CreateEntryFromJSON creates an entry from a json description.
func (entry *Entry) UnmarshalJSON(description []byte) (err error) { func (entry *Entry) UnmarshalJSON(description []byte) (err error) {
@@ -492,14 +513,26 @@ func (entry *Entry) IsComplete() bool {
return entry.Size >= 0 return entry.Size >= 0
} }
func (entry *Entry) IsFileNotHardlink() bool {
return entry.IsFile() && (len(entry.Link) == 0 || entry.Link == "/")
}
func (entry *Entry) IsHardlinkedFrom() bool {
return entry.IsFile() && len(entry.Link) > 0 && entry.Link != "/"
}
func (entry *Entry) IsHardlinkRoot() bool {
return entry.IsFile() && entry.Link == "/"
}
func (entry *Entry) GetPermissions() os.FileMode { func (entry *Entry) GetPermissions() os.FileMode {
return os.FileMode(entry.Mode) & fileModeMask return os.FileMode(entry.Mode) & fileModeMask
} }
func (entry *Entry) GetParent() string { func (entry *Entry) GetParent() string {
path := entry.Path path := entry.Path
if path != "" && path[len(path) - 1] == '/' { if path != "" && path[len(path)-1] == '/' {
path = path[:len(path) - 1] path = path[:len(path)-1]
} }
i := strings.LastIndex(path, "/") i := strings.LastIndex(path, "/")
if i == -1 { if i == -1 {
@@ -596,7 +629,7 @@ func ComparePaths(left string, right string) int {
for i := p; i < len(left); i++ { for i := p; i < len(left); i++ {
c3 = left[i] c3 = left[i]
if c3 == '/' { if c3 == '/' {
last1 = i == len(left) - 1 last1 = i == len(left)-1
break break
} }
} }
@@ -606,7 +639,7 @@ func ComparePaths(left string, right string) int {
for i := p; i < len(right); i++ { for i := p; i < len(right); i++ {
c4 = right[i] c4 = right[i]
if c4 == '/' { if c4 == '/' {
last2 = i == len(right) - 1 last2 = i == len(right)-1
break break
} }
} }
@@ -694,10 +727,26 @@ func (files FileInfoCompare) Less(i, j int) bool {
} }
} }
type listEntryLinkKey struct {
dev uint64
ino uint64
}
type ListingState struct {
linkTable map[listEntryLinkKey]string // map unique inode details to initially found path
}
func NewListingState() *ListingState {
return &ListingState{
linkTable: make(map[listEntryLinkKey]string),
}
}
// ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths // ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths
// are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files. // are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files.
func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, listingChannel chan *Entry) (directoryList []*Entry, func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool,
skippedFiles []string, err error) { listingState *ListingState,
listingChannel chan *Entry) (directoryList []*Entry, skippedFiles []string, err error) {
LOG_DEBUG("LIST_ENTRIES", "Listing %s", path) LOG_DEBUG("LIST_ENTRIES", "Listing %s", path)
@@ -784,6 +833,20 @@ func ListEntries(top string, path string, patterns []string, nobackupFile string
continue continue
} }
if entry.IsFile() {
stat, ok := f.Sys().(*syscall.Stat_t)
if ok && stat != nil && stat.Nlink > 1 {
k := listEntryLinkKey{dev: uint64(stat.Dev), ino: uint64(stat.Ino)}
if path, ok := listingState.linkTable[k]; ok {
LOG_DEBUG("LIST_HARDLINK", "Detected hardlink %s to %s", entry.Path, path)
entry.Link = path
} else {
entry.Link = "/"
listingState.linkTable[k] = entry.Path
}
}
}
if entry.IsDir() { if entry.IsDir() {
directoryList = append(directoryList, entry) directoryList = append(directoryList, entry)
} else { } else {

View File

@@ -62,6 +62,7 @@ type EntryList struct {
uploadedChunkIndex int // counter for upload chunks uploadedChunkIndex int // counter for upload chunks
uploadedChunkOffset int // the start offset for the current modified entry uploadedChunkOffset int // the start offset for the current modified entry
HardLinkTable map[string]*Entry
} }
// Create a new entry list // Create a new entry list
@@ -78,6 +79,7 @@ func CreateEntryList(snapshotID string, cachePath string, maximumInMemoryEntries
maximumInMemoryEntries: maximumInMemoryEntries, maximumInMemoryEntries: maximumInMemoryEntries,
cachePath: cachePath, cachePath: cachePath,
Token: string(token), Token: string(token),
HardLinkTable: make(map[string]*Entry),
} }
return entryList, nil return entryList, nil
@@ -118,7 +120,7 @@ func (entryList *EntryList)AddEntry(entry *Entry) error {
if !entry.IsComplete() { if !entry.IsComplete() {
if entry.IsDir() || entry.IsLink() { if entry.IsDir() || entry.IsLink() {
entry.Size = 0 entry.Size = 0
} else { } else if !entry.IsHardlinkedFrom() {
modifiedEntry := ModifiedEntry { modifiedEntry := ModifiedEntry {
Path: entry.Path, Path: entry.Path,
Size: -1, Size: -1,

View File

@@ -68,6 +68,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
skippedDirectories *[]string, skippedFiles *[]string) { skippedDirectories *[]string, skippedFiles *[]string) {
var patterns []string var patterns []string
var listingState = NewListingState()
if filtersFile == "" { if filtersFile == "" {
filtersFile = joinPath(GetDuplicacyPreferencePath(), "filters") filtersFile = joinPath(GetDuplicacyPreferencePath(), "filters")
@@ -81,7 +82,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
directory := directories[len(directories)-1] directory := directories[len(directories)-1]
directories = directories[:len(directories)-1] directories = directories[:len(directories)-1]
subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingChannel) subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingState, listingChannel)
if err != nil { if err != nil {
if directory.Path == "" { if directory.Path == "" {
LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err) LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err)