Initial hardlink in snapshot support.

- Create a new snapshot version number as this method is not backwards
compatible.
- This has some breakages with restoring. Namely if the root file
is not marked for download any hardlinked files that need to be restored
will not be linked, they will be restored as a regular file
This commit is contained in:
2023-09-29 16:28:47 -05:00
parent e3bf370e35
commit 2bb70595c5
3 changed files with 98 additions and 13 deletions

View File

@@ -622,6 +622,11 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta
return true
}
type hardLinkEntry struct {
entry *Entry
willDownload bool
}
// Restore downloads the specified snapshot, compares it with what's on the repository, and then downloads
// files that are different. 'base' is a directory that contains files at a different revision which can
// serve as a local cache to avoid download chunks available locally. It is perfectly ok for 'base' to be
@@ -703,8 +708,16 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
var localEntry *Entry
localListingOK := true
hardLinkTable := make(map[string]hardLinkEntry)
hardLinks := make([]*Entry, 0)
for remoteEntry := range remoteListingChannel {
if remoteEntry.IsFile() && remoteEntry.Link == "/" {
LOG_INFO("RESTORE_LINK", "Noting hardlinked source file %s", remoteEntry.Path)
hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, false}
}
if len(patterns) > 0 && !MatchPath(remoteEntry.Path, patterns) {
continue
}
@@ -713,6 +726,8 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
var compareResult int
for {
// TODO: We likely need to check if a local listing file exists in the hardLinkTable for the case where one is restoring a hardlink
// to an existing disk file. Right now, we'll just end up downloading the file new.
if localEntry == nil && localListingOK {
localEntry, localListingOK = <- localListingChannel
}
@@ -730,12 +745,22 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
}
if compareResult == 0 {
if quickMode && localEntry.IsFile() && localEntry.IsSameAs(remoteEntry) {
LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path)
skippedFileSize += localEntry.Size
skippedFileCount++
localEntry = nil
continue
if quickMode && localEntry.IsFile() {
checkEntry := remoteEntry
if len(remoteEntry.Link) > 0 && remoteEntry.Link != "/" {
if e, ok := hardLinkTable[remoteEntry.Link]; !ok {
LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path)
} else {
checkEntry = e.entry
}
}
if localEntry.IsSameAs(checkEntry) {
LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path)
skippedFileSize += localEntry.Size
skippedFileCount++
localEntry = nil
continue
}
}
localEntry = nil
}
@@ -782,6 +807,21 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
}
directoryEntries = append(directoryEntries, remoteEntry)
} else {
if remoteEntry.Link == "/" {
hardLinkTable[remoteEntry.Path] = hardLinkEntry{remoteEntry, true}
} else if len(remoteEntry.Link) > 0 {
if e, ok := hardLinkTable[remoteEntry.Link]; !ok {
LOG_ERROR("RESTORE_LINK", "Source file %s for hardlink %s missing", remoteEntry.Link, remoteEntry.Path)
} else if !e.willDownload {
origSourcePath := e.entry.Path
e.entry.Path = remoteEntry.Path
remoteEntry = e.entry
hardLinkTable[origSourcePath] = hardLinkEntry{remoteEntry, true}
} else {
hardLinks = append(hardLinks, remoteEntry)
continue
}
}
// We can't download files here since fileEntries needs to be sorted
fileEntries = append(fileEntries, remoteEntry)
totalFileSize += remoteEntry.Size
@@ -837,7 +877,11 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
stat, _ := os.Stat(fullPath)
if stat != nil {
if quickMode {
if file.IsSameAsFileInfo(stat) {
cmpFile := file
if file.IsFile() && len(file.Link) > 0 && file.Link != "/" {
cmpFile = hardLinkTable[file.Link].entry
}
if cmpFile.IsSameAsFileInfo(stat) {
LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", file.Path)
skippedFileSize += file.Size
skippedFileCount++
@@ -874,7 +918,6 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
downloadedFileSize += file.Size
downloadedFiles = append(downloadedFiles, file)
}
continue
}
@@ -901,6 +944,15 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
file.RestoreMetadata(fullPath, nil, setOwner)
}
for _, linkEntry := range hardLinks {
sourcePath := joinPath(top, hardLinkTable[linkEntry.Link].entry.Path)
fullPath := joinPath(top, linkEntry.Path)
LOG_INFO("DOWNLOAD_LINK", "Hard linking %s -> %s", fullPath, sourcePath)
if err := os.Link(sourcePath, fullPath); err != nil {
LOG_ERROR("DOWNLOAD_LINK", "Failed to create hard link %s -> %s", fullPath, sourcePath)
}
}
if deleteMode && len(patterns) == 0 {
// Reverse the order to make sure directories are empty before being deleted
for i := range extraFiles {

View File

@@ -18,6 +18,7 @@ import (
"time"
"bytes"
"crypto/sha256"
"syscall"
"github.com/vmihailenco/msgpack"
@@ -694,10 +695,26 @@ func (files FileInfoCompare) Less(i, j int) bool {
}
}
type listEntryLinkKey struct {
dev uint64
ino uint64
}
type ListingState struct {
linkTable map[listEntryLinkKey]string // map unique inode details to initially found path
}
func NewListingState() *ListingState {
return &ListingState{
linkTable: make(map[listEntryLinkKey]string),
}
}
// ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths
// are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files.
func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, listingChannel chan *Entry) (directoryList []*Entry,
skippedFiles []string, err error) {
func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool,
listingState *ListingState,
listingChannel chan *Entry) (directoryList []*Entry, skippedFiles []string, err error) {
LOG_DEBUG("LIST_ENTRIES", "Listing %s", path)
@@ -784,6 +801,21 @@ func ListEntries(top string, path string, patterns []string, nobackupFile string
continue
}
if entry.IsFile() {
stat, ok := f.Sys().(*syscall.Stat_t)
if ok && stat != nil && stat.Nlink > 1 {
k := listEntryLinkKey{dev: uint64(stat.Dev), ino: uint64(stat.Ino)}
if path, ok := listingState.linkTable[k]; ok {
LOG_WARN("LIST_LINK", "Linking %s to %s", entry.Path, path)
entry.Link = path
entry.Size = 0
} else {
entry.Link = "/"
listingState.linkTable[k] = entry.Path
}
}
}
if entry.IsDir() {
directoryList = append(directoryList, entry)
} else {

View File

@@ -51,7 +51,7 @@ type Snapshot struct {
// CreateEmptySnapshot creates an empty snapshot.
func CreateEmptySnapshot(id string) (snapshto *Snapshot) {
return &Snapshot{
Version: 1,
Version: 0x6a6c01,
ID: id,
Revision: 0,
StartTime: time.Now().Unix(),
@@ -68,6 +68,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
skippedDirectories *[]string, skippedFiles *[]string) {
var patterns []string
var listingState = NewListingState()
if filtersFile == "" {
filtersFile = joinPath(GetDuplicacyPreferencePath(), "filters")
@@ -81,7 +82,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
directory := directories[len(directories)-1]
directories = directories[:len(directories)-1]
subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingChannel)
subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingState, listingChannel)
if err != nil {
if directory.Path == "" {
LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err)
@@ -160,7 +161,7 @@ func (snapshot *Snapshot)ListRemoteFiles(config *Config, chunkOperator *ChunkOpe
return
}
}
} else if snapshot.Version == 1 {
} else if snapshot.Version == 1 || snapshot.Version == 0x6a6c01 {
decoder := msgpack.NewDecoder(reader)
lastEndChunk := 0