Support backup and restore of hardlinks

This tracks inode/device from the stat info and creates backward
compatible snapshots that allow preserving hardlinks. Backwards
compatibility is preserved by saving a virtual inode number index in the
Link field of the file entry. Since this field was previously only used
for symlinks, this won't break old versions. Additionally, the entry
data is cloned so restoration with an old version works.

Current limitations are primarility with restore. They include:
- no command line option to prevent hard link restore
- if a file has the immutable or append only flag it will be set before
hardlinks are restored, so hardlinking will fail.
- if a partial restore includes a hardlink but not the parent
directories the hardlink will fail.

These will be solved by grouping restore of hardlinks together
with file, prior to applying final metadata.

- if a file is changed and is being rewritten by a restore hardlinks are
not preserved.
This commit is contained in:
2023-10-03 00:09:04 -05:00
parent bf2565b5c3
commit 16885eaa61
3 changed files with 162 additions and 22 deletions

View File

@@ -703,8 +703,19 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
var localEntry *Entry var localEntry *Entry
localListingOK := true localListingOK := true
type hardLinkEntry struct {
entry *Entry
willDownload bool
}
var hardLinkTable []hardLinkEntry
var hardLinks []*Entry
for remoteEntry := range remoteListingChannel { for remoteEntry := range remoteListingChannel {
if remoteEntry.IsHardlinkRoot() {
hardLinkTable = append(hardLinkTable, hardLinkEntry{remoteEntry, false})
}
if len(patterns) > 0 && !MatchPath(remoteEntry.Path, patterns) { if len(patterns) > 0 && !MatchPath(remoteEntry.Path, patterns) {
continue continue
} }
@@ -783,6 +794,21 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
remoteEntry.RestoreEarlyDirFlags(fullPath) remoteEntry.RestoreEarlyDirFlags(fullPath)
directoryEntries = append(directoryEntries, remoteEntry) directoryEntries = append(directoryEntries, remoteEntry)
} else { } else {
if remoteEntry.IsHardlinkRoot() {
hardLinkTable[len(hardLinkTable)-1] = hardLinkEntry{remoteEntry, true}
} else if remoteEntry.IsHardlinkedFrom() {
i, err := strconv.ParseUint(remoteEntry.Link, 16, 64)
if err != nil {
LOG_ERROR("RESTORE_HARDLINK", "Decode error in hardlink entry, expected hex int, got %s", remoteEntry.Link)
return 0
}
if !hardLinkTable[i].willDownload {
hardLinkTable[i] = hardLinkEntry{remoteEntry, true}
} else {
hardLinks = append(hardLinks, remoteEntry)
continue
}
}
// We can't download files here since fileEntries needs to be sorted // We can't download files here since fileEntries needs to be sorted
fileEntries = append(fileEntries, remoteEntry) fileEntries = append(fileEntries, remoteEntry)
totalFileSize += remoteEntry.Size totalFileSize += remoteEntry.Size
@@ -902,6 +928,17 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
file.RestoreMetadata(fullPath, nil, setOwner) file.RestoreMetadata(fullPath, nil, setOwner)
} }
for _, linkEntry := range hardLinks {
i, _ := strconv.ParseUint(linkEntry.Link, 16, 64)
sourcePath := joinPath(top, hardLinkTable[i].entry.Path)
fullPath := joinPath(top, linkEntry.Path)
LOG_INFO("RESTORE_HARDLINK", "Hard linking %s to %s", fullPath, sourcePath)
if err := os.Link(sourcePath, fullPath); err != nil {
LOG_ERROR("RESTORE_HARDLINK", "Failed to create hard link %s to %s", fullPath, sourcePath)
return 0
}
}
if deleteMode && len(patterns) == 0 { if deleteMode && len(patterns) == 0 {
// Reverse the order to make sure directories are empty before being deleted // Reverse the order to make sure directories are empty before being deleted
for i := range extraFiles { for i := range extraFiles {
@@ -1053,8 +1090,13 @@ func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top s
lastEndChunk := 0 lastEndChunk := 0
uploadEntryInfoFunc := func(entry *Entry) error { type hardLinkEntry struct {
entry *Entry
startChunk int
}
var hardLinkTable []hardLinkEntry
uploadEntryInfoFunc := func(entry *Entry) error {
if entry.IsFile() && entry.Size > 0 { if entry.IsFile() && entry.Size > 0 {
delta := entry.StartChunk - len(chunkHashes) + 1 delta := entry.StartChunk - len(chunkHashes) + 1
if entry.StartChunk != lastChunk { if entry.StartChunk != lastChunk {
@@ -1072,10 +1114,38 @@ func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top s
entry.StartChunk -= delta entry.StartChunk -= delta
entry.EndChunk -= delta entry.EndChunk -= delta
if entry.IsHardlinkRoot() {
LOG_DEBUG("SNAPSHOT_UPLOAD", "Hard link root %s %v %v", entry.Path, entry.StartChunk, entry.EndChunk)
hardLinkTable = append(hardLinkTable, hardLinkEntry{entry, entry.StartChunk})
}
delta = entry.EndChunk - entry.StartChunk delta = entry.EndChunk - entry.StartChunk
entry.StartChunk -= lastEndChunk entry.StartChunk -= lastEndChunk
lastEndChunk = entry.EndChunk lastEndChunk = entry.EndChunk
entry.EndChunk = delta entry.EndChunk = delta
} else if entry.IsHardlinkedFrom() {
i, err := strconv.ParseUint(entry.Link, 16, 64)
if err != nil {
LOG_ERROR("SNAPSHOT_UPLOAD", "Decode error in hardlink entry, expected hex int, got %s", entry.Link)
return err
}
targetEntry := hardLinkTable[i].entry
var startChunk, endChunk int
if targetEntry.Size > 0 {
startChunk = hardLinkTable[i].startChunk - lastEndChunk
endChunk = targetEntry.EndChunk
}
entry = entry.HardLinkTo(targetEntry, startChunk, endChunk)
if targetEntry.Size > 0 {
lastEndChunk = hardLinkTable[i].startChunk + endChunk
}
LOG_DEBUG("SNAPSHOT_UPLOAD", "Uploading cloned hardlink for %s to %s (%v %v)", entry.Path, targetEntry.Path, startChunk, endChunk)
} else if entry.IsHardlinkRoot() {
hardLinkTable = append(hardLinkTable, hardLinkEntry{entry, 0})
} }
buffer.Reset() buffer.Reset()

View File

@@ -4,6 +4,8 @@
package duplicacy package duplicacy
import ( import (
"bytes"
"crypto/sha256"
"encoding/base64" "encoding/base64"
"encoding/json" "encoding/json"
"fmt" "fmt"
@@ -15,12 +17,10 @@ import (
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"syscall"
"time" "time"
"bytes"
"crypto/sha256"
"github.com/vmihailenco/msgpack" "github.com/vmihailenco/msgpack"
) )
// This is the hidden directory in the repository for storing various files. // This is the hidden directory in the repository for storing various files.
@@ -119,6 +119,27 @@ func (entry *Entry) Copy() *Entry {
} }
} }
func (entry *Entry) HardLinkTo(target *Entry, startChunk int, endChunk int) *Entry {
return &Entry{
Path: entry.Path,
Size: target.Size,
Time: target.Time,
Mode: target.Mode,
Link: entry.Link,
Hash: target.Hash,
UID: target.UID,
GID: target.GID,
StartChunk: startChunk,
StartOffset: target.StartOffset,
EndChunk: endChunk,
EndOffset: target.EndOffset,
Attributes: target.Attributes,
}
}
// CreateEntryFromJSON creates an entry from a json description. // CreateEntryFromJSON creates an entry from a json description.
func (entry *Entry) UnmarshalJSON(description []byte) (err error) { func (entry *Entry) UnmarshalJSON(description []byte) (err error) {
@@ -492,14 +513,22 @@ func (entry *Entry) IsComplete() bool {
return entry.Size >= 0 return entry.Size >= 0
} }
func (entry *Entry) IsHardlinkedFrom() bool {
return entry.IsFile() && len(entry.Link) > 0 && entry.Link != "/"
}
func (entry *Entry) IsHardlinkRoot() bool {
return entry.IsFile() && entry.Link == "/"
}
func (entry *Entry) GetPermissions() os.FileMode { func (entry *Entry) GetPermissions() os.FileMode {
return os.FileMode(entry.Mode) & fileModeMask return os.FileMode(entry.Mode) & fileModeMask
} }
func (entry *Entry) GetParent() string { func (entry *Entry) GetParent() string {
path := entry.Path path := entry.Path
if path != "" && path[len(path) - 1] == '/' { if path != "" && path[len(path)-1] == '/' {
path = path[:len(path) - 1] path = path[:len(path)-1]
} }
i := strings.LastIndex(path, "/") i := strings.LastIndex(path, "/")
if i == -1 { if i == -1 {
@@ -596,7 +625,7 @@ func ComparePaths(left string, right string) int {
for i := p; i < len(left); i++ { for i := p; i < len(left); i++ {
c3 = left[i] c3 = left[i]
if c3 == '/' { if c3 == '/' {
last1 = i == len(left) - 1 last1 = i == len(left)-1
break break
} }
} }
@@ -606,7 +635,7 @@ func ComparePaths(left string, right string) int {
for i := p; i < len(right); i++ { for i := p; i < len(right); i++ {
c4 = right[i] c4 = right[i]
if c4 == '/' { if c4 == '/' {
last2 = i == len(right) - 1 last2 = i == len(right)-1
break break
} }
} }
@@ -694,10 +723,27 @@ func (files FileInfoCompare) Less(i, j int) bool {
} }
} }
type listEntryLinkKey struct {
dev uint64
ino uint64
}
type ListingState struct {
linkIndex int
linkTable map[listEntryLinkKey]int // map unique inode details to initially found path
}
func NewListingState() *ListingState {
return &ListingState{
linkTable: make(map[listEntryLinkKey]int),
}
}
// ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths // ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths
// are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files. // are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files.
func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, listingChannel chan *Entry) (directoryList []*Entry, func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool,
skippedFiles []string, err error) { listingState *ListingState,
listingChannel chan *Entry) (directoryList []*Entry, skippedFiles []string, err error) {
LOG_DEBUG("LIST_ENTRIES", "Listing %s", path) LOG_DEBUG("LIST_ENTRIES", "Listing %s", path)
@@ -777,6 +823,24 @@ func ListEntries(top string, path string, patterns []string, nobackupFile string
continue continue
} }
var linkKey *listEntryLinkKey
if stat, ok := f.Sys().(*syscall.Stat_t); entry.IsFile() && ok && stat != nil && stat.Nlink > 1 {
k := listEntryLinkKey{dev: uint64(stat.Dev), ino: uint64(stat.Ino)}
if linkIndex, seen := listingState.linkTable[k]; seen {
if linkIndex == -1 {
LOG_DEBUG("LIST_EXCLUDE", "%s is excluded by attribute (hardlink)", entry.Path)
continue
}
entry.Size = 0
entry.Link = strconv.FormatInt(int64(linkIndex), 16)
} else {
entry.Link = "/"
listingState.linkTable[k] = -1
linkKey = &k
}
}
entry.ReadAttributes(top) entry.ReadAttributes(top)
if excludeByAttribute && entry.Attributes != nil && excludedByAttribute(*entry.Attributes) { if excludeByAttribute && entry.Attributes != nil && excludedByAttribute(*entry.Attributes) {
@@ -784,6 +848,11 @@ func ListEntries(top string, path string, patterns []string, nobackupFile string
continue continue
} }
if linkKey != nil {
listingState.linkTable[*linkKey] = listingState.linkIndex
listingState.linkIndex++
}
if entry.IsDir() { if entry.IsDir() {
directoryList = append(directoryList, entry) directoryList = append(directoryList, entry)
} else { } else {

View File

@@ -68,6 +68,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
skippedDirectories *[]string, skippedFiles *[]string) { skippedDirectories *[]string, skippedFiles *[]string) {
var patterns []string var patterns []string
listingState := NewListingState()
if filtersFile == "" { if filtersFile == "" {
filtersFile = joinPath(GetDuplicacyPreferencePath(), "filters") filtersFile = joinPath(GetDuplicacyPreferencePath(), "filters")
@@ -81,7 +82,7 @@ func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
directory := directories[len(directories)-1] directory := directories[len(directories)-1]
directories = directories[:len(directories)-1] directories = directories[:len(directories)-1]
subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingChannel) subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingState, listingChannel)
if err != nil { if err != nil {
if directory.Path == "" { if directory.Path == "" {
LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err) LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err)