Files
duplicacy/src/duplicacy_snapshot.go
John K. Luebs 16885eaa61 Support backup and restore of hardlinks
This tracks inode/device from the stat info and creates backward
compatible snapshots that allow preserving hardlinks. Backwards
compatibility is preserved by saving a virtual inode number index in the
Link field of the file entry. Since this field was previously only used
for symlinks, this won't break old versions. Additionally, the entry
data is cloned so restoration with an old version works.

Current limitations are primarility with restore. They include:
- no command line option to prevent hard link restore
- if a file has the immutable or append only flag it will be set before
hardlinks are restored, so hardlinking will fail.
- if a partial restore includes a hardlink but not the parent
directories the hardlink will fail.

These will be solved by grouping restore of hardlinks together
with file, prior to applying final metadata.

- if a file is changed and is being rewritten by a restore hardlinks are
not preserved.
2023-10-03 12:21:46 -05:00

495 lines
14 KiB
Go

// Copyright (c) Acrosync LLC. All rights reserved.
// Free for personal use and commercial trial
// Commercial use requires per-user licenses available from https://duplicacy.com
package duplicacy
import (
"encoding/hex"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strings"
"time"
"sort"
"github.com/vmihailenco/msgpack"
)
// Snapshot represents a backup of the repository.
type Snapshot struct {
Version int
ID string // the snapshot id; must be different for different repositories
Revision int // the revision number
Options string // options used to create this snapshot (some not included)
Tag string // user-assigned tag
StartTime int64 // at what time the snapshot was created
EndTime int64 // at what time the snapshot was done
FileSize int64 // total file size
NumberOfFiles int64 // number of files
// A sequence of chunks whose aggregated content is the json representation of 'Files'.
FileSequence []string
// A sequence of chunks whose aggregated content is the json representation of 'ChunkHashes'.
ChunkSequence []string
// A sequence of chunks whose aggregated content is the json representation of 'ChunkLengths'.
LengthSequence []string
ChunkHashes []string // a sequence of chunks representing the file content
ChunkLengths []int // the length of each chunk
Flag bool // used to mark certain snapshots for deletion or copy
}
// CreateEmptySnapshot creates an empty snapshot.
func CreateEmptySnapshot(id string) (snapshto *Snapshot) {
return &Snapshot{
Version: 1,
ID: id,
Revision: 0,
StartTime: time.Now().Unix(),
}
}
type DirectoryListing struct {
directory string
files *[]Entry
}
func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string,
filtersFile string, excludeByAttribute bool, listingChannel chan *Entry,
skippedDirectories *[]string, skippedFiles *[]string) {
var patterns []string
listingState := NewListingState()
if filtersFile == "" {
filtersFile = joinPath(GetDuplicacyPreferencePath(), "filters")
}
patterns = ProcessFilters(filtersFile)
directories := make([]*Entry, 0, 256)
directories = append(directories, CreateEntry("", 0, 0, 0))
for len(directories) > 0 {
directory := directories[len(directories)-1]
directories = directories[:len(directories)-1]
subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingState, listingChannel)
if err != nil {
if directory.Path == "" {
LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err)
return
}
LOG_WARN("LIST_FAILURE", "Failed to list subdirectory %s: %v", directory.Path, err)
if skippedDirectories != nil {
*skippedDirectories = append(*skippedDirectories, directory.Path)
}
continue
}
directories = append(directories, subdirectories...)
if skippedFiles != nil {
*skippedFiles = append(*skippedFiles, skipped...)
}
}
close(listingChannel)
}
func (snapshot *Snapshot)ListRemoteFiles(config *Config, chunkOperator *ChunkOperator, entryOut func(*Entry) bool) {
var chunks []string
for _, chunkHash := range snapshot.FileSequence {
chunks = append(chunks, chunkOperator.config.GetChunkIDFromHash(chunkHash))
}
var chunk *Chunk
reader := NewSequenceReader(snapshot.FileSequence, func(chunkHash string) []byte {
if chunk != nil {
config.PutChunk(chunk)
}
chunk = chunkOperator.Download(chunkHash, 0, true)
return chunk.GetBytes()
})
defer func() {
if chunk != nil {
config.PutChunk(chunk)
}
} ()
// Normally if Version is 0 then the snapshot is created by CLI v2 but unfortunately CLI 3.0.1 does not set the
// version bit correctly when copying old backups. So we need to check the first byte -- if it is '[' then it is
// the old format. The new format starts with a string encoded in msgpack and the first byte can't be '['.
if snapshot.Version == 0 || reader.GetFirstByte() == '['{
LOG_INFO("SNAPSHOT_VERSION", "snapshot %s at revision %d is encoded in an old version format", snapshot.ID, snapshot.Revision)
files := make([]*Entry, 0)
decoder := json.NewDecoder(reader)
// read open bracket
_, err := decoder.Token()
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to open the snapshot %s at revision %d: not a list of entries",
snapshot.ID, snapshot.Revision)
return
}
for decoder.More() {
var entry Entry
err = decoder.Decode(&entry)
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return
}
files = append(files, &entry)
}
sort.Sort(ByName(files))
for _, file := range files {
if !entryOut(file) {
return
}
}
} else if snapshot.Version == 1 {
decoder := msgpack.NewDecoder(reader)
lastEndChunk := 0
// while the array contains values
for _, err := decoder.PeekCode(); err != io.EOF; _, err = decoder.PeekCode() {
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to parse the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return
}
var entry Entry
err = decoder.Decode(&entry)
if err != nil {
LOG_ERROR("SNAPSHOT_PARSE", "Failed to load the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return
}
if entry.IsFile() {
entry.StartChunk += lastEndChunk
entry.EndChunk += entry.StartChunk
lastEndChunk = entry.EndChunk
}
err = entry.check(snapshot.ChunkLengths)
if err != nil {
LOG_ERROR("SNAPSHOT_ENTRY", "Failed to load the snapshot %s at revision %d: %v",
snapshot.ID, snapshot.Revision, err)
return
}
if !entryOut(&entry) {
return
}
}
} else {
LOG_ERROR("SNAPSHOT_VERSION", "snapshot %s at revision %d is encoded in unsupported version %d format",
snapshot.ID, snapshot.Revision, snapshot.Version)
return
}
}
func AppendPattern(patterns []string, new_pattern string) (new_patterns []string) {
for _, pattern := range patterns {
if pattern == new_pattern {
LOG_INFO("SNAPSHOT_FILTER", "Ignoring duplicate pattern: %s ...", new_pattern)
return patterns
}
}
new_patterns = append(patterns, new_pattern)
return new_patterns
}
func ProcessFilters(filtersFile string) (patterns []string) {
patterns = ProcessFilterFile(filtersFile, make([]string, 0))
LOG_DEBUG("REGEX_DEBUG", "There are %d compiled regular expressions stored", len(RegexMap))
LOG_INFO("SNAPSHOT_FILTER", "Loaded %d include/exclude pattern(s)", len(patterns))
if IsTracing() {
for _, pattern := range patterns {
LOG_TRACE("SNAPSHOT_PATTERN", "Pattern: %s", pattern)
}
}
return patterns
}
func ProcessFilterFile(patternFile string, includedFiles []string) (patterns []string) {
for _, file := range includedFiles {
if file == patternFile {
// cycle in include mechanism discovered.
LOG_ERROR("SNAPSHOT_FILTER", "The filter file %s has already been included", patternFile)
return patterns
}
}
includedFiles = append(includedFiles, patternFile)
LOG_INFO("SNAPSHOT_FILTER", "Parsing filter file %s", patternFile)
patternFileContent, err := ioutil.ReadFile(patternFile)
if err == nil {
patternFileLines := strings.Split(string(patternFileContent), "\n")
patterns = ProcessFilterLines(patternFileLines, includedFiles)
}
return patterns
}
func ProcessFilterLines(patternFileLines []string, includedFiles []string) (patterns []string) {
for _, pattern := range patternFileLines {
pattern = strings.TrimSpace(pattern)
if len(pattern) == 0 {
continue
}
if strings.HasPrefix(pattern, "@") {
patternIncludeFile := strings.TrimSpace(pattern[1:])
if patternIncludeFile == "" {
continue
}
if ! filepath.IsAbs(patternIncludeFile) {
basePath := ""
if len(includedFiles) == 0 {
basePath, _ = os.Getwd()
} else {
basePath = filepath.Dir(includedFiles[len(includedFiles)-1])
}
patternIncludeFile = joinPath(basePath, patternIncludeFile)
}
for _, pattern := range ProcessFilterFile(patternIncludeFile, includedFiles) {
patterns = AppendPattern(patterns, pattern)
}
continue
}
if pattern[0] == '#' {
continue
}
if IsUnspecifiedFilter(pattern) {
pattern = "+" + pattern
}
if IsEmptyFilter(pattern) {
continue
}
if strings.HasPrefix(pattern, "i:") || strings.HasPrefix(pattern, "e:") {
valid, err := IsValidRegex(pattern[2:])
if !valid || err != nil {
LOG_ERROR("SNAPSHOT_FILTER", "Invalid regular expression encountered for filter: \"%s\", error: %v", pattern, err)
}
}
patterns = AppendPattern(patterns, pattern)
}
return patterns
}
// CreateSnapshotFromDescription creates a snapshot from json decription.
func CreateSnapshotFromDescription(description []byte) (snapshot *Snapshot, err error) {
var root map[string]interface{}
err = json.Unmarshal(description, &root)
if err != nil {
return nil, err
}
snapshot = &Snapshot{}
if value, ok := root["version"]; !ok {
snapshot.Version = 0
} else if version, ok := value.(float64); !ok {
return nil, fmt.Errorf("Invalid version is specified in the snapshot")
} else {
snapshot.Version = int(version)
}
if value, ok := root["id"]; !ok {
return nil, fmt.Errorf("No id is specified in the snapshot")
} else if snapshot.ID, ok = value.(string); !ok {
return nil, fmt.Errorf("Invalid id is specified in the snapshot")
}
if value, ok := root["revision"]; !ok {
return nil, fmt.Errorf("No revision is specified in the snapshot")
} else if _, ok = value.(float64); !ok {
return nil, fmt.Errorf("Invalid revision is specified in the snapshot")
} else {
snapshot.Revision = int(value.(float64))
}
if value, ok := root["tag"]; !ok {
} else if snapshot.Tag, ok = value.(string); !ok {
return nil, fmt.Errorf("Invalid tag is specified in the snapshot")
}
if value, ok := root["options"]; !ok {
} else if snapshot.Options, ok = value.(string); !ok {
return nil, fmt.Errorf("Invalid options is specified in the snapshot")
}
if value, ok := root["start_time"]; !ok {
return nil, fmt.Errorf("No creation time is specified in the snapshot")
} else if _, ok = value.(float64); !ok {
return nil, fmt.Errorf("Invalid creation time is specified in the snapshot")
} else {
snapshot.StartTime = int64(value.(float64))
}
if value, ok := root["end_time"]; !ok {
return nil, fmt.Errorf("No creation time is specified in the snapshot")
} else if _, ok = value.(float64); !ok {
return nil, fmt.Errorf("Invalid creation time is specified in the snapshot")
} else {
snapshot.EndTime = int64(value.(float64))
}
if value, ok := root["file_size"]; ok {
if _, ok = value.(float64); ok {
snapshot.FileSize = int64(value.(float64))
}
}
if value, ok := root["number_of_files"]; ok {
if _, ok = value.(float64); ok {
snapshot.NumberOfFiles = int64(value.(float64))
}
}
for _, sequenceType := range []string{"files", "chunks", "lengths"} {
if value, ok := root[sequenceType]; !ok {
return nil, fmt.Errorf("No %s are specified in the snapshot", sequenceType)
} else if _, ok = value.([]interface{}); !ok {
return nil, fmt.Errorf("Invalid %s are specified in the snapshot", sequenceType)
} else {
array := value.([]interface{})
sequence := make([]string, len(array))
for i := 0; i < len(array); i++ {
if hashInHex, ok := array[i].(string); !ok {
return nil, fmt.Errorf("Invalid file sequence is specified in the snapshot")
} else if hash, err := hex.DecodeString(hashInHex); err != nil {
return nil, fmt.Errorf("Hash %s is not a valid hex string in the snapshot", hashInHex)
} else {
sequence[i] = string(hash)
}
}
snapshot.SetSequence(sequenceType, sequence)
}
}
return snapshot, nil
}
// LoadChunks construct 'ChunkHashes' from the json description.
func (snapshot *Snapshot) LoadChunks(description []byte) (err error) {
var root []interface{}
err = json.Unmarshal(description, &root)
if err != nil {
return err
}
snapshot.ChunkHashes = make([]string, len(root))
for i, object := range root {
if hashInHex, ok := object.(string); !ok {
return fmt.Errorf("Invalid chunk hash is specified in the snapshot")
} else if hash, err := hex.DecodeString(hashInHex); err != nil {
return fmt.Errorf("The chunk hash %s is not a valid hex string", hashInHex)
} else {
snapshot.ChunkHashes[i] = string(hash)
}
}
return err
}
// ClearChunks removes loaded chunks from memory
func (snapshot *Snapshot) ClearChunks() {
snapshot.ChunkHashes = nil
}
// LoadLengths construct 'ChunkLengths' from the json description.
func (snapshot *Snapshot) LoadLengths(description []byte) (err error) {
return json.Unmarshal(description, &snapshot.ChunkLengths)
}
// MarshalJSON creates a json representation of the snapshot.
func (snapshot *Snapshot) MarshalJSON() ([]byte, error) {
object := make(map[string]interface{})
object["version"] = snapshot.Version
object["id"] = snapshot.ID
object["revision"] = snapshot.Revision
object["options"] = snapshot.Options
object["tag"] = snapshot.Tag
object["start_time"] = snapshot.StartTime
object["end_time"] = snapshot.EndTime
if snapshot.FileSize != 0 && snapshot.NumberOfFiles != 0 {
object["file_size"] = snapshot.FileSize
object["number_of_files"] = snapshot.NumberOfFiles
}
object["files"] = encodeSequence(snapshot.FileSequence)
object["chunks"] = encodeSequence(snapshot.ChunkSequence)
object["lengths"] = encodeSequence(snapshot.LengthSequence)
return json.Marshal(object)
}
// MarshalSequence creates a json represetion for the specified chunk sequence.
func (snapshot *Snapshot) MarshalSequence(sequenceType string) ([]byte, error) {
if sequenceType == "chunks" {
return json.Marshal(encodeSequence(snapshot.ChunkHashes))
} else {
return json.Marshal(snapshot.ChunkLengths)
}
}
// SetSequence assign a chunk sequence to the specified field.
func (snapshot *Snapshot) SetSequence(sequenceType string, sequence []string) {
if sequenceType == "files" {
snapshot.FileSequence = sequence
} else if sequenceType == "chunks" {
snapshot.ChunkSequence = sequence
} else {
snapshot.LengthSequence = sequence
}
}
// encodeSequence turns a sequence of binary hashes into a sequence of hex hashes.
func encodeSequence(sequence []string) []string {
sequenceInHex := make([]string, len(sequence))
for i, hash := range sequence {
sequenceInHex[i] = hex.EncodeToString([]byte(hash))
}
return sequenceInHex
}