mirror of
https://github.com/jkl1337/duplicacy.git
synced 2026-01-04 20:54:44 -06:00
Implement zstd compression
Zstd compression can be enabled by providing `-zstd` or `-zstd-level <level>` to `init`, `add`, or `backup`. With `-zstd` the compression level will be `default`, and with `-zstd-level` the level can be any of `fastest`, `default`, `better`, or `best`.
This commit is contained in:
@@ -47,6 +47,10 @@ func (manager *BackupManager) SetDryRun(dryRun bool) {
|
||||
manager.config.dryRun = dryRun
|
||||
}
|
||||
|
||||
func (manager *BackupManager) SetCompressionLevel(level int) {
|
||||
manager.config.CompressionLevel = level
|
||||
}
|
||||
|
||||
// CreateBackupManager creates a backup manager using the specified 'storage'. 'snapshotID' is a unique id to
|
||||
// identify snapshots created for this repository. 'top' is the top directory of the repository. 'password' is the
|
||||
// master key which can be nil if encryption is not enabled.
|
||||
@@ -138,6 +142,8 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta
|
||||
|
||||
LOG_DEBUG("BACKUP_PARAMETERS", "top: %s, quick: %t, tag: %s", top, quickMode, tag)
|
||||
|
||||
manager.config.PrintCompressionLevel()
|
||||
|
||||
if manager.config.DataShards != 0 && manager.config.ParityShards != 0 {
|
||||
LOG_INFO("BACKUP_ERASURECODING", "Erasure coding is enabled with %d data shards and %d parity shards",
|
||||
manager.config.DataShards, manager.config.ParityShards)
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
"github.com/bkaradzic/go-lz4"
|
||||
"github.com/minio/highwayhash"
|
||||
"github.com/klauspost/reedsolomon"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
|
||||
// This is a fork of github.com/minio/highwayhash at 1.0.1 that computes incorrect hash on
|
||||
// arm64 machines. We need this fork to be able to read the chunks created by Duplicacy
|
||||
@@ -267,6 +268,38 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetada
|
||||
deflater, _ := zlib.NewWriterLevel(encryptedBuffer, chunk.config.CompressionLevel)
|
||||
deflater.Write(chunk.buffer.Bytes())
|
||||
deflater.Close()
|
||||
} else if chunk.config.CompressionLevel >= ZSTD_COMPRESSION_LEVEL_FASTEST && chunk.config.CompressionLevel <= ZSTD_COMPRESSION_LEVEL_BEST {
|
||||
encryptedBuffer.Write([]byte("ZSTD"))
|
||||
|
||||
compressionLevel := zstd.SpeedDefault
|
||||
if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_FASTEST {
|
||||
compressionLevel = zstd.SpeedFastest
|
||||
} else if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_BETTER {
|
||||
compressionLevel = zstd.SpeedBetterCompression
|
||||
} else if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_BEST {
|
||||
compressionLevel = zstd.SpeedBestCompression
|
||||
}
|
||||
|
||||
deflater, err := zstd.NewWriter(encryptedBuffer, zstd.WithEncoderLevel(compressionLevel))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make sure we have enough space in encryptedBuffer
|
||||
availableLength := encryptedBuffer.Cap() - len(encryptedBuffer.Bytes())
|
||||
maximumLength := deflater.MaxEncodedSize(chunk.buffer.Len())
|
||||
if availableLength < maximumLength {
|
||||
encryptedBuffer.Grow(maximumLength - availableLength)
|
||||
}
|
||||
_, err = deflater.Write(chunk.buffer.Bytes())
|
||||
if err != nil {
|
||||
return fmt.Errorf("ZSTD compression error: %v", err)
|
||||
}
|
||||
|
||||
err = deflater.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("ZSTD compression error: %v", err)
|
||||
}
|
||||
} else if chunk.config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL {
|
||||
encryptedBuffer.Write([]byte("LZ4 "))
|
||||
// Make sure we have enough space in encryptedBuffer
|
||||
@@ -361,7 +394,6 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetada
|
||||
chunk.buffer.Write(header)
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
// This is to ensure compatibility with Vertical Backup, which still uses HMAC-SHA256 (instead of HMAC-BLAKE2) to
|
||||
@@ -633,6 +665,24 @@ func (chunk *Chunk) Decrypt(encryptionKey []byte, derivationKey string) (err err
|
||||
chunk.hash = nil
|
||||
return nil, rewriteNeeded
|
||||
}
|
||||
|
||||
if len(compressed) > 4 && string(compressed[:4]) == "ZSTD" {
|
||||
chunk.buffer.Reset()
|
||||
chunk.hasher = chunk.config.NewKeyedHasher(chunk.config.HashKey)
|
||||
chunk.hash = nil
|
||||
|
||||
encryptedBuffer.Read(encryptedBuffer.Bytes()[:4])
|
||||
inflater, err := zstd.NewReader(encryptedBuffer)
|
||||
if err != nil {
|
||||
return err, false
|
||||
}
|
||||
defer inflater.Close()
|
||||
if _, err = io.Copy(chunk, inflater); err != nil {
|
||||
return err, false
|
||||
}
|
||||
return nil, rewriteNeeded
|
||||
}
|
||||
|
||||
inflater, err := zlib.NewReader(encryptedBuffer)
|
||||
if err != nil {
|
||||
return err, false
|
||||
|
||||
@@ -35,6 +35,19 @@ var DEFAULT_KEY = []byte("duplicacy")
|
||||
// standard zlib levels of -1 to 9.
|
||||
var DEFAULT_COMPRESSION_LEVEL = 100
|
||||
|
||||
// zstd compression levels starting from 200
|
||||
var ZSTD_COMPRESSION_LEVEL_FASTEST = 200
|
||||
var ZSTD_COMPRESSION_LEVEL_DEFAULT = 201
|
||||
var ZSTD_COMPRESSION_LEVEL_BETTER = 202
|
||||
var ZSTD_COMPRESSION_LEVEL_BEST = 203
|
||||
|
||||
var ZSTD_COMPRESSION_LEVELS = map[string]int {
|
||||
"fastest": ZSTD_COMPRESSION_LEVEL_FASTEST,
|
||||
"default": ZSTD_COMPRESSION_LEVEL_DEFAULT,
|
||||
"better": ZSTD_COMPRESSION_LEVEL_BETTER,
|
||||
"best": ZSTD_COMPRESSION_LEVEL_BEST,
|
||||
}
|
||||
|
||||
// The new banner of the config file (to differentiate from the old format where the salt and iterations are fixed)
|
||||
var CONFIG_BANNER = "duplicacy\001"
|
||||
|
||||
@@ -202,6 +215,14 @@ func (config *Config) Print() {
|
||||
|
||||
}
|
||||
|
||||
func (config *Config) PrintCompressionLevel() {
|
||||
for name, level := range ZSTD_COMPRESSION_LEVELS {
|
||||
if level == config.CompressionLevel {
|
||||
LOG_INFO("COMPRESSION_LEVEL", "Zstd compression is enabled (level: %s)", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func CreateConfigFromParameters(compressionLevel int, averageChunkSize int, maximumChunkSize int, mininumChunkSize int,
|
||||
isEncrypted bool, copyFrom *Config, bitCopy bool) (config *Config) {
|
||||
|
||||
@@ -294,7 +315,10 @@ func (config *Config) PutChunk(chunk *Chunk) {
|
||||
}
|
||||
|
||||
func (config *Config) NewKeyedHasher(key []byte) hash.Hash {
|
||||
if config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL {
|
||||
// Early versions of Duplicacy used SHA256 as the hash function for chunk IDs at the time when
|
||||
// only zlib compression was supported. Later SHA256 was replaced by Blake2b and LZ4 was used
|
||||
// for compression (with compression level set to 100).
|
||||
if config.CompressionLevel >= DEFAULT_COMPRESSION_LEVEL {
|
||||
hasher, err := blake2.New(&blake2.Config{Size: 32, Key: key})
|
||||
if err != nil {
|
||||
LOG_ERROR("HASH_KEY", "Invalid hash key: %x", key)
|
||||
@@ -339,7 +363,7 @@ func (hasher *DummyHasher) BlockSize() int {
|
||||
func (config *Config) NewFileHasher() hash.Hash {
|
||||
if SkipFileHash {
|
||||
return &DummyHasher{}
|
||||
} else if config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL {
|
||||
} else if config.CompressionLevel >= DEFAULT_COMPRESSION_LEVEL {
|
||||
hasher, _ := blake2.New(&blake2.Config{Size: 32})
|
||||
return hasher
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user