Implement zstd compression

Zstd compression can be enabled by providing `-zstd` or `-zstd-level <level>`
to `init`, `add`, or `backup`. With `-zstd` the compression level will be
`default`, and with `-zstd-level` the level can be any of `fastest`, `default`,
`better`, or `best`.
This commit is contained in:
Gilbert Chen
2023-03-26 21:31:51 -04:00
parent 9f276047db
commit 53b0f3f7b6
6 changed files with 138 additions and 5 deletions

View File

@@ -47,6 +47,10 @@ func (manager *BackupManager) SetDryRun(dryRun bool) {
manager.config.dryRun = dryRun
}
func (manager *BackupManager) SetCompressionLevel(level int) {
manager.config.CompressionLevel = level
}
// CreateBackupManager creates a backup manager using the specified 'storage'. 'snapshotID' is a unique id to
// identify snapshots created for this repository. 'top' is the top directory of the repository. 'password' is the
// master key which can be nil if encryption is not enabled.
@@ -138,6 +142,8 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta
LOG_DEBUG("BACKUP_PARAMETERS", "top: %s, quick: %t, tag: %s", top, quickMode, tag)
manager.config.PrintCompressionLevel()
if manager.config.DataShards != 0 && manager.config.ParityShards != 0 {
LOG_INFO("BACKUP_ERASURECODING", "Erasure coding is enabled with %d data shards and %d parity shards",
manager.config.DataShards, manager.config.ParityShards)

View File

@@ -24,6 +24,7 @@ import (
"github.com/bkaradzic/go-lz4"
"github.com/minio/highwayhash"
"github.com/klauspost/reedsolomon"
"github.com/klauspost/compress/zstd"
// This is a fork of github.com/minio/highwayhash at 1.0.1 that computes incorrect hash on
// arm64 machines. We need this fork to be able to read the chunks created by Duplicacy
@@ -267,6 +268,38 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetada
deflater, _ := zlib.NewWriterLevel(encryptedBuffer, chunk.config.CompressionLevel)
deflater.Write(chunk.buffer.Bytes())
deflater.Close()
} else if chunk.config.CompressionLevel >= ZSTD_COMPRESSION_LEVEL_FASTEST && chunk.config.CompressionLevel <= ZSTD_COMPRESSION_LEVEL_BEST {
encryptedBuffer.Write([]byte("ZSTD"))
compressionLevel := zstd.SpeedDefault
if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_FASTEST {
compressionLevel = zstd.SpeedFastest
} else if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_BETTER {
compressionLevel = zstd.SpeedBetterCompression
} else if chunk.config.CompressionLevel == ZSTD_COMPRESSION_LEVEL_BEST {
compressionLevel = zstd.SpeedBestCompression
}
deflater, err := zstd.NewWriter(encryptedBuffer, zstd.WithEncoderLevel(compressionLevel))
if err != nil {
return err
}
// Make sure we have enough space in encryptedBuffer
availableLength := encryptedBuffer.Cap() - len(encryptedBuffer.Bytes())
maximumLength := deflater.MaxEncodedSize(chunk.buffer.Len())
if availableLength < maximumLength {
encryptedBuffer.Grow(maximumLength - availableLength)
}
_, err = deflater.Write(chunk.buffer.Bytes())
if err != nil {
return fmt.Errorf("ZSTD compression error: %v", err)
}
err = deflater.Close()
if err != nil {
return fmt.Errorf("ZSTD compression error: %v", err)
}
} else if chunk.config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL {
encryptedBuffer.Write([]byte("LZ4 "))
// Make sure we have enough space in encryptedBuffer
@@ -361,7 +394,6 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetada
chunk.buffer.Write(header)
return nil
}
// This is to ensure compatibility with Vertical Backup, which still uses HMAC-SHA256 (instead of HMAC-BLAKE2) to
@@ -633,6 +665,24 @@ func (chunk *Chunk) Decrypt(encryptionKey []byte, derivationKey string) (err err
chunk.hash = nil
return nil, rewriteNeeded
}
if len(compressed) > 4 && string(compressed[:4]) == "ZSTD" {
chunk.buffer.Reset()
chunk.hasher = chunk.config.NewKeyedHasher(chunk.config.HashKey)
chunk.hash = nil
encryptedBuffer.Read(encryptedBuffer.Bytes()[:4])
inflater, err := zstd.NewReader(encryptedBuffer)
if err != nil {
return err, false
}
defer inflater.Close()
if _, err = io.Copy(chunk, inflater); err != nil {
return err, false
}
return nil, rewriteNeeded
}
inflater, err := zlib.NewReader(encryptedBuffer)
if err != nil {
return err, false

View File

@@ -35,6 +35,19 @@ var DEFAULT_KEY = []byte("duplicacy")
// standard zlib levels of -1 to 9.
var DEFAULT_COMPRESSION_LEVEL = 100
// zstd compression levels starting from 200
var ZSTD_COMPRESSION_LEVEL_FASTEST = 200
var ZSTD_COMPRESSION_LEVEL_DEFAULT = 201
var ZSTD_COMPRESSION_LEVEL_BETTER = 202
var ZSTD_COMPRESSION_LEVEL_BEST = 203
var ZSTD_COMPRESSION_LEVELS = map[string]int {
"fastest": ZSTD_COMPRESSION_LEVEL_FASTEST,
"default": ZSTD_COMPRESSION_LEVEL_DEFAULT,
"better": ZSTD_COMPRESSION_LEVEL_BETTER,
"best": ZSTD_COMPRESSION_LEVEL_BEST,
}
// The new banner of the config file (to differentiate from the old format where the salt and iterations are fixed)
var CONFIG_BANNER = "duplicacy\001"
@@ -202,6 +215,14 @@ func (config *Config) Print() {
}
func (config *Config) PrintCompressionLevel() {
for name, level := range ZSTD_COMPRESSION_LEVELS {
if level == config.CompressionLevel {
LOG_INFO("COMPRESSION_LEVEL", "Zstd compression is enabled (level: %s)", name)
}
}
}
func CreateConfigFromParameters(compressionLevel int, averageChunkSize int, maximumChunkSize int, mininumChunkSize int,
isEncrypted bool, copyFrom *Config, bitCopy bool) (config *Config) {
@@ -294,7 +315,10 @@ func (config *Config) PutChunk(chunk *Chunk) {
}
func (config *Config) NewKeyedHasher(key []byte) hash.Hash {
if config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL {
// Early versions of Duplicacy used SHA256 as the hash function for chunk IDs at the time when
// only zlib compression was supported. Later SHA256 was replaced by Blake2b and LZ4 was used
// for compression (with compression level set to 100).
if config.CompressionLevel >= DEFAULT_COMPRESSION_LEVEL {
hasher, err := blake2.New(&blake2.Config{Size: 32, Key: key})
if err != nil {
LOG_ERROR("HASH_KEY", "Invalid hash key: %x", key)
@@ -339,7 +363,7 @@ func (hasher *DummyHasher) BlockSize() int {
func (config *Config) NewFileHasher() hash.Hash {
if SkipFileHash {
return &DummyHasher{}
} else if config.CompressionLevel == DEFAULT_COMPRESSION_LEVEL {
} else if config.CompressionLevel >= DEFAULT_COMPRESSION_LEVEL {
hasher, _ := blake2.New(&blake2.Config{Size: 32})
return hasher
} else {