Implement Erasure Coding

This commit is contained in:
Gilbert Chen
2020-09-03 12:54:48 -04:00
parent 670cbcd776
commit 923a6fbc5b
6 changed files with 321 additions and 53 deletions

View File

@@ -22,6 +22,8 @@ import (
"runtime"
"github.com/bkaradzic/go-lz4"
"github.com/minio/highwayhash"
"github.com/klauspost/reedsolomon"
)
// A chunk needs to acquire a new buffer and return the old one for every encrypt/decrypt operation, therefore
@@ -68,11 +70,13 @@ type Chunk struct {
}
// Magic word to identify a duplicacy format encrypted file, plus a version number.
var ENCRYPTION_HEADER = "duplicacy\000"
var ENCRYPTION_BANNER = "duplicacy\000"
// RSA encrypted chunks start with "duplicacy\002"
var ENCRYPTION_VERSION_RSA byte = 2
var ERASURE_CODING_BANNER = "duplicacy\003"
// CreateChunk creates a new chunk.
func CreateChunk(config *Config, bufferNeeded bool) *Chunk {
@@ -224,7 +228,7 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapsh
// Start with the magic number and the version number.
if usingRSA {
// RSA encryption starts "duplicacy\002"
encryptedBuffer.Write([]byte(ENCRYPTION_HEADER)[:len(ENCRYPTION_HEADER) - 1])
encryptedBuffer.Write([]byte(ENCRYPTION_BANNER)[:len(ENCRYPTION_BANNER) - 1])
encryptedBuffer.Write([]byte{ENCRYPTION_VERSION_RSA})
// Then the encrypted key
@@ -235,7 +239,7 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapsh
binary.Write(encryptedBuffer, binary.LittleEndian, uint16(len(encryptedKey)))
encryptedBuffer.Write(encryptedKey)
} else {
encryptedBuffer.Write([]byte(ENCRYPTION_HEADER))
encryptedBuffer.Write([]byte(ENCRYPTION_BANNER))
}
// Followed by the nonce
@@ -248,7 +252,7 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapsh
offset = encryptedBuffer.Len()
}
// offset is either 0 or the length of header + nonce
// offset is either 0 or the length of banner + nonce
if chunk.config.CompressionLevel >= -1 && chunk.config.CompressionLevel <= 9 {
deflater, _ := zlib.NewWriterLevel(encryptedBuffer, chunk.config.CompressionLevel)
@@ -273,26 +277,79 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapsh
return fmt.Errorf("Invalid compression level: %d", chunk.config.CompressionLevel)
}
if len(encryptionKey) == 0 {
chunk.buffer, encryptedBuffer = encryptedBuffer, chunk.buffer
return nil
if len(encryptionKey) > 0 {
// PKCS7 is used. The sizes of compressed chunks leak information about the original chunks so we want the padding sizes
// to be the maximum allowed by PKCS7
dataLength := encryptedBuffer.Len() - offset
paddingLength := 256 - dataLength%256
encryptedBuffer.Write(bytes.Repeat([]byte{byte(paddingLength)}, paddingLength))
encryptedBuffer.Write(bytes.Repeat([]byte{0}, gcm.Overhead()))
// The encrypted data will be appended to the duplicacy banner and the once.
encryptedBytes := gcm.Seal(encryptedBuffer.Bytes()[:offset], nonce,
encryptedBuffer.Bytes()[offset:offset+dataLength+paddingLength], nil)
encryptedBuffer.Truncate(len(encryptedBytes))
}
// PKCS7 is used. Compressed chunk sizes leaks information about the original chunks so we want the padding sizes
// to be the maximum allowed by PKCS7
dataLength := encryptedBuffer.Len() - offset
paddingLength := 256 - dataLength%256
if chunk.config.DataShards == 0 || chunk.config.ParityShards == 0 {
chunk.buffer, encryptedBuffer = encryptedBuffer, chunk.buffer
return
}
encryptedBuffer.Write(bytes.Repeat([]byte{byte(paddingLength)}, paddingLength))
encryptedBuffer.Write(bytes.Repeat([]byte{0}, gcm.Overhead()))
// Start erasure coding
encoder, err := reedsolomon.New(chunk.config.DataShards, chunk.config.ParityShards)
if err != nil {
return err
}
chunkSize := len(encryptedBuffer.Bytes())
shardSize := (chunkSize + chunk.config.DataShards - 1) / chunk.config.DataShards
// Append zeros to make the last shard to have the same size as other
encryptedBuffer.Write(make([]byte, shardSize * chunk.config.DataShards - chunkSize))
// Grow the buffer for parity shards
encryptedBuffer.Grow(shardSize * chunk.config.ParityShards)
// Now create one slice for each shard, reusing the data in the buffer
data := make([][]byte, chunk.config.DataShards + chunk.config.ParityShards)
for i := 0; i < chunk.config.DataShards + chunk.config.ParityShards; i++ {
data[i] = encryptedBuffer.Bytes()[i * shardSize: (i + 1) * shardSize]
}
// This populates the parity shard
encoder.Encode(data)
// The encrypted data will be appended to the duplicacy header and the once.
encryptedBytes := gcm.Seal(encryptedBuffer.Bytes()[:offset], nonce,
encryptedBuffer.Bytes()[offset:offset+dataLength+paddingLength], nil)
// Prepare the chunk to be uploaded
chunk.buffer.Reset()
// First the banner
chunk.buffer.Write([]byte(ERASURE_CODING_BANNER))
// Then the header which includes the chunk size, data/parity and a 2-byte checksum
header := make([]byte, 14)
binary.LittleEndian.PutUint64(header[0:], uint64(chunkSize))
binary.LittleEndian.PutUint16(header[8:], uint16(chunk.config.DataShards))
binary.LittleEndian.PutUint16(header[10:], uint16(chunk.config.ParityShards))
header[12] = header[0] ^ header[2] ^ header[4] ^ header[6] ^ header[8] ^ header[10]
header[13] = header[1] ^ header[3] ^ header[5] ^ header[7] ^ header[9] ^ header[11]
chunk.buffer.Write(header)
// Calculate the highway hash for each shard
hashKey := make([]byte, 32)
for _, part := range data {
hasher, err := highwayhash.New(hashKey)
if err != nil {
return err
}
_, err = hasher.Write(part)
if err != nil {
return err
}
chunk.buffer.Write(hasher.Sum(nil))
}
encryptedBuffer.Truncate(len(encryptedBytes))
chunk.buffer, encryptedBuffer = encryptedBuffer, chunk.buffer
// Copy the data
for _, part := range data {
chunk.buffer.Write(part)
}
// Append the header again for redundancy
chunk.buffer.Write(header)
return nil
@@ -322,7 +379,122 @@ func (chunk *Chunk) Decrypt(encryptionKey []byte, derivationKey string) (err err
}()
chunk.buffer, encryptedBuffer = encryptedBuffer, chunk.buffer
headerLength := len(ENCRYPTION_HEADER)
bannerLength := len(ENCRYPTION_BANNER)
if len(encryptedBuffer.Bytes()) > bannerLength && string(encryptedBuffer.Bytes()[:bannerLength]) == ERASURE_CODING_BANNER {
// The chunk was encoded with erasure coding
if len(encryptedBuffer.Bytes()) < bannerLength + 14 {
return fmt.Errorf("Erasure coding header truncated (%d bytes)", len(encryptedBuffer.Bytes()))
}
// Check the header checksum
header := encryptedBuffer.Bytes()[bannerLength: bannerLength + 14]
if header[12] != header[0] ^ header[2] ^ header[4] ^ header[6] ^ header[8] ^ header[10] ||
header[13] != header[1] ^ header[3] ^ header[5] ^ header[7] ^ header[9] ^ header[11] {
return fmt.Errorf("Erasure coding header corrupted (%x)", header)
}
// Read the parameters
chunkSize := int(binary.LittleEndian.Uint64(header[0:8]))
dataShards := int(binary.LittleEndian.Uint16(header[8:10]))
parityShards := int(binary.LittleEndian.Uint16(header[10:12]))
shardSize := (chunkSize + chunk.config.DataShards - 1) / chunk.config.DataShards
// This is the length the chunk file should have
expectedLength := bannerLength + 2 * len(header) + (dataShards + parityShards) * (shardSize + 32)
// The minimum length that can be recovered from
minimumLength := bannerLength + len(header) + (dataShards + parityShards) * 32 + dataShards * shardSize
LOG_DEBUG("CHUNK_ERASURECODE", "Chunk size: %d bytes, data size: %d, parity: %d/%d", chunkSize, len(encryptedBuffer.Bytes()), dataShards, parityShards)
if len(encryptedBuffer.Bytes()) > expectedLength {
LOG_WARN("CHUNK_ERASURECODE", "Chunk has %d bytes (instead of %d)", len(encryptedBuffer.Bytes()), expectedLength)
} else if len(encryptedBuffer.Bytes()) == expectedLength {
// Correct size; fall through
} else if len(encryptedBuffer.Bytes()) > minimumLength {
LOG_WARN("CHUNK_ERASURECODE", "Chunk is truncated (%d out of %d bytes)", len(encryptedBuffer.Bytes()), expectedLength)
} else {
return fmt.Errorf("Not enough chunk data for recovery; chunk size: %d bytes, data size: %d, parity: %d/%d", chunkSize, len(encryptedBuffer.Bytes()), dataShards, parityShards)
}
// Where the hashes start
hashOffset := bannerLength + len(header)
// Where the data start
dataOffset := hashOffset + (dataShards + parityShards) * 32
data := make([][]byte, dataShards + parityShards)
recoveryNeeded := false
hashKey := make([]byte, 32)
availableShards := 0
for i := 0; i < dataShards + parityShards; i++ {
start := dataOffset + i * shardSize
if start + shardSize > len(encryptedBuffer.Bytes()) {
// the current shard is incomplete
break
}
// Now verify the hash
hasher, err := highwayhash.New(hashKey)
if err != nil {
return err
}
_, err = hasher.Write(encryptedBuffer.Bytes()[start: start + shardSize])
if err != nil {
return err
}
if bytes.Compare(hasher.Sum(nil), encryptedBuffer.Bytes()[hashOffset + i * 32: hashOffset + (i + 1) * 32]) != 0 {
if i < dataShards {
recoveryNeeded = true
}
} else {
// The shard is good
data[i] = encryptedBuffer.Bytes()[start: start + shardSize]
availableShards++
if availableShards >= dataShards {
// We have enough shards to recover; skip the remaining shards
break
}
}
}
if !recoveryNeeded {
// Remove the padding zeros from the last shard
encryptedBuffer.Truncate(dataOffset + chunkSize)
// Skip the header and hashes
encryptedBuffer.Read(encryptedBuffer.Bytes()[:dataOffset])
} else {
if availableShards < dataShards {
return fmt.Errorf("Not enough chunk data for recover; only %d out of %d shards are complete", availableShards, dataShards + parityShards)
}
// Show the validity of shards using a string of * and -
slots := ""
for _, part := range data {
if len(part) != 0 {
slots += "*"
} else {
slots += "-"
}
}
LOG_WARN("CHUNK_ERASURECODE", "Recovering a %d byte chunk from %d byte shards: %s", chunkSize, shardSize, slots)
encoder, err := reedsolomon.New(dataShards, parityShards)
if err != nil {
return err
}
err = encoder.Reconstruct(data)
if err != nil {
return err
}
LOG_DEBUG("CHUNK_ERASURECODE", "Chunk data successfully recovered")
buffer := AllocateChunkBuffer()
buffer.Reset()
for i := 0; i < dataShards; i++ {
buffer.Write(data[i])
}
buffer.Truncate(chunkSize)
ReleaseChunkBuffer(encryptedBuffer)
encryptedBuffer = buffer
}
}
if len(encryptionKey) > 0 {
@@ -340,15 +512,15 @@ func (chunk *Chunk) Decrypt(encryptionKey []byte, derivationKey string) (err err
key = hasher.Sum(nil)
}
if len(encryptedBuffer.Bytes()) < headerLength + 12 {
if len(encryptedBuffer.Bytes()) < bannerLength + 12 {
return fmt.Errorf("No enough encrypted data (%d bytes) provided", len(encryptedBuffer.Bytes()))
}
if string(encryptedBuffer.Bytes()[:headerLength-1]) != ENCRYPTION_HEADER[:headerLength-1] {
if string(encryptedBuffer.Bytes()[:bannerLength-1]) != ENCRYPTION_BANNER[:bannerLength-1] {
return fmt.Errorf("The storage doesn't seem to be encrypted")
}
encryptionVersion := encryptedBuffer.Bytes()[headerLength-1]
encryptionVersion := encryptedBuffer.Bytes()[bannerLength-1]
if encryptionVersion != 0 && encryptionVersion != ENCRYPTION_VERSION_RSA {
return fmt.Errorf("Unsupported encryption version %d", encryptionVersion)
}
@@ -359,14 +531,14 @@ func (chunk *Chunk) Decrypt(encryptionKey []byte, derivationKey string) (err err
return fmt.Errorf("An RSA private key is required to decrypt the chunk")
}
encryptedKeyLength := binary.LittleEndian.Uint16(encryptedBuffer.Bytes()[headerLength:headerLength+2])
encryptedKeyLength := binary.LittleEndian.Uint16(encryptedBuffer.Bytes()[bannerLength:bannerLength+2])
if len(encryptedBuffer.Bytes()) < headerLength + 14 + int(encryptedKeyLength) {
if len(encryptedBuffer.Bytes()) < bannerLength + 14 + int(encryptedKeyLength) {
return fmt.Errorf("No enough encrypted data (%d bytes) provided", len(encryptedBuffer.Bytes()))
}
encryptedKey := encryptedBuffer.Bytes()[headerLength + 2:headerLength + 2 + int(encryptedKeyLength)]
headerLength += 2 + int(encryptedKeyLength)
encryptedKey := encryptedBuffer.Bytes()[bannerLength + 2:bannerLength + 2 + int(encryptedKeyLength)]
bannerLength += 2 + int(encryptedKeyLength)
decryptedKey, err := rsa.DecryptOAEP(sha256.New(), rand.Reader, chunk.config.rsaPrivateKey, encryptedKey, nil)
if err != nil {
@@ -385,8 +557,8 @@ func (chunk *Chunk) Decrypt(encryptionKey []byte, derivationKey string) (err err
return err
}
offset = headerLength + gcm.NonceSize()
nonce := encryptedBuffer.Bytes()[headerLength:offset]
offset = bannerLength + gcm.NonceSize()
nonce := encryptedBuffer.Bytes()[bannerLength:offset]
decryptedBytes, err := gcm.Open(encryptedBuffer.Bytes()[:offset], nonce,
encryptedBuffer.Bytes()[offset:], nil)