Optimizating restore to avoid reading newly created sparse file

This commit is contained in:
Gilbert Chen
2018-10-21 22:43:24 -04:00
parent 22a0b222db
commit bfb4b44c0a

View File

@@ -1162,6 +1162,9 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
lengthMap := make(map[string]int) lengthMap := make(map[string]int)
var offset int64 var offset int64
// If the file is newly created (needed by sparse file optimization)
isNewFile := false
existingFile, err = os.Open(fullPath) existingFile, err = os.Open(fullPath)
if err != nil { if err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
@@ -1196,6 +1199,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
LOG_ERROR("DOWNLOAD_OPEN", "Can't reopen the initial file just created: %v", err) LOG_ERROR("DOWNLOAD_OPEN", "Can't reopen the initial file just created: %v", err)
return false return false
} }
isNewFile = true
} }
} else { } else {
LOG_TRACE("DOWNLOAD_OPEN", "Can't open the existing file: %v", err) LOG_TRACE("DOWNLOAD_OPEN", "Can't open the existing file: %v", err)
@@ -1208,6 +1212,9 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
} }
} }
// The key in this map is the number of zeroes. The value is the corresponding hash.
knownHashes := make(map[int]string)
fileHash := "" fileHash := ""
if existingFile != nil { if existingFile != nil {
@@ -1217,6 +1224,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
fileHasher := manager.config.NewFileHasher() fileHasher := manager.config.NewFileHasher()
buffer := make([]byte, 64*1024) buffer := make([]byte, 64*1024)
err = nil err = nil
isSkipped := false
// We set to read one more byte so the file hash will be different if the file to be restored is a // We set to read one more byte so the file hash will be different if the file to be restored is a
// truncated portion of the existing file // truncated portion of the existing file
for i := entry.StartChunk; i <= entry.EndChunk+1; i++ { for i := entry.StartChunk; i <= entry.EndChunk+1; i++ {
@@ -1232,6 +1240,28 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
chunkSize = 1 // the size of extra chunk beyond EndChunk chunkSize = 1 // the size of extra chunk beyond EndChunk
} }
count := 0 count := 0
if isNewFile {
if hash, found := knownHashes[chunkSize]; found {
// We have read the same number of zeros before, so we just retrieve the hash from the map
existingChunks = append(existingChunks, hash)
existingLengths = append(existingLengths, chunkSize)
offsetMap[hash] = offset
lengthMap[hash] = chunkSize
offset += int64(chunkSize)
isSkipped = true
continue
}
}
if isSkipped {
_, err := existingFile.Seek(offset, 0)
if err != nil {
LOG_ERROR("DOWNLOAD_SEEK", "Failed to seek to offset %d: %v", offset, err)
}
isSkipped = false
}
for count < chunkSize { for count < chunkSize {
n := chunkSize - count n := chunkSize - count
if n > cap(buffer) { if n > cap(buffer) {
@@ -1258,12 +1288,16 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
offsetMap[hash] = offset offsetMap[hash] = offset
lengthMap[hash] = chunkSize lengthMap[hash] = chunkSize
offset += int64(chunkSize) offset += int64(chunkSize)
if isNewFile {
knownHashes[chunkSize] = hash
}
} }
if err == io.EOF { if err == io.EOF {
break break
} }
} }
fileHash = hex.EncodeToString(fileHasher.Sum(nil)) fileHash = hex.EncodeToString(fileHasher.Sum(nil))
} else { } else {
// If it is not inplace, we want to reuse any chunks in the existing file regardless their offets, so // If it is not inplace, we want to reuse any chunks in the existing file regardless their offets, so
@@ -1290,6 +1324,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
} }
} }
for i := entry.StartChunk; i <= entry.EndChunk; i++ { for i := entry.StartChunk; i <= entry.EndChunk; i++ {
if _, found := offsetMap[chunkDownloader.taskList[i].chunkHash]; !found { if _, found := offsetMap[chunkDownloader.taskList[i].chunkHash]; !found {
chunkDownloader.taskList[i].needed = true chunkDownloader.taskList[i].needed = true