Rewrite the backup procedure to reduce memory usage

Main changes:

* Change the listing order of files/directories so that the local and remote
  snapshots can be compared on-the-fly.

* Introduce a new struct called EntryList that maintains a list of
  files/directories, which are kept in memory when the number is lower, and
  serialized into a file when there are too many.

* EntryList can also be turned into an on-disk incomplete snapshot quickly,
  to support fast-resume on next run.

* ChunkOperator can now download and upload chunks, thus replacing original
  ChunkDownloader and ChunkUploader.  The new ChunkDownloader is only used
  to prefetch chunks during the restore operation.
This commit is contained in:
Gilbert Chen
2021-10-24 23:34:49 -04:00
parent f83e4f3c44
commit d9f6545d63
20 changed files with 2762 additions and 1749 deletions

View File

@@ -147,6 +147,10 @@ func setGlobalOptions(context *cli.Context) {
duplicacy.SetLoggingLevel(duplicacy.DEBUG)
}
if context.GlobalBool("print-memory-usage") {
go duplicacy.PrintMemoryUsage()
}
ScriptEnabled = true
if context.GlobalBool("no-script") {
ScriptEnabled = false
@@ -781,7 +785,10 @@ func backupRepository(context *cli.Context) {
backupManager.SetupSnapshotCache(preference.Name)
backupManager.SetDryRun(dryRun)
backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly)
metadataChunkSize := context.Int("metadata-chunk-size")
maximumInMemoryEntries := context.Int("max-in-memory-entries")
backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly, metadataChunkSize, maximumInMemoryEntries)
runScript(context, preference.Name, "post")
}
@@ -1506,6 +1513,19 @@ func main() {
Name: "enum-only",
Usage: "enumerate the repository recursively and then exit",
},
cli.IntFlag{
Name: "metadata-chunk-size",
Value: 1024 * 1024,
Usage: "the average size of metadata chunks (defaults to 1M)",
Argument: "<size>",
},
cli.IntFlag{
Name: "max-in-memory-entries",
Value: 1024 * 1024,
Usage: "the maximum number of entries kept in memory (defaults to 1M)",
Argument: "<number>",
},
},
Usage: "Save a snapshot of the repository to the storage",
ArgsUsage: " ",
@@ -2180,6 +2200,10 @@ func main() {
Usage: "suppress logs with the specified id",
Argument: "<id>",
},
cli.BoolFlag{
Name: "print-memory-usage",
Usage: "print memory usage every second",
},
}
app.HideVersion = true