Compare commits

...

40 Commits

Author SHA1 Message Date
Gilbert Chen
d8573ca789 Bump version to 2.0.9 2017-09-11 11:13:28 -04:00
Gilbert Chen
6b2f50a1e8 Fixed OneDrive 503 errors by sending GET requests with a nil body 2017-09-11 11:12:05 -04:00
gilbertchen
81b8550232 Merge pull request #173 from jt70471/patch-2
change message when chunk is skipped at destination for copy
2017-09-11 10:19:55 -04:00
gilbertchen
f6e2877948 Merge pull request #170 from jt70471/patch-1
fix upload/download rate for copy described in issue #169
2017-09-11 08:16:03 -04:00
Jeff Thompson
3c1057a3c6 change message when chunk is optimized and skipped at destination for copy 2017-09-09 11:27:33 -05:00
Gilbert Chen
8808ad5c28 Retry on XAmzContentSHA256Mismatch 2017-09-08 19:46:27 -04:00
Jeff Thompson
707967e91b fix upload/download rate for copy described in issue #169 2017-09-08 16:39:41 -05:00
Gilbert Chen
3f83890859 Don't save passwords from env/pref to keyring 2017-09-08 16:51:05 -04:00
Gilbert Chen
68fb6d671e Fixed symbolic link handling on Windows 2017-09-08 15:31:45 -04:00
gilbertchen
b04ef67d26 Fixed a typo in GUIDE.md 2017-09-08 11:57:46 -04:00
gilbertchen
72ba2dfa87 Merge pull request #154 from jt70471/jt70471-patch-1
Skip chunks to copy if already on destination for issue #134
2017-09-07 20:20:29 -04:00
Jeff Thompson
b41e8a24a9 Skip chunks to copy if already on destination for issue #134 2017-09-07 16:24:11 -05:00
gilbertchen
a3aa575c68 Merge pull request #165 from jt70471/patch-2
Use number of threads specified on copy command
2017-09-07 16:18:13 -04:00
gilbertchen
e765575210 Merge pull request #155 from niknah/sftp_login
Use ssh key file first if we have it in preferences/environment
2017-09-07 16:13:19 -04:00
gilbertchen
044e1862e5 Merge pull request #161 from jt70471/patch-1
Fix doc bug for issue #151
2017-09-07 15:31:34 -04:00
Jeff Thompson
612c5b7746 Use number of threads specified on copy command 2017-09-06 17:01:27 -05:00
Jeff Thompson
34afc6f93c Update GUIDE.md
Fix doc bug referenced in issue #151.
2017-09-05 15:37:34 -05:00
niknah
030cd274c2 If we have a sftp key file in the environment/preferences, then don't attempt a password login to avoid bad login errors. 2017-09-04 19:40:08 +10:00
Gilbert Chen
197d20f0e0 Workaround a go bug to avoid seek offsets whose lower 32 bits are -1 2017-09-01 15:05:14 -04:00
gilbertchen
93cfbf27cb Merge pull request #147 from flamingm0e/master
cleanup markdown
2017-09-01 11:52:34 -04:00
m@
46ec852d4d cleanup markdown 2017-08-31 22:18:05 -05:00
Gilbert Chen
dfa6113279 Keep and restore attributes when no patterns provided to the restore command 2017-08-31 16:29:57 -04:00
Gilbert Chen
d7fdb5fe7f Add .bat to script names on Windows 2017-08-31 12:25:31 -04:00
Gilbert Chen
37ebbc4736 Add a test for copying snapshots between storages 2017-08-30 23:07:00 -04:00
Gilbert Chen
3ae2de241e For chunks already existing on the storage the skipped flag should be true 2017-08-30 15:40:38 -04:00
Gilbert Chen
4adb8dbf70 Convert samba drive paths to UNC paths 2017-08-29 14:56:13 -04:00
gilbertchen
41e3d267e5 Merge pull request #139 from countextreme/master
Fix typos: snpashot -> snapshot
2017-08-28 16:04:10 -04:00
gilbertchen
3e23b0c61c Merge pull request #138 from smt/patch-1
Fix typo
2017-08-28 16:03:21 -04:00
countextreme
b7f537de3c Update duplicacy_snapshotmanager_test.go 2017-08-28 13:17:07 -04:00
countextreme
0c8a88d15a Update duplicacy_snapshotmanager.go 2017-08-28 13:16:33 -04:00
countextreme
204f56e939 Update duplicacy_snapshot.go 2017-08-28 13:15:56 -04:00
countextreme
4a80d94b63 Update duplicacy_backupmanager.go 2017-08-28 13:15:22 -04:00
Stephen Tudor
3729de1c67 Fix typo
s/Subdirecotry/Subdirectory
2017-08-28 08:25:58 -04:00
Gilbert Chen
6f70b37d61 In GCD backend each thread should have its own backoff value 2017-08-25 23:53:02 -04:00
Gilbert Chen
7baf8702a3 The file .duplicacy/preferences should not be readable by group and others 2017-08-24 23:07:49 -04:00
Gilbert Chen
8fce6f5f83 FindPreference should return the address of the Preference object for setPreference to work 2017-08-24 23:02:39 -04:00
gilbertchen
fd362be54a Merge pull request #120 from thenickdude/sftp-path-docs
Add documentation for absolute SFTP paths
2017-08-24 11:28:33 -04:00
Nicholas Sherlock
0c13da9872 Add documentation for absolute SFTP paths 2017-08-24 16:29:44 +12:00
Gilbert Chen
4912911017 Bump version to 2.0.8 2017-08-23 22:34:49 -04:00
Gilbert Chen
f69550d0db Allow logging function to be customized 2017-08-23 22:33:45 -04:00
19 changed files with 405 additions and 266 deletions

View File

@@ -8,9 +8,9 @@ Duplicacy is based on the following open source projects:
|https://github.com/Azure/azure-sdk-for-go | Apache-2.0 |
|https://github.com/tj/go-dropbox | MIT |
|https://github.com/aws/aws-sdk-go | Apache-2.0 |
|https://github.com/goamz/goamz | LGPL with static link exception |
|https://github.com/goamz/goamz | LGPL with static link exception |
|https://github.com/howeyc/gopass | ISC |
|https://github.com/tmc/keyring | ISC |
|https://github.com/pcwizz/xattr | BSD-2-Clause |
|https://github.com/pcwizz/xattr | BSD-2-Clause |
|https://github.com/minio/blake2b-simd | Apache-2.0 |
|https://github.com/go-ole/go-ole | MIT |

View File

@@ -27,7 +27,7 @@ If exclusive access to a file storage by a single client can be guaranteed, the
chunks not referenced by any backup and delete them. However, if concurrent access is required, an unreferenced chunk
can't be trivially removed, because of the possibility that a backup procedure in progress may reference the same chunk.
The ongoing backup procedure, still unknown to the deletion procedure, may have already encountered that chunk during its
file scanning phase, but decided not to upload the chunk again since it already exists in the file storage.
file scanning phase, but decided not to upload the chunk again since it already exists in the file storage.
Fortunately, there is a solution to address the deletion problem and make lock-free deduplication practical. The solution is a *two-step fossil collection* algorithm that deletes unreferenced chunks in two steps: identify and collect them in the first step, and then permanently remove them once certain conditions are met.
@@ -47,7 +47,7 @@ In the first step of the deletion procedure, called the *fossil collection* step
be saved in a fossil collection file. The deletion procedure then exits without performing further actions. This step has not effectively changed any chunk references due to the first fossil access rule. If a backup procedure references a chunk after it is marked as a fossil, a new chunk will be uploaded because of the second fossil access rule, as shown in Figure 1.
<p align="center">
<img src="https://github.com/gilbertchen/duplicacy-beta/blob/master/images/fossil_collection_1.png?raw=true"
<img src="https://github.com/gilbertchen/duplicacy-beta/blob/master/images/fossil_collection_1.png?raw=true"
alt="Reference after Rename"/>
</p>
@@ -64,7 +64,7 @@ Therefore, if a backup procedure references a chunk before the chunk is marked a
delete the chunk until it sees that backup procedure finishes (as indicated by the appearance of a new snapshot file uploaded to the storage). This ensures that scenarios depicted in Figure 2 will never happen.
<p align="center">
<img src="https://github.com/gilbertchen/duplicacy-beta/blob/master/images/fossil_collection_2.png?raw=true"
<img src="https://github.com/gilbertchen/duplicacy-beta/blob/master/images/fossil_collection_2.png?raw=true"
alt="Reference before Rename"/>
</p>
@@ -128,25 +128,25 @@ and dir1/file3):
170593,
124309,
1734
]
]
}
```
When Duplicacy splits a file in chunks using the variable-size chunking algorithm, if the end of a file is reached and yet the boundary marker for terminating a chunk
hasn't been found, the next file, if there is one, will be read in and the chunking algorithm continues. It is as if all
hasn't been found, the next file, if there is one, will be read in and the chunking algorithm continues. It is as if all
files were packed into a big tar file which is then split into chunks.
The *content* field of a file indicates the indexes of starting and ending chunks and the corresponding offsets. For
instance, *file1* starts at chunk 0 offset 0 while ends at chunk 2 offset 6108, immediately followed by *file2*.
The backup procedure can run in one of two modes. In the default quick mode, only modified or new files are scanned. Chunks only
referenced by old files that have been modified are removed from the chunk sequence, and then chunks referenced by new
referenced by old files that have been modified are removed from the chunk sequence, and then chunks referenced by new
files are appended. Indices for unchanged files need to be updated too.
In the safe mode (enabled by the -hash option), all files are scanned and the chunk sequence is regenerated.
The length sequence stores the lengths for all chunks, which are needed when calculating some statistics such as the total
length of chunks. For a repository containing a large number of files, the size of the snapshot file can be tremendous.
length of chunks. For a repository containing a large number of files, the size of the snapshot file can be tremendous.
To make the situation worse, every time a big snapshot file would have been uploaded even if only a few files have been changed since
last backup. To save space, the variable-size chunking algorithm is also applied to the three dynamic fields of a snapshot
file, *files*, *chunks*, and *lengths*.
@@ -200,7 +200,7 @@ When encryption is enabled (by the -e option with the *init* or *add* command),
Here is a diagram showing how these keys are used:
<p align="center">
<img src="https://github.com/gilbertchen/duplicacy-beta/blob/master/images/duplicacy_encryption.png?raw=true"
<img src="https://github.com/gilbertchen/duplicacy-beta/blob/master/images/duplicacy_encryption.png?raw=true"
alt="encryption"/>
</p>
@@ -210,6 +210,4 @@ Chunk content is encrypted by AES-GCM, with an encryption key that is the HMAC-S
The snapshot is encrypted by AES-GCM too, using an encrypt key that is the HMAC-SHA256 of the file path with the *File Key* as the secret key.
These four random keys are saved in a file named 'config' in the storage, encrypted with a master key derived from the PBKDF2 function on
the storage password chosen by the user.
These four random keys are saved in a file named 'config' in the storage, encrypted with a master key derived from the PBKDF2 function on the storage password chosen by the user.

185
GUIDE.md
View File

@@ -16,25 +16,22 @@ OPTIONS:
-chunk-size, -c 4M the average size of chunks
-max-chunk-size, -max 16M the maximum size of chunks (defaults to chunk-size * 4)
-min-chunk-size, -min 1M the minimum size of chunks (defaults to chunk-size / 4)
-pref-dir <preference directory path> Specify alternate location for .duplicacy preferences directory
-pref-dir <preference directory path> Specify alternate location for .duplicacy preferences directory
```
The *init* command first connects to the storage specified by the storage URL. If the storage has been already been
initialized before, it will download the storage configuration (stored in the file named *config*) and ignore the options provided in the command line. Otherwise, it will create the configuration file from the options and upload the file.
The *init* command first connects to the storage specified by the storage URL. If the storage has been already been initialized before, it will download the storage configuration (stored in the file named *config*) and ignore the options provided in the command line. Otherwise, it will create the configuration file from the options and upload the file.
The initialized storage will then become the default storage for other commands if the -storage option is not specified
for those commands. This default storage actually has a name, *default*.
The initialized storage will then become the default storage for other commands if the `-storage` option is not specified for those commands. This default storage actually has a name, *default*.
After that, it will prepare the the current working directory as the repository to be backed up. Under the hood, it will create a directory
named *.duplicacy* in the repository and put a file named *preferences* that stores the snapshot id and encryption and storage options.
After that, it will prepare the current working directory as the repository to be backed up. Under the hood, it will create a directory named *.duplicacy* in the repository and put a file named *preferences* that stores the snapshot id and encryption and storage options.
The snapshot id is an id used to distinguish different repositories connected to the same storage. Each repository must have a unique snapshot id. A snapshot id must contain only characters valid in Linux and Windows paths (alphabet, digits, underscore, dash, etc), but cannot include `/`, `\`, or `@`.
The -e option controls whether or not encryption will be enabled for the storage. If encryption is enabled, you will be prompted to enter a storage password.
The `-e` option controls whether or not encryption will be enabled for the storage. If encryption is enabled, you will be prompted to enter a storage password.
The three chunk size parameters are passed to the variable-size chunking algorithm. Their values are important to the overall performance, especially for cloud storages. If the chunk size is too small, a lot of overhead will be in sending requests and receiving responses. If the chunk size is too large, the effect of deduplication will be less obvious as more data will need to be transferred with each chunk.
The three chunk size parameters are passed to the variable-size chunking algorithm. Their values are important to the overall performance, especially for cloud storages. If the chunk size is too small, a lot of overhead will be in sending requests and receiving responses. If the chunk size is too large, the effect of de-duplication will be less obvious as more data will need to be transferred with each chunk.
The -pref-dir controls the location of the preferences directory. If not specified, a directory named .duplicacy is created in the repository. If specified, it must point to a non-existing directory. The directory is created and a .duplicacy file is created in the repository. The .duplicacy file contains the absolute path name to the preferences directory.
The `-pref-dir` controls the location of the preferences directory. If not specified, a directory named .duplicacy is created in the repository. If specified, it must point to a non-existing directory. The directory is created and a .duplicacy file is created in the repository. The .duplicacy file contains the absolute path name to the preferences directory.
Once a storage has been initialized with these parameters, these parameters cannot be modified any more.
@@ -52,29 +49,24 @@ OPTIONS:
-t <tag> assign a tag to the backup
-stats show statistics during and after backup
-threads <n> number of uploading threads
-limit-rate <kB/s> the maximum upload rate (in kilobytes/sec)
-limit-rate <kB/s> the maximum upload rate (in kilobytes/sec)
-vss enable the Volume Shadow Copy service (Windows only)
-storage <storage name> backup to the specified storage instead of the default one
```
The *backup* command creates a snapshot of the repository and uploads it to the storage. If -hash is not provided,
it will upload new or modified files since last backup by comparing file sizes and timestamps.
Otherwise, every file is scanned to detect changes.
The *backup* command creates a snapshot of the repository and uploads it to the storage. If `-hash` is not provided,it will upload new or modified files since last backup by comparing file sizes and timestamps. Otherwise, every file is scanned to detect changes.
You can assign a tag to the snapshot so that later you can refer to it by tag in other commands.
If the -stats option is specified, statistical information such as transfer speed, the number of chunks will be displayed
throughout the backup procedure.
If the `-stats` option is specified, statistical information such as transfer speed, and the number of chunks will be displayed throughout the backup procedure.
The -threads option can be used to specify more than one thread to upload chunks.
The `-threads` option can be used to specify more than one thread to upload chunks.
The -limit-rate option sets a cape on the maximum upload rate.
The `-limit-rate` option sets a cap on the maximum upload rate.
The -vss option works on Windows only to turn on the Volume Shadow Copy service such that files opened by other
processes with exclusive locks can be read as usual.
The `-vss` option works on Windows only to turn on the Volume Shadow Copy service such that files opened by other processes with exclusive locks can be read as usual.
When the repository can have multiple storages (added by the *add* command), you can select the storage to back up to
by giving a storage name.
When the repository can have multiple storages (added by the *add* command), you can select the storage to back up to by giving a storage name.
You can specify patterns to include/exclude files by putting them in a file named *.duplicacy/filters*. Please refer to the [Include/Exclude Patterns](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#includeexclude-patterns) section for how to specify the patterns.
@@ -93,29 +85,25 @@ OPTIONS:
-delete delete files not in the snapshot
-stats show statistics during and after restore
-threads <n> number of downloading threads
-limit-rate <kB/s> the maximum download rate (in kilobytes/sec)
-limit-rate <kB/s> the maximum download rate (in kilobytes/sec)
-storage <storage name> restore from the specified storage instead of the default one
```
The *restore* command restores the repository to a previous revision. By default the restore procedure will treat
files that have the same sizes and timestamps as those in the snapshot as unchanged files, but with the -hash option, every file will be fully scanned to make sure they are in fact unchanged.
The *restore* command restores the repository to a previous revision. By default the restore procedure will treat files that have the same sizes and timestamps as those in the snapshot as unchanged files, but with the -hash option, every file will be fully scanned to make sure they are in fact unchanged.
By default the restore procedure will not overwriting existing files, unless the -overwrite option is specified.
By default the restore procedure will not overwriting existing files, unless the `-overwrite` option is specified.
The -delete option indicates that files not in the snapshot will be removed.
The `-delete` option indicates that files not in the snapshot will be removed.
If the -stats option is specified, statistical information such as transfer speed, number of chunks will be displayed
throughout the restore procedure.
If the `-stats` option is specified, statistical information such as transfer speed, and number of chunks will be displayed throughout the restore procedure.
The -threads option can be used to specify more than one thread to download chunks.
The `-threads` option can be used to specify more than one thread to download chunks.
The -limit-rate option sets a cape on the maximum upload rate.
The `-limit-rate` option sets a cap on the maximum upload rate.
When the repository can have multiple storages (added by the *add* command), you can select the storage to restore from by specifying the storage name.
Unlike the *backup* procedure that reading the include/exclude patterns from a file, the *restore* procedure reads them
from the command line. If the patterns can cause confusion to the command line argument parser, -- should be prepended to
the patterns. Please refer to the [Include/Exclude Patterns](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#includeexclude-patterns) section for how to specify patterns.
Unlike the *backup* procedure that reading the include/exclude patterns from a file, the *restore* procedure reads them from the command line. If the patterns can cause confusion to the command line argument parser, -- should be prepended to the patterns. Please refer to the [Include/Exclude Patterns](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#includeexclude-patterns) section for how to specify patterns.
#### List
@@ -124,7 +112,7 @@ SYNOPSIS:
duplicacy list - List snapshots
USAGE:
duplicacy list [command options]
duplicacy list [command options]
OPTIONS:
-all, -a list snapshots with any id
@@ -137,24 +125,17 @@ OPTIONS:
-storage <storage name> retrieve snapshots from the specified storage
```
The *list* command lists information about specified snapshots. By default it will list snapshots created from the
current repository, but you can list all snapshots stored in the storage by specifying the -all option, or list snapshots
with a different snapshot id using the -id option, and/or snapshots with a particular tag with the -t option.
The *list* command lists information about specified snapshots. By default it will list snapshots created from the current repository, but you can list all snapshots stored in the storage by specifying the -all option, or list snapshots with a different snapshot id using the `-id` option, and/or snapshots with a particular tag with the `-t` option.
The revision number is a number assigned to the snapshot when it is being created. This number will keep increasing
every time a new snapshot is created from a repository. You can refer to snapshots by their revision numbers using
the -r option, which either takes a single revision number (-r 123) or a range (-r 123-456).
There can be multiple -r options.
The revision number is a number assigned to the snapshot when it is being created. This number will keep increasing every time a new snapshot is created from a repository. You can refer to snapshots by their revision numbers using the `-r` option, which either takes a single revision number `-r 123` or a range `-r 123-456`. There can be multiple `-r` options.
If -files is specified, for each snapshot to be listed, this command will also print information about every file
contained in the snapshot.
If `-files` is specified, for each snapshot to be listed, this command will also print information about every file contained in the snapshot.
If -chunks is specified, the command will also print out every chunk the snapshot references.
If `-chunks` is specified, the command will also print out every chunk the snapshot references.
The -reset-password option is used to reset stored passwords and to allow passwords to be entered again. Please refer to the [Managing Passwords](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#managing-passwords) section for more information.
The `-reset-password` option is used to reset stored passwords and to allow passwords to be entered again. Please refer to the [Managing Passwords](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#managing-passwords) section for more information.
When the repository can have multiple storages (added by the *add* command), you can specify the storage to list
by specifying the storage name.
When the repository can have multiple storages (added by the *add* command), you can specify the storage to list by specifying the storage name.
#### Check
```
@@ -178,23 +159,15 @@ OPTIONS:
The *check* command checks, for each specified snapshot, that all referenced chunks exist in the storage.
By default the *check* command will check snapshots created from the
current repository, but you can check all snapshots stored in the storage at once by specifying the -all option, or
snapshots from a different repository using the -id option, and/or snapshots with a particular tag with the -t option.
current repository, but you can check all snapshots stored in the storage at once by specifying the `-all` option, or snapshots from a different repository using the `-id` option, and/or snapshots with a particular tag with the `-t` option.
The revision number is a number assigned to the snapshot when it is being created. This number will keep increasing
every time a new snapshot is created from a repository. You can refer to snapshots by their revision numbers using
the -r option, which either takes a single revision number (-r 123) or a range (-r 123-456).
There can be multiple -r options.
The revision number is a number assigned to the snapshot when it is being created. This number will keep increasing every time a new snapshot is created from a repository. You can refer to snapshots by their revision numbers using the `-r` option, which either takes a single revision number `-r 123` or a range `-r 123-456`. There can be multiple `-r` options.
By default the *check* command only verifies the existence of chunks. To verify the full integrity of a snapshot,
you should specify the -files option, which will download chunks and compute file hashes in memory, to
make sure that all hashes match.
By default the *check* command only verifies the existence of chunks. To verify the full integrity of a snapshot, you should specify the `-files` option, which will download chunks and compute file hashes in memory, to make sure that all hashes match.
By default the *check* command does not find fossils. If the -fossils option is specified, it will find
the fossil if the referenced chunk does not exist. if the -resurrect option is specified, it will turn the fossil back into a chunk.
By default the *check* command does not find fossils. If the `-fossils` option is specified, it will find the fossil if the referenced chunk does not exist. if the `-resurrect` option is specified, it will turn the fossil back into a chunk.
When the repository can have multiple storages (added by the *add* command), you can specify the storage to check
by specifying the storage name.
When the repository can have multiple storages (added by the *add* command), you can specify the storage to check by specifying the storage name.
#### Cat
@@ -217,9 +190,9 @@ The file must be specified with a path relative to the repository.
You can specify a different snapshot id rather than the default id.
The -r option is optional. If not specified, the latest revision will be selected.
The `-r` option is optional. If not specified, the latest revision will be selected.
You can use the -storage option to select a different storage other than the default one.
You can use the `-storage` option to select a different storage other than the default one.
#### Diff
```
@@ -235,17 +208,15 @@ OPTIONS:
-hash compute the hashes of on-disk files
-storage <storage name> retrieve files from the specified storage
```
The *diff* command compares the same file in two different snapshots if a file is given, otherwise compares the
two snapshots.
The *diff* command compares the same file in two different snapshots if a file is given, otherwise compares the two snapshots.
The file must be specified with a path relative to the repository.
You can specify a different snapshot id rather than the default snapshot id.
If only one revision is given by -r, the right hand side of the comparison will be the on-disk file.
The -hash option can then instruct this command to compute the hash of the file.
If only one revision is given by `-r`, the right hand side of the comparison will be the on-disk file. The `-hash` option can then instruct this command to compute the hash of the file.
You can use the -storage option to select a different storage other than the default one.
You can use the `-storage` option to select a different storage other than the default one.
#### History
```
@@ -264,13 +235,11 @@ OPTIONS:
The *history* command shows how the hash, size, and timestamp of a file change over the specified set of revisions.
You can specify a different snapshot id rather than the default snapshot id, and multiple -r options to specify the
set of revisions.
You can specify a different snapshot id rather than the default snapshot id, and multiple `-r` options to specify the set of revisions.
The -hash option is to compute the hash of the on-disk file. Otherwise, only the size and timestamp of the on-disk
file will be included.
The `-hash` option is to compute the hash of the on-disk file. Otherwise, only the size and timestamp of the on-disk file will be included.
You can use the -storage option to select a different storage other than the default one.
You can use the `-storage` option to select a different storage other than the default one.
#### Prune
```
@@ -295,16 +264,11 @@ OPTIONS:
-storage <storage name> prune snapshots from the specified storage
```
The *prune* command implements the two-step fossil collection algorithm. It will first find fossil collection files
from previous runs and check if contained fossils are eligible for permanent deletion (the fossil deletion step). Then it
will search for snapshots to be deleted, mark unreferenced chunks as fossils (by renaming) and save them in a new fossil
collection file stored locally (the fossil collection step).
The *prune* command implements the two-step fossil collection algorithm. It will first find fossil collection files from previous runs and check if contained fossils are eligible for permanent deletion (the fossil deletion step). Then it will search for snapshots to be deleted, mark unreferenced chunks as fossils (by renaming) and save them in a new fossil collection file stored locally (the fossil collection step).
If a snapshot id is specified, that snapshot id will be used instead of the default one. The -a option will find
snapshots with any id. Snapshots to be deleted can be specified by revision numbers, by a tag, by retention policies,
or by any combination of them.
If a snapshot id is specified, that snapshot id will be used instead of the default one. The `-a` option will find snapshots with any id. Snapshots to be deleted can be specified by revision numbers, by a tag, by retention policies, or by any combination of them.
The retention policies are specified by the -keep option, which accepts an argument in the form of two numbers *n:m*, where *n* indicates the number of days between two consecutive snapshots to keep, and *m* means that the policy only applies to snapshots at least *m* day old. If *n* is zero, any snapshots older than *m* days will be removed.
The retention policies are specified by the `-keep` option, which accepts an argument in the form of two numbers *n:m*, where *n* indicates the number of days between two consecutive snapshots to keep, and *m* means that the policy only applies to snapshots at least *m* day old. If *n* is zero, any snapshots older than *m* days will be removed.
Here are a few sample retention policies:
@@ -315,37 +279,28 @@ $ duplicacy prune -keep 30:180 # Keep 1 snapshot every 30 days for snapshots
$ duplicacy prune -keep 0:360 # Keep no snapshots older than 360 days
```
Multiple -keep options must be sorted by their *m* values in decreasing order. For instance, to combine the above policies into one line, it would become:
Multiple `-keep` options must be sorted by their *m* values in decreasing order. For instance, to combine the above policies into one line, it would become:
```sh
$ duplicacy prune -keep 0:360 -keep 30:180 -keep 7:30 -keep 1:7
```
The -exhaustive option will scan the list of all chunks in the storage, therefore it will find not only
unreferenced chunks from deleted snapshots, but also chunks that become unreferenced for other reasons, such as
those from an incomplete backup. It will also find any file that does not look like a chunk file.
In contrast, a default *prune* command will only identify
The `-exhaustive` option will scan the list of all chunks in the storage, therefore it will find not only unreferenced chunks from deleted snapshots, but also chunks that become unreferenced for other reasons, such as those from an incomplete backup. It will also find any file that does not look like a chunk file. In contrast, a default *prune* command will only identify
chunks referenced by deleted snapshots but not any other snapshots.
The -exclusive option will assume that no other clients are accessing the storage, effectively disabling the
*two-step fossil collection* algorithm. With this option, the *prune* command will immediately remove unreferenced chunks.
The `-exclusive` option will assume that no other clients are accessing the storage, effectively disabling the *two-step fossil collection* algorithm. With this option, the *prune* command will immediately remove unreferenced chunks.
The -dryrun option is used to test what changes the *prune* command would have done. It is guaranteed not to make
any changes on the storage, not even creating the local fossil collection file. The following command checks if the
chunk directory is clean (i.e., if there are any unreferenced chunks, temporary files, or anything else):
The `-dry-run` option is used to test what changes the *prune* command would have done. It is guaranteed not to make any changes on the storage, not even creating the local fossil collection file. The following command checks if the chunk directory is clean (i.e., if there are any unreferenced chunks, temporary files, or anything else):
```
$ duplicacy prune -d -exclusive -exhaustive # Prints out nothing if the chunk directory is clean
```
The -delete-only option will skip the fossil collection step, while the -collect-only option will skip the fossil deletion step.
The `-delete-only` option will skip the fossil collection step, while the `-collect-only` option will skip the fossil deletion step.
For fossils collected in the fossil collection step to be eligible for safe deletion in the fossil deletion step, at least
one new snapshot from *each* snapshot id must be created between two runs of the *prune* command. However, some repository
may not be set up to back up with a regular schedule, and thus literally blocking other repositories from deleting any fossils. Duplicacy by default will ignore repositories that have no new backup in the past 7 days. It also provide an
-ignore option that can be used to skip certain repositories when deciding the deletion criteria.
For fossils collected in the fossil collection step to be eligible for safe deletion in the fossil deletion step, at least one new snapshot from *each* snapshot id must be created between two runs of the *prune* command. However, some repository may not be set up to back up with a regular schedule, and thus literally blocking other repositories from deleting any fossils. Duplicacy by default will ignore repositories that have no new backup in the past 7 days. It also provide an `-ignore` option that can be used to skip certain repositories when deciding the deletion criteria.
You can use the -storage option to select a different storage other than the default one.
You can use the `-storage` option to select a different storage other than the default one.
#### Password
@@ -384,17 +339,11 @@ OPTIONS:
-copy <storage name> make the new storage copy-compatible with an existing one
```
The *add* command connects another storage to the current repository. Like the *init* command, if the storage has not
been initialized before, a storage configuration file derived from the command line options will be uploaded, but those
options will be ignored if the configuration file already exists in the storage.
The *add* command connects another storage to the current repository. Like the *init* command, if the storage has not been initialized before, a storage configuration file derived from the command line options will be uploaded, but those options will be ignored if the configuration file already exists in the storage.
A unique storage name must be given in order to distinguish it from other storages.
The -copy option is required if later you want to copy snapshots between this storage and another storage.
Two storages are copy-compatible if they have the same average chunk size, the same maximum chunk size,
the same minimum chunk size, the same chunk seed (used in calculating the rolling hash in the variable-size chunks
algorithm), and the same hash key. If the -copy option is specified, these parameters will be copied from
the existing storage rather than from the command line.
The `-copy` option is required if later you want to copy snapshots between this storage and another storage. Two storages are copy-compatible if they have the same average chunk size, the same maximum chunk size, the same minimum chunk size, the same chunk seed (used in calculating the rolling hash in the variable-size chunks algorithm), and the same hash key. If the `-copy` option is specified, these parameters will be copied from the existing storage rather than from the command line.
#### Set
```
@@ -416,16 +365,15 @@ OPTIONS:
The *set* command changes the options for the specified storage.
The -e option turns on the storage encryption. If specified as -e=false, it turns off the storage encryption.
The `-e` option turns on the storage encryption. If specified as `-e=false`, it turns off the storage encryption.
The -no-backup option will not allow backups from this repository to be created.
The `-no-backup` option will not allow backups from this repository to be created.
The -no-restore option will not allow restoring this repository to a different revision.
The `-no-restore` option will not allow restoring this repository to a different revision.
The -no-save-password option will require every password or token to be entered every time and not saved anywhere.
The `-no-save-password` option will require every password or token to be entered every time and not saved anywhere.
The -key and -value options are used to store (in plain text) access keys or tokens need by various storages. Please
refer to the [Managing Passwords](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#managing-passwords) section for more details.
The `-key` and `-value` options are used to store (in plain text) access keys or tokens need by various storages. Please refer to the [Managing Passwords](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#managing-passwords) section for more details.
You can select a storage to change options for by specifying a storage name.
@@ -445,14 +393,11 @@ OPTIONS:
-to <storage name> copy snapshots to the specified storage
```
The *copy* command copies snapshots from one storage to another storage. They must be copy-compatible, i.e., some
configuration parameters must be the same. One storage must be initialized with the -copy option provided by the *add* command.
The *copy* command copies snapshots from one storage to another storage. They must be copy-compatible, i.e., some configuration parameters must be the same. One storage must be initialized with the `-copy` option provided by the *add* command.
Instead of copying all snapshots, you can specify a set of snapshots to copy by giving the -r options. The *copy* command
preserves the revision numbers, so if a revision number already exists on the destination storage the command will fail.
Instead of copying all snapshots, you can specify a set of snapshots to copy by giving the `-r` options. The *copy* command preserves the revision numbers, so if a revision number already exists on the destination storage the command will fail.
If no -from option is given, the snapshots from the default storage will be copied. The -to option specified the
destination storage and is required.
If no `-from` option is given, the snapshots from the default storage will be copied. The `-to` option specified the destination storage and is required.
## Include/Exclude Patterns
@@ -495,10 +440,10 @@ For the *restore* command, the include/exclude patterns are specified as the com
Duplicacy will attempt to retrieve in three ways the storage password and the storage-specific access tokens/keys.
* If a secret vault service is available, Duplicacy will store passwords/keys entered by the user in such a secret vault and later retrieve them when needed. On Mac OS X it is Keychain, and on Linux it is gnome-keyring. On Windows the passwords/keys are encrypted and decrypted by the Data Protection API, and encrypted passwords/keys are stored in the file *.duplicacy/keyring*. However, if the -no-save-password option is specified for the storage, then Duplicacy will not save passwords this way.
* If an environment variable for a password is provided, Duplicacy will always take it. The table below shows the name of the environment variable for each kind of password. Note that if the storage is not the default one, the storage name will be included in the name of the environment variable.
* If an environment variable for a password is provided, Duplicacy will always take it. The table below shows the name of the environment variable for each kind of password. Note that if the storage is not the default one, the storage name will be included in the name of the environment variable (in uppercase). For example, if your storage name is b2, then the environment variable should be named DUPLICACY_B2_PASSWORD.
* If a matching key and its value are saved to the preference file (.duplicacy/preferences) by the *set* command, the value will be used as the password. The last column in the table below lists the name of the preference key for each type of password.
| password type | environment variable (default storage) | environment variable (non-default storage) | key in preferences |
| password type | environment variable (default storage) | environment variable (non-default storage in uppercase) | key in preferences |
|:----------------:|:----------------:|:----------------:|:----------------:|
| storage password | DUPLICACY_PASSWORD | DUPLICACY_&lt;STORAGENAME&gt;_PASSWORD | password |
| sftp password | DUPLICACY_SSH_PASSWORD | DUPLICACY_&lt;STORAGENAME&gt;_SSH_PASSWORD | ssh_password |
@@ -521,7 +466,7 @@ Duplicacy maintains a local cache under the `.duplicacy/cache` folder in the rep
At the end of a backup operation, Duplicacy will clean up the local cache in such a way that only chunks composing the snapshot file from the last backup will stay in the cache. All other chunks will be removed from the cache. However, if the *prune* command has been run before (which will leave a the `.duplicacy/collection` folder in the repository, then the *backup* command won't perform any cache cleanup and instead defer that to the *prune* command.
At the end of a prune operation, Duplicacy will remove all chunks from the local cache except those composing the snapshot file from the last backup (those that would be kept by the *backup* command), as well as chunks that contain information about chunks referenced by *all* backups from *all* repositories connected to the same storage url.
At the end of a prune operation, Duplicacy will remove all chunks from the local cache except those composing the snapshot file from the last backup (those that would be kept by the *backup* command), as well as chunks that contain information about chunks referenced by *all* backups from *all* repositories connected to the same storage url.
Other commands, such as *list*, *check*, does not clean up the local cache at all, so the local cache may keep growing if many of these commands run consecutively. However, once a *backup* or a *prune* command is invoked, the local cache should shrink to its normal size.

View File

@@ -8,10 +8,10 @@ There is a special edition of Duplicacy developed for VMware vSphere (ESXi) name
## Features
Duplicacy currently supports major cloud storage providers (Amazon S3, Google Cloud Storage, Microsoft Azure, Dropbox, Backblaze, Google Drive, Microsoft OneDrive, and Hubic) and offers all essential features of a modern backup tool:
Duplicacy currently supports major cloud storage providers (Amazon S3, Google Cloud Storage, Microsoft Azure, Dropbox, Backblaze B2, Google Drive, Microsoft OneDrive, and Hubic) and offers all essential features of a modern backup tool:
* Incremental backup: only back up what has been changed
* Full snapshot : although each backup is incremental, it must behave like a full snapshot for easy restore and deletion
* Full snapshot: although each backup is incremental, it must behave like a full snapshot for easy restore and deletion
* Deduplication: identical files must be stored as one copy (file-level deduplication), and identical parts from different files must be stored as one copy (block-level deduplication)
* Encryption: encrypt not only file contents but also file paths, sizes, times, etc.
* Deletion: every backup can be deleted independently without affecting others
@@ -133,10 +133,11 @@ Storage URL: /path/to/storage (on Linux or Mac OS X)
```
</details>
<details> <summary>SFTP</summary>
<details> <summary>SFTP</summary>
```
Storage URL: sftp://username@server/path/to/storage
Storage URL: sftp://username@server/path/to/storage (path relative to the home directory)
sftp://username@server//path/to/storage (absolute path)
```
Login methods include password authentication and public key authentication. Due to a limitation of the underlying Go SSH library, the key pair for public key authentication must be generated without a passphrase. To work with a key that has a passphrase, you can set up SSH agent forwarding which is also supported by Duplicacy.
@@ -199,7 +200,7 @@ Storage URL: gcs://bucket/path/to/storage
```
Starting from version 2.0.0, a new Google Cloud Storage backend is added which is implemented using the [official Google client library](https://godoc.org/cloud.google.com/go/storage). You must first obtain a credential file by [authorizing](https://duplicacy.com/gcp_start) Duplicacy to access your Google Cloud Storage account or by [downloading](https://console.cloud.google.com/projectselector/iam-admin/serviceaccounts) a service account credential file.
You can also use the s3 protocol to access Google Cloud Storage. To do this, you must enable the [s3 interoperability](https://cloud.google.com/storage/docs/migrating#migration-simple) in your Google Cloud Storage settings and set the storage url as `s3://storage.googleapis.com/bucket/path/to/storage`.
</details>
@@ -232,8 +233,7 @@ Backblaze's B2 storage is one of the least expensive (at 0.5 cent per GB per mon
Storage URL: gcd://path/to/storage
```
To use Google Drive as the storage, you first need to download a token file from https://duplicacy.com/gcd_start by
authorizing Duplicacy to access your Google Drive, and then enter the path to this token file to Duplicacy when prompted.
To use Google Drive as the storage, you first need to download a token file from https://duplicacy.com/gcd_start by authorizing Duplicacy to access your Google Drive, and then enter the path to this token file to Duplicacy when prompted.
</details>
@@ -243,8 +243,7 @@ authorizing Duplicacy to access your Google Drive, and then enter the path to th
Storage URL: one://path/to/storage
```
To use Microsoft OneDrive as the storage, you first need to download a token file from https://duplicacy.com/one_start by
authorizing Duplicacy to access your OneDrive, and then enter the path to this token file to Duplicacy when prompted.
To use Microsoft OneDrive as the storage, you first need to download a token file from https://duplicacy.com/one_start by authorizing Duplicacy to access your OneDrive, and then enter the path to this token file to Duplicacy when prompted.
</details>
@@ -254,8 +253,7 @@ authorizing Duplicacy to access your OneDrive, and then enter the path to this t
Storage URL: hubic://path/to/storage
```
To use Hubic as the storage, you first need to download a token file from https://duplicacy.com/hubic_start by
authorizing Duplicacy to access your Hubic drive, and then enter the path to this token file to Duplicacy when prompted.
To use Hubic as the storage, you first need to download a token file from https://duplicacy.com/hubic_start by authorizing Duplicacy to access your Hubic drive, and then enter the path to this token file to Duplicacy when prompted.
Hubic offers the most free space (25GB) of all major cloud providers and there is no bandwidth charge (same as Google Drive and OneDrive), so it may be worth a try.
@@ -274,18 +272,18 @@ Deletion of old backups is possible, but no cloud storages are supported.
Multiple clients can back up to the same storage, but only sequential access is granted by the [locking on-disk data structures](http://obnam.org/locking/).
It is unclear if the lack of cloud backends is due to difficulties in porting the locking data structures to cloud storage APIs.
[Attic](https://attic-backup.org) has been acclaimed by some as the [Holy Grail of backups](https://www.stavros.io/posts/holy-grail-backups). It follows the same incremental backup model as Obnam, but embraces the variable-size chunk algorithm for better performance and better deduplication. Deletions of old backup is also supported. However, no cloud backends are implemented, as in Obnam. Although concurrent backups from multiple clients to the same storage is in theory possible by the use of locking, it is
[not recommended](http://librelist.com/browser//attic/2014/11/11/backing-up-multiple-servers-into-a-single-repository/#e96345aa5a3469a87786675d65da492b) by the developer due to chunk indices being kept in a local cache.
[Attic](https://attic-backup.org) has been acclaimed by some as the [Holy Grail of backups](https://www.stavros.io/posts/holy-grail-backups). It follows the same incremental backup model as Obnam, but embraces the variable-size chunk algorithm for better performance and better deduplication. Deletions of old backup is also supported. However, no cloud backends are implemented, as in Obnam. Although concurrent backups from multiple clients to the same storage is in theory possible by the use of locking, it is
[not recommended](http://librelist.com/browser//attic/2014/11/11/backing-up-multiple-servers-into-a-single-repository/#e96345aa5a3469a87786675d65da492b) by the developer due to chunk indices being kept in a local cache.
Concurrent access is not only a convenience; it is a necessity for better deduplication. For instance, if multiple machines with the same OS installed can back up their entire drives to the same storage, only one copy of the system files needs to be stored, greatly reducing the storage space regardless of the number of machines. Attic still adopts the traditional approach of using a centralized indexing database to manage chunks, and relies heavily on caching to improve performance. The presence of exclusive locking makes it hard to be adapted for cloud storage APIs and reduces the level of deduplication.
[restic](https://restic.github.io) is a more recent addition. It is worth mentioning here because, like Duplicacy, it is written in Go. It uses a format similar to the git packfile format. Multiple clients backing up to the same storage are still guarded by
[restic](https://restic.github.io) is a more recent addition. It is worth mentioning here because, like Duplicacy, it is written in Go. It uses a format similar to the git packfile format. Multiple clients backing up to the same storage are still guarded by
[locks](https://github.com/restic/restic/blob/master/doc/Design.md#locks). A prune operation will therefore completely block all other clients connected to the storage from doing their regular backups. Moreover, since most cloud storage services do not provide a locking service, the best effort is to use some basic file operations to simulate a lock, but distributed locking is known to be a hard problem and it is unclear how reliable restic's lock implementation is. A faulty implementation may cause a prune operation to accidentally delete data still in use, resulting in unrecoverable data loss. This is the exact problem that we avoided by taking the lock-free approach.
The following table compares the feature lists of all these backup tools:
| Feature/Tool | duplicity | bup | Obnam | Attic | restic | **Duplicacy** |
| Feature/Tool | duplicity | bup | Obnam | Attic | restic | **Duplicacy** |
|:------------------:|:---------:|:---:|:-----------------:|:---------------:|:-----------------:|:-------------:|
| Incremental Backup | Yes | Yes | Yes | Yes | Yes | **Yes** |
| Full Snapshot | No | Yes | Yes | Yes | Yes | **Yes** |
@@ -302,20 +300,20 @@ The following table compares the feature lists of all these backup tools:
Duplicacy is not only more feature-rich but also faster than other backup tools. The following table lists the running times in seconds of backing up the [Linux code base](https://github.com/torvalds/linux) using Duplicacy and 3 other tools. Clearly Duplicacy is the fastest by a significant margin.
| | Duplicacy | restic | Attic | duplicity |
| | Duplicacy | restic | Attic | duplicity |
|:------------------:|:----------------:|:----------:|:----------:|:-----------:|
| Initial backup | 13.7 | 20.7 | 26.9 | 44.2 |
| 2nd backup | 4.8 | 8.0 | 15.4 | 19.5 |
| 3rd backup | 6.9 | 11.9 | 19.6 | 29.8 |
| 4th backup | 3.3 | 7.0 | 13.7 | 18.6 |
| 5th backup | 9.9 | 11.4 | 19.9 | 28.0 |
| 6th backup | 3.8 | 8.0 | 16.8 | 22.0 |
| 7th backup | 5.1 | 7.8 | 14.3 | 21.6 |
| 8th backup | 9.5 | 13.5 | 18.3 | 35.0 |
| 9th backup | 4.3 | 9.0 | 15.7 | 24.9 |
| 10th backup | 7.9 | 20.2 | 32.2 | 35.0 |
| 11th backup | 4.6 | 9.1 | 16.8 | 28.1 |
| 12th backup | 7.4 | 12.0 | 21.7 | 37.4 |
| Initial backup | 13.7 | 20.7 | 26.9 | 44.2 |
| 2nd backup | 4.8 | 8.0 | 15.4 | 19.5 |
| 3rd backup | 6.9 | 11.9 | 19.6 | 29.8 |
| 4th backup | 3.3 | 7.0 | 13.7 | 18.6 |
| 5th backup | 9.9 | 11.4 | 19.9 | 28.0 |
| 6th backup | 3.8 | 8.0 | 16.8 | 22.0 |
| 7th backup | 5.1 | 7.8 | 14.3 | 21.6 |
| 8th backup | 9.5 | 13.5 | 18.3 | 35.0 |
| 9th backup | 4.3 | 9.0 | 15.7 | 24.9 |
| 10th backup | 7.9 | 20.2 | 32.2 | 35.0 |
| 11th backup | 4.6 | 9.1 | 16.8 | 28.1 |
| 12th backup | 7.4 | 12.0 | 21.7 | 37.4 |
For more details and other speed comparison results, please visit https://github.com/gilbertchen/benchmarking. There you can also find test scripts that you can use to run your own experiments.

View File

@@ -12,6 +12,7 @@ import (
"regexp"
"strings"
"strconv"
"runtime"
"os/exec"
"os/signal"
"encoding/json"
@@ -147,18 +148,27 @@ func runScript(context *cli.Context, storageName string, phase string) bool {
preferencePath := duplicacy.GetDuplicacyPreferencePath()
scriptDir, _ := filepath.Abs(path.Join(preferencePath, "scripts"))
scriptName := phase + "-" + context.Command.Name
scriptNames := []string { phase + "-" + context.Command.Name,
storageName + "-" + phase + "-" + context.Command.Name }
script := path.Join(scriptDir, scriptName)
if _, err := os.Stat(script); err != nil {
scriptName = storageName + "-" + scriptName
script := ""
for _, scriptName := range scriptNames {
script = path.Join(scriptDir, scriptName)
if _, err = os.Stat(script); err != nil {
return false
if runtime.GOOS == "windows" {
script += ".bat"
}
if _, err := os.Stat(script); err == nil {
break
} else {
script = ""
}
}
duplicacy.LOG_INFO("SCRIPT_RUN", "Running %s script", scriptName)
if script == "" {
return false
}
duplicacy.LOG_INFO("SCRIPT_RUN", "Running script %s", script)
output, err := exec.Command(script, os.Args...).CombinedOutput()
for _, line := range strings.Split(string(output), "\n") {
@@ -1010,12 +1020,17 @@ func copySnapshots(context *cli.Context) {
os.Exit(ArgumentExitCode)
}
threads := context.Int("threads")
if threads < 1 {
threads = 1
}
repository, source := getRepositoryPreference(context, context.String("from"))
runScript(context, source.Name, "pre")
duplicacy.LOG_INFO("STORAGE_SET", "Source storage set to %s", source.StorageURL)
sourceStorage := duplicacy.CreateStorage(*source, false, 1)
sourceStorage := duplicacy.CreateStorage(*source, false, threads)
if sourceStorage == nil {
return
}
@@ -1045,7 +1060,7 @@ func copySnapshots(context *cli.Context) {
duplicacy.LOG_INFO("STORAGE_SET", "Destination storage set to %s", destination.StorageURL)
destinationStorage := duplicacy.CreateStorage(*destination, false, 1)
destinationStorage := duplicacy.CreateStorage(*destination, false, threads)
if destinationStorage == nil {
return
}
@@ -1056,8 +1071,8 @@ func copySnapshots(context *cli.Context) {
"Enter destination storage password:",false, false)
}
sourceStorage.SetRateLimits(context.Int("download-rate-limit"), 0)
destinationStorage.SetRateLimits(0, context.Int("upload-rate-limit"))
sourceStorage.SetRateLimits(context.Int("download-limit-rate"), 0)
destinationStorage.SetRateLimits(0, context.Int("upload-limit-rate"))
destinationManager := duplicacy.CreateBackupManager(destination.SnapshotID, destinationStorage, repository,
destinationPassword)
@@ -1070,11 +1085,6 @@ func copySnapshots(context *cli.Context) {
snapshotID = context.String("id")
}
threads := context.Int("threads")
if threads < 1 {
threads = 1
}
sourceManager.CopySnapshots(destinationManager, snapshotID, revisions, threads)
runScript(context, source.Name, "post")
}
@@ -1685,7 +1695,7 @@ func main() {
app.Name = "duplicacy"
app.HelpName = "duplicacy"
app.Usage = "A new generation cloud backup tool based on lock-free deduplication"
app.Version = "2.0.7"
app.Version = "2.0.9"
// If the program is interrupted, call the RunAtError function.
c := make(chan os.Signal, 1)

21
integration_tests/copy_test.sh Executable file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
. ./test_functions.sh
fixture
pushd ${TEST_REPO}
${DUPLICACY} init integration-tests $TEST_STORAGE -c 1k
${DUPLICACY} add -copy default secondary integration-tests $SECONDARY_STORAGE
add_file file1
add_file file2
${DUPLICACY} backup
${DUPLICACY} copy -from default -to secondary
add_file file3
add_file file4
${DUPLICACY} backup
${DUPLICACY} copy -from default -to secondary
${DUPLICACY} check --files -stats
${DUPLICACY} check --files -stats -storage secondary
popd

View File

@@ -17,6 +17,7 @@ import (
"sync/atomic"
"strings"
"strconv"
"runtime"
"encoding/hex"
"path/filepath"
)
@@ -621,7 +622,7 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta
}
for _, dir := range skippedDirectories {
LOG_WARN("SKIP_DIRECTORY", "Subdirecotry %s cannot be listed", dir)
LOG_WARN("SKIP_DIRECTORY", "Subdirectory %s cannot be listed", dir)
}
for _, file := range fileReader.SkippedFiles {
@@ -1026,7 +1027,7 @@ func (manager *BackupManager) UploadSnapshot(chunkMaker *ChunkMaker, uploader *C
totalUploadedSnapshotChunkSize += int64(chunkSize)
totalUploadedSnapshotChunkBytes += int64(uploadSize)
} else {
LOG_DEBUG("CHUNK_EXIST", "Skipped snpashot chunk %s in the storage", chunk.GetID())
LOG_DEBUG("CHUNK_EXIST", "Skipped snapshot chunk %s in the storage", chunk.GetID())
}
}
@@ -1152,21 +1153,29 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
existingFile, err = os.Open(fullPath)
if err != nil {
if os.IsNotExist(err) {
if inPlace && entry.Size > 100 * 1024 * 1024 {
// macOS has no sparse file support
if inPlace && entry.Size > 100 * 1024 * 1024 && runtime.GOOS != "darwin" {
// Create an empty sparse file
existingFile, err = os.OpenFile(fullPath, os.O_WRONLY | os.O_CREATE | os.O_TRUNC, 0600)
if err != nil {
LOG_ERROR("DOWNLOAD_CREATE", "Failed to create the file %s for in-place writing", fullPath)
LOG_ERROR("DOWNLOAD_CREATE", "Failed to create the file %s for in-place writing: %v", fullPath, err)
return false
}
_, err = existingFile.Seek(entry.Size - 1, 0)
n := int64(1)
// There is a go bug on Windows (https://github.com/golang/go/issues/21681) that causes Seek to fail
// if the lower 32 bit of the offset argument is 0xffffffff. Therefore we need to avoid that value by increasing n.
if uint32(entry.Size) == 0 && (entry.Size >> 32) > 0 {
n = int64(2)
}
_, err = existingFile.Seek(entry.Size - n, 0)
if err != nil {
LOG_ERROR("DOWNLOAD_CREATE", "Failed to resize the initial file %s for in-place writing", fullPath)
LOG_ERROR("DOWNLOAD_CREATE", "Failed to resize the initial file %s for in-place writing: %v", fullPath, err)
return false
}
_, err = existingFile.Write([]byte("\x00"))
_, err = existingFile.Write([]byte("\x00\x00")[:n])
if err != nil {
LOG_ERROR("DOWNLOAD_CREATE", "Failed to initialize the sparse file %s for in-place writing", fullPath)
LOG_ERROR("DOWNLOAD_CREATE", "Failed to initialize the sparse file %s for in-place writing: %v", fullPath, err)
return false
}
existingFile.Close()
@@ -1475,14 +1484,27 @@ func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapsho
return false
}
revisionMap := make(map[int]bool)
if snapshotID == "" && len(revisionsToBeCopied) > 0 {
LOG_ERROR("SNAPSHOT_ERROR", "You must specify the snapshot id when one or more revisions are specified.")
return false
}
revisionMap := make(map[string]map[int]bool)
_, found := revisionMap[snapshotID]
if !found {
revisionMap[snapshotID] = make(map[int]bool)
}
for _, revision := range revisionsToBeCopied {
revisionMap[revision] = true
revisionMap[snapshotID][revision] = true
}
var snapshots [] *Snapshot
var otherSnapshots [] *Snapshot
var snapshotIDs [] string
var err error
if snapshotID == "" {
snapshotIDs, err = manager.SnapshotManager.ListSnapshotIDs()
if err != nil {
@@ -1494,6 +1516,10 @@ func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapsho
}
for _, id := range snapshotIDs {
_, found := revisionMap[id]
if !found {
revisionMap[id] = make(map[int]bool)
}
revisions, err := manager.SnapshotManager.ListSnapshotRevisions(id)
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all revisions for snapshot %s: %v", id, err)
@@ -1502,9 +1528,14 @@ func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapsho
for _, revision := range revisions {
if len(revisionsToBeCopied) > 0 {
if _, found := revisionMap[revision]; !found {
if _, found := revisionMap[id][revision]; found {
revisionMap[id][revision] = true
} else {
revisionMap[id][revision] = false
continue
}
} else {
revisionMap[id][revision] = true
}
snapshotPath := fmt.Sprintf("snapshots/%s/%d", id, revision)
@@ -1516,21 +1547,44 @@ func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapsho
}
if exist {
LOG_INFO("SNAPSHOT_EXIST", "Snapshot %s at revision %d already exists in the destination storage",
LOG_INFO("SNAPSHOT_EXIST", "Snapshot %s at revision %d already exists at the destination storage",
id, revision)
revisionMap[id][revision] = false
continue
}
snapshot := manager.SnapshotManager.DownloadSnapshot(id, revision)
snapshots = append(snapshots, snapshot)
}
otherRevisions, err := otherManager.SnapshotManager.ListSnapshotRevisions(id)
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all revisions at the destination for snapshot %s: %v", id, err)
return false
}
for _, otherRevision := range otherRevisions {
otherSnapshot := otherManager.SnapshotManager.DownloadSnapshot(id, otherRevision)
otherSnapshots = append(otherSnapshots, otherSnapshot)
}
}
if len(snapshots) == 0 {
LOG_INFO("SNAPSHOT_COPY", "Nothing to copy, all snapshot revisions exist at the destination.")
return true
}
chunks := make(map[string]bool)
for _, snapshot := range snapshots {
if revisionMap[snapshot.ID][snapshot.Revision] == false {
continue
}
LOG_TRACE("SNAPSHOT_COPY", "Copying snapshot %s at revision %d", snapshot.ID, snapshot.Revision)
for _, chunkHash := range snapshot.FileSequence {
chunks[chunkHash] = true
}
@@ -1556,42 +1610,90 @@ func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapsho
}
}
for _, otherSnapshot := range otherSnapshots {
for _, chunkHash := range otherSnapshot.FileSequence {
if _, found := chunks[chunkHash]; found {
chunks[chunkHash] = false
}
}
for _, chunkHash := range otherSnapshot.ChunkSequence {
if _, found := chunks[chunkHash]; found {
chunks[chunkHash] = false
}
}
for _, chunkHash := range otherSnapshot.LengthSequence {
if _, found := chunks[chunkHash]; found {
chunks[chunkHash] = false
}
}
description := otherManager.SnapshotManager.DownloadSequence(otherSnapshot.ChunkSequence)
err := otherSnapshot.LoadChunks(description)
if err != nil {
LOG_ERROR("SNAPSHOT_CHUNK", "Failed to load chunks for destination snapshot %s at revision %d: %v",
otherSnapshot.ID, otherSnapshot.Revision, err)
return false
}
for _, chunkHash := range otherSnapshot.ChunkHashes {
if _, found := chunks[chunkHash]; found {
chunks[chunkHash] = false
}
}
}
chunkDownloader := CreateChunkDownloader(manager.config, manager.storage, nil, false, threads)
chunkUploader := CreateChunkUploader(otherManager.config, otherManager.storage, nil, threads,
func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
if skipped {
LOG_INFO("SNAPSHOT_COPY", "Chunk %s (%d/%d) exists in the destination", chunk.GetID(), chunkIndex, len(chunks))
LOG_INFO("SNAPSHOT_COPY", "Chunk %s (%d/%d) exists at the destination", chunk.GetID(), chunkIndex, len(chunks))
} else {
LOG_INFO("SNAPSHOT_COPY", "Copied chunk %s (%d/%d)", chunk.GetID(), chunkIndex, len(chunks))
LOG_INFO("SNAPSHOT_COPY", "Chunk %s (%d/%d) copied to the destination", chunk.GetID(), chunkIndex, len(chunks))
}
otherManager.config.PutChunk(chunk)
})
chunkUploader.Start()
totalCopied := 0
totalSkipped := 0
chunkIndex := 0
for chunkHash, _ := range chunks {
for chunkHash, needsCopy := range chunks {
chunkIndex++
chunkID := manager.config.GetChunkIDFromHash(chunkHash)
newChunkID := otherManager.config.GetChunkIDFromHash(chunkHash)
LOG_DEBUG("SNAPSHOT_COPY", "Copying chunk %s to %s", chunkID, newChunkID)
i := chunkDownloader.AddChunk(chunkHash)
chunk := chunkDownloader.WaitForChunk(i)
newChunk := otherManager.config.GetChunk()
newChunk.Reset(true)
newChunk.Write(chunk.GetBytes())
chunkUploader.StartChunk(newChunk, chunkIndex)
if needsCopy {
newChunkID := otherManager.config.GetChunkIDFromHash(chunkHash)
LOG_DEBUG("SNAPSHOT_COPY", "Copying chunk %s to %s", chunkID, newChunkID)
i := chunkDownloader.AddChunk(chunkHash)
chunk := chunkDownloader.WaitForChunk(i)
newChunk := otherManager.config.GetChunk()
newChunk.Reset(true)
newChunk.Write(chunk.GetBytes())
chunkUploader.StartChunk(newChunk, chunkIndex)
totalCopied++
} else {
LOG_INFO("SNAPSHOT_COPY", "Chunk %s (%d/%d) skipped at the destination", chunkID, chunkIndex, len(chunks))
totalSkipped++
}
}
chunkDownloader.Stop()
chunkUploader.Stop()
LOG_INFO("SNAPSHOT_COPY", "Total chunks copied = %d, skipped = %d.", totalCopied, totalSkipped)
for _, snapshot := range snapshots {
otherManager.storage.CreateDirectory(0, fmt.Sprintf("snapshots/%s", manager.snapshotID))
if revisionMap[snapshot.ID][snapshot.Revision] == false {
continue
}
otherManager.storage.CreateDirectory(0, fmt.Sprintf("snapshots/%s", snapshot.ID))
description, _ := snapshot.MarshalJSON()
path := fmt.Sprintf("snapshots/%s/%d", manager.snapshotID, snapshot.Revision)
path := fmt.Sprintf("snapshots/%s/%d", snapshot.ID, snapshot.Revision)
otherManager.SnapshotManager.UploadFile(path, path, description)
LOG_INFO("SNAPSHOT_COPY", "Copied snapshot %s at revision %d", snapshot.ID, snapshot.Revision)
}

View File

@@ -122,7 +122,7 @@ func (uploader *ChunkUploader) Upload(threadIndex int, task ChunkUploadTask) boo
// Chunk deduplication by name in effect here.
LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID)
uploader.completionFunc(chunk, task.chunkIndex, false, chunkSize, 0)
uploader.completionFunc(chunk, task.chunkIndex, true, chunkSize, 0)
atomic.AddInt32(&uploader.numberOfUploadingTasks, -1)
return false
}

View File

@@ -15,6 +15,7 @@ import (
"encoding/json"
"encoding/base64"
"strings"
"runtime"
)
@@ -488,7 +489,14 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns [] string
skippedFiles = append(skippedFiles, entry.Path)
continue
}
entry = CreateEntryFromFileInfo(stat, "")
newEntry := CreateEntryFromFileInfo(stat, "")
if runtime.GOOS == "windows" {
// On Windows, stat.Name() is the last component of the target, so we need to construct the correct
// path from f.Name(); note that a "/" is append assuming a symbolic link is always a directory
newEntry.Path = filepath.Join(normalizedPath, f.Name()) + "/"
}
entry = newEntry
}
}

View File

@@ -30,7 +30,7 @@ type GCDStorage struct {
service *drive.Service
idCache map[string]string
idCacheLock *sync.Mutex
backoff int
backoffs []int
isConnected bool
numberOfThreads int
@@ -45,12 +45,12 @@ type GCDConfig struct {
Token oauth2.Token `json:"token"`
}
func (storage *GCDStorage) shouldRetry(err error) (bool, error) {
func (storage *GCDStorage) shouldRetry(threadIndex int, err error) (bool, error) {
retry := false
message := ""
if err == nil {
storage.backoff = 1
storage.backoffs[threadIndex] = 1
return false, nil
} else if e, ok := err.(*googleapi.Error); ok {
if 500 <= e.Code && e.Code < 600 {
@@ -84,15 +84,15 @@ func (storage *GCDStorage) shouldRetry(err error) (bool, error) {
retry = err.Temporary()
}
if !retry || storage.backoff >= 256{
storage.backoff = 1
if !retry || storage.backoffs[threadIndex] >= 256 {
storage.backoffs[threadIndex] = 1
return false, err
}
delay := float32(storage.backoff) * rand.Float32()
delay := float32(storage.backoffs[threadIndex]) * rand.Float32()
LOG_DEBUG("GCD_RETRY", "%s; retrying after %.2f seconds", message, delay)
time.Sleep(time.Duration(float32(storage.backoff) * float32(time.Second)))
storage.backoff *= 2
time.Sleep(time.Duration(float32(storage.backoffs[threadIndex]) * float32(time.Second)))
storage.backoffs[threadIndex] *= 2
return true, nil
}
@@ -129,7 +129,7 @@ func (storage *GCDStorage) deletePathID(path string) {
storage.idCacheLock.Unlock()
}
func (storage *GCDStorage) listFiles(parentID string, listFiles bool) ([]*drive.File, error) {
func (storage *GCDStorage) listFiles(threadIndex int, parentID string, listFiles bool) ([]*drive.File, error) {
if parentID == "" {
return nil, fmt.Errorf("No parent ID provided")
@@ -157,7 +157,7 @@ func (storage *GCDStorage) listFiles(parentID string, listFiles bool) ([]*drive.
for {
fileList, err = storage.service.Files.List().Q(query).Fields("nextPageToken", "files(name, mimeType, id, size)").PageToken(startToken).PageSize(maxCount).Do()
if retry, e := storage.shouldRetry(err); e == nil && !retry {
if retry, e := storage.shouldRetry(threadIndex, err); e == nil && !retry {
break
} else if retry {
continue
@@ -178,7 +178,7 @@ func (storage *GCDStorage) listFiles(parentID string, listFiles bool) ([]*drive.
return files, nil
}
func (storage *GCDStorage) listByName(parentID string, name string) (string, bool, int64, error) {
func (storage *GCDStorage) listByName(threadIndex int, parentID string, name string) (string, bool, int64, error) {
var fileList *drive.FileList
var err error
@@ -187,7 +187,7 @@ func (storage *GCDStorage) listByName(parentID string, name string) (string, boo
query := "name = '" + name + "' and '" + parentID + "' in parents"
fileList, err = storage.service.Files.List().Q(query).Fields("files(name, mimeType, id, size)").Do()
if retry, e := storage.shouldRetry(err); e == nil && !retry {
if retry, e := storage.shouldRetry(threadIndex, err); e == nil && !retry {
break
} else if retry {
continue
@@ -205,7 +205,7 @@ func (storage *GCDStorage) listByName(parentID string, name string) (string, boo
return file.Id, file.MimeType == "application/vnd.google-apps.folder", file.Size, nil
}
func (storage *GCDStorage) getIDFromPath(path string) (string, error) {
func (storage *GCDStorage) getIDFromPath(threadIndex int, path string) (string, error) {
fileID := "root"
@@ -231,7 +231,7 @@ func (storage *GCDStorage) getIDFromPath(path string) (string, error) {
var err error
var isDir bool
fileID, isDir, _, err = storage.listByName(fileID, name)
fileID, isDir, _, err = storage.listByName(threadIndex, fileID, name)
if err != nil {
return "", err
}
@@ -276,9 +276,10 @@ func CreateGCDStorage(tokenFile string, storagePath string, threads int) (storag
numberOfThreads: threads,
idCache: make(map[string]string),
idCacheLock: &sync.Mutex{},
backoffs: make([]int, threads),
}
storagePathID, err := storage.getIDFromPath(storagePath)
storagePathID, err := storage.getIDFromPath(0, storagePath)
if err != nil {
return nil, err
}
@@ -286,7 +287,7 @@ func CreateGCDStorage(tokenFile string, storagePath string, threads int) (storag
storage.idCache[""] = storagePathID
for _, dir := range []string { "chunks", "snapshots", "fossils" } {
dirID, isDir, _, err := storage.listByName(storagePathID, dir)
dirID, isDir, _, err := storage.listByName(0, storagePathID, dir)
if err != nil {
return nil, err
}
@@ -316,7 +317,7 @@ func (storage *GCDStorage) ListFiles(threadIndex int, dir string) ([]string, []i
if dir == "snapshots" {
files, err := storage.listFiles(storage.getPathID(dir), false)
files, err := storage.listFiles(threadIndex, storage.getPathID(dir), false)
if err != nil {
return nil, nil, err
}
@@ -329,12 +330,12 @@ func (storage *GCDStorage) ListFiles(threadIndex int, dir string) ([]string, []i
}
return subDirs, nil, nil
} else if strings.HasPrefix(dir, "snapshots/") {
pathID, err := storage.getIDFromPath(dir)
pathID, err := storage.getIDFromPath(threadIndex, dir)
if err != nil {
return nil, nil, err
}
entries, err := storage.listFiles(pathID, true)
entries, err := storage.listFiles(threadIndex, pathID, true)
if err != nil {
return nil, nil, err
}
@@ -351,7 +352,7 @@ func (storage *GCDStorage) ListFiles(threadIndex int, dir string) ([]string, []i
sizes := []int64{}
for _, parent := range []string { "chunks", "fossils" } {
entries, err := storage.listFiles(storage.getPathID(parent), true)
entries, err := storage.listFiles(threadIndex, storage.getPathID(parent), true)
if err != nil {
return nil, nil, err
}
@@ -376,7 +377,7 @@ func (storage *GCDStorage) DeleteFile(threadIndex int, filePath string) (err err
filePath = storage.convertFilePath(filePath)
fileID, ok := storage.findPathID(filePath)
if !ok {
fileID, err = storage.getIDFromPath(filePath)
fileID, err = storage.getIDFromPath(threadIndex, filePath)
if err != nil {
LOG_TRACE("GCD_STORAGE", "Ignored file deletion error: %v", err)
return nil
@@ -385,7 +386,7 @@ func (storage *GCDStorage) DeleteFile(threadIndex int, filePath string) (err err
for {
err = storage.service.Files.Delete(fileID).Fields("id").Do()
if retry, err := storage.shouldRetry(err); err == nil && !retry {
if retry, err := storage.shouldRetry(threadIndex, err); err == nil && !retry {
storage.deletePathID(filePath)
return nil
} else if retry {
@@ -420,7 +421,7 @@ func (storage *GCDStorage) MoveFile(threadIndex int, from string, to string) (er
for {
_, err = storage.service.Files.Update(fileID, nil).AddParents(toParentID).RemoveParents(fromParentID).Do()
if retry, err := storage.shouldRetry(err); err == nil && !retry {
if retry, err := storage.shouldRetry(threadIndex, err); err == nil && !retry {
break
} else if retry {
continue
@@ -469,7 +470,7 @@ func (storage *GCDStorage) CreateDirectory(threadIndex int, dir string) (err err
for {
file, err = storage.service.Files.Create(file).Fields("id").Do()
if retry, err := storage.shouldRetry(err); err == nil && !retry {
if retry, err := storage.shouldRetry(threadIndex, err); err == nil && !retry {
break
} else if retry {
continue
@@ -495,12 +496,12 @@ func (storage *GCDStorage) GetFileInfo(threadIndex int, filePath string) (exist
if dir == "." {
dir = ""
}
dirID, err := storage.getIDFromPath(dir)
dirID, err := storage.getIDFromPath(threadIndex, dir)
if err != nil {
return false, false, 0, err
}
fileID, isDir, size, err = storage.listByName(dirID, path.Base(filePath))
fileID, isDir, size, err = storage.listByName(threadIndex, dirID, path.Base(filePath))
if fileID != "" {
storage.savePathID(filePath, fileID)
}
@@ -509,7 +510,7 @@ func (storage *GCDStorage) GetFileInfo(threadIndex int, filePath string) (exist
for {
file, err := storage.service.Files.Get(fileID).Fields("id, mimeType").Do()
if retry, err := storage.shouldRetry(err); err == nil && !retry {
if retry, err := storage.shouldRetry(threadIndex, err); err == nil && !retry {
return true, file.MimeType == "application/vnd.google-apps.folder", file.Size, nil
} else if retry {
continue
@@ -533,7 +534,7 @@ func (storage *GCDStorage) FindChunk(threadIndex int, chunkID string, isFossil b
}
fileID := ""
fileID, _, size, err = storage.listByName(parentID, chunkID)
fileID, _, size, err = storage.listByName(threadIndex, parentID, chunkID)
if fileID != "" {
storage.savePathID(realPath, fileID)
}
@@ -545,7 +546,7 @@ func (storage *GCDStorage) DownloadFile(threadIndex int, filePath string, chunk
// We never download the fossil so there is no need to convert the path
fileID, ok := storage.findPathID(filePath)
if !ok {
fileID, err = storage.getIDFromPath(filePath)
fileID, err = storage.getIDFromPath(threadIndex, filePath)
if err != nil {
return err
}
@@ -556,7 +557,7 @@ func (storage *GCDStorage) DownloadFile(threadIndex int, filePath string, chunk
for {
response, err = storage.service.Files.Get(fileID).Download()
if retry, err := storage.shouldRetry(err); err == nil && !retry {
if retry, err := storage.shouldRetry(threadIndex, err); err == nil && !retry {
break
} else if retry {
continue
@@ -583,7 +584,7 @@ func (storage *GCDStorage) UploadFile(threadIndex int, filePath string, content
parentID, ok := storage.findPathID(parent)
if !ok {
parentID, err = storage.getIDFromPath(parent)
parentID, err = storage.getIDFromPath(threadIndex, parent)
if err != nil {
return err
}
@@ -599,7 +600,7 @@ func (storage *GCDStorage) UploadFile(threadIndex int, filePath string, content
for {
reader := CreateRateLimitedReader(content, storage.UploadRateLimit / storage.numberOfThreads)
_, err = storage.service.Files.Create(file).Media(reader).Fields("id").Do()
if retry, err := storage.shouldRetry(err); err == nil && !retry {
if retry, err := storage.shouldRetry(threadIndex, err); err == nil && !retry {
break
} else if retry {
continue

View File

@@ -23,6 +23,7 @@ const (
ASSERT = 4
)
var LogFunction func(level int, logID string, message string)
var printLogHeader = false
@@ -117,6 +118,11 @@ func logf(level int, logID string, format string, v ...interface{}) {
message := fmt.Sprintf(format, v...)
if LogFunction != nil {
LogFunction(level, logID, message)
return
}
now := time.Now()
// Uncomment this line to enable unbufferred logging for tests

View File

@@ -89,7 +89,7 @@ func (client *OneDriveClient) call(url string, method string, input interface{},
case []byte:
inputReader = bytes.NewReader(input.([]byte))
case int:
inputReader = bytes.NewReader([]byte(""))
inputReader = nil
case *bytes.Buffer:
inputReader = bytes.NewReader(input.(*bytes.Buffer).Bytes())
case *RateLimitedReader:

View File

@@ -98,7 +98,7 @@ func SavePreferences() (bool) {
}
preferenceFile := path.Join(GetDuplicacyPreferencePath(), "preferences")
err = ioutil.WriteFile(preferenceFile, description, 0644)
err = ioutil.WriteFile(preferenceFile, description, 0600)
if err != nil {
LOG_ERROR("PREFERENCE_WRITE", "Failed to save the preference file %s: %v", preferenceFile, err)
return false
@@ -108,9 +108,9 @@ func SavePreferences() (bool) {
}
func FindPreference(name string) (*Preference) {
for _, preference := range Preferences {
for i, preference := range Preferences {
if preference.Name == name || preference.StorageURL == name {
return &preference
return &Preferences[i]
}
}

View File

@@ -5,6 +5,9 @@
package duplicacy
import (
"strings"
"reflect"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/aws/credentials"
@@ -227,15 +230,26 @@ func (storage *S3Storage) DownloadFile(threadIndex int, filePath string, chunk *
// UploadFile writes 'content' to the file at 'filePath'.
func (storage *S3Storage) UploadFile(threadIndex int, filePath string, content []byte) (err error) {
input := &s3.PutObjectInput {
Bucket: aws.String(storage.bucket),
Key: aws.String(storage.storageDir + filePath),
ACL: aws.String(s3.ObjectCannedACLPrivate),
Body: CreateRateLimitedReader(content, storage.UploadRateLimit / len(storage.bucket)),
ContentType: aws.String("application/duplicacy"),
attempts := 0
for {
input := &s3.PutObjectInput {
Bucket: aws.String(storage.bucket),
Key: aws.String(storage.storageDir + filePath),
ACL: aws.String(s3.ObjectCannedACLPrivate),
Body: CreateRateLimitedReader(content, storage.UploadRateLimit / len(storage.bucket)),
ContentType: aws.String("application/duplicacy"),
}
_, err = storage.client.PutObject(input)
if err == nil || attempts >= 3 || !strings.Contains(err.Error(), "XAmzContentSHA256Mismatch") {
return err
}
LOG_INFO("S3_RETRY", "Retrying on %s: %v", reflect.TypeOf(err), err)
attempts += 1
}
_, err = storage.client.PutObject(input)
return err
}

View File

@@ -176,7 +176,7 @@ func LoadIncompleteSnapshot() (snapshot *Snapshot) {
ChunkHashes: chunkHashes,
ChunkLengths: incompleteSnapshot.ChunkLengths,
}
LOG_INFO("INCOMPLETE_LOAD", "Incomplete snpashot loaded from %s", snapshotFile)
LOG_INFO("INCOMPLETE_LOAD", "Incomplete snapshot loaded from %s", snapshotFile)
return snapshot
}

View File

@@ -303,12 +303,8 @@ func (manager *SnapshotManager) DownloadSnapshotFileSequence(snapshot *Snapshot,
return false
}
if patterns == nil {
if len(patterns) != 0 && !MatchPath(entry.Path, patterns) {
entry.Attributes = nil
} else if len(patterns) != 0 {
if !MatchPath(entry.Path, patterns) {
entry.Attributes = nil
}
}
files = append(files, &entry)
@@ -664,7 +660,7 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
if snapshotID == "" {
snapshotIDs, err = manager.ListSnapshotIDs()
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snpashots: %v", err)
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snapshots: %v", err)
return 0
}
} else {
@@ -787,7 +783,7 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
if snapshotID == "" || showStatistics {
snapshotIDs, err := manager.ListSnapshotIDs()
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snpashots: %v", err)
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snapshots: %v", err)
return false
}
@@ -1596,7 +1592,7 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
// because we need to find out which chunks are not referenced.
snapshotIDs, err := manager.ListSnapshotIDs()
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snpashots: %v", err)
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snapshots: %v", err)
return false
}

View File

@@ -181,7 +181,7 @@ func checkTestSnapshots(manager *SnapshotManager, expectedSnapshots int, expecte
snapshotIDs, err = manager.ListSnapshotIDs()
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snpashots: %v", err)
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snapshots: %v", err)
return
}

View File

@@ -200,6 +200,9 @@ func CreateStorage(preference Preference, resetPassword bool, threads int) (stor
username = username[:len(username) - 1]
}
// If ssh_key_file is set, skip password-based login
keyFile := GetPasswordFromPreference(preference, "ssh_key_file")
password := ""
passwordCallback := func() (string, error) {
LOG_DEBUG("SSH_PASSWORD", "Attempting password login")
@@ -219,7 +222,6 @@ func CreateStorage(preference Preference, resetPassword bool, threads int) (stor
}
}
keyFile := ""
publicKeysCallback := func() ([]ssh.Signer, error) {
LOG_DEBUG("SSH_PUBLICKEY", "Attempting public key authentication")
@@ -273,10 +275,19 @@ func CreateStorage(preference Preference, resetPassword bool, threads int) (stor
}
authMethods := [] ssh.AuthMethod {
}
passwordAuthMethods := [] ssh.AuthMethod {
ssh.PasswordCallback(passwordCallback),
ssh.KeyboardInteractive(keyboardInteractive),
}
keyFileAuthMethods := [] ssh.AuthMethod {
ssh.PublicKeysCallback(publicKeysCallback),
}
if keyFile != "" {
authMethods = append(keyFileAuthMethods, passwordAuthMethods...)
} else {
authMethods = append(passwordAuthMethods, keyFileAuthMethods...)
}
if RunInBackground {

View File

@@ -118,10 +118,8 @@ func GenerateKeyFromPassword(password string) []byte {
return pbkdf2.Key([]byte(password), DEFAULT_KEY, 16384, 32, sha256.New)
}
// GetPassword attempts to get the password from KeyChain/KeyRing, environment variables, or keyboard input.
func GetPassword(preference Preference, passwordType string, prompt string,
showPassword bool, resetPassword bool) (string) {
// Get password from preference, env, but don't start any keyring request
func GetPasswordFromPreference(preference Preference, passwordType string) (string) {
passwordID := passwordType
if preference.Name != "default" {
passwordID = preference.Name + "_" + passwordID
@@ -135,11 +133,31 @@ func GetPassword(preference Preference, passwordType string, prompt string,
}
}
// If the password is stored in the preference, there is no need to include the storage name
// (i.e., preference.Name) in the key, so the key name should really be passwordType rather
// than passwordID; we're using passwordID here only for backward compatibility
if len(preference.Keys) > 0 && len(preference.Keys[passwordID]) > 0 {
LOG_DEBUG("PASSWORD_KEYCHAIN", "Reading %s from preferences", passwordID)
return preference.Keys[passwordID]
}
if len(preference.Keys) > 0 && len(preference.Keys[passwordType]) > 0 {
LOG_DEBUG("PASSWORD_KEYCHAIN", "Reading %s from preferences", passwordType)
return preference.Keys[passwordType]
}
return ""
}
// GetPassword attempts to get the password from KeyChain/KeyRing, environment variables, or keyboard input.
func GetPassword(preference Preference, passwordType string, prompt string,
showPassword bool, resetPassword bool) (string) {
passwordID := passwordType
password := GetPasswordFromPreference(preference,passwordType)
if password != "" {
return password
}
if resetPassword && !RunInBackground {
keyringSet(passwordID, "")
} else {
@@ -155,7 +173,7 @@ func GetPassword(preference Preference, passwordType string, prompt string,
}
password := ""
password = ""
fmt.Printf("%s", prompt)
if showPassword {
scanner := bufio.NewScanner(os.Stdin)
@@ -175,6 +193,7 @@ func GetPassword(preference Preference, passwordType string, prompt string,
// SavePassword saves the specified password in the keyring/keychain.
func SavePassword(preference Preference, passwordType string, password string) {
if password == "" || RunInBackground {
return
}
@@ -182,6 +201,12 @@ func SavePassword(preference Preference, passwordType string, password string) {
if preference.DoNotSavePassword {
return
}
// If the password is retrieved from env or preference, don't save it to keyring
if GetPasswordFromPreference(preference, passwordType) == password {
return
}
passwordID := passwordType
if preference.Name != "default" {
passwordID = preference.Name + "_" + passwordID
@@ -282,6 +307,10 @@ func joinPath(components ...string) string {
combinedPath := path.Join(components...)
if len(combinedPath) > 257 && runtime.GOOS == "windows" {
combinedPath = `\\?\` + filepath.Join(components...)
// If the path is on a samba drive we must use the UNC format
if strings.HasPrefix(combinedPath, `\\?\\\`) {
combinedPath = `\\?\UNC\` + combinedPath[6:]
}
}
return combinedPath
}