Compare commits

...

65 Commits

Author SHA1 Message Date
Gilbert Chen
d8573ca789 Bump version to 2.0.9 2017-09-11 11:13:28 -04:00
Gilbert Chen
6b2f50a1e8 Fixed OneDrive 503 errors by sending GET requests with a nil body 2017-09-11 11:12:05 -04:00
gilbertchen
81b8550232 Merge pull request #173 from jt70471/patch-2
change message when chunk is skipped at destination for copy
2017-09-11 10:19:55 -04:00
gilbertchen
f6e2877948 Merge pull request #170 from jt70471/patch-1
fix upload/download rate for copy described in issue #169
2017-09-11 08:16:03 -04:00
Jeff Thompson
3c1057a3c6 change message when chunk is optimized and skipped at destination for copy 2017-09-09 11:27:33 -05:00
Gilbert Chen
8808ad5c28 Retry on XAmzContentSHA256Mismatch 2017-09-08 19:46:27 -04:00
Jeff Thompson
707967e91b fix upload/download rate for copy described in issue #169 2017-09-08 16:39:41 -05:00
Gilbert Chen
3f83890859 Don't save passwords from env/pref to keyring 2017-09-08 16:51:05 -04:00
Gilbert Chen
68fb6d671e Fixed symbolic link handling on Windows 2017-09-08 15:31:45 -04:00
gilbertchen
b04ef67d26 Fixed a typo in GUIDE.md 2017-09-08 11:57:46 -04:00
gilbertchen
72ba2dfa87 Merge pull request #154 from jt70471/jt70471-patch-1
Skip chunks to copy if already on destination for issue #134
2017-09-07 20:20:29 -04:00
Jeff Thompson
b41e8a24a9 Skip chunks to copy if already on destination for issue #134 2017-09-07 16:24:11 -05:00
gilbertchen
a3aa575c68 Merge pull request #165 from jt70471/patch-2
Use number of threads specified on copy command
2017-09-07 16:18:13 -04:00
gilbertchen
e765575210 Merge pull request #155 from niknah/sftp_login
Use ssh key file first if we have it in preferences/environment
2017-09-07 16:13:19 -04:00
gilbertchen
044e1862e5 Merge pull request #161 from jt70471/patch-1
Fix doc bug for issue #151
2017-09-07 15:31:34 -04:00
Jeff Thompson
612c5b7746 Use number of threads specified on copy command 2017-09-06 17:01:27 -05:00
Jeff Thompson
34afc6f93c Update GUIDE.md
Fix doc bug referenced in issue #151.
2017-09-05 15:37:34 -05:00
niknah
030cd274c2 If we have a sftp key file in the environment/preferences, then don't attempt a password login to avoid bad login errors. 2017-09-04 19:40:08 +10:00
Gilbert Chen
197d20f0e0 Workaround a go bug to avoid seek offsets whose lower 32 bits are -1 2017-09-01 15:05:14 -04:00
gilbertchen
93cfbf27cb Merge pull request #147 from flamingm0e/master
cleanup markdown
2017-09-01 11:52:34 -04:00
m@
46ec852d4d cleanup markdown 2017-08-31 22:18:05 -05:00
Gilbert Chen
dfa6113279 Keep and restore attributes when no patterns provided to the restore command 2017-08-31 16:29:57 -04:00
Gilbert Chen
d7fdb5fe7f Add .bat to script names on Windows 2017-08-31 12:25:31 -04:00
Gilbert Chen
37ebbc4736 Add a test for copying snapshots between storages 2017-08-30 23:07:00 -04:00
Gilbert Chen
3ae2de241e For chunks already existing on the storage the skipped flag should be true 2017-08-30 15:40:38 -04:00
Gilbert Chen
4adb8dbf70 Convert samba drive paths to UNC paths 2017-08-29 14:56:13 -04:00
gilbertchen
41e3d267e5 Merge pull request #139 from countextreme/master
Fix typos: snpashot -> snapshot
2017-08-28 16:04:10 -04:00
gilbertchen
3e23b0c61c Merge pull request #138 from smt/patch-1
Fix typo
2017-08-28 16:03:21 -04:00
countextreme
b7f537de3c Update duplicacy_snapshotmanager_test.go 2017-08-28 13:17:07 -04:00
countextreme
0c8a88d15a Update duplicacy_snapshotmanager.go 2017-08-28 13:16:33 -04:00
countextreme
204f56e939 Update duplicacy_snapshot.go 2017-08-28 13:15:56 -04:00
countextreme
4a80d94b63 Update duplicacy_backupmanager.go 2017-08-28 13:15:22 -04:00
Stephen Tudor
3729de1c67 Fix typo
s/Subdirecotry/Subdirectory
2017-08-28 08:25:58 -04:00
Gilbert Chen
6f70b37d61 In GCD backend each thread should have its own backoff value 2017-08-25 23:53:02 -04:00
Gilbert Chen
7baf8702a3 The file .duplicacy/preferences should not be readable by group and others 2017-08-24 23:07:49 -04:00
Gilbert Chen
8fce6f5f83 FindPreference should return the address of the Preference object for setPreference to work 2017-08-24 23:02:39 -04:00
gilbertchen
fd362be54a Merge pull request #120 from thenickdude/sftp-path-docs
Add documentation for absolute SFTP paths
2017-08-24 11:28:33 -04:00
Nicholas Sherlock
0c13da9872 Add documentation for absolute SFTP paths 2017-08-24 16:29:44 +12:00
Gilbert Chen
4912911017 Bump version to 2.0.8 2017-08-23 22:34:49 -04:00
Gilbert Chen
f69550d0db Allow logging function to be customized 2017-08-23 22:33:45 -04:00
Gilbert Chen
799b040913 Add Wasabi storage to tests 2017-08-12 11:25:25 -04:00
gilbertchen
41e3843bfa Update README.md 2017-08-12 10:59:00 -04:00
gilbertchen
9e1d2ac1e6 Merge pull request #110 from clbn/patch-1
Update README.md
2017-08-12 10:51:44 -04:00
Alex Olshansky
bc40498d1b Update README.md
Fixes typo ("serivce" -> "service") and factual error (B2 is not the least expensive since Wasabi was added).
2017-08-12 15:24:27 +02:00
Gilbert Chen
446bb4bcc8 Add a pseudo test to clean the storage 2017-08-11 13:15:59 -04:00
Gilbert Chen
150ea13a0d Fixed a build error in SnapshotManager tests caused by changes in CreateFileStorage 2017-08-09 12:12:21 -04:00
Gilbert Chen
8c5b7d5f63 Fixed Azure storage after updating gilbertchen/azure-sdk-for-g 2017-08-09 00:14:25 -04:00
Gilbert Chen
315dfff7d6 Add caching to network drives 2017-08-08 23:10:22 -04:00
Gilbert Chen
0bc475ca4d Allow backups to be restore and managed without a license 2017-08-05 21:24:05 -04:00
Gilbert Chen
a0fa0fe7da Fixed #101: show storage name correctly in the password command 2017-08-05 12:30:13 -04:00
gilbertchen
01db72080c Update GUIDE.md 2017-08-05 11:52:17 -04:00
Gilbert Chen
22ddc04698 Restore empty directories 2017-08-05 10:56:15 -04:00
Gilbert Chen
2aa3b2b737 Fixed a chunk not found error if the storage is a Windows network share with deduplication on 2017-08-02 22:04:22 -04:00
Gilbert Chen
76f75cb0cb Merge branch 'master' of https://github.com/gilbertchen/duplicacy 2017-08-01 23:09:18 -04:00
Gilbert Chen
ea4c4339e6 Bump version to 2.0.7 2017-08-01 23:08:57 -04:00
Gilbert Chen
fa294eabf4 When a chunk can't be found, print the error if it is not nil 2017-08-01 23:08:11 -04:00
gilbertchen
0ec262fd93 Merge pull request #102 from whereisaaron/patch-1
Update option name reset-password -> reset-passwords
2017-07-27 23:55:41 -04:00
Gilbert Chen
db3e0946bb Fixed a bug that caused a truncated file not to be restored correctly 2017-07-27 23:27:59 -04:00
Gilbert Chen
c426bf5af2 Merge branch 'master' of https://github.com/gilbertchen/duplicacy 2017-07-27 22:43:00 -04:00
Gilbert Chen
823b82060c Add a storage prefix flat:// that can handle a flat chunk directory 2017-07-27 22:42:48 -04:00
Aaron Roydhouse
4308e3e6e9 Update option name reset-password -> reset-passwords 2017-07-27 16:38:48 -04:00
gilbertchen
0391ecf941 Update README.md 2017-07-25 23:48:52 -04:00
gilbertchen
7ecf895d85 Update README.md 2017-07-25 23:48:13 -04:00
gilbertchen
a43114da99 Update README.md 2017-07-25 23:47:50 -04:00
gilbertchen
caaff6b4b2 Add doc for minio and s3c 2017-07-24 14:01:10 -04:00
26 changed files with 657 additions and 379 deletions

3
.gitignore vendored
View File

@@ -1,3 +0,0 @@
.idea
duplicacy_main

View File

@@ -8,9 +8,9 @@ Duplicacy is based on the following open source projects:
|https://github.com/Azure/azure-sdk-for-go | Apache-2.0 |
|https://github.com/tj/go-dropbox | MIT |
|https://github.com/aws/aws-sdk-go | Apache-2.0 |
|https://github.com/goamz/goamz | LGPL with static link exception |
|https://github.com/goamz/goamz | LGPL with static link exception |
|https://github.com/howeyc/gopass | ISC |
|https://github.com/tmc/keyring | ISC |
|https://github.com/pcwizz/xattr | BSD-2-Clause |
|https://github.com/pcwizz/xattr | BSD-2-Clause |
|https://github.com/minio/blake2b-simd | Apache-2.0 |
|https://github.com/go-ole/go-ole | MIT |

View File

@@ -27,7 +27,7 @@ If exclusive access to a file storage by a single client can be guaranteed, the
chunks not referenced by any backup and delete them. However, if concurrent access is required, an unreferenced chunk
can't be trivially removed, because of the possibility that a backup procedure in progress may reference the same chunk.
The ongoing backup procedure, still unknown to the deletion procedure, may have already encountered that chunk during its
file scanning phase, but decided not to upload the chunk again since it already exists in the file storage.
file scanning phase, but decided not to upload the chunk again since it already exists in the file storage.
Fortunately, there is a solution to address the deletion problem and make lock-free deduplication practical. The solution is a *two-step fossil collection* algorithm that deletes unreferenced chunks in two steps: identify and collect them in the first step, and then permanently remove them once certain conditions are met.
@@ -47,7 +47,7 @@ In the first step of the deletion procedure, called the *fossil collection* step
be saved in a fossil collection file. The deletion procedure then exits without performing further actions. This step has not effectively changed any chunk references due to the first fossil access rule. If a backup procedure references a chunk after it is marked as a fossil, a new chunk will be uploaded because of the second fossil access rule, as shown in Figure 1.
<p align="center">
<img src="https://github.com/gilbertchen/duplicacy-beta/blob/master/images/fossil_collection_1.png?raw=true"
<img src="https://github.com/gilbertchen/duplicacy-beta/blob/master/images/fossil_collection_1.png?raw=true"
alt="Reference after Rename"/>
</p>
@@ -64,7 +64,7 @@ Therefore, if a backup procedure references a chunk before the chunk is marked a
delete the chunk until it sees that backup procedure finishes (as indicated by the appearance of a new snapshot file uploaded to the storage). This ensures that scenarios depicted in Figure 2 will never happen.
<p align="center">
<img src="https://github.com/gilbertchen/duplicacy-beta/blob/master/images/fossil_collection_2.png?raw=true"
<img src="https://github.com/gilbertchen/duplicacy-beta/blob/master/images/fossil_collection_2.png?raw=true"
alt="Reference before Rename"/>
</p>
@@ -128,25 +128,25 @@ and dir1/file3):
170593,
124309,
1734
]
]
}
```
When Duplicacy splits a file in chunks using the variable-size chunking algorithm, if the end of a file is reached and yet the boundary marker for terminating a chunk
hasn't been found, the next file, if there is one, will be read in and the chunking algorithm continues. It is as if all
hasn't been found, the next file, if there is one, will be read in and the chunking algorithm continues. It is as if all
files were packed into a big tar file which is then split into chunks.
The *content* field of a file indicates the indexes of starting and ending chunks and the corresponding offsets. For
instance, *file1* starts at chunk 0 offset 0 while ends at chunk 2 offset 6108, immediately followed by *file2*.
The backup procedure can run in one of two modes. In the default quick mode, only modified or new files are scanned. Chunks only
referenced by old files that have been modified are removed from the chunk sequence, and then chunks referenced by new
referenced by old files that have been modified are removed from the chunk sequence, and then chunks referenced by new
files are appended. Indices for unchanged files need to be updated too.
In the safe mode (enabled by the -hash option), all files are scanned and the chunk sequence is regenerated.
The length sequence stores the lengths for all chunks, which are needed when calculating some statistics such as the total
length of chunks. For a repository containing a large number of files, the size of the snapshot file can be tremendous.
length of chunks. For a repository containing a large number of files, the size of the snapshot file can be tremendous.
To make the situation worse, every time a big snapshot file would have been uploaded even if only a few files have been changed since
last backup. To save space, the variable-size chunking algorithm is also applied to the three dynamic fields of a snapshot
file, *files*, *chunks*, and *lengths*.
@@ -200,7 +200,7 @@ When encryption is enabled (by the -e option with the *init* or *add* command),
Here is a diagram showing how these keys are used:
<p align="center">
<img src="https://github.com/gilbertchen/duplicacy-beta/blob/master/images/duplicacy_encryption.png?raw=true"
<img src="https://github.com/gilbertchen/duplicacy-beta/blob/master/images/duplicacy_encryption.png?raw=true"
alt="encryption"/>
</p>
@@ -210,6 +210,4 @@ Chunk content is encrypted by AES-GCM, with an encryption key that is the HMAC-S
The snapshot is encrypted by AES-GCM too, using an encrypt key that is the HMAC-SHA256 of the file path with the *File Key* as the secret key.
These four random keys are saved in a file named 'config' in the storage, encrypted with a master key derived from the PBKDF2 function on
the storage password chosen by the user.
These four random keys are saved in a file named 'config' in the storage, encrypted with a master key derived from the PBKDF2 function on the storage password chosen by the user.

204
GUIDE.md
View File

@@ -16,25 +16,22 @@ OPTIONS:
-chunk-size, -c 4M the average size of chunks
-max-chunk-size, -max 16M the maximum size of chunks (defaults to chunk-size * 4)
-min-chunk-size, -min 1M the minimum size of chunks (defaults to chunk-size / 4)
-pref-dir <preference directory path> Specify alternate location for .duplicacy preferences directory
-pref-dir <preference directory path> Specify alternate location for .duplicacy preferences directory
```
The *init* command first connects to the storage specified by the storage URL. If the storage has been already been
initialized before, it will download the storage configuration (stored in the file named *config*) and ignore the options provided in the command line. Otherwise, it will create the configuration file from the options and upload the file.
The *init* command first connects to the storage specified by the storage URL. If the storage has been already been initialized before, it will download the storage configuration (stored in the file named *config*) and ignore the options provided in the command line. Otherwise, it will create the configuration file from the options and upload the file.
The initialized storage will then become the default storage for other commands if the -storage option is not specified
for those commands. This default storage actually has a name, *default*.
The initialized storage will then become the default storage for other commands if the `-storage` option is not specified for those commands. This default storage actually has a name, *default*.
After that, it will prepare the the current working directory as the repository to be backed up. Under the hood, it will create a directory
named *.duplicacy* in the repository and put a file named *preferences* that stores the snapshot id and encryption and storage options.
After that, it will prepare the current working directory as the repository to be backed up. Under the hood, it will create a directory named *.duplicacy* in the repository and put a file named *preferences* that stores the snapshot id and encryption and storage options.
The snapshot id is an id used to distinguish different repositories connected to the same storage. Each repository must have a unique snapshot id. A snapshot id must contain only characters valid in Linux and Windows paths (alphabet, digits, underscore, dash, etc), but cannot include `/`, `\`, or `@`.
The -e option controls whether or not encryption will be enabled for the storage. If encryption is enabled, you will be prompted to enter a storage password.
The `-e` option controls whether or not encryption will be enabled for the storage. If encryption is enabled, you will be prompted to enter a storage password.
The three chunk size parameters are passed to the variable-size chunking algorithm. Their values are important to the overall performance, especially for cloud storages. If the chunk size is too small, a lot of overhead will be in sending requests and receiving responses. If the chunk size is too large, the effect of deduplication will be less obvious as more data will need to be transferred with each chunk.
The three chunk size parameters are passed to the variable-size chunking algorithm. Their values are important to the overall performance, especially for cloud storages. If the chunk size is too small, a lot of overhead will be in sending requests and receiving responses. If the chunk size is too large, the effect of de-duplication will be less obvious as more data will need to be transferred with each chunk.
The -pref-dir controls the location of the preferences directory. If not specified, a directory named .duplicacy is created in the repository. If specified, it must point to a non-existing directory. The directory is created and a .duplicacy file is created in the repository. The .duplicacy file contains the absolute path name to the preferences directory.
The `-pref-dir` controls the location of the preferences directory. If not specified, a directory named .duplicacy is created in the repository. If specified, it must point to a non-existing directory. The directory is created and a .duplicacy file is created in the repository. The .duplicacy file contains the absolute path name to the preferences directory.
Once a storage has been initialized with these parameters, these parameters cannot be modified any more.
@@ -52,29 +49,24 @@ OPTIONS:
-t <tag> assign a tag to the backup
-stats show statistics during and after backup
-threads <n> number of uploading threads
-limit-rate <kB/s> the maximum upload rate (in kilobytes/sec)
-limit-rate <kB/s> the maximum upload rate (in kilobytes/sec)
-vss enable the Volume Shadow Copy service (Windows only)
-storage <storage name> backup to the specified storage instead of the default one
```
The *backup* command creates a snapshot of the repository and uploads it to the storage. If -hash is not provided,
it will upload new or modified files since last backup by comparing file sizes and timestamps.
Otherwise, every file is scanned to detect changes.
The *backup* command creates a snapshot of the repository and uploads it to the storage. If `-hash` is not provided,it will upload new or modified files since last backup by comparing file sizes and timestamps. Otherwise, every file is scanned to detect changes.
You can assign a tag to the snapshot so that later you can refer to it by tag in other commands.
If the -stats option is specified, statistical information such as transfer speed, the number of chunks will be displayed
throughout the backup procedure.
If the `-stats` option is specified, statistical information such as transfer speed, and the number of chunks will be displayed throughout the backup procedure.
The -threads option can be used to specify more than one thread to upload chunks.
The `-threads` option can be used to specify more than one thread to upload chunks.
The -limit-rate option sets a cape on the maximum upload rate.
The `-limit-rate` option sets a cap on the maximum upload rate.
The -vss option works on Windows only to turn on the Volume Shadow Copy service such that files opened by other
processes with exclusive locks can be read as usual.
The `-vss` option works on Windows only to turn on the Volume Shadow Copy service such that files opened by other processes with exclusive locks can be read as usual.
When the repository can have multiple storages (added by the *add* command), you can select the storage to back up to
by giving a storage name.
When the repository can have multiple storages (added by the *add* command), you can select the storage to back up to by giving a storage name.
You can specify patterns to include/exclude files by putting them in a file named *.duplicacy/filters*. Please refer to the [Include/Exclude Patterns](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#includeexclude-patterns) section for how to specify the patterns.
@@ -93,29 +85,25 @@ OPTIONS:
-delete delete files not in the snapshot
-stats show statistics during and after restore
-threads <n> number of downloading threads
-limit-rate <kB/s> the maximum download rate (in kilobytes/sec)
-limit-rate <kB/s> the maximum download rate (in kilobytes/sec)
-storage <storage name> restore from the specified storage instead of the default one
```
The *restore* command restores the repository to a previous revision. By default the restore procedure will treat
files that have the same sizes and timestamps as those in the snapshot as unchanged files, but with the -hash option, every file will be fully scanned to make sure they are in fact unchanged.
The *restore* command restores the repository to a previous revision. By default the restore procedure will treat files that have the same sizes and timestamps as those in the snapshot as unchanged files, but with the -hash option, every file will be fully scanned to make sure they are in fact unchanged.
By default the restore procedure will not overwriting existing files, unless the -overwrite option is specified.
By default the restore procedure will not overwriting existing files, unless the `-overwrite` option is specified.
The -delete option indicates that files not in the snapshot will be removed.
The `-delete` option indicates that files not in the snapshot will be removed.
If the -stats option is specified, statistical information such as transfer speed, number of chunks will be displayed
throughout the restore procedure.
If the `-stats` option is specified, statistical information such as transfer speed, and number of chunks will be displayed throughout the restore procedure.
The -threads option can be used to specify more than one thread to download chunks.
The `-threads` option can be used to specify more than one thread to download chunks.
The -limit-rate option sets a cape on the maximum upload rate.
The `-limit-rate` option sets a cap on the maximum upload rate.
When the repository can have multiple storages (added by the *add* command), you can select the storage to restore from by specifying the storage name.
Unlike the *backup* procedure that reading the include/exclude patterns from a file, the *restore* procedure reads them
from the command line. If the patterns can cause confusion to the command line argument parser, -- should be prepended to
the patterns. Please refer to the [Include/Exclude Patterns](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#includeexclude-patterns) section for how to specify patterns.
Unlike the *backup* procedure that reading the include/exclude patterns from a file, the *restore* procedure reads them from the command line. If the patterns can cause confusion to the command line argument parser, -- should be prepended to the patterns. Please refer to the [Include/Exclude Patterns](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#includeexclude-patterns) section for how to specify patterns.
#### List
@@ -124,7 +112,7 @@ SYNOPSIS:
duplicacy list - List snapshots
USAGE:
duplicacy list [command options]
duplicacy list [command options]
OPTIONS:
-all, -a list snapshots with any id
@@ -133,28 +121,21 @@ OPTIONS:
-t <tag> list snapshots with the specified tag
-files print the file list in each snapshot
-chunks print chunks in each snapshot or all chunks if no snapshot specified
-reset-password take passwords from input rather than keychain/keyring or env
-reset-passwords take passwords from input rather than keychain/keyring or env
-storage <storage name> retrieve snapshots from the specified storage
```
The *list* command lists information about specified snapshots. By default it will list snapshots created from the
current repository, but you can list all snapshots stored in the storage by specifying the -all option, or list snapshots
with a different snapshot id using the -id option, and/or snapshots with a particular tag with the -t option.
The *list* command lists information about specified snapshots. By default it will list snapshots created from the current repository, but you can list all snapshots stored in the storage by specifying the -all option, or list snapshots with a different snapshot id using the `-id` option, and/or snapshots with a particular tag with the `-t` option.
The revision number is a number assigned to the snapshot when it is being created. This number will keep increasing
every time a new snapshot is created from a repository. You can refer to snapshots by their revision numbers using
the -r option, which either takes a single revision number (-r 123) or a range (-r 123-456).
There can be multiple -r options.
The revision number is a number assigned to the snapshot when it is being created. This number will keep increasing every time a new snapshot is created from a repository. You can refer to snapshots by their revision numbers using the `-r` option, which either takes a single revision number `-r 123` or a range `-r 123-456`. There can be multiple `-r` options.
If -files is specified, for each snapshot to be listed, this command will also print information about every file
contained in the snapshot.
If `-files` is specified, for each snapshot to be listed, this command will also print information about every file contained in the snapshot.
If -chunks is specified, the command will also print out every chunk the snapshot references.
If `-chunks` is specified, the command will also print out every chunk the snapshot references.
The -reset-password option is used to reset stored passwords and to allow passwords to be entered again. Please refer to the [Managing Passwords](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#managing-passwords) section for more information.
The `-reset-password` option is used to reset stored passwords and to allow passwords to be entered again. Please refer to the [Managing Passwords](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#managing-passwords) section for more information.
When the repository can have multiple storages (added by the *add* command), you can specify the storage to list
by specifying the storage name.
When the repository can have multiple storages (added by the *add* command), you can specify the storage to list by specifying the storage name.
#### Check
```
@@ -178,23 +159,15 @@ OPTIONS:
The *check* command checks, for each specified snapshot, that all referenced chunks exist in the storage.
By default the *check* command will check snapshots created from the
current repository, but you can check all snapshots stored in the storage at once by specifying the -all option, or
snapshots from a different repository using the -id option, and/or snapshots with a particular tag with the -t option.
current repository, but you can check all snapshots stored in the storage at once by specifying the `-all` option, or snapshots from a different repository using the `-id` option, and/or snapshots with a particular tag with the `-t` option.
The revision number is a number assigned to the snapshot when it is being created. This number will keep increasing
every time a new snapshot is created from a repository. You can refer to snapshots by their revision numbers using
the -r option, which either takes a single revision number (-r 123) or a range (-r 123-456).
There can be multiple -r options.
The revision number is a number assigned to the snapshot when it is being created. This number will keep increasing every time a new snapshot is created from a repository. You can refer to snapshots by their revision numbers using the `-r` option, which either takes a single revision number `-r 123` or a range `-r 123-456`. There can be multiple `-r` options.
By default the *check* command only verifies the existence of chunks. To verify the full integrity of a snapshot,
you should specify the -files option, which will download chunks and compute file hashes in memory, to
make sure that all hashes match.
By default the *check* command only verifies the existence of chunks. To verify the full integrity of a snapshot, you should specify the `-files` option, which will download chunks and compute file hashes in memory, to make sure that all hashes match.
By default the *check* command does not find fossils. If the -fossils option is specified, it will find
the fossil if the referenced chunk does not exist. if the -resurrect option is specified, it will turn the fossil back into a chunk.
By default the *check* command does not find fossils. If the `-fossils` option is specified, it will find the fossil if the referenced chunk does not exist. if the `-resurrect` option is specified, it will turn the fossil back into a chunk.
When the repository can have multiple storages (added by the *add* command), you can specify the storage to check
by specifying the storage name.
When the repository can have multiple storages (added by the *add* command), you can specify the storage to check by specifying the storage name.
#### Cat
@@ -217,9 +190,9 @@ The file must be specified with a path relative to the repository.
You can specify a different snapshot id rather than the default id.
The -r option is optional. If not specified, the latest revision will be selected.
The `-r` option is optional. If not specified, the latest revision will be selected.
You can use the -storage option to select a different storage other than the default one.
You can use the `-storage` option to select a different storage other than the default one.
#### Diff
```
@@ -235,17 +208,15 @@ OPTIONS:
-hash compute the hashes of on-disk files
-storage <storage name> retrieve files from the specified storage
```
The *diff* command compares the same file in two different snapshots if a file is given, otherwise compares the
two snapshots.
The *diff* command compares the same file in two different snapshots if a file is given, otherwise compares the two snapshots.
The file must be specified with a path relative to the repository.
You can specify a different snapshot id rather than the default snapshot id.
If only one revision is given by -r, the right hand side of the comparison will be the on-disk file.
The -hash option can then instruct this command to compute the hash of the file.
If only one revision is given by `-r`, the right hand side of the comparison will be the on-disk file. The `-hash` option can then instruct this command to compute the hash of the file.
You can use the -storage option to select a different storage other than the default one.
You can use the `-storage` option to select a different storage other than the default one.
#### History
```
@@ -264,13 +235,11 @@ OPTIONS:
The *history* command shows how the hash, size, and timestamp of a file change over the specified set of revisions.
You can specify a different snapshot id rather than the default snapshot id, and multiple -r options to specify the
set of revisions.
You can specify a different snapshot id rather than the default snapshot id, and multiple `-r` options to specify the set of revisions.
The -hash option is to compute the hash of the on-disk file. Otherwise, only the size and timestamp of the on-disk
file will be included.
The `-hash` option is to compute the hash of the on-disk file. Otherwise, only the size and timestamp of the on-disk file will be included.
You can use the -storage option to select a different storage other than the default one.
You can use the `-storage` option to select a different storage other than the default one.
#### Prune
```
@@ -295,16 +264,11 @@ OPTIONS:
-storage <storage name> prune snapshots from the specified storage
```
The *prune* command implements the two-step fossil collection algorithm. It will first find fossil collection files
from previous runs and check if contained fossils are eligible for permanent deletion (the fossil deletion step). Then it
will search for snapshots to be deleted, mark unreferenced chunks as fossils (by renaming) and save them in a new fossil
collection file stored locally (the fossil collection step).
The *prune* command implements the two-step fossil collection algorithm. It will first find fossil collection files from previous runs and check if contained fossils are eligible for permanent deletion (the fossil deletion step). Then it will search for snapshots to be deleted, mark unreferenced chunks as fossils (by renaming) and save them in a new fossil collection file stored locally (the fossil collection step).
If a snapshot id is specified, that snapshot id will be used instead of the default one. The -a option will find
snapshots with any id. Snapshots to be deleted can be specified by revision numbers, by a tag, by retention policies,
or by any combination of them.
If a snapshot id is specified, that snapshot id will be used instead of the default one. The `-a` option will find snapshots with any id. Snapshots to be deleted can be specified by revision numbers, by a tag, by retention policies, or by any combination of them.
The retention policies are specified by the -keep option, which accepts an argument in the form of two numbers *n:m*, where *n* indicates the number of days between two consecutive snapshots to keep, and *m* means that the policy only applies to snapshots at least *m* day old. If *n* is zero, any snapshots older than *m* days will be removed.
The retention policies are specified by the `-keep` option, which accepts an argument in the form of two numbers *n:m*, where *n* indicates the number of days between two consecutive snapshots to keep, and *m* means that the policy only applies to snapshots at least *m* day old. If *n* is zero, any snapshots older than *m* days will be removed.
Here are a few sample retention policies:
@@ -315,37 +279,28 @@ $ duplicacy prune -keep 30:180 # Keep 1 snapshot every 30 days for snapshots
$ duplicacy prune -keep 0:360 # Keep no snapshots older than 360 days
```
Multiple -keep options must be sorted by their *m* values in decreasing order. For instance, to combine the above policies into one line, it would become:
Multiple `-keep` options must be sorted by their *m* values in decreasing order. For instance, to combine the above policies into one line, it would become:
```sh
$ duplicacy prune -keep 0:360 -keep 30:180 -keep 7:30 -keep 1:7
```
The -exhaustive option will scan the list of all chunks in the storage, therefore it will find not only
unreferenced chunks from deleted snapshots, but also chunks that become unreferenced for other reasons, such as
those from an incomplete backup. It will also find any file that does not look like a chunk file.
In contrast, a default *prune* command will only identify
The `-exhaustive` option will scan the list of all chunks in the storage, therefore it will find not only unreferenced chunks from deleted snapshots, but also chunks that become unreferenced for other reasons, such as those from an incomplete backup. It will also find any file that does not look like a chunk file. In contrast, a default *prune* command will only identify
chunks referenced by deleted snapshots but not any other snapshots.
The -exclusive option will assume that no other clients are accessing the storage, effectively disabling the
*two-step fossil collection* algorithm. With this option, the *prune* command will immediately remove unreferenced chunks.
The `-exclusive` option will assume that no other clients are accessing the storage, effectively disabling the *two-step fossil collection* algorithm. With this option, the *prune* command will immediately remove unreferenced chunks.
The -dryrun option is used to test what changes the *prune* command would have done. It is guaranteed not to make
any changes on the storage, not even creating the local fossil collection file. The following command checks if the
chunk directory is clean (i.e., if there are any unreferenced chunks, temporary files, or anything else):
The `-dry-run` option is used to test what changes the *prune* command would have done. It is guaranteed not to make any changes on the storage, not even creating the local fossil collection file. The following command checks if the chunk directory is clean (i.e., if there are any unreferenced chunks, temporary files, or anything else):
```
$ duplicacy prune -d -exclusive -exhaustive # Prints out nothing if the chunk directory is clean
```
The -delete-only option will skip the fossil collection step, while the -collect-only option will skip the fossil deletion step.
The `-delete-only` option will skip the fossil collection step, while the `-collect-only` option will skip the fossil deletion step.
For fossils collected in the fossil collection step to be eligible for safe deletion in the fossil deletion step, at least
one new snapshot from *each* snapshot id must be created between two runs of the *prune* command. However, some repository
may not be set up to back up with a regular schedule, and thus literally blocking other repositories from deleting any fossils. Duplicacy by default will ignore repositories that have no new backup in the past 7 days. It also provide an
-ignore option that can be used to skip certain repositories when deciding the deletion criteria.
For fossils collected in the fossil collection step to be eligible for safe deletion in the fossil deletion step, at least one new snapshot from *each* snapshot id must be created between two runs of the *prune* command. However, some repository may not be set up to back up with a regular schedule, and thus literally blocking other repositories from deleting any fossils. Duplicacy by default will ignore repositories that have no new backup in the past 7 days. It also provide an `-ignore` option that can be used to skip certain repositories when deciding the deletion criteria.
You can use the -storage option to select a different storage other than the default one.
You can use the `-storage` option to select a different storage other than the default one.
#### Password
@@ -384,17 +339,11 @@ OPTIONS:
-copy <storage name> make the new storage copy-compatible with an existing one
```
The *add* command connects another storage to the current repository. Like the *init* command, if the storage has not
been initialized before, a storage configuration file derived from the command line options will be uploaded, but those
options will be ignored if the configuration file already exists in the storage.
The *add* command connects another storage to the current repository. Like the *init* command, if the storage has not been initialized before, a storage configuration file derived from the command line options will be uploaded, but those options will be ignored if the configuration file already exists in the storage.
A unique storage name must be given in order to distinguish it from other storages.
The -copy option is required if later you want to copy snapshots between this storage and another storage.
Two storages are copy-compatible if they have the same average chunk size, the same maximum chunk size,
the same minimum chunk size, the same chunk seed (used in calculating the rolling hash in the variable-size chunks
algorithm), and the same hash key. If the -copy option is specified, these parameters will be copied from
the existing storage rather than from the command line.
The `-copy` option is required if later you want to copy snapshots between this storage and another storage. Two storages are copy-compatible if they have the same average chunk size, the same maximum chunk size, the same minimum chunk size, the same chunk seed (used in calculating the rolling hash in the variable-size chunks algorithm), and the same hash key. If the `-copy` option is specified, these parameters will be copied from the existing storage rather than from the command line.
#### Set
```
@@ -416,16 +365,15 @@ OPTIONS:
The *set* command changes the options for the specified storage.
The -e option turns on the storage encryption. If specified as -e=false, it turns off the storage encryption.
The `-e` option turns on the storage encryption. If specified as `-e=false`, it turns off the storage encryption.
The -no-backup option will not allow backups from this repository to be created.
The `-no-backup` option will not allow backups from this repository to be created.
The -no-restore option will not allow restoring this repository to a different revision.
The `-no-restore` option will not allow restoring this repository to a different revision.
The -no-save-password option will require every password or token to be entered every time and not saved anywhere.
The `-no-save-password` option will require every password or token to be entered every time and not saved anywhere.
The -key and -value options are used to store (in plain text) access keys or tokens need by various storages. Please
refer to the [Managing Passwords](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#managing-passwords) section for more details.
The `-key` and `-value` options are used to store (in plain text) access keys or tokens need by various storages. Please refer to the [Managing Passwords](https://github.com/gilbertchen/duplicacy-beta/blob/master/GUIDE.md#managing-passwords) section for more details.
You can select a storage to change options for by specifying a storage name.
@@ -445,22 +393,34 @@ OPTIONS:
-to <storage name> copy snapshots to the specified storage
```
The *copy* command copies snapshots from one storage to another storage. They must be copy-compatible, i.e., some
configuration parameters must be the same. One storage must be initialized with the -copy option provided by the *add* command.
The *copy* command copies snapshots from one storage to another storage. They must be copy-compatible, i.e., some configuration parameters must be the same. One storage must be initialized with the `-copy` option provided by the *add* command.
Instead of copying all snapshots, you can specify a set of snapshots to copy by giving the -r options. The *copy* command
preserves the revision numbers, so if a revision number already exists on the destination storage the command will fail.
Instead of copying all snapshots, you can specify a set of snapshots to copy by giving the `-r` options. The *copy* command preserves the revision numbers, so if a revision number already exists on the destination storage the command will fail.
If no -from option is given, the snapshots from the default storage will be copied. The -to option specified the
destination storage and is required.
If no `-from` option is given, the snapshots from the default storage will be copied. The `-to` option specified the destination storage and is required.
## Include/Exclude Patterns
An include pattern starts with +, and an exclude pattern starts with -. Patterns may contain wildcard characters * which matches a path string of any length, and ? matches a single character. Note that both * and ? will match any character including the path separator /.
The path separator is always /, even on Windows.
When matching a path against a list of patterns, the path is compared with the part after + or -, one pattern at a time. Therefore, the order of the patterns is significant. If a match with an include pattern is found, the path is said to be included without further comparisons. If a match with an exclude pattern is found, the path is said to be excluded without further comparison. If a match is not found, the path will be excluded if all patterns are include patterns, but included otherwise.
Patterns ending with a / apply to directories only, and patterns not ending with a / apply to files only. When a directory is excluded, all files and subdirectires under it will also be excluded. Note that the path separator is always /, even on Windows.
Patterns ending with a / apply to directories only, and patterns not ending with a / apply to files only. Patterns ending with * and ?, however, apply to both directories and files. When a directory is excluded, all files and subdirectories under it will also be excluded. Therefore, to include a subdirectory, all parent directories must be explicitly included. For instance, the following pattern list doesn't do what is intended, since the `foo` directory will be excluded so the `foo/bar` will never be visited:
```
+foo/bar/*
-*
```
The correct way is to include `foo` as well:
```
+foo/bar/*
+foo/
-*
```
The following pattern list includes only files under the directory foo/ but not files under the subdirectory foo/bar:
@@ -480,10 +440,10 @@ For the *restore* command, the include/exclude patterns are specified as the com
Duplicacy will attempt to retrieve in three ways the storage password and the storage-specific access tokens/keys.
* If a secret vault service is available, Duplicacy will store passwords/keys entered by the user in such a secret vault and later retrieve them when needed. On Mac OS X it is Keychain, and on Linux it is gnome-keyring. On Windows the passwords/keys are encrypted and decrypted by the Data Protection API, and encrypted passwords/keys are stored in the file *.duplicacy/keyring*. However, if the -no-save-password option is specified for the storage, then Duplicacy will not save passwords this way.
* If an environment variable for a password is provided, Duplicacy will always take it. The table below shows the name of the environment variable for each kind of password. Note that if the storage is not the default one, the storage name will be included in the name of the environment variable.
* If an environment variable for a password is provided, Duplicacy will always take it. The table below shows the name of the environment variable for each kind of password. Note that if the storage is not the default one, the storage name will be included in the name of the environment variable (in uppercase). For example, if your storage name is b2, then the environment variable should be named DUPLICACY_B2_PASSWORD.
* If a matching key and its value are saved to the preference file (.duplicacy/preferences) by the *set* command, the value will be used as the password. The last column in the table below lists the name of the preference key for each type of password.
| password type | environment variable (default storage) | environment variable (non-default storage) | key in preferences |
| password type | environment variable (default storage) | environment variable (non-default storage in uppercase) | key in preferences |
|:----------------:|:----------------:|:----------------:|:----------------:|
| storage password | DUPLICACY_PASSWORD | DUPLICACY_&lt;STORAGENAME&gt;_PASSWORD | password |
| sftp password | DUPLICACY_SSH_PASSWORD | DUPLICACY_&lt;STORAGENAME&gt;_SSH_PASSWORD | ssh_password |
@@ -506,7 +466,7 @@ Duplicacy maintains a local cache under the `.duplicacy/cache` folder in the rep
At the end of a backup operation, Duplicacy will clean up the local cache in such a way that only chunks composing the snapshot file from the last backup will stay in the cache. All other chunks will be removed from the cache. However, if the *prune* command has been run before (which will leave a the `.duplicacy/collection` folder in the repository, then the *backup* command won't perform any cache cleanup and instead defer that to the *prune* command.
At the end of a prune operation, Duplicacy will remove all chunks from the local cache except those composing the snapshot file from the last backup (those that would be kept by the *backup* command), as well as chunks that contain information about chunks referenced by *all* backups from *all* repositories connected to the same storage url.
At the end of a prune operation, Duplicacy will remove all chunks from the local cache except those composing the snapshot file from the last backup (those that would be kept by the *backup* command), as well as chunks that contain information about chunks referenced by *all* backups from *all* repositories connected to the same storage url.
Other commands, such as *list*, *check*, does not clean up the local cache at all, so the local cache may keep growing if many of these commands run consecutively. However, once a *backup* or a *prune* command is invoked, the local cache should shrink to its normal size.

View File

@@ -2,4 +2,5 @@ Copyright © 2017 Acrosync LLC
* Free for personal use or commercial trial
* Non-trial commercial use requires per-user licenses available from [duplicacy.com](https://duplicacy.com/customer) at a cost of $20 per year
* Commercial licenses are not required to restore or manage backups; only the backup command requires a valid commercial license
* Modification and redistribution are permitted, but commercial use of derivative works is subject to the same requirements of this license

View File

@@ -8,10 +8,10 @@ There is a special edition of Duplicacy developed for VMware vSphere (ESXi) name
## Features
Duplicacy currently supports major cloud storage providers (Amazon S3, Google Cloud Storage, Microsoft Azure, Dropbox, Backblaze, Google Drive, Microsoft OneDrive, and Hubic) and offers all essential features of a modern backup tool:
Duplicacy currently supports major cloud storage providers (Amazon S3, Google Cloud Storage, Microsoft Azure, Dropbox, Backblaze B2, Google Drive, Microsoft OneDrive, and Hubic) and offers all essential features of a modern backup tool:
* Incremental backup: only back up what has been changed
* Full snapshot : although each backup is incremental, it must behave like a full snapshot for easy restore and deletion
* Full snapshot: although each backup is incremental, it must behave like a full snapshot for easy restore and deletion
* Deduplication: identical files must be stored as one copy (file-level deduplication), and identical parts from different files must be stored as one copy (block-level deduplication)
* Encryption: encrypt not only file contents but also file paths, sizes, times, etc.
* Deletion: every backup can be deleted independently without affecting others
@@ -133,10 +133,11 @@ Storage URL: /path/to/storage (on Linux or Mac OS X)
```
</details>
<details> <summary>SFTP</summary>
<details> <summary>SFTP</summary>
```
Storage URL: sftp://username@server/path/to/storage
Storage URL: sftp://username@server/path/to/storage (path relative to the home directory)
sftp://username@server//path/to/storage (absolute path)
```
Login methods include password authentication and public key authentication. Due to a limitation of the underlying Go SSH library, the key pair for public key authentication must be generated without a passphrase. To work with a key that has a passphrase, you can set up SSH agent forwarding which is also supported by Duplicacy.
@@ -168,6 +169,27 @@ Storage URL: s3://amazon.com/bucket/path/to/storage (default region is us-east-
You'll need to input an access key and a secret key to access your Amazon S3 storage.
Minio-based S3 compatiable storages are also supported by using the `minio` or `minios` backends:
```
Storage URL: minio://region@host/bucket/path/to/storage (without TLS)
Storage URL: minios://region@host/bucket/path/to/storage (with TLS)
```
There is another backend that works with S3 compatible storage providers that require V2 signing:
```
Storage URL: s3c://region@host/bucket/path/to/storage
```
</details>
<details> <summary>Wasabi</summary>
```
Storage URL: s3://us-east-1@s3.wasabisys.com/bucket/path/to/storage
```
[Wasabi](https://wasabi.com) is a relatively new cloud storage service providing a S3-compatible API.
It is well suited for storing backups, because it is much cheaper than Amazon S3 with a storage cost of $.0039/GB/Month and a download fee of $0.04/GB, and no additional charges on API calls.
</details>
@@ -178,7 +200,7 @@ Storage URL: gcs://bucket/path/to/storage
```
Starting from version 2.0.0, a new Google Cloud Storage backend is added which is implemented using the [official Google client library](https://godoc.org/cloud.google.com/go/storage). You must first obtain a credential file by [authorizing](https://duplicacy.com/gcp_start) Duplicacy to access your Google Cloud Storage account or by [downloading](https://console.cloud.google.com/projectselector/iam-admin/serviceaccounts) a service account credential file.
You can also use the s3 protocol to access Google Cloud Storage. To do this, you must enable the [s3 interoperability](https://cloud.google.com/storage/docs/migrating#migration-simple) in your Google Cloud Storage settings and set the storage url as `s3://storage.googleapis.com/bucket/path/to/storage`.
</details>
@@ -201,7 +223,7 @@ Storage URL: b2://bucket
You'll need to input the account id and application key.
Backblaze's B2 storage is not only the least expensive (at 0.5 cent per GB per month), but also the fastest. We have been working closely with their developers to leverage the full potentials provided by the B2 API in order to maximize the transfer speed.
Backblaze's B2 storage is one of the least expensive (at 0.5 cent per GB per month, with a download fee of 2 cents per GB, plus additional charges for API calls).
</details>
@@ -211,8 +233,7 @@ Backblaze's B2 storage is not only the least expensive (at 0.5 cent per GB per m
Storage URL: gcd://path/to/storage
```
To use Google Drive as the storage, you first need to download a token file from https://duplicacy.com/gcd_start by
authorizing Duplicacy to access your Google Drive, and then enter the path to this token file to Duplicacy when prompted.
To use Google Drive as the storage, you first need to download a token file from https://duplicacy.com/gcd_start by authorizing Duplicacy to access your Google Drive, and then enter the path to this token file to Duplicacy when prompted.
</details>
@@ -222,8 +243,7 @@ authorizing Duplicacy to access your Google Drive, and then enter the path to th
Storage URL: one://path/to/storage
```
To use Microsoft OneDrive as the storage, you first need to download a token file from https://duplicacy.com/one_start by
authorizing Duplicacy to access your OneDrive, and then enter the path to this token file to Duplicacy when prompted.
To use Microsoft OneDrive as the storage, you first need to download a token file from https://duplicacy.com/one_start by authorizing Duplicacy to access your OneDrive, and then enter the path to this token file to Duplicacy when prompted.
</details>
@@ -233,8 +253,7 @@ authorizing Duplicacy to access your OneDrive, and then enter the path to this t
Storage URL: hubic://path/to/storage
```
To use Hubic as the storage, you first need to download a token file from https://duplicacy.com/hubic_start by
authorizing Duplicacy to access your Hubic drive, and then enter the path to this token file to Duplicacy when prompted.
To use Hubic as the storage, you first need to download a token file from https://duplicacy.com/hubic_start by authorizing Duplicacy to access your Hubic drive, and then enter the path to this token file to Duplicacy when prompted.
Hubic offers the most free space (25GB) of all major cloud providers and there is no bandwidth charge (same as Google Drive and OneDrive), so it may be worth a try.
@@ -253,18 +272,18 @@ Deletion of old backups is possible, but no cloud storages are supported.
Multiple clients can back up to the same storage, but only sequential access is granted by the [locking on-disk data structures](http://obnam.org/locking/).
It is unclear if the lack of cloud backends is due to difficulties in porting the locking data structures to cloud storage APIs.
[Attic](https://attic-backup.org) has been acclaimed by some as the [Holy Grail of backups](https://www.stavros.io/posts/holy-grail-backups). It follows the same incremental backup model as Obnam, but embraces the variable-size chunk algorithm for better performance and better deduplication. Deletions of old backup is also supported. However, no cloud backends are implemented, as in Obnam. Although concurrent backups from multiple clients to the same storage is in theory possible by the use of locking, it is
[not recommended](http://librelist.com/browser//attic/2014/11/11/backing-up-multiple-servers-into-a-single-repository/#e96345aa5a3469a87786675d65da492b) by the developer due to chunk indices being kept in a local cache.
[Attic](https://attic-backup.org) has been acclaimed by some as the [Holy Grail of backups](https://www.stavros.io/posts/holy-grail-backups). It follows the same incremental backup model as Obnam, but embraces the variable-size chunk algorithm for better performance and better deduplication. Deletions of old backup is also supported. However, no cloud backends are implemented, as in Obnam. Although concurrent backups from multiple clients to the same storage is in theory possible by the use of locking, it is
[not recommended](http://librelist.com/browser//attic/2014/11/11/backing-up-multiple-servers-into-a-single-repository/#e96345aa5a3469a87786675d65da492b) by the developer due to chunk indices being kept in a local cache.
Concurrent access is not only a convenience; it is a necessity for better deduplication. For instance, if multiple machines with the same OS installed can back up their entire drives to the same storage, only one copy of the system files needs to be stored, greatly reducing the storage space regardless of the number of machines. Attic still adopts the traditional approach of using a centralized indexing database to manage chunks, and relies heavily on caching to improve performance. The presence of exclusive locking makes it hard to be adapted for cloud storage APIs and reduces the level of deduplication.
[restic](https://restic.github.io) is a more recent addition. It is worth mentioning here because, like Duplicacy, it is written in Go. It uses a format similar to the git packfile format. Multiple clients backing up to the same storage are still guarded by
[restic](https://restic.github.io) is a more recent addition. It is worth mentioning here because, like Duplicacy, it is written in Go. It uses a format similar to the git packfile format. Multiple clients backing up to the same storage are still guarded by
[locks](https://github.com/restic/restic/blob/master/doc/Design.md#locks). A prune operation will therefore completely block all other clients connected to the storage from doing their regular backups. Moreover, since most cloud storage services do not provide a locking service, the best effort is to use some basic file operations to simulate a lock, but distributed locking is known to be a hard problem and it is unclear how reliable restic's lock implementation is. A faulty implementation may cause a prune operation to accidentally delete data still in use, resulting in unrecoverable data loss. This is the exact problem that we avoided by taking the lock-free approach.
The following table compares the feature lists of all these backup tools:
| Feature/Tool | duplicity | bup | Obnam | Attic | restic | **Duplicacy** |
| Feature/Tool | duplicity | bup | Obnam | Attic | restic | **Duplicacy** |
|:------------------:|:---------:|:---:|:-----------------:|:---------------:|:-----------------:|:-------------:|
| Incremental Backup | Yes | Yes | Yes | Yes | Yes | **Yes** |
| Full Snapshot | No | Yes | Yes | Yes | Yes | **Yes** |
@@ -281,20 +300,20 @@ The following table compares the feature lists of all these backup tools:
Duplicacy is not only more feature-rich but also faster than other backup tools. The following table lists the running times in seconds of backing up the [Linux code base](https://github.com/torvalds/linux) using Duplicacy and 3 other tools. Clearly Duplicacy is the fastest by a significant margin.
| | Duplicacy | restic | Attic | duplicity |
| | Duplicacy | restic | Attic | duplicity |
|:------------------:|:----------------:|:----------:|:----------:|:-----------:|
| Initial backup | 13.7 | 20.7 | 26.9 | 44.2 |
| 2nd backup | 4.8 | 8.0 | 15.4 | 19.5 |
| 3rd backup | 6.9 | 11.9 | 19.6 | 29.8 |
| 4th backup | 3.3 | 7.0 | 13.7 | 18.6 |
| 5th backup | 9.9 | 11.4 | 19.9 | 28.0 |
| 6th backup | 3.8 | 8.0 | 16.8 | 22.0 |
| 7th backup | 5.1 | 7.8 | 14.3 | 21.6 |
| 8th backup | 9.5 | 13.5 | 18.3 | 35.0 |
| 9th backup | 4.3 | 9.0 | 15.7 | 24.9 |
| 10th backup | 7.9 | 20.2 | 32.2 | 35.0 |
| 11th backup | 4.6 | 9.1 | 16.8 | 28.1 |
| 12th backup | 7.4 | 12.0 | 21.7 | 37.4 |
| Initial backup | 13.7 | 20.7 | 26.9 | 44.2 |
| 2nd backup | 4.8 | 8.0 | 15.4 | 19.5 |
| 3rd backup | 6.9 | 11.9 | 19.6 | 29.8 |
| 4th backup | 3.3 | 7.0 | 13.7 | 18.6 |
| 5th backup | 9.9 | 11.4 | 19.9 | 28.0 |
| 6th backup | 3.8 | 8.0 | 16.8 | 22.0 |
| 7th backup | 5.1 | 7.8 | 14.3 | 21.6 |
| 8th backup | 9.5 | 13.5 | 18.3 | 35.0 |
| 9th backup | 4.3 | 9.0 | 15.7 | 24.9 |
| 10th backup | 7.9 | 20.2 | 32.2 | 35.0 |
| 11th backup | 4.6 | 9.1 | 16.8 | 28.1 |
| 12th backup | 7.4 | 12.0 | 21.7 | 37.4 |
For more details and other speed comparison results, please visit https://github.com/gilbertchen/benchmarking. There you can also find test scripts that you can use to run your own experiments.
@@ -303,4 +322,5 @@ For more details and other speed comparison results, please visit https://github
* Free for personal use or commercial trial
* Non-trial commercial use requires per-user licenses available from [duplicacy.com](https://duplicacy.com/customer) at a cost of $20 per year
* Commercial licenses are not required to restore or manage backups; only the backup command requires a valid commercial license
* Modification and redistribution are permitted, but commercial use of derivative works is subject to the same requirements of this license

View File

@@ -12,6 +12,7 @@ import (
"regexp"
"strings"
"strconv"
"runtime"
"os/exec"
"os/signal"
"encoding/json"
@@ -147,18 +148,27 @@ func runScript(context *cli.Context, storageName string, phase string) bool {
preferencePath := duplicacy.GetDuplicacyPreferencePath()
scriptDir, _ := filepath.Abs(path.Join(preferencePath, "scripts"))
scriptName := phase + "-" + context.Command.Name
scriptNames := []string { phase + "-" + context.Command.Name,
storageName + "-" + phase + "-" + context.Command.Name }
script := path.Join(scriptDir, scriptName)
if _, err := os.Stat(script); err != nil {
scriptName = storageName + "-" + scriptName
script := ""
for _, scriptName := range scriptNames {
script = path.Join(scriptDir, scriptName)
if _, err = os.Stat(script); err != nil {
return false
if runtime.GOOS == "windows" {
script += ".bat"
}
if _, err := os.Stat(script); err == nil {
break
} else {
script = ""
}
}
duplicacy.LOG_INFO("SCRIPT_RUN", "Running %s script", scriptName)
if script == "" {
return false
}
duplicacy.LOG_INFO("SCRIPT_RUN", "Running script %s", script)
output, err := exec.Command(script, os.Args...).CombinedOutput()
for _, line := range strings.Split(string(output), "\n") {
@@ -534,7 +544,9 @@ func changePassword(context *cli.Context) {
password := ""
if preference.Encrypted {
password = duplicacy.GetPassword(*preference, "password", "Enter old password for storage %s:", false, true)
password = duplicacy.GetPassword(*preference, "password",
fmt.Sprintf("Enter old password for storage %s:", preference.StorageURL),
false, true)
}
config, _, err := duplicacy.DownloadConfig(storage, password)
@@ -1008,12 +1020,17 @@ func copySnapshots(context *cli.Context) {
os.Exit(ArgumentExitCode)
}
threads := context.Int("threads")
if threads < 1 {
threads = 1
}
repository, source := getRepositoryPreference(context, context.String("from"))
runScript(context, source.Name, "pre")
duplicacy.LOG_INFO("STORAGE_SET", "Source storage set to %s", source.StorageURL)
sourceStorage := duplicacy.CreateStorage(*source, false, 1)
sourceStorage := duplicacy.CreateStorage(*source, false, threads)
if sourceStorage == nil {
return
}
@@ -1043,7 +1060,7 @@ func copySnapshots(context *cli.Context) {
duplicacy.LOG_INFO("STORAGE_SET", "Destination storage set to %s", destination.StorageURL)
destinationStorage := duplicacy.CreateStorage(*destination, false, 1)
destinationStorage := duplicacy.CreateStorage(*destination, false, threads)
if destinationStorage == nil {
return
}
@@ -1054,8 +1071,8 @@ func copySnapshots(context *cli.Context) {
"Enter destination storage password:",false, false)
}
sourceStorage.SetRateLimits(context.Int("download-rate-limit"), 0)
destinationStorage.SetRateLimits(0, context.Int("upload-rate-limit"))
sourceStorage.SetRateLimits(context.Int("download-limit-rate"), 0)
destinationStorage.SetRateLimits(0, context.Int("upload-limit-rate"))
destinationManager := duplicacy.CreateBackupManager(destination.SnapshotID, destinationStorage, repository,
destinationPassword)
@@ -1068,11 +1085,6 @@ func copySnapshots(context *cli.Context) {
snapshotID = context.String("id")
}
threads := context.Int("threads")
if threads < 1 {
threads = 1
}
sourceManager.CopySnapshots(destinationManager, snapshotID, revisions, threads)
runScript(context, source.Name, "post")
}
@@ -1683,7 +1695,7 @@ func main() {
app.Name = "duplicacy"
app.HelpName = "duplicacy"
app.Usage = "A new generation cloud backup tool based on lock-free deduplication"
app.Version = "2.0.6"
app.Version = "2.0.9"
// If the program is interrupted, call the RunAtError function.
c := make(chan os.Signal, 1)

21
integration_tests/copy_test.sh Executable file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
. ./test_functions.sh
fixture
pushd ${TEST_REPO}
${DUPLICACY} init integration-tests $TEST_STORAGE -c 1k
${DUPLICACY} add -copy default secondary integration-tests $SECONDARY_STORAGE
add_file file1
add_file file2
${DUPLICACY} backup
${DUPLICACY} copy -from default -to secondary
add_file file3
add_file file4
${DUPLICACY} backup
${DUPLICACY} copy -from default -to secondary
${DUPLICACY} check --files -stats
${DUPLICACY} check --files -stats -storage secondary
popd

View File

@@ -14,14 +14,13 @@ import (
type AzureStorage struct {
RateLimitedStorage
clients []*storage.BlobStorageClient
container string
containers []*storage.Container
}
func CreateAzureStorage(accountName string, accountKey string,
container string, threads int) (azureStorage *AzureStorage, err error) {
containerName string, threads int) (azureStorage *AzureStorage, err error) {
var clients []*storage.BlobStorageClient
var containers []*storage.Container
for i := 0; i < threads; i++ {
client, err := storage.NewBasicClient(accountName, accountKey)
@@ -31,21 +30,21 @@ func CreateAzureStorage(accountName string, accountKey string,
}
blobService := client.GetBlobService()
clients = append(clients, &blobService)
container := blobService.GetContainerReference(containerName)
containers = append(containers, container)
}
exist, err := clients[0].ContainerExists(container)
exist, err := containers[0].Exists()
if err != nil {
return nil, err
}
if !exist {
return nil, fmt.Errorf("container %s does not exist", container)
return nil, fmt.Errorf("container %s does not exist", containerName)
}
azureStorage = &AzureStorage {
clients: clients,
container: container,
containers: containers,
}
return
@@ -77,7 +76,7 @@ func (azureStorage *AzureStorage) ListFiles(threadIndex int, dir string) (files
for {
results, err := azureStorage.clients[threadIndex].ListBlobs(azureStorage.container, parameters)
results, err := azureStorage.containers[threadIndex].ListBlobs(parameters)
if err != nil {
return nil, nil, err
}
@@ -115,14 +114,15 @@ func (azureStorage *AzureStorage) ListFiles(threadIndex int, dir string) (files
// DeleteFile deletes the file or directory at 'filePath'.
func (storage *AzureStorage) DeleteFile(threadIndex int, filePath string) (err error) {
_, err = storage.clients[threadIndex].DeleteBlobIfExists(storage.container, filePath)
_, err = storage.containers[threadIndex].GetBlobReference(filePath).DeleteIfExists(nil)
return err
}
// MoveFile renames the file.
func (storage *AzureStorage) MoveFile(threadIndex int, from string, to string) (err error) {
source := storage.clients[threadIndex].GetBlobURL(storage.container, from)
err = storage.clients[threadIndex].CopyBlob(storage.container, to, source)
source := storage.containers[threadIndex].GetBlobReference(from)
destination := storage.containers[threadIndex].GetBlobReference(to)
err = destination.Copy(source.GetURL(), nil)
if err != nil {
return err
}
@@ -136,7 +136,8 @@ func (storage *AzureStorage) CreateDirectory(threadIndex int, dir string) (err e
// GetFileInfo returns the information about the file or directory at 'filePath'.
func (storage *AzureStorage) GetFileInfo(threadIndex int, filePath string) (exist bool, isDir bool, size int64, err error) {
properties, err := storage.clients[threadIndex].GetBlobProperties(storage.container, filePath)
blob := storage.containers[threadIndex].GetBlobReference(filePath)
err = blob.GetProperties(nil)
if err != nil {
if strings.Contains(err.Error(), "404") {
return false, false, 0, nil
@@ -145,7 +146,7 @@ func (storage *AzureStorage) GetFileInfo(threadIndex int, filePath string) (exis
}
}
return true, false, properties.ContentLength, nil
return true, false, blob.Properties.ContentLength, nil
}
// FindChunk finds the chunk with the specified id. If 'isFossil' is true, it will search for chunk files with
@@ -167,21 +168,22 @@ func (storage *AzureStorage) FindChunk(threadIndex int, chunkID string, isFossil
// DownloadFile reads the file at 'filePath' into the chunk.
func (storage *AzureStorage) DownloadFile(threadIndex int, filePath string, chunk *Chunk) (err error) {
readCloser, err := storage.clients[threadIndex].GetBlob(storage.container, filePath)
readCloser, err := storage.containers[threadIndex].GetBlobReference(filePath).Get(nil)
if err != nil {
return err
}
defer readCloser.Close()
_, err = RateLimitedCopy(chunk, readCloser, storage.DownloadRateLimit / len(storage.clients))
_, err = RateLimitedCopy(chunk, readCloser, storage.DownloadRateLimit / len(storage.containers))
return err
}
// UploadFile writes 'content' to the file at 'filePath'.
func (storage *AzureStorage) UploadFile(threadIndex int, filePath string, content []byte) (err error) {
reader := CreateRateLimitedReader(content, storage.UploadRateLimit / len(storage.clients))
return storage.clients[threadIndex].CreateBlockBlobFromReader(storage.container, filePath, uint64(len(content)), reader, nil)
reader := CreateRateLimitedReader(content, storage.UploadRateLimit / len(storage.containers))
blob := storage.containers[threadIndex].GetBlobReference(filePath)
return blob.CreateBlockBlobFromReader(reader, nil)
}

View File

@@ -17,6 +17,7 @@ import (
"sync/atomic"
"strings"
"strconv"
"runtime"
"encoding/hex"
"path/filepath"
)
@@ -76,7 +77,7 @@ func (manager *BackupManager) SetupSnapshotCache(storageName string) bool {
preferencePath := GetDuplicacyPreferencePath()
cacheDir := path.Join(preferencePath, "cache", storageName)
storage, err := CreateFileStorage(cacheDir, 1)
storage, err := CreateFileStorage(cacheDir, 2, false, 1)
if err != nil {
LOG_ERROR("BACKUP_CACHE", "Failed to create the snapshot cache dir: %v", err)
return false
@@ -621,7 +622,7 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta
}
for _, dir := range skippedDirectories {
LOG_WARN("SKIP_DIRECTORY", "Subdirecotry %s cannot be listed", dir)
LOG_WARN("SKIP_DIRECTORY", "Subdirectory %s cannot be listed", dir)
}
for _, file := range fileReader.SkippedFiles {
@@ -918,7 +919,9 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
if deleteMode && len(patterns) == 0 {
for _, file := range extraFiles {
// Reverse the order to make sure directories are empty before being deleted
for i := range extraFiles {
file := extraFiles[len(extraFiles) - 1 - i]
fullPath := joinPath(top, file)
os.Remove(fullPath)
LOG_INFO("RESTORE_DELETE", "Deleted %s", file)
@@ -932,8 +935,6 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu
}
}
RemoveEmptyDirectories(top)
if showStatistics {
for _, file := range downloadedFiles {
LOG_INFO("DOWNLOAD_DONE", "Downloaded %s (%d)", file.Path, file.Size)
@@ -1026,7 +1027,7 @@ func (manager *BackupManager) UploadSnapshot(chunkMaker *ChunkMaker, uploader *C
totalUploadedSnapshotChunkSize += int64(chunkSize)
totalUploadedSnapshotChunkBytes += int64(uploadSize)
} else {
LOG_DEBUG("CHUNK_EXIST", "Skipped snpashot chunk %s in the storage", chunk.GetID())
LOG_DEBUG("CHUNK_EXIST", "Skipped snapshot chunk %s in the storage", chunk.GetID())
}
}
@@ -1152,21 +1153,29 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
existingFile, err = os.Open(fullPath)
if err != nil {
if os.IsNotExist(err) {
if inPlace && entry.Size > 100 * 1024 * 1024 {
// macOS has no sparse file support
if inPlace && entry.Size > 100 * 1024 * 1024 && runtime.GOOS != "darwin" {
// Create an empty sparse file
existingFile, err = os.OpenFile(fullPath, os.O_WRONLY | os.O_CREATE | os.O_TRUNC, 0600)
if err != nil {
LOG_ERROR("DOWNLOAD_CREATE", "Failed to create the file %s for in-place writing", fullPath)
LOG_ERROR("DOWNLOAD_CREATE", "Failed to create the file %s for in-place writing: %v", fullPath, err)
return false
}
_, err = existingFile.Seek(entry.Size - 1, 0)
n := int64(1)
// There is a go bug on Windows (https://github.com/golang/go/issues/21681) that causes Seek to fail
// if the lower 32 bit of the offset argument is 0xffffffff. Therefore we need to avoid that value by increasing n.
if uint32(entry.Size) == 0 && (entry.Size >> 32) > 0 {
n = int64(2)
}
_, err = existingFile.Seek(entry.Size - n, 0)
if err != nil {
LOG_ERROR("DOWNLOAD_CREATE", "Failed to resize the initial file %s for in-place writing", fullPath)
LOG_ERROR("DOWNLOAD_CREATE", "Failed to resize the initial file %s for in-place writing: %v", fullPath, err)
return false
}
_, err = existingFile.Write([]byte("\x00"))
_, err = existingFile.Write([]byte("\x00\x00")[:n])
if err != nil {
LOG_ERROR("DOWNLOAD_CREATE", "Failed to initialize the sparse file %s for in-place writing", fullPath)
LOG_ERROR("DOWNLOAD_CREATE", "Failed to initialize the sparse file %s for in-place writing: %v", fullPath, err)
return false
}
existingFile.Close()
@@ -1196,13 +1205,17 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun
fileHasher := manager.config.NewFileHasher()
buffer := make([]byte, 64 * 1024)
err = nil
for i := entry.StartChunk; i <= entry.EndChunk; i++ {
// We set to read one more byte so the file hash will be different if the file to be restored is a
// truncated portion of the existing file
for i := entry.StartChunk; i <= entry.EndChunk + 1; i++ {
hasher := manager.config.NewKeyedHasher(manager.config.HashKey)
chunkSize := chunkDownloader.taskList[i].chunkLength
chunkSize := 1 // the size of extra chunk beyond EndChunk
if i == entry.StartChunk {
chunkSize -= entry.StartOffset
} else if i == entry.EndChunk {
chunkSize = entry.EndOffset
} else if i > entry.StartChunk && i < entry.EndChunk {
chunkSize = chunkDownloader.taskList[i].chunkLength
}
count := 0
for count < chunkSize {
@@ -1471,14 +1484,27 @@ func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapsho
return false
}
revisionMap := make(map[int]bool)
if snapshotID == "" && len(revisionsToBeCopied) > 0 {
LOG_ERROR("SNAPSHOT_ERROR", "You must specify the snapshot id when one or more revisions are specified.")
return false
}
revisionMap := make(map[string]map[int]bool)
_, found := revisionMap[snapshotID]
if !found {
revisionMap[snapshotID] = make(map[int]bool)
}
for _, revision := range revisionsToBeCopied {
revisionMap[revision] = true
revisionMap[snapshotID][revision] = true
}
var snapshots [] *Snapshot
var otherSnapshots [] *Snapshot
var snapshotIDs [] string
var err error
if snapshotID == "" {
snapshotIDs, err = manager.SnapshotManager.ListSnapshotIDs()
if err != nil {
@@ -1490,6 +1516,10 @@ func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapsho
}
for _, id := range snapshotIDs {
_, found := revisionMap[id]
if !found {
revisionMap[id] = make(map[int]bool)
}
revisions, err := manager.SnapshotManager.ListSnapshotRevisions(id)
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all revisions for snapshot %s: %v", id, err)
@@ -1498,9 +1528,14 @@ func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapsho
for _, revision := range revisions {
if len(revisionsToBeCopied) > 0 {
if _, found := revisionMap[revision]; !found {
if _, found := revisionMap[id][revision]; found {
revisionMap[id][revision] = true
} else {
revisionMap[id][revision] = false
continue
}
} else {
revisionMap[id][revision] = true
}
snapshotPath := fmt.Sprintf("snapshots/%s/%d", id, revision)
@@ -1512,21 +1547,44 @@ func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapsho
}
if exist {
LOG_INFO("SNAPSHOT_EXIST", "Snapshot %s at revision %d already exists in the destination storage",
LOG_INFO("SNAPSHOT_EXIST", "Snapshot %s at revision %d already exists at the destination storage",
id, revision)
revisionMap[id][revision] = false
continue
}
snapshot := manager.SnapshotManager.DownloadSnapshot(id, revision)
snapshots = append(snapshots, snapshot)
}
otherRevisions, err := otherManager.SnapshotManager.ListSnapshotRevisions(id)
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all revisions at the destination for snapshot %s: %v", id, err)
return false
}
for _, otherRevision := range otherRevisions {
otherSnapshot := otherManager.SnapshotManager.DownloadSnapshot(id, otherRevision)
otherSnapshots = append(otherSnapshots, otherSnapshot)
}
}
if len(snapshots) == 0 {
LOG_INFO("SNAPSHOT_COPY", "Nothing to copy, all snapshot revisions exist at the destination.")
return true
}
chunks := make(map[string]bool)
for _, snapshot := range snapshots {
if revisionMap[snapshot.ID][snapshot.Revision] == false {
continue
}
LOG_TRACE("SNAPSHOT_COPY", "Copying snapshot %s at revision %d", snapshot.ID, snapshot.Revision)
for _, chunkHash := range snapshot.FileSequence {
chunks[chunkHash] = true
}
@@ -1552,42 +1610,90 @@ func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapsho
}
}
for _, otherSnapshot := range otherSnapshots {
for _, chunkHash := range otherSnapshot.FileSequence {
if _, found := chunks[chunkHash]; found {
chunks[chunkHash] = false
}
}
for _, chunkHash := range otherSnapshot.ChunkSequence {
if _, found := chunks[chunkHash]; found {
chunks[chunkHash] = false
}
}
for _, chunkHash := range otherSnapshot.LengthSequence {
if _, found := chunks[chunkHash]; found {
chunks[chunkHash] = false
}
}
description := otherManager.SnapshotManager.DownloadSequence(otherSnapshot.ChunkSequence)
err := otherSnapshot.LoadChunks(description)
if err != nil {
LOG_ERROR("SNAPSHOT_CHUNK", "Failed to load chunks for destination snapshot %s at revision %d: %v",
otherSnapshot.ID, otherSnapshot.Revision, err)
return false
}
for _, chunkHash := range otherSnapshot.ChunkHashes {
if _, found := chunks[chunkHash]; found {
chunks[chunkHash] = false
}
}
}
chunkDownloader := CreateChunkDownloader(manager.config, manager.storage, nil, false, threads)
chunkUploader := CreateChunkUploader(otherManager.config, otherManager.storage, nil, threads,
func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) {
if skipped {
LOG_INFO("SNAPSHOT_COPY", "Chunk %s (%d/%d) exists in the destination", chunk.GetID(), chunkIndex, len(chunks))
LOG_INFO("SNAPSHOT_COPY", "Chunk %s (%d/%d) exists at the destination", chunk.GetID(), chunkIndex, len(chunks))
} else {
LOG_INFO("SNAPSHOT_COPY", "Copied chunk %s (%d/%d)", chunk.GetID(), chunkIndex, len(chunks))
LOG_INFO("SNAPSHOT_COPY", "Chunk %s (%d/%d) copied to the destination", chunk.GetID(), chunkIndex, len(chunks))
}
otherManager.config.PutChunk(chunk)
})
chunkUploader.Start()
totalCopied := 0
totalSkipped := 0
chunkIndex := 0
for chunkHash, _ := range chunks {
for chunkHash, needsCopy := range chunks {
chunkIndex++
chunkID := manager.config.GetChunkIDFromHash(chunkHash)
newChunkID := otherManager.config.GetChunkIDFromHash(chunkHash)
LOG_DEBUG("SNAPSHOT_COPY", "Copying chunk %s to %s", chunkID, newChunkID)
i := chunkDownloader.AddChunk(chunkHash)
chunk := chunkDownloader.WaitForChunk(i)
newChunk := otherManager.config.GetChunk()
newChunk.Reset(true)
newChunk.Write(chunk.GetBytes())
chunkUploader.StartChunk(newChunk, chunkIndex)
if needsCopy {
newChunkID := otherManager.config.GetChunkIDFromHash(chunkHash)
LOG_DEBUG("SNAPSHOT_COPY", "Copying chunk %s to %s", chunkID, newChunkID)
i := chunkDownloader.AddChunk(chunkHash)
chunk := chunkDownloader.WaitForChunk(i)
newChunk := otherManager.config.GetChunk()
newChunk.Reset(true)
newChunk.Write(chunk.GetBytes())
chunkUploader.StartChunk(newChunk, chunkIndex)
totalCopied++
} else {
LOG_INFO("SNAPSHOT_COPY", "Chunk %s (%d/%d) skipped at the destination", chunkID, chunkIndex, len(chunks))
totalSkipped++
}
}
chunkDownloader.Stop()
chunkUploader.Stop()
LOG_INFO("SNAPSHOT_COPY", "Total chunks copied = %d, skipped = %d.", totalCopied, totalSkipped)
for _, snapshot := range snapshots {
otherManager.storage.CreateDirectory(0, fmt.Sprintf("snapshots/%s", manager.snapshotID))
if revisionMap[snapshot.ID][snapshot.Revision] == false {
continue
}
otherManager.storage.CreateDirectory(0, fmt.Sprintf("snapshots/%s", snapshot.ID))
description, _ := snapshot.MarshalJSON()
path := fmt.Sprintf("snapshots/%s/%d", manager.snapshotID, snapshot.Revision)
path := fmt.Sprintf("snapshots/%s/%d", snapshot.ID, snapshot.Revision)
otherManager.SnapshotManager.UploadFile(path, path, description)
LOG_INFO("SNAPSHOT_COPY", "Copied snapshot %s at revision %d", snapshot.ID, snapshot.Revision)
}

View File

@@ -104,6 +104,27 @@ func modifyFile(path string, portion float32) {
}
}
func checkExistence(t *testing.T, path string, exists bool, isDir bool) {
stat, err := os.Stat(path)
if exists {
if err != nil {
t.Errorf("%s does not exist: %v", path, err)
} else if isDir {
if !stat.Mode().IsDir() {
t.Errorf("%s is not a directory", path)
}
} else {
if stat.Mode().IsDir() {
t.Errorf("%s is not a file", path)
}
}
} else {
if err == nil || !os.IsNotExist(err) {
t.Errorf("%s may exist: %v", path, err)
}
}
}
func truncateFile(path string) {
file, err := os.OpenFile(path, os.O_WRONLY, 0644)
if err != nil {
@@ -173,6 +194,9 @@ func TestBackupManager(t *testing.T) {
os.Mkdir(testDir + "/repository1", 0700)
os.Mkdir(testDir + "/repository1/dir1", 0700)
os.Mkdir(testDir + "/repository1/.duplicacy", 0700)
os.Mkdir(testDir + "/repository2", 0700)
os.Mkdir(testDir + "/repository2/.duplicacy", 0700)
maxFileSize := 1000000
//maxFileSize := 200000
@@ -215,14 +239,14 @@ func TestBackupManager(t *testing.T) {
time.Sleep(time.Duration(delay) * time.Second)
SetDuplicacyPreferencePath(testDir + "/repository1")
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
backupManager := CreateBackupManager("host1", storage, testDir, password)
backupManager.SetupSnapshotCache("default")
SetDuplicacyPreferencePath(testDir + "/repository1")
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
backupManager.Backup(testDir + "/repository1", /*quickMode=*/true, threads, "first", false, false)
time.Sleep(time.Duration(delay) * time.Second)
SetDuplicacyPreferencePath(testDir + "/repository2")
SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy")
backupManager.Restore(testDir + "/repository2", threads, /*inPlace=*/false, /*quickMode=*/false, threads, /*overwrite=*/true,
/*deleteMode=*/false, /*showStatistics=*/false, /*patterns=*/nil)
@@ -243,10 +267,10 @@ func TestBackupManager(t *testing.T) {
modifyFile(testDir + "/repository1/file2", 0.2)
modifyFile(testDir + "/repository1/dir1/file3", 0.3)
SetDuplicacyPreferencePath(testDir + "/repository1")
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
backupManager.Backup(testDir + "/repository1", /*quickMode=*/true, threads, "second", false, false)
time.Sleep(time.Duration(delay) * time.Second)
SetDuplicacyPreferencePath(testDir + "/repository2")
SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy")
backupManager.Restore(testDir + "/repository2", 2, /*inPlace=*/true, /*quickMode=*/true, threads, /*overwrite=*/true,
/*deleteMode=*/false, /*showStatistics=*/false, /*patterns=*/nil)
@@ -258,13 +282,25 @@ func TestBackupManager(t *testing.T) {
}
}
// Truncate file2 and add a few empty directories
truncateFile(testDir + "/repository1/file2")
SetDuplicacyPreferencePath(testDir + "/repository1")
os.Mkdir(testDir + "/repository1/dir2", 0700)
os.Mkdir(testDir + "/repository1/dir2/dir3", 0700)
os.Mkdir(testDir + "/repository1/dir4", 0700)
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
backupManager.Backup(testDir + "/repository1", /*quickMode=*/false, threads, "third", false, false)
time.Sleep(time.Duration(delay) * time.Second)
SetDuplicacyPreferencePath(testDir + "/repository2")
// Create some directories and files under repository2 that will be deleted during restore
os.Mkdir(testDir + "/repository2/dir5", 0700)
os.Mkdir(testDir + "/repository2/dir5/dir6", 0700)
os.Mkdir(testDir + "/repository2/dir7", 0700)
createRandomFile(testDir + "/repository2/file4", 100)
createRandomFile(testDir + "/repository2/dir5/file5", 100)
SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy")
backupManager.Restore(testDir + "/repository2", 3, /*inPlace=*/true, /*quickMode=*/false, threads, /*overwrite=*/true,
/*deleteMode=*/false, /*showStatistics=*/false, /*patterns=*/nil)
/*deleteMode=*/true, /*showStatistics=*/false, /*patterns=*/nil)
for _, f := range []string{ "file1", "file2", "dir1/file3" } {
hash1 := getFileHash(testDir + "/repository1/" + f)
@@ -274,9 +310,22 @@ func TestBackupManager(t *testing.T) {
}
}
// These files/dirs should not exist because deleteMode == true
checkExistence(t, testDir + "/repository2/dir5", false, false);
checkExistence(t, testDir + "/repository2/dir5/dir6", false, false);
checkExistence(t, testDir + "/repository2/dir7", false, false);
checkExistence(t, testDir + "/repository2/file4", false, false);
checkExistence(t, testDir + "/repository2/dir5/file5", false, false);
// These empty dirs should exist
checkExistence(t, testDir + "/repository2/dir2", true, true);
checkExistence(t, testDir + "/repository2/dir2/dir3", true, true);
checkExistence(t, testDir + "/repository2/dir4", true, true);
// Remove file2 and dir1/file3 and restore them from revision 3
os.Remove(testDir + "/repository1/file2")
os.Remove(testDir + "/repository1/dir1/file3")
SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy")
backupManager.Restore(testDir + "/repository1", 3, /*inPlace=*/true, /*quickMode=*/false, threads, /*overwrite=*/true,
/*deleteMode=*/false, /*showStatistics=*/false, /*patterns=*/[]string{"+file2", "+dir1/file3", "-*"})

View File

@@ -314,7 +314,11 @@ func (downloader *ChunkDownloader) Download(threadIndex int, task ChunkDownloadT
if !exist {
// A chunk is not found. This is a serious error and hopefully it will never happen.
LOG_FATAL("DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID)
if err != nil {
LOG_FATAL("DOWNLOAD_CHUNK", "Chunk %s can't be found: %v", chunkID, err)
} else {
LOG_FATAL("DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID)
}
return false
}
LOG_DEBUG("CHUNK_FOSSIL", "Chunk %s has been marked as a fossil", chunkID)

View File

@@ -122,7 +122,7 @@ func (uploader *ChunkUploader) Upload(threadIndex int, task ChunkUploadTask) boo
// Chunk deduplication by name in effect here.
LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID)
uploader.completionFunc(chunk, task.chunkIndex, false, chunkSize, 0)
uploader.completionFunc(chunk, task.chunkIndex, true, chunkSize, 0)
atomic.AddInt32(&uploader.numberOfUploadingTasks, -1)
return false
}

View File

@@ -15,6 +15,7 @@ import (
"encoding/json"
"encoding/base64"
"strings"
"runtime"
)
@@ -488,7 +489,14 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns [] string
skippedFiles = append(skippedFiles, entry.Path)
continue
}
entry = CreateEntryFromFileInfo(stat, "")
newEntry := CreateEntryFromFileInfo(stat, "")
if runtime.GOOS == "windows" {
// On Windows, stat.Name() is the last component of the target, so we need to construct the correct
// path from f.Name(); note that a "/" is append assuming a symbolic link is always a directory
newEntry.Path = filepath.Join(normalizedPath, f.Name()) + "/"
}
entry = newEntry
}
}

View File

@@ -18,19 +18,25 @@ import (
type FileStorage struct {
RateLimitedStorage
minimumLevel int // The minimum level of directories to dive into before searching for the chunk file.
isCacheNeeded bool // Network storages require caching
storageDir string
numberOfThreads int
}
// CreateFileStorage creates a file storage.
func CreateFileStorage(storageDir string, threads int) (storage *FileStorage, err error) {
func CreateFileStorage(storageDir string, minimumLevel int, isCacheNeeded bool, threads int) (storage *FileStorage, err error) {
var stat os.FileInfo
stat, err = os.Stat(storageDir)
if os.IsNotExist(err) {
err = os.MkdirAll(storageDir, 0744)
if err != nil {
if err != nil {
if os.IsNotExist(err) {
err = os.MkdirAll(storageDir, 0744)
if err != nil {
return nil, err
}
} else {
return nil, err
}
} else {
@@ -45,6 +51,8 @@ func CreateFileStorage(storageDir string, threads int) (storage *FileStorage, er
storage = &FileStorage {
storageDir : storageDir,
minimumLevel: minimumLevel,
isCacheNeeded: isCacheNeeded,
numberOfThreads: threads,
}
@@ -128,16 +136,18 @@ func (storage *FileStorage) FindChunk(threadIndex int, chunkID string, isFossil
suffix = ".fsl"
}
// The minimum level of directories to dive into before searching for the chunk file.
minimumLevel := 2
for level := 0; level * 2 < len(chunkID); level ++ {
if level >= minimumLevel {
if level >= storage.minimumLevel {
filePath = path.Join(dir, chunkID[2 * level:]) + suffix
if stat, err := os.Stat(filePath); err == nil && !stat.IsDir() {
// Use Lstat() instead of Stat() since 1) Stat() doesn't work for deduplicated disks on Windows and 2) there isn't
// really a need to follow the link if filePath is a link.
stat, err := os.Lstat(filePath)
if err != nil {
LOG_DEBUG("FS_FIND", "File %s can't be found: %v", filePath, err)
} else if stat.IsDir() {
return filePath[len(storage.storageDir) + 1:], false, 0, fmt.Errorf("The path %s is a directory", filePath)
} else {
return filePath[len(storage.storageDir) + 1:], true, stat.Size(), nil
} else if err == nil && stat.IsDir() {
return filePath[len(storage.storageDir) + 1:], true, 0, fmt.Errorf("The path %s is a directory", filePath)
}
}
@@ -149,7 +159,7 @@ func (storage *FileStorage) FindChunk(threadIndex int, chunkID string, isFossil
continue
}
if level < minimumLevel {
if level < storage.minimumLevel {
// Create the subdirectory if it doesn't exist.
if err == nil && !stat.IsDir() {
@@ -164,7 +174,6 @@ func (storage *FileStorage) FindChunk(threadIndex int, chunkID string, isFossil
return "", false, 0, err
}
}
dir = subDir
continue
}
@@ -174,9 +183,7 @@ func (storage *FileStorage) FindChunk(threadIndex int, chunkID string, isFossil
}
LOG_FATAL("CHUNK_FIND", "Chunk %s is still not found after having searched a maximum level of directories",
chunkID)
return "", false, 0, nil
return "", false, 0, fmt.Errorf("The maximum level of directories searched")
}
@@ -241,7 +248,7 @@ func (storage *FileStorage) UploadFile(threadIndex int, filePath string, content
// If a local snapshot cache is needed for the storage to avoid downloading/uploading chunks too often when
// managing snapshots.
func (storage *FileStorage) IsCacheNeeded () (bool) { return false }
func (storage *FileStorage) IsCacheNeeded () (bool) { return storage.isCacheNeeded }
// If the 'MoveFile' method is implemented.
func (storage *FileStorage) IsMoveFileImplemented() (bool) { return true }

View File

@@ -30,7 +30,7 @@ type GCDStorage struct {
service *drive.Service
idCache map[string]string
idCacheLock *sync.Mutex
backoff int
backoffs []int
isConnected bool
numberOfThreads int
@@ -45,12 +45,12 @@ type GCDConfig struct {
Token oauth2.Token `json:"token"`
}
func (storage *GCDStorage) shouldRetry(err error) (bool, error) {
func (storage *GCDStorage) shouldRetry(threadIndex int, err error) (bool, error) {
retry := false
message := ""
if err == nil {
storage.backoff = 1
storage.backoffs[threadIndex] = 1
return false, nil
} else if e, ok := err.(*googleapi.Error); ok {
if 500 <= e.Code && e.Code < 600 {
@@ -84,15 +84,15 @@ func (storage *GCDStorage) shouldRetry(err error) (bool, error) {
retry = err.Temporary()
}
if !retry || storage.backoff >= 256{
storage.backoff = 1
if !retry || storage.backoffs[threadIndex] >= 256 {
storage.backoffs[threadIndex] = 1
return false, err
}
delay := float32(storage.backoff) * rand.Float32()
delay := float32(storage.backoffs[threadIndex]) * rand.Float32()
LOG_DEBUG("GCD_RETRY", "%s; retrying after %.2f seconds", message, delay)
time.Sleep(time.Duration(float32(storage.backoff) * float32(time.Second)))
storage.backoff *= 2
time.Sleep(time.Duration(float32(storage.backoffs[threadIndex]) * float32(time.Second)))
storage.backoffs[threadIndex] *= 2
return true, nil
}
@@ -129,7 +129,7 @@ func (storage *GCDStorage) deletePathID(path string) {
storage.idCacheLock.Unlock()
}
func (storage *GCDStorage) listFiles(parentID string, listFiles bool) ([]*drive.File, error) {
func (storage *GCDStorage) listFiles(threadIndex int, parentID string, listFiles bool) ([]*drive.File, error) {
if parentID == "" {
return nil, fmt.Errorf("No parent ID provided")
@@ -157,7 +157,7 @@ func (storage *GCDStorage) listFiles(parentID string, listFiles bool) ([]*drive.
for {
fileList, err = storage.service.Files.List().Q(query).Fields("nextPageToken", "files(name, mimeType, id, size)").PageToken(startToken).PageSize(maxCount).Do()
if retry, e := storage.shouldRetry(err); e == nil && !retry {
if retry, e := storage.shouldRetry(threadIndex, err); e == nil && !retry {
break
} else if retry {
continue
@@ -178,7 +178,7 @@ func (storage *GCDStorage) listFiles(parentID string, listFiles bool) ([]*drive.
return files, nil
}
func (storage *GCDStorage) listByName(parentID string, name string) (string, bool, int64, error) {
func (storage *GCDStorage) listByName(threadIndex int, parentID string, name string) (string, bool, int64, error) {
var fileList *drive.FileList
var err error
@@ -187,7 +187,7 @@ func (storage *GCDStorage) listByName(parentID string, name string) (string, boo
query := "name = '" + name + "' and '" + parentID + "' in parents"
fileList, err = storage.service.Files.List().Q(query).Fields("files(name, mimeType, id, size)").Do()
if retry, e := storage.shouldRetry(err); e == nil && !retry {
if retry, e := storage.shouldRetry(threadIndex, err); e == nil && !retry {
break
} else if retry {
continue
@@ -205,7 +205,7 @@ func (storage *GCDStorage) listByName(parentID string, name string) (string, boo
return file.Id, file.MimeType == "application/vnd.google-apps.folder", file.Size, nil
}
func (storage *GCDStorage) getIDFromPath(path string) (string, error) {
func (storage *GCDStorage) getIDFromPath(threadIndex int, path string) (string, error) {
fileID := "root"
@@ -231,7 +231,7 @@ func (storage *GCDStorage) getIDFromPath(path string) (string, error) {
var err error
var isDir bool
fileID, isDir, _, err = storage.listByName(fileID, name)
fileID, isDir, _, err = storage.listByName(threadIndex, fileID, name)
if err != nil {
return "", err
}
@@ -276,9 +276,10 @@ func CreateGCDStorage(tokenFile string, storagePath string, threads int) (storag
numberOfThreads: threads,
idCache: make(map[string]string),
idCacheLock: &sync.Mutex{},
backoffs: make([]int, threads),
}
storagePathID, err := storage.getIDFromPath(storagePath)
storagePathID, err := storage.getIDFromPath(0, storagePath)
if err != nil {
return nil, err
}
@@ -286,7 +287,7 @@ func CreateGCDStorage(tokenFile string, storagePath string, threads int) (storag
storage.idCache[""] = storagePathID
for _, dir := range []string { "chunks", "snapshots", "fossils" } {
dirID, isDir, _, err := storage.listByName(storagePathID, dir)
dirID, isDir, _, err := storage.listByName(0, storagePathID, dir)
if err != nil {
return nil, err
}
@@ -316,7 +317,7 @@ func (storage *GCDStorage) ListFiles(threadIndex int, dir string) ([]string, []i
if dir == "snapshots" {
files, err := storage.listFiles(storage.getPathID(dir), false)
files, err := storage.listFiles(threadIndex, storage.getPathID(dir), false)
if err != nil {
return nil, nil, err
}
@@ -329,12 +330,12 @@ func (storage *GCDStorage) ListFiles(threadIndex int, dir string) ([]string, []i
}
return subDirs, nil, nil
} else if strings.HasPrefix(dir, "snapshots/") {
pathID, err := storage.getIDFromPath(dir)
pathID, err := storage.getIDFromPath(threadIndex, dir)
if err != nil {
return nil, nil, err
}
entries, err := storage.listFiles(pathID, true)
entries, err := storage.listFiles(threadIndex, pathID, true)
if err != nil {
return nil, nil, err
}
@@ -351,7 +352,7 @@ func (storage *GCDStorage) ListFiles(threadIndex int, dir string) ([]string, []i
sizes := []int64{}
for _, parent := range []string { "chunks", "fossils" } {
entries, err := storage.listFiles(storage.getPathID(parent), true)
entries, err := storage.listFiles(threadIndex, storage.getPathID(parent), true)
if err != nil {
return nil, nil, err
}
@@ -376,7 +377,7 @@ func (storage *GCDStorage) DeleteFile(threadIndex int, filePath string) (err err
filePath = storage.convertFilePath(filePath)
fileID, ok := storage.findPathID(filePath)
if !ok {
fileID, err = storage.getIDFromPath(filePath)
fileID, err = storage.getIDFromPath(threadIndex, filePath)
if err != nil {
LOG_TRACE("GCD_STORAGE", "Ignored file deletion error: %v", err)
return nil
@@ -385,7 +386,7 @@ func (storage *GCDStorage) DeleteFile(threadIndex int, filePath string) (err err
for {
err = storage.service.Files.Delete(fileID).Fields("id").Do()
if retry, err := storage.shouldRetry(err); err == nil && !retry {
if retry, err := storage.shouldRetry(threadIndex, err); err == nil && !retry {
storage.deletePathID(filePath)
return nil
} else if retry {
@@ -420,7 +421,7 @@ func (storage *GCDStorage) MoveFile(threadIndex int, from string, to string) (er
for {
_, err = storage.service.Files.Update(fileID, nil).AddParents(toParentID).RemoveParents(fromParentID).Do()
if retry, err := storage.shouldRetry(err); err == nil && !retry {
if retry, err := storage.shouldRetry(threadIndex, err); err == nil && !retry {
break
} else if retry {
continue
@@ -469,7 +470,7 @@ func (storage *GCDStorage) CreateDirectory(threadIndex int, dir string) (err err
for {
file, err = storage.service.Files.Create(file).Fields("id").Do()
if retry, err := storage.shouldRetry(err); err == nil && !retry {
if retry, err := storage.shouldRetry(threadIndex, err); err == nil && !retry {
break
} else if retry {
continue
@@ -495,12 +496,12 @@ func (storage *GCDStorage) GetFileInfo(threadIndex int, filePath string) (exist
if dir == "." {
dir = ""
}
dirID, err := storage.getIDFromPath(dir)
dirID, err := storage.getIDFromPath(threadIndex, dir)
if err != nil {
return false, false, 0, err
}
fileID, isDir, size, err = storage.listByName(dirID, path.Base(filePath))
fileID, isDir, size, err = storage.listByName(threadIndex, dirID, path.Base(filePath))
if fileID != "" {
storage.savePathID(filePath, fileID)
}
@@ -509,7 +510,7 @@ func (storage *GCDStorage) GetFileInfo(threadIndex int, filePath string) (exist
for {
file, err := storage.service.Files.Get(fileID).Fields("id, mimeType").Do()
if retry, err := storage.shouldRetry(err); err == nil && !retry {
if retry, err := storage.shouldRetry(threadIndex, err); err == nil && !retry {
return true, file.MimeType == "application/vnd.google-apps.folder", file.Size, nil
} else if retry {
continue
@@ -533,7 +534,7 @@ func (storage *GCDStorage) FindChunk(threadIndex int, chunkID string, isFossil b
}
fileID := ""
fileID, _, size, err = storage.listByName(parentID, chunkID)
fileID, _, size, err = storage.listByName(threadIndex, parentID, chunkID)
if fileID != "" {
storage.savePathID(realPath, fileID)
}
@@ -545,7 +546,7 @@ func (storage *GCDStorage) DownloadFile(threadIndex int, filePath string, chunk
// We never download the fossil so there is no need to convert the path
fileID, ok := storage.findPathID(filePath)
if !ok {
fileID, err = storage.getIDFromPath(filePath)
fileID, err = storage.getIDFromPath(threadIndex, filePath)
if err != nil {
return err
}
@@ -556,7 +557,7 @@ func (storage *GCDStorage) DownloadFile(threadIndex int, filePath string, chunk
for {
response, err = storage.service.Files.Get(fileID).Download()
if retry, err := storage.shouldRetry(err); err == nil && !retry {
if retry, err := storage.shouldRetry(threadIndex, err); err == nil && !retry {
break
} else if retry {
continue
@@ -583,7 +584,7 @@ func (storage *GCDStorage) UploadFile(threadIndex int, filePath string, content
parentID, ok := storage.findPathID(parent)
if !ok {
parentID, err = storage.getIDFromPath(parent)
parentID, err = storage.getIDFromPath(threadIndex, parent)
if err != nil {
return err
}
@@ -599,7 +600,7 @@ func (storage *GCDStorage) UploadFile(threadIndex int, filePath string, content
for {
reader := CreateRateLimitedReader(content, storage.UploadRateLimit / storage.numberOfThreads)
_, err = storage.service.Files.Create(file).Media(reader).Fields("id").Do()
if retry, err := storage.shouldRetry(err); err == nil && !retry {
if retry, err := storage.shouldRetry(threadIndex, err); err == nil && !retry {
break
} else if retry {
continue

View File

@@ -23,6 +23,7 @@ const (
ASSERT = 4
)
var LogFunction func(level int, logID string, message string)
var printLogHeader = false
@@ -117,6 +118,11 @@ func logf(level int, logID string, format string, v ...interface{}) {
message := fmt.Sprintf(format, v...)
if LogFunction != nil {
LogFunction(level, logID, message)
return
}
now := time.Now()
// Uncomment this line to enable unbufferred logging for tests

View File

@@ -89,7 +89,7 @@ func (client *OneDriveClient) call(url string, method string, input interface{},
case []byte:
inputReader = bytes.NewReader(input.([]byte))
case int:
inputReader = bytes.NewReader([]byte(""))
inputReader = nil
case *bytes.Buffer:
inputReader = bytes.NewReader(input.(*bytes.Buffer).Bytes())
case *RateLimitedReader:

View File

@@ -98,7 +98,7 @@ func SavePreferences() (bool) {
}
preferenceFile := path.Join(GetDuplicacyPreferencePath(), "preferences")
err = ioutil.WriteFile(preferenceFile, description, 0644)
err = ioutil.WriteFile(preferenceFile, description, 0600)
if err != nil {
LOG_ERROR("PREFERENCE_WRITE", "Failed to save the preference file %s: %v", preferenceFile, err)
return false
@@ -108,9 +108,9 @@ func SavePreferences() (bool) {
}
func FindPreference(name string) (*Preference) {
for _, preference := range Preferences {
for i, preference := range Preferences {
if preference.Name == name || preference.StorageURL == name {
return &preference
return &Preferences[i]
}
}

View File

@@ -5,6 +5,9 @@
package duplicacy
import (
"strings"
"reflect"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/aws/credentials"
@@ -227,15 +230,26 @@ func (storage *S3Storage) DownloadFile(threadIndex int, filePath string, chunk *
// UploadFile writes 'content' to the file at 'filePath'.
func (storage *S3Storage) UploadFile(threadIndex int, filePath string, content []byte) (err error) {
input := &s3.PutObjectInput {
Bucket: aws.String(storage.bucket),
Key: aws.String(storage.storageDir + filePath),
ACL: aws.String(s3.ObjectCannedACLPrivate),
Body: CreateRateLimitedReader(content, storage.UploadRateLimit / len(storage.bucket)),
ContentType: aws.String("application/duplicacy"),
attempts := 0
for {
input := &s3.PutObjectInput {
Bucket: aws.String(storage.bucket),
Key: aws.String(storage.storageDir + filePath),
ACL: aws.String(s3.ObjectCannedACLPrivate),
Body: CreateRateLimitedReader(content, storage.UploadRateLimit / len(storage.bucket)),
ContentType: aws.String("application/duplicacy"),
}
_, err = storage.client.PutObject(input)
if err == nil || attempts >= 3 || !strings.Contains(err.Error(), "XAmzContentSHA256Mismatch") {
return err
}
LOG_INFO("S3_RETRY", "Retrying on %s: %v", reflect.TypeOf(err), err)
attempts += 1
}
_, err = storage.client.PutObject(input)
return err
}

View File

@@ -176,7 +176,7 @@ func LoadIncompleteSnapshot() (snapshot *Snapshot) {
ChunkHashes: chunkHashes,
ChunkLengths: incompleteSnapshot.ChunkLengths,
}
LOG_INFO("INCOMPLETE_LOAD", "Incomplete snpashot loaded from %s", snapshotFile)
LOG_INFO("INCOMPLETE_LOAD", "Incomplete snapshot loaded from %s", snapshotFile)
return snapshot
}

View File

@@ -303,12 +303,8 @@ func (manager *SnapshotManager) DownloadSnapshotFileSequence(snapshot *Snapshot,
return false
}
if patterns == nil {
if len(patterns) != 0 && !MatchPath(entry.Path, patterns) {
entry.Attributes = nil
} else if len(patterns) != 0 {
if !MatchPath(entry.Path, patterns) {
entry.Attributes = nil
}
}
files = append(files, &entry)
@@ -664,7 +660,7 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList
if snapshotID == "" {
snapshotIDs, err = manager.ListSnapshotIDs()
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snpashots: %v", err)
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snapshots: %v", err)
return 0
}
} else {
@@ -787,7 +783,7 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
if snapshotID == "" || showStatistics {
snapshotIDs, err := manager.ListSnapshotIDs()
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snpashots: %v", err)
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snapshots: %v", err)
return false
}
@@ -1596,7 +1592,7 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string,
// because we need to find out which chunks are not referenced.
snapshotIDs, err := manager.ListSnapshotIDs()
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snpashots: %v", err)
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snapshots: %v", err)
return false
}

View File

@@ -95,14 +95,14 @@ func createTestSnapshotManager(testDir string) *SnapshotManager {
os.RemoveAll(testDir)
os.MkdirAll(testDir, 0700)
storage, _ := CreateFileStorage(testDir, 1)
storage, _ := CreateFileStorage(testDir, 2, false, 1)
storage.CreateDirectory(0, "chunks")
storage.CreateDirectory(0, "snapshots")
config := CreateConfig()
snapshotManager := CreateSnapshotManager(config, storage)
cacheDir := path.Join(testDir, "cache")
snapshotCache, _ := CreateFileStorage(cacheDir, 1)
snapshotCache, _ := CreateFileStorage(cacheDir, 2, false, 1)
snapshotCache.CreateDirectory(0, "chunks")
snapshotCache.CreateDirectory(0, "snapshots")
@@ -181,7 +181,7 @@ func checkTestSnapshots(manager *SnapshotManager, expectedSnapshots int, expecte
snapshotIDs, err = manager.ListSnapshotIDs()
if err != nil {
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snpashots: %v", err)
LOG_ERROR("SNAPSHOT_LIST", "Failed to list all snapshots: %v", err)
return
}

View File

@@ -127,6 +127,7 @@ func CreateStorage(preference Preference, resetPassword bool, threads int) (stor
storageURL := preference.StorageURL
isFileStorage := false
isCacheNeeded := false
if strings.HasPrefix(storageURL, "/") {
isFileStorage = true
@@ -140,11 +141,30 @@ func CreateStorage(preference Preference, resetPassword bool, threads int) (stor
if !isFileStorage && strings.HasPrefix(storageURL, `\\`) {
isFileStorage = true
isCacheNeeded = true
}
}
if isFileStorage {
fileStorage, err := CreateFileStorage(storageURL, threads)
fileStorage, err := CreateFileStorage(storageURL, 2, isCacheNeeded, threads)
if err != nil {
LOG_ERROR("STORAGE_CREATE", "Failed to load the file storage at %s: %v", storageURL, err)
return nil
}
return fileStorage
}
if strings.HasPrefix(storageURL, "flat://") {
fileStorage, err := CreateFileStorage(storageURL[7:], 0, false, threads)
if err != nil {
LOG_ERROR("STORAGE_CREATE", "Failed to load the file storage at %s: %v", storageURL, err)
return nil
}
return fileStorage
}
if strings.HasPrefix(storageURL, "samba://") {
fileStorage, err := CreateFileStorage(storageURL[8:], 2, true, threads)
if err != nil {
LOG_ERROR("STORAGE_CREATE", "Failed to load the file storage at %s: %v", storageURL, err)
return nil
@@ -180,6 +200,9 @@ func CreateStorage(preference Preference, resetPassword bool, threads int) (stor
username = username[:len(username) - 1]
}
// If ssh_key_file is set, skip password-based login
keyFile := GetPasswordFromPreference(preference, "ssh_key_file")
password := ""
passwordCallback := func() (string, error) {
LOG_DEBUG("SSH_PASSWORD", "Attempting password login")
@@ -199,7 +222,6 @@ func CreateStorage(preference Preference, resetPassword bool, threads int) (stor
}
}
keyFile := ""
publicKeysCallback := func() ([]ssh.Signer, error) {
LOG_DEBUG("SSH_PUBLICKEY", "Attempting public key authentication")
@@ -253,10 +275,19 @@ func CreateStorage(preference Preference, resetPassword bool, threads int) (stor
}
authMethods := [] ssh.AuthMethod {
}
passwordAuthMethods := [] ssh.AuthMethod {
ssh.PasswordCallback(passwordCallback),
ssh.KeyboardInteractive(keyboardInteractive),
}
keyFileAuthMethods := [] ssh.AuthMethod {
ssh.PublicKeysCallback(publicKeysCallback),
}
if keyFile != "" {
authMethods = append(keyFileAuthMethods, passwordAuthMethods...)
} else {
authMethods = append(passwordAuthMethods, keyFileAuthMethods...)
}
if RunInBackground {

View File

@@ -41,7 +41,7 @@ func init() {
func loadStorage(localStoragePath string, threads int) (Storage, error) {
if testStorageName == "" || testStorageName == "file" {
return CreateFileStorage(localStoragePath, threads)
return CreateFileStorage(localStoragePath, 2, false, threads)
}
config, err := ioutil.ReadFile("test_storage.conf")
@@ -61,10 +61,14 @@ func loadStorage(localStoragePath string, threads int) (Storage, error) {
return nil, fmt.Errorf("No storage named '%s' found", testStorageName)
}
if testStorageName == "sftp" {
if testStorageName == "flat" {
return CreateFileStorage(localStoragePath, 0, false, threads)
} else if testStorageName == "samba" {
return CreateFileStorage(localStoragePath, 2, true, threads)
} else if testStorageName == "sftp" {
port, _ := strconv.Atoi(storage["port"])
return CreateSFTPStorageWithPassword(storage["server"], port, storage["username"], storage["directory"], storage["password"], threads)
} else if testStorageName == "s3" {
} else if testStorageName == "s3" || testStorageName == "wasabi" {
return CreateS3Storage(storage["region"], storage["endpoint"], storage["bucket"], storage["directory"], storage["access_key"], storage["secret_key"], threads, true, false)
} else if testStorageName == "s3c" {
return CreateS3CStorage(storage["region"], storage["endpoint"], storage["bucket"], storage["directory"], storage["access_key"], storage["secret_key"], threads)
@@ -454,3 +458,64 @@ func TestStorage(t *testing.T) {
}
}
func TestCleanStorage(t *testing.T) {
setTestingT(t)
SetLoggingLevel(INFO)
defer func() {
if r := recover(); r != nil {
switch e := r.(type) {
case Exception:
t.Errorf("%s %s", e.LogID, e.Message)
debug.PrintStack()
default:
t.Errorf("%v", e)
debug.PrintStack()
}
}
} ()
testDir := path.Join(os.TempDir(), "duplicacy_test", "storage_test")
os.RemoveAll(testDir)
os.MkdirAll(testDir, 0700)
LOG_INFO("STORAGE_TEST", "storage: %s", testStorageName)
storage, err := loadStorage(testDir, 1)
if err != nil {
t.Errorf("Failed to create storage: %v", err)
return
}
directories := make([]string, 0, 1024)
directories = append(directories, "snapshots/")
directories = append(directories, "chunks/")
for len(directories) > 0 {
dir := directories[len(directories) - 1]
directories = directories[:len(directories) - 1]
LOG_INFO("LIST_FILES", "Listing %s", dir)
files, _, err := storage.ListFiles(0, dir)
if err != nil {
LOG_ERROR("LIST_FILES", "Failed to list the directory %s: %v", dir, err)
return
}
for _, file := range files {
if len(file) > 0 && file[len(file) - 1] == '/' {
directories = append(directories, dir + file)
} else {
storage.DeleteFile(0, dir + file)
LOG_INFO("DELETE_FILE", "Deleted file %s", file)
}
}
}
storage.DeleteFile(0, "config")
LOG_INFO("DELETE_FILE", "Deleted config")
}

View File

@@ -9,7 +9,6 @@ import (
"os"
"bufio"
"io"
"io/ioutil"
"time"
"path"
"path/filepath"
@@ -119,10 +118,8 @@ func GenerateKeyFromPassword(password string) []byte {
return pbkdf2.Key([]byte(password), DEFAULT_KEY, 16384, 32, sha256.New)
}
// GetPassword attempts to get the password from KeyChain/KeyRing, environment variables, or keyboard input.
func GetPassword(preference Preference, passwordType string, prompt string,
showPassword bool, resetPassword bool) (string) {
// Get password from preference, env, but don't start any keyring request
func GetPasswordFromPreference(preference Preference, passwordType string) (string) {
passwordID := passwordType
if preference.Name != "default" {
passwordID = preference.Name + "_" + passwordID
@@ -136,11 +133,31 @@ func GetPassword(preference Preference, passwordType string, prompt string,
}
}
// If the password is stored in the preference, there is no need to include the storage name
// (i.e., preference.Name) in the key, so the key name should really be passwordType rather
// than passwordID; we're using passwordID here only for backward compatibility
if len(preference.Keys) > 0 && len(preference.Keys[passwordID]) > 0 {
LOG_DEBUG("PASSWORD_KEYCHAIN", "Reading %s from preferences", passwordID)
return preference.Keys[passwordID]
}
if len(preference.Keys) > 0 && len(preference.Keys[passwordType]) > 0 {
LOG_DEBUG("PASSWORD_KEYCHAIN", "Reading %s from preferences", passwordType)
return preference.Keys[passwordType]
}
return ""
}
// GetPassword attempts to get the password from KeyChain/KeyRing, environment variables, or keyboard input.
func GetPassword(preference Preference, passwordType string, prompt string,
showPassword bool, resetPassword bool) (string) {
passwordID := passwordType
password := GetPasswordFromPreference(preference,passwordType)
if password != "" {
return password
}
if resetPassword && !RunInBackground {
keyringSet(passwordID, "")
} else {
@@ -156,7 +173,7 @@ func GetPassword(preference Preference, passwordType string, prompt string,
}
password := ""
password = ""
fmt.Printf("%s", prompt)
if showPassword {
scanner := bufio.NewScanner(os.Stdin)
@@ -176,6 +193,7 @@ func GetPassword(preference Preference, passwordType string, prompt string,
// SavePassword saves the specified password in the keyring/keychain.
func SavePassword(preference Preference, passwordType string, password string) {
if password == "" || RunInBackground {
return
}
@@ -183,6 +201,12 @@ func SavePassword(preference Preference, passwordType string, password string) {
if preference.DoNotSavePassword {
return
}
// If the password is retrieved from env or preference, don't save it to keyring
if GetPasswordFromPreference(preference, passwordType) == password {
return
}
passwordID := passwordType
if preference.Name != "default" {
passwordID = preference.Name + "_" + passwordID
@@ -190,54 +214,6 @@ func SavePassword(preference Preference, passwordType string, password string) {
keyringSet(passwordID, password)
}
// RemoveEmptyDirectories remove all empty subdirectoreies under top.
func RemoveEmptyDirectories(top string) {
stack := make([]string, 0, 256)
stack = append(stack, top)
for len(stack) > 0 {
dir := stack[len(stack) - 1]
stack = stack[:len(stack) - 1]
files, err := ioutil.ReadDir(dir)
if err != nil {
continue
}
for _, file := range files {
if file.IsDir() && file.Name()[0] != '.' {
stack = append(stack, path.Join(dir, file.Name()))
}
}
if len(files) == 0 {
if os.Remove(dir) != nil {
continue
}
dir = path.Dir(dir)
for (len(dir) > len(top)) {
files, err := ioutil.ReadDir(dir)
if err != nil {
break
}
if len(files) == 0 {
if os.Remove(dir) != nil {
break;
}
}
dir = path.Dir(dir)
}
}
}
}
// The following code was modified from the online article 'Matching Wildcards: An Algorithm', by Kirk J. Krauss,
// Dr. Dobb's, August 26, 2008. However, the version in the article doesn't handle cases like matching 'abcccd'
// against '*ccd', and the version here fixed that issue.
@@ -331,6 +307,10 @@ func joinPath(components ...string) string {
combinedPath := path.Join(components...)
if len(combinedPath) > 257 && runtime.GOOS == "windows" {
combinedPath = `\\?\` + filepath.Join(components...)
// If the path is on a samba drive we must use the UNC format
if strings.HasPrefix(combinedPath, `\\?\\\`) {
combinedPath = `\\?\UNC\` + combinedPath[6:]
}
}
return combinedPath
}