diff --git a/samples/simple-example.sh b/samples/simple-example.sh index ca2bba94..354b679f 100755 --- a/samples/simple-example.sh +++ b/samples/simple-example.sh @@ -91,7 +91,7 @@ set -x fim st || exit $? echo - echo \# Search for duplicated files + echo \# Search for duplicate files fim fdup || exit $? echo @@ -103,7 +103,7 @@ set -x fim st || exit $? echo - echo \# No duplicated files as we are looking only inside the dir01 + echo \# No duplicate files as we are looking only inside the dir01 fim fdup || exit $? echo diff --git a/src/main/asciidoc/docs/en/dealing-with-duplicates.adoc b/src/main/asciidoc/docs/en/dealing-with-duplicates.adoc index 1274d46e..ed78a532 100644 --- a/src/main/asciidoc/docs/en/dealing-with-duplicates.adoc +++ b/src/main/asciidoc/docs/en/dealing-with-duplicates.adoc @@ -1,18 +1,71 @@ = Dealing with duplicates -Duplicated files are addressed by Fim in two different ways. +Duplicate files are addressed by Fim in two different ways. == Duplicates inside a Fim repository -Fim allow you to detect duplicates using the `fdup` command. It displays the list of duplicated files. + -See it in action in <>. +Fim allow you to detect duplicates using the `fdup` command. -If you want to remove them, Fim won't do it. It does not provide a smart way to remove duplicates inside the same repository. +You can also remove them. + +=== Find duplicates + +Fim is able to display duplicates contained in a repository using the `fdup` (`find-duplicates`) command. +It displays the list of duplicate files. + +See it in action in <>. + +[source, bash] +---- +$ fim fdup +---- + +If the current State is already commited, you can skip the workspace scanning phase with the `-l` option : + +[source, bash] +---- +$ fim fdup -l +---- + +=== Remove duplicates + +You can remove duplicate files. + +* Either interactive: + +[source, bash] +---- +$ fim rdup +---- + +• Or automatically preserving the first file in the list: + +[source, bash] +---- +$ fim rdup -y +---- + +In both cases, it is possible to use the current State as with `fdup` by adding the `-l` option: + +[source, bash] +---- +$ fim rdup -l +---- == Duplicates that are outside -You can use Fim to remove duplicated files that are located outside a Fim repository using the `rdup` command. -It can be useful if you want to cleanup old backups that are no more synchronized and you want to be sure to not lose any files that could have been modified or added. +Fim can delete duplicate files contained in another repository. + +It can be useful if you want to cleanup old backups that are no more synchronized and you want to be sure to not lose any files that could have been modified or added. + +It erases all files locally that already exist in the master workspace. + +For example, `backup` is a copy of the repository named `source` : + +[source, bash] +---- +$ cd backup +$ fim rdup -m ../source +---- + +When the workspace to clean is remote, you can just copy the `.fim` in an empty directory and set it as parameter to the `-m` option of the `rdup` command === Simple duplicates removing @@ -146,7 +199,7 @@ Do you really want to commit (y/n/A)? y ------ ~/rdup-example/source$ cd ../backup/ ~/rdup-example/backup$ fim rdup -m ../source -2016/05/21 08:39:14 - Info - Searching for duplicated files using the ../source directory as master +2016/05/21 08:39:14 - Info - Searching for duplicate files using the ../source directory as master 2016/05/21 08:39:14 - Info - Scanning recursively local files, using 'full' mode and 4 threads (Hash progress legend for files grouped 10 by 10: # > 1 GB, @ > 200 MB, O > 100 MB, 8 > 50 MB, o > 20 MB, . otherwise) @@ -173,13 +226,13 @@ Do you really want to remove it (y/n/A)? A 'file10' is a duplicate of '../source/file10' 'file10' removed -8 duplicated files found. 8 duplicated files removed +8 duplicate files found. 8 duplicate files removed ------ [IMPORTANT] ===== When you are prompted with a question asking for (y/n/A) which means Yes, No, or All Yes. + -All Yes will reply Yes to all the remaining questions. You can see it in action above. +'All Yes' will reply Yes to all the remaining questions. You can see it in action above. ===== ==== Only the two modified files remains diff --git a/src/main/asciidoc/docs/en/faq.adoc b/src/main/asciidoc/docs/en/faq.adoc index 27577ef0..08f92f80 100644 --- a/src/main/asciidoc/docs/en/faq.adoc +++ b/src/main/asciidoc/docs/en/faq.adoc @@ -9,7 +9,7 @@ Doing this allow you to: - Quickly find the modifications done in this specific sub-directory. You will hash only the files contained inside and not the complete file tree - Quickly commit the modifications done in this sub-directory -- Quickly find the duplicated files contained in this sub-directory +- Quickly find the duplicate files contained in this sub-directory and remove them - Quickly reset the attributes of files contained in this sub-directory All the other commands will run as if you were on the top of the Fim repository. diff --git a/src/main/asciidoc/docs/en/fim-usage.adoc b/src/main/asciidoc/docs/en/fim-usage.adoc index ffa0cc30..0e602c59 100644 --- a/src/main/asciidoc/docs/en/fim-usage.adoc +++ b/src/main/asciidoc/docs/en/fim-usage.adoc @@ -17,8 +17,9 @@ Available commands: rfa / reset-file-attrs Reset the files attributes like they were stored in the last committed State dcor / detect-corruption Find changes most likely caused by a hardware corruption or a filesystem bug. Change in content, but not in creation time and last modified time - fdup / find-duplicates Find local duplicated files in the Fim repository - rdup / remove-duplicates Remove duplicated files from local directory based on a remote master Fim repository + fdup / find-duplicates Find local duplicate files in the Fim repository + rdup / remove-duplicates Remove duplicates found by the 'fdup' command. + If you specify the '-m' option it removes duplicates based on a master repository log Display the history of the States with the same output as the 'status' command dign / display-ignored Display the files or directories that are ignored into the last State rbk / rollback Rollback the last commit. It will remove the last State @@ -41,9 +42,9 @@ Available options: You can specify multiple kind of difference to ignore separated by a comma. For example: -i attrs,dates,renamed -l,--use-last-state Use the last committed State. - Only for the find local duplicated files command + Both for the 'find-duplicates' and 'remove-duplicates' commands -m,--master-fim-repository Fim repository directory that you want to use as remote master. - Only for the remove duplicated files command + Only for the 'remove-duplicates' command -n,--do-not-hash Do not hash file content. Uses only file names and modification dates -o,--output-max-lines Change the maximum number lines displayed for the same kind of modification. Default value is 200 lines diff --git a/src/main/asciidoc/docs/en/most-common-use-cases.adoc b/src/main/asciidoc/docs/en/most-common-use-cases.adoc index 964d92a9..bb6ce43b 100644 --- a/src/main/asciidoc/docs/en/most-common-use-cases.adoc +++ b/src/main/asciidoc/docs/en/most-common-use-cases.adoc @@ -2,7 +2,14 @@ Fim can be used for different kind of use cases. -== Binary Workspace management +== Managing a workspace + +* Manage directories filled with binary. For example: pictures, music or movies + +* Know the status of a workspace in which we work episodically + +* Track changes over time + Personally I use Fim to manage my photos and videos. When I have new photos, I put them at the right place in my pictures folder and then I do `fim ci` from the sub-directory containing the new photos to record a new State, as I could do with Git. @@ -13,9 +20,16 @@ More details on using Fim from a sub-directory can be found in <>. == Backup integrity diff --git a/src/main/asciidoc/docs/en/simple-example.adoc b/src/main/asciidoc/docs/en/simple-example.adoc index d77a344d..0cd4dd7d 100644 --- a/src/main/asciidoc/docs/en/simple-example.adoc +++ b/src/main/asciidoc/docs/en/simple-example.adoc @@ -174,12 +174,12 @@ Deleted: file06 1 added, 1 copied, 3 duplicated, 1 date modified, 2 content modified, 1 renamed, 1 deleted ---- -=== Search for duplicated files +=== Search for duplicate files [source, bash] ---- simple-example$ fim fdup -2016/05/09 21:58:37 - Info - Searching for duplicated files +2016/05/09 21:58:37 - Info - Searching for duplicate files 2016/05/09 21:58:37 - Info - Scanning recursively local files, using 'full' mode and 2 threads (Hash progress legend for files grouped 10 by 10: # > 1 GB, @ > 200 MB, O > 100 MB, 8 > 50 MB, o > 20 MB, . otherwise) @@ -195,7 +195,7 @@ simple-example$ fim fdup file07 file07.dup1 -3 duplicated files spread into 2 duplicate sets, 36 bytes of total wasted space +3 duplicate files spread into 2 duplicate sets, 36 bytes of total wasted space ---- === From the `dir01` sub-directory @@ -225,18 +225,18 @@ Added: dir01/file01 1 added ---- -There are no duplicated file as we are looking only inside `dir01`. +There are no duplicate file as we are looking only inside `dir01`. [source, bash] ---- simple-example/dir01$ fim fdup -2016/05/09 21:58:37 - Info - Searching for duplicated files +2016/05/09 21:58:37 - Info - Searching for duplicate files 2016/05/09 21:58:37 - Info - Scanning recursively local files, using 'full' mode and 2 threads (Hash progress legend for files grouped 10 by 10: # > 1 GB, @ > 200 MB, O > 100 MB, 8 > 50 MB, o > 20 MB, . otherwise) 2016/05/09 21:58:38 - Info - Scanned 1 file (12 bytes), hashed 12 bytes (avg 12 bytes/s), during 00:00:00 -No duplicated file found +No duplicate file found ---- Commit only the local modifications done inside this directory. diff --git a/src/main/java/org/fim/Fim.java b/src/main/java/org/fim/Fim.java index 628cd020..0ddc936a 100644 --- a/src/main/java/org/fim/Fim.java +++ b/src/main/java/org/fim/Fim.java @@ -107,7 +107,7 @@ private Options buildOptions() { opts.addOption(buildOption("d", "directory", "Run Fim into the specified directory").hasArg().build()); opts.addOption(buildOption("e", "errors", "Display execution error details").build()); opts.addOption(buildOption("m", "master-fim-repository", "Fim repository directory that you want to use as remote master.\n" + - "Only for the remove duplicated files command").hasArg().build()); + "Only for the 'remove-duplicates' command").hasArg().build()); opts.addOption(buildOption("n", "do-not-hash", "Do not hash file content. Uses only file names and modification dates").build()); opts.addOption(buildOption("s", "super-fast-mode", "Use super-fast mode. Hash only 3 small blocks.\n" + "One at the beginning, one in the middle and one at the end").build()); @@ -122,7 +122,7 @@ private Options buildOptions() { "You can specify multiple kind of difference to ignore separated by a comma.\n" + "For example: -i attrs,dates,renamed").hasArg().valueSeparator(',').build()); opts.addOption(buildOption("l", "use-last-state", "Use the last committed State.\n" + - "Only for the find local duplicated files command").build()); + "Both for the 'find-duplicates' and 'remove-duplicates' commands").build()); opts.addOption(buildOption("c", "comment", "Comment to set during init and commit").hasArg().build()); opts.addOption(buildOption("o", "output-max-lines", "Change the maximum number lines displayed for the same kind of modification.\n" + "Default value is 200 lines").hasArg().build()); diff --git a/src/main/java/org/fim/command/FindDuplicatesCommand.java b/src/main/java/org/fim/command/FindDuplicatesCommand.java index d06d3bea..f0953035 100644 --- a/src/main/java/org/fim/command/FindDuplicatesCommand.java +++ b/src/main/java/org/fim/command/FindDuplicatesCommand.java @@ -39,7 +39,7 @@ public String getShortCmdName() { @Override public String getDescription() { - return "Find local duplicated files in the Fim repository"; + return "Find local duplicate files in the Fim repository"; } @Override @@ -48,7 +48,7 @@ public Object execute(Context context) throws Exception { fileContentHashingMandatory(context); - Logger.info(String.format("Searching for duplicated files%s", context.isUseLastState() ? " from the last committed State" : "")); + Logger.info(String.format("Searching for duplicate files%s", context.isUseLastState() ? " from the last committed State" : "")); Logger.newLine(); State state; diff --git a/src/main/java/org/fim/command/RemoveDuplicatesCommand.java b/src/main/java/org/fim/command/RemoveDuplicatesCommand.java index dce34cb5..f5044a4e 100644 --- a/src/main/java/org/fim/command/RemoveDuplicatesCommand.java +++ b/src/main/java/org/fim/command/RemoveDuplicatesCommand.java @@ -62,7 +62,8 @@ public String getShortCmdName() { @Override public String getDescription() { - return "Remove duplicated files from local directory based on a remote master Fim repository"; + return "Remove duplicates found by the 'fdup' command.\n" + + " If you specify the '-m' option it removes duplicates based on a master repository"; } @Override @@ -112,7 +113,7 @@ public Object execute(Context context) throws Exception { } context.setRepositoryRootDir(masterFimRepository); - Logger.info(String.format("Searching for duplicated files using the %s directory as master", context.getMasterFimRepositoryDir())); + Logger.info(String.format("Searching for duplicate files using the %s directory as master", context.getMasterFimRepositoryDir())); Logger.newLine(); State masterState = new StateManager(context).loadLastState(); @@ -142,13 +143,13 @@ public Object execute(Context context) throws Exception { if (totalFilesRemoved == 0) { if (duplicatedFilesCount == 0) { - Logger.out.println("No duplicated file found"); + Logger.out.println("No duplicate file found"); } else { - Logger.out.printf("Found %d duplicated %s. No files removed%n", duplicatedFilesCount, pluralForLong("file", duplicatedFilesCount)); + Logger.out.printf("Found %d duplicate %s. No files removed%n", duplicatedFilesCount, pluralForLong("file", duplicatedFilesCount)); } } else { Logger.newLine(); - Logger.out.printf("%d duplicated %s found. %d duplicated %s removed%n", + Logger.out.printf("%d duplicate %s found. %d duplicate %s removed%n", duplicatedFilesCount, pluralForLong("file", duplicatedFilesCount), totalFilesRemoved, pluralForLong("file", totalFilesRemoved)); } diff --git a/src/main/java/org/fim/internal/StateComparator.java b/src/main/java/org/fim/internal/StateComparator.java index a41bf78a..c52b9f67 100644 --- a/src/main/java/org/fim/internal/StateComparator.java +++ b/src/main/java/org/fim/internal/StateComparator.java @@ -282,7 +282,7 @@ private void searchForDifferences() { } List removed = notFoundInCurrentFileStateList.removeAll(originalFileHash); if (removed != null && removed.size() > 0) { - // Used to check other duplicated files that have been renamed + // Used to check other duplicate files that have been renamed foundInPreviousState.put(originalFileHash, originalFileState); } } else { diff --git a/src/main/java/org/fim/model/DuplicateResult.java b/src/main/java/org/fim/model/DuplicateResult.java index bb30a9d0..f6f81fbd 100644 --- a/src/main/java/org/fim/model/DuplicateResult.java +++ b/src/main/java/org/fim/model/DuplicateResult.java @@ -75,20 +75,20 @@ public DuplicateResult displayAndRemoveDuplicates() { if (filesRemoved == 0) { if (duplicatedFilesCount > 0) { - Logger.out.printf("%d duplicated %s, %s of total wasted space%n", + Logger.out.printf("%d duplicate %s, %s of total wasted space%n", duplicatedFilesCount, pluralForLong("file", duplicatedFilesCount), byteCountToDisplaySize(totalWastedSpace)); } else { - Logger.out.println("No duplicated file found"); + Logger.out.println("No duplicate file found"); } } else { Logger.out.printf("Removed %d files and freed %s%n", filesRemoved, byteCountToDisplaySize(spaceFreed)); long remainingDuplicates = duplicatedFilesCount - filesRemoved; long remainingWastedSpace = totalWastedSpace - spaceFreed; if (remainingDuplicates > 0) { - Logger.out.printf("Still have %d duplicated %s, %s of total wasted space%n", + Logger.out.printf("Still have %d duplicate %s, %s of total wasted space%n", remainingDuplicates, pluralForLong("file", remainingDuplicates), byteCountToDisplaySize(remainingWastedSpace)); } else { - Logger.out.println("No duplicated file remains"); + Logger.out.println("No duplicate file remains"); } } return this;