diff --git a/.github/workflows/clang18.yml b/.github/workflows/clang18.yml index 299b861..7d8f05d 100644 --- a/.github/workflows/clang18.yml +++ b/.github/workflows/clang18.yml @@ -1,6 +1,7 @@ +--- name: clang 18 -on: +"on": push: branches: - main @@ -14,21 +15,20 @@ jobs: runs-on: ubuntu-24.04 steps: - - name: checkout - uses: actions/checkout@v4 - - name: install packages - run: sudo apt install build-essential nettle-dev time clang-18 - - name: bootstrap - run: ./bootstrap.sh - - name: configure - run: ./configure CXX=clang++-18 - - name: build - run: make - - name: check - run: make check - - name: store the logs as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - path: '**/*.log' - + - name: checkout + uses: actions/checkout@v4 + - name: install packages + run: sudo apt install build-essential nettle-dev time clang-18 + - name: bootstrap + run: ./bootstrap.sh + - name: configure + run: ./configure CXX=clang++-18 + - name: build + run: make + - name: check + run: make check + - name: store the logs as an artifact + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + path: '**/*.log' diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index cc7b490..1c83bb2 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -1,3 +1,4 @@ +--- # For most projects, this workflow file will not need changing; you simply need # to commit it to your repository. # @@ -11,12 +12,12 @@ # name: "CodeQL" -on: +"on": push: - branches: [ "main", "devel" ] + branches: ["main", "devel"] pull_request: # The branches below must be a subset of the branches above - branches: [ "main", "devel" ] + branches: ["main", "devel"] schedule: - cron: '39 4 * * 0' @@ -32,39 +33,40 @@ jobs: strategy: fail-fast: false matrix: - language: [ 'cpp' ] + language: ['cpp'] # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ] # Use only 'java' to analyze code written in Java, Kotlin or both # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support steps: - - name: Checkout repository - uses: actions/checkout@v4 + - name: Checkout repository + uses: actions/checkout@v4 - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install autoconf autoconf-archive nettle-dev build-essential g++ -y + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install autoconf autoconf-archive nettle-dev build-essential g++ -y - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v2 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. - # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs - # queries: security-extended,security-and-quality - - name: Build - run: | - ./bootstrap.sh - ./configure - make + # For more details on CodeQL's query packs, refer to: + # https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + - name: Build + run: | + ./bootstrap.sh + ./configure + make - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 - with: - category: "/language:${{matrix.language}}" + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/cppcheck.yml b/.github/workflows/cppcheck.yml index bd2bccd..8ac7f10 100644 --- a/.github/workflows/cppcheck.yml +++ b/.github/workflows/cppcheck.yml @@ -1,6 +1,7 @@ +--- name: cppcheck -on: +"on": push: branches: - main @@ -14,15 +15,14 @@ jobs: runs-on: ubuntu-20.04 steps: - - name: checkout - uses: actions/checkout@v4 - - name: install packages - run: sudo apt install cppcheck - - name: run cppcheck - run: cppcheck/run_cppcheck.sh - - name: store the cppcheck output as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - path: 'cppcheck/out/*' - + - name: checkout + uses: actions/checkout@v4 + - name: install packages + run: sudo apt install cppcheck + - name: run cppcheck + run: cppcheck/run_cppcheck.sh + - name: store the cppcheck output as an artifact + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + path: 'cppcheck/out/*' diff --git a/.github/workflows/debian-bookworm.yml b/.github/workflows/debian-bookworm.yml index 06d0f84..c189d0c 100644 --- a/.github/workflows/debian-bookworm.yml +++ b/.github/workflows/debian-bookworm.yml @@ -1,6 +1,7 @@ +--- name: debian 12 bookworm -on: +"on": push: branches: - main @@ -14,29 +15,29 @@ jobs: container: image: debian:bookworm-slim steps: - - name: checkout - uses: actions/checkout@v4 - - name: install packages - run: apt-get update && apt-get install autoconf build-essential nettle-dev libcap2-bin --yes - - name: bootstrap - run: ./bootstrap.sh - - name: configure - run: ./configure --enable-warnings CXXFLAGS=-std=c++17 - - name: make - run: make - - name: make check - run: make check - - name: make distcheck - run: make distcheck CXXFLAGS=-std=c++17 - - name: build with hardened build flags - run: | - make clean - eval $(DEB_CXXFLAGS_APPEND=-std=c++17 DEB_BUILD_MAINT_OPTIONS="hardening=+all qa=+all,-canary reproducible=+all" dpkg-buildflags --export=sh) - ./configure - make - make check - - name: store the logs as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - path: '**/*.log' + - name: checkout + uses: actions/checkout@v4 + - name: install packages + run: apt-get update && apt-get install autoconf build-essential nettle-dev libcap2-bin --yes + - name: bootstrap + run: ./bootstrap.sh + - name: configure + run: ./configure --enable-warnings CXXFLAGS=-std=c++17 + - name: make + run: make + - name: make check + run: make check + - name: make distcheck + run: make distcheck CXXFLAGS=-std=c++17 + - name: build with hardened build flags + run: | + make clean + eval $(DEB_CXXFLAGS_APPEND=-std=c++17 DEB_BUILD_MAINT_OPTIONS="hardening=+all qa=+all,-canary reproducible=+all" dpkg-buildflags --export=sh) + ./configure + make + make check + - name: store the logs as an artifact + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + path: '**/*.log' diff --git a/.github/workflows/debian-bullseye.yml b/.github/workflows/debian-bullseye.yml index f1087bb..7501383 100644 --- a/.github/workflows/debian-bullseye.yml +++ b/.github/workflows/debian-bullseye.yml @@ -1,6 +1,7 @@ +--- name: debian 11 bullseye -on: +"on": push: branches: - main @@ -14,29 +15,29 @@ jobs: container: image: debian:bullseye-slim steps: - - name: checkout - uses: actions/checkout@v4 - - name: install packages - run: apt-get update && apt-get install autoconf build-essential nettle-dev libcap2-bin --yes - - name: bootstrap - run: ./bootstrap.sh - - name: configure - run: ./configure --enable-warnings CXXFLAGS=-std=c++17 - - name: make - run: make - - name: make check - run: make check - - name: make distcheck - run: make distcheck CXXFLAGS=-std=c++17 - - name: build with hardened build flags - run: | - make clean - eval $(DEB_CXXFLAGS_APPEND=-std=c++17 DEB_BUILD_MAINT_OPTIONS="hardening=+all qa=+all,-canary reproducible=+all" dpkg-buildflags --export=sh) - ./configure - make - make check - - name: store the logs as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - path: '**/*.log' + - name: checkout + uses: actions/checkout@v4 + - name: install packages + run: apt-get update && apt-get install autoconf build-essential nettle-dev libcap2-bin --yes + - name: bootstrap + run: ./bootstrap.sh + - name: configure + run: ./configure --enable-warnings CXXFLAGS=-std=c++17 + - name: make + run: make + - name: make check + run: make check + - name: make distcheck + run: make distcheck CXXFLAGS=-std=c++17 + - name: build with hardened build flags + run: | + make clean + eval $(DEB_CXXFLAGS_APPEND=-std=c++17 DEB_BUILD_MAINT_OPTIONS="hardening=+all qa=+all,-canary reproducible=+all" dpkg-buildflags --export=sh) + ./configure + make + make check + - name: store the logs as an artifact + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + path: '**/*.log' diff --git a/.github/workflows/debian-trixie.yml b/.github/workflows/debian-trixie.yml index f13b320..6a31b2b 100644 --- a/.github/workflows/debian-trixie.yml +++ b/.github/workflows/debian-trixie.yml @@ -1,6 +1,7 @@ +--- name: debian 13 trixie -on: +"on": push: branches: - main @@ -14,29 +15,29 @@ jobs: container: image: debian:trixie-slim steps: - - name: checkout - uses: actions/checkout@v4 - - name: install packages - run: apt-get update && apt-get install autoconf build-essential nettle-dev libcap2-bin --yes - - name: bootstrap - run: ./bootstrap.sh - - name: configure - run: ./configure --enable-warnings CXXFLAGS=-std=c++17 - - name: make - run: make - - name: make check - run: make check - - name: make distcheck - run: make distcheck CXXFLAGS=-std=c++17 - - name: build with hardened build flags - run: | - make clean - eval $(DEB_CXXFLAGS_APPEND=-std=c++17 DEB_BUILD_MAINT_OPTIONS="hardening=+all qa=+all,-canary reproducible=+all" dpkg-buildflags --export=sh) - ./configure - make - make check - - name: store the logs as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - path: '**/*.log' + - name: checkout + uses: actions/checkout@v4 + - name: install packages + run: apt-get update && apt-get install autoconf build-essential nettle-dev libcap2-bin --yes + - name: bootstrap + run: ./bootstrap.sh + - name: configure + run: ./configure --enable-warnings CXXFLAGS=-std=c++17 + - name: make + run: make + - name: make check + run: make check + - name: make distcheck + run: make distcheck CXXFLAGS=-std=c++17 + - name: build with hardened build flags + run: | + make clean + eval $(DEB_CXXFLAGS_APPEND=-std=c++17 DEB_BUILD_MAINT_OPTIONS="hardening=+all qa=+all,-canary reproducible=+all" dpkg-buildflags --export=sh) + ./configure + make + make check + - name: store the logs as an artifact + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + path: '**/*.log' diff --git a/.github/workflows/deterministic.yml b/.github/workflows/deterministic.yml index 9cf4ef1..e13c13a 100644 --- a/.github/workflows/deterministic.yml +++ b/.github/workflows/deterministic.yml @@ -1,6 +1,7 @@ +--- name: test deterministic operation -on: +"on": push: branches: - main @@ -14,21 +15,20 @@ jobs: runs-on: ubuntu-24.04 steps: - - name: checkout - uses: actions/checkout@v4 - - name: install packages - run: sudo apt install build-essential nettle-dev time disorderfs - - name: bootstrap - run: ./bootstrap.sh - - name: configure - run: ./configure CXXFLAGS=-std=c++17 - - name: make - run: make - - name: run determinism test - run: testcases/verify_deterministic_operation.sh - - name: store logs as artifacts - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - path: '**/*.log' - + - name: checkout + uses: actions/checkout@v4 + - name: install packages + run: sudo apt install build-essential nettle-dev time disorderfs + - name: bootstrap + run: ./bootstrap.sh + - name: configure + run: ./configure CXXFLAGS=-std=c++17 + - name: make + run: make + - name: run determinism test + run: testcases/verify_deterministic_operation.sh + - name: store logs as artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + path: '**/*.log' diff --git a/.github/workflows/fedora-40.yml b/.github/workflows/fedora-40.yml index ebc004d..e6d159f 100644 --- a/.github/workflows/fedora-40.yml +++ b/.github/workflows/fedora-40.yml @@ -1,6 +1,7 @@ +--- name: fedora 40 -on: +"on": push: branches: - main @@ -14,22 +15,22 @@ jobs: container: image: fedora:40 steps: - - name: checkout - uses: actions/checkout@v4 - - name: install packages - run: yes | dnf install automake gcc which g++ nettle-devel - - name: bootstrap - run: ./bootstrap.sh - - name: configure - run: ./configure --enable-warnings CXXFLAGS=-std=c++17 - - name: make - run: make -j $(nproc) - - name: make check - run: make check - - name: make distcheck - run: make distcheck CXXFLAGS=-std=c++17 - - name: store the logs as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - path: '**/*.log' + - name: checkout + uses: actions/checkout@v4 + - name: install packages + run: yes | dnf install automake gcc which g++ nettle-devel + - name: bootstrap + run: ./bootstrap.sh + - name: configure + run: ./configure --enable-warnings CXXFLAGS=-std=c++17 + - name: make + run: make -j $(nproc) + - name: make check + run: make check + - name: make distcheck + run: make distcheck CXXFLAGS=-std=c++17 + - name: store the logs as an artifact + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + path: '**/*.log' diff --git a/.github/workflows/fedora-41.yml b/.github/workflows/fedora-41.yml index 572ad5c..6e634be 100644 --- a/.github/workflows/fedora-41.yml +++ b/.github/workflows/fedora-41.yml @@ -1,6 +1,7 @@ +--- name: fedora 41 -on: +"on": push: branches: - main @@ -14,22 +15,22 @@ jobs: container: image: fedora:41 steps: - - name: checkout - uses: actions/checkout@v4 - - name: install packages - run: yes | dnf install automake gcc which g++ nettle-devel - - name: bootstrap - run: ./bootstrap.sh - - name: configure - run: ./configure --enable-warnings CXXFLAGS=-std=c++17 - - name: make - run: make -j $(nproc) - - name: make check - run: make check - - name: make distcheck - run: make distcheck CXXFLAGS=-std=c++17 - - name: store the logs as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - path: '**/*.log' + - name: checkout + uses: actions/checkout@v4 + - name: install packages + run: yes | dnf install automake gcc which g++ nettle-devel + - name: bootstrap + run: ./bootstrap.sh + - name: configure + run: ./configure --enable-warnings CXXFLAGS=-std=c++17 + - name: make + run: make -j $(nproc) + - name: make check + run: make check + - name: make distcheck + run: make distcheck CXXFLAGS=-std=c++17 + - name: store the logs as an artifact + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + path: '**/*.log' diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml index 224cce3..06adf1b 100644 --- a/.github/workflows/formatting.yml +++ b/.github/workflows/formatting.yml @@ -1,6 +1,7 @@ +--- name: code formatting -on: +"on": push: branches: - main @@ -14,30 +15,29 @@ jobs: runs-on: ubuntu-24.04 steps: - - name: checkout - uses: actions/checkout@v4 - - name: install packages - run: sudo apt install clang-18 - - name: run clang format - run: | - ./do_clang_format.sh - - name: check for differences - run: | - git diff >clang-format.patch - if [ $(wc -c clang-format.patch + if [ $(wc -c + * release 1.7.0 + * add xxh checksum for speed + * add controllable buffer size -buffersize N 2023-06-17 Paul Dreik * release 1.6.0 2023-06-17 Paul Dreik diff --git a/Checksum.cc b/Checksum.cc index b0d51e9..c6fbc80 100644 --- a/Checksum.cc +++ b/Checksum.cc @@ -31,6 +31,14 @@ Checksum::Checksum(checksumtypes type) case checksumtypes::MD5: { md5_init(&m_state.md5); } break; +#ifdef HAVE_LIBXXHASH + case checksumtypes::XXH128: { + m_state.xxh128 = XXH3_createState(); + assert(m_state.xxh128 != NULL && "Out of memory!"); + [[maybe_unused]] const auto ret = XXH3_128bits_reset(m_state.xxh128); + assert(ret == XXH_OK); + } break; +#endif default: // not allowed to have something that is not recognized. throw std::runtime_error("wrong checksum type - programming error"); @@ -53,6 +61,12 @@ Checksum::update(std::size_t length, const unsigned char* buffer) case checksumtypes::MD5: md5_update(&m_state.md5, length, buffer); break; +#ifdef HAVE_LIBXXHASH + case checksumtypes::XXH128: { + const auto res = XXH3_128bits_update(m_state.xxh128, buffer, length); + return res == XXH_OK ? 0 : -1; + } +#endif default: return -1; } @@ -117,6 +131,10 @@ Checksum::getDigestLength() const return SHA512_DIGEST_SIZE; case checksumtypes::MD5: return MD5_DIGEST_SIZE; +#ifdef HAVE_LIBXXHASH + case checksumtypes::XXH128: + return sizeof(XXH128_hash_t); +#endif default: return -1; } @@ -168,6 +186,19 @@ Checksum::printToBuffer(void* buffer, std::size_t N) return -1; } break; +#ifdef HAVE_LIBXXHASH + case checksumtypes::XXH128: + if (N >= sizeof(XXH128_hash_t)) { + XXH128_hash_t result = XXH3_128bits_digest(m_state.xxh128); + XXH128_canonicalFromHash(static_cast(buffer), + result); + XXH3_freeState(m_state.xxh128); + } else { + // bad size. + return -1; + } + break; +#endif default: return -1; } diff --git a/Checksum.hh b/Checksum.hh index 2c3f6e6..9f70763 100644 --- a/Checksum.hh +++ b/Checksum.hh @@ -13,6 +13,12 @@ #include #include +#include "config.h" + +#ifdef HAVE_LIBXXHASH +#include +#endif + /** * class for checksum calculation */ @@ -26,7 +32,8 @@ public: MD5, SHA1, SHA256, - SHA512 + SHA512, + XXH128 }; explicit Checksum(checksumtypes type); @@ -56,6 +63,9 @@ private: sha256_ctx sha256; sha512_ctx sha512; md5_ctx md5; +#ifdef HAVE_LIBXXHASH + XXH3_state_t* xxh128; +#endif } m_state; }; diff --git a/Fileinfo.cc b/Fileinfo.cc index 5c44435..c8ee097 100644 --- a/Fileinfo.cc +++ b/Fileinfo.cc @@ -24,7 +24,8 @@ int Fileinfo::fillwithbytes(enum readtobuffermode filltype, - enum readtobuffermode lasttype) + enum readtobuffermode lasttype, + std::vector& buffer) { // Decide if we are going to read from file or not. @@ -72,6 +73,9 @@ Fileinfo::fillwithbytes(enum readtobuffermode filltype, case readtobuffermode::CREATE_SHA512_CHECKSUM: checksumtype = Checksum::checksumtypes::SHA512; break; + case readtobuffermode::CREATE_XXH128_CHECKSUM: + checksumtype = Checksum::checksumtypes::XXH128; + break; default: std::cerr << "does not know how to do that filltype:" << static_cast(filltype) << std::endl; @@ -80,11 +84,10 @@ Fileinfo::fillwithbytes(enum readtobuffermode filltype, if (checksumtype != Checksum::checksumtypes::NOTSET) { Checksum chk(checksumtype); - char buffer[4096]; while (f1) { - f1.read(buffer, sizeof(buffer)); + f1.read(buffer.data(), static_cast(buffer.size())); // gcount is never negative, the cast is safe. - chk.update(static_cast(f1.gcount()), buffer); + chk.update(static_cast(f1.gcount()), buffer.data()); } // store the result of the checksum calculation in somebytes diff --git a/Fileinfo.hh b/Fileinfo.hh index 0e77c23..0ece9f1 100644 --- a/Fileinfo.hh +++ b/Fileinfo.hh @@ -10,6 +10,7 @@ #include #include #include +#include // os specific headers #include //for off_t and others. @@ -48,6 +49,7 @@ public: CREATE_SHA1_CHECKSUM, CREATE_SHA256_CHECKSUM, CREATE_SHA512_CHECKSUM, + CREATE_XXH128_CHECKSUM, }; // type of duplicate @@ -135,10 +137,13 @@ public: * is shorter than the length of the bytes field. * @param filltype * @param lasttype + * @param buffer will be used as a scratch buffer - provided from the outside + * to avoid having to reallocate it for each file * @return zero on success */ int fillwithbytes(enum readtobuffermode filltype, - enum readtobuffermode lasttype); + enum readtobuffermode lasttype, + std::vector& buffer); /// get a pointer to the bytes read from the file const char* getbyteptr() const { return m_somebytes.data(); } diff --git a/Makefile.am b/Makefile.am index 2b4ed82..38e904d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6,6 +6,7 @@ bin_PROGRAMS = rdfind rdfind_SOURCES = rdfind.cc Checksum.cc Dirlist.cc Fileinfo.cc Rdutil.cc \ EasyRandom.cc UndoableUnlink.cc CmdlineParser.cc +LDADD = @LIBXXHASH@ #these are the test scripts to execute - I do not know how to glob here, #feedback welcome. TESTS=testcases/largefilesupport.sh \ @@ -18,7 +19,8 @@ TESTS=testcases/largefilesupport.sh \ testcases/verify_deterministic_operation.sh \ testcases/checksum_options.sh \ testcases/md5collisions.sh \ - testcases/sha1collisions.sh + testcases/sha1collisions.sh \ + testcases/checksum_buffersize.sh AUXFILES=testcases/common_funcs.sh \ testcases/md5collisions/letter_of_rec.ps \ diff --git a/NEWS b/NEWS index c33dd4d..8d28942 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,7 @@ -next +1.7.0 requires a C++17 capable compiler. +new fast non-cryptographic hash xxh +controllable buffer size for calculating checksum 1.6.0 supports sha512 1.4.0alpha diff --git a/README.md b/README.md index d05a9ee..09bcb71 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ Here is how to get and install nettle from source. Please check for the current ### Quality The following methods are used to maintain code quality: - builds without warnings on gcc and clang, even with all the suggested warnings from [cppbestpractices](https://github.com/lefticus/cppbestpractices/blob/master/02-Use_the_Tools_Available.md) enabled. Pass --enable-warnings to configure to turn them on. - - builds with standards c++11, 14, 17 and 2a + - builds with standards c++17 and 20 - tests are written for newly found bugs, first to prove the bug and then to prove that it is fixed. Older bugs do not all have tests. - tests are also run through valgrind - tests are run on address sanitizer builds diff --git a/Rdutil.cc b/Rdutil.cc index 60d5e81..7b9ddef 100644 --- a/Rdutil.cc +++ b/Rdutil.cc @@ -542,15 +542,18 @@ Rdutil::saveablespace(std::ostream& out) const int Rdutil::fillwithbytes(enum Fileinfo::readtobuffermode type, enum Fileinfo::readtobuffermode lasttype, - const long nsecsleep) + const long nsecsleep, + const std::size_t buffersize) { // first sort on inode (to read efficiently from the hard drive) sortOnDeviceAndInode(); const auto duration = std::chrono::nanoseconds{ nsecsleep }; + std::vector buffer(buffersize, '\0'); + for (auto& elem : m_list) { - elem.fillwithbytes(type, lasttype); + elem.fillwithbytes(type, lasttype, buffer); if (nsecsleep > 0) { std::this_thread::sleep_for(duration); } diff --git a/Rdutil.hh b/Rdutil.hh index 98e892f..6f5899a 100644 --- a/Rdutil.hh +++ b/Rdutil.hh @@ -88,9 +88,9 @@ public: // if there is trouble with too much disk reading, sleeping for nsecsleep // nanoseconds can be made between each file. int fillwithbytes(enum Fileinfo::readtobuffermode type, - enum Fileinfo::readtobuffermode lasttype = - Fileinfo::readtobuffermode::NOT_DEFINED, - long nsecsleep = 0); + enum Fileinfo::readtobuffermode lasttype, + long nsecsleep, + std::size_t buffersize); /// make symlinks of duplicates. std::size_t makesymlinks(bool dryrun) const; diff --git a/VERSION.txt b/VERSION.txt index 52eb074..ac5dbac 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -1.6.next +1.7.next diff --git a/bootstrap.sh b/bootstrap.sh index 43a9417..950d34e 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -8,13 +8,13 @@ #bail out on error set -e -me=$(basename $0) +me="$(basename "$0")" for prog in aclocal autoheader automake autoconf make; do - if ! which $prog >/dev/null 2>&1 ; then - echo $me: please install $prog - exit 1 - fi + if ! which $prog >/dev/null 2>&1; then + echo "$me: please install $prog" + exit 1 + fi done aclocal --warnings=all diff --git a/configure.ac b/configure.ac index 58a37fc..2e3dcc7 100644 --- a/configure.ac +++ b/configure.ac @@ -3,8 +3,8 @@ dnl copyright 2016-2019 Paul Dreik (earlier Paul Sundvall) dnl Distributed under GPL v 2.0 or later, at your option. dnl See LICENSE for further details. -VERSION="1.6.next" -AC_INIT([rdfind],[1.6.next]) +VERSION="1.7.next" +AC_INIT([rdfind],[1.7.next]) AC_CONFIG_SRCDIR([rdfind.cc]) AC_CONFIG_HEADERS([config.h]) @@ -54,6 +54,40 @@ AC_CHECK_LIB(nettle,nettle_pbkdf2_hmac_sha256,,[AC_MSG_ERROR([ and try again. ])]) +dnl xxh hashing is optional: +dnl --with-xxhash requires it +dnl --without-xxhash does not use it +dnl not saying anything enables it if it can be found +AC_ARG_WITH([xxhash], + [AS_HELP_STRING([--with-xxhash], + [support xxhash @<:@default=check@:>@])], + [], + [with_xxhash=check]) + + LIBXXHASH= + AS_IF([test "x$with_xxhash" != xno], + [AC_CHECK_LIB([xxhash], [XXH3_128bits], + [AC_SUBST([LIBXXHASH], ["-lxxhash"]) + AC_DEFINE([HAVE_LIBXXHASH], [1], + [Define if you have libxxhash]) + ], + [if test "x$with_xxhash" != xcheck; then + AC_MSG_FAILURE([ + --with-xxhash was given, but test for xxhash failed. + Please install xxhash first. If you have already done so and get this error message + anyway, it may be installed somewhere else, maybe because you + don't have root access. Pass CPPFLAGS=-I/your/path/to/xxhash to configure + and try again. The path should be so that \#include "xxhash.h" works. + On Debian-ish systems, use "apt-get install libxxhash-dev" to get a system + wide xxhash install. + On RedHat-ish systems, use "dnf install xxhash-devel" to get a system + wide xxhash install. + If you have xxhash somewhere else, maybe because you don't have root + access, pass LDFLAGS=-L/your/path/to/xxhash to configure and try again. + ]) + fi + ],)]) + dnl test for some specific functions AC_CHECK_FUNC(stat,,AC_MSG_ERROR(oops! no stat ?!?)) diff --git a/cppcheck/run_cppcheck.sh b/cppcheck/run_cppcheck.sh index 523c01b..a5a57fc 100755 --- a/cppcheck/run_cppcheck.sh +++ b/cppcheck/run_cppcheck.sh @@ -2,27 +2,26 @@ set -eu -outdir=$(dirname $0)/out -me=$(basename $0) +outdir="$(dirname "$0")/out" +me="$(basename "$0")" -mkdir -p $outdir +mkdir -p "$outdir" args="--enable=all --inconclusive --std=c++17 -I . --quiet --suppress=missingIncludeSystem" # cppcheck can not produce an xml report and a reulgar text file at the same time, so run twice -cppcheck $args *.cc *.hh --template='{severity}:{file}:{line}:{message}' 2>$outdir/cppcheck.out +# shellcheck disable=SC2086 +cppcheck $args --template='{severity}:{file}:{line}:{message}' ./*.cc ./*.hh 2>"$outdir/cppcheck.out" -cppcheck $args *.cc *.hh --xml 2>$outdir/cppcheck.xml - -cppcheck-htmlreport --source-dir=. --title=rdfind --file=$outdir/cppcheck.xml --report-dir=$outdir +# shellcheck disable=SC2086 +cppcheck $args --xml ./*.cc ./*.hh 2>"$outdir/cppcheck.xml" +cppcheck-htmlreport --source-dir=. --title=rdfind --file="$outdir/cppcheck.xml" --report-dir="$outdir" #is anything serious found? -if grep --quiet -v -E '^(style|information|performance):' $outdir/cppcheck.out ; then - echo $me: cppcheck found serious issues. see $outdir/cppcheck.out - exit 1 +if grep --quiet -v -E '^(style|information|performance):' "$outdir/cppcheck.out"; then + echo "$me: cppcheck found serious issues. see $outdir/cppcheck.out" + exit 1 fi echo "$me: cppcheck passed without serious issues." - - diff --git a/do_clang_format.sh b/do_clang_format.sh index b34a787..36d8d09 100755 --- a/do_clang_format.sh +++ b/do_clang_format.sh @@ -8,14 +8,14 @@ # See LICENSE for further details. #find clang format (usually named clang-format-3.x or clang-format, who knows) -CLANGFORMAT=$(find /usr/local/bin /usr/bin -executable -name "clang-format*" |grep -v -- -diff |sort -g |tail -n1) +CLANGFORMAT="$(find /usr/local/bin /usr/bin -executable -name "clang-format*" | grep -v -- -diff | sort -g | tail -n1)" -if [ ! -x "$CLANGFORMAT" ] ; then - echo failed finding clangformat - exit 1 +if [ ! -x "$CLANGFORMAT" ]; then + echo "failed finding clangformat" + exit 1 else - echo found clang format: $CLANGFORMAT + echo "found clang format: $CLANGFORMAT" fi -find . -maxdepth 1 -type f \( -name "*.h" -o -name "*.cpp" -o -name "*.cc" -o -name "*.hh" \) -print0 | \ - xargs -0 -n1 $CLANGFORMAT -i +find . -maxdepth 1 -type f \( -name "*.h" -o -name "*.cpp" -o -name "*.cc" -o -name "*.hh" \) -print0 \ + | xargs -0 -n1 "$CLANGFORMAT" -i diff --git a/do_quality_checks.sh b/do_quality_checks.sh index 4d8166f..55f125a 100755 --- a/do_quality_checks.sh +++ b/do_quality_checks.sh @@ -34,18 +34,18 @@ set -e export LANG= -rootdir=$(dirname $0) -me=$(basename $0) +rootdir="$(dirname "$0")" +me="$(basename "$0")" #flags to configure, for assert. ASSERT= ############################################################################### start_from_scratch() { - cd $rootdir - if [ -e Makefile ] ; then - make distclean >/dev/null 2>&1 - fi + cd "$rootdir" + if [ -e Makefile ]; then + make distclean >/dev/null 2>&1 + fi } ############################################################################### @@ -53,286 +53,296 @@ start_from_scratch() { #argument 2 is the c++ standard #argument 3 (optional) is appended to CXXFLAGS compile_and_test_standard() { - start_from_scratch - /bin/echo -n "$me: using $(basename $1) with standard $2" - if [ -n "$3" ] ; then - echo " (with additional CXXFLAGS $3)" - else - echo "" - fi + start_from_scratch + # shellcheck disable=SC3037 + /bin/echo -n "$me: using $(basename "$1") with standard $2" + if [ -n "$3" ]; then + echo " (with additional CXXFLAGS $3)" + else + echo "" + fi - if ! ./bootstrap.sh >bootstrap.log 2>&1; then - echo $me:failed bootstrap - see bootstrap.log - exit 1 - fi - if ! ./configure $ASSERT --enable-warnings CXX=$1 CXXFLAGS="-std=$2 $3" >configure.log 2>&1 ; then - echo $me: failed configure - see configure.log - exit 1 - fi - #make sure it compiles - if [ ! -x /usr/bin/time ] ; then - echo "$me: please install /usr/bin/time (apt install time)" - exit 1 - fi - if ! /usr/bin/time --format=%e --output=time.log make >make.log 2>&1; then - echo $me: failed make - exit 1 - fi - if [ ! -z $MEASURE_COMPILE_TIME ] ; then - echo $me: " compile with $(basename $1) $2 took $(cat time.log) seconds" - fi - #check for warnings - if grep -q "warning" make.log; then - # store as an artifact instead of erroring out - name=$(cat *.log |sha256sum |head -c 12) - cp make.log make_${name}.log - echo $me: found compile warning - see make.log, also stored as make_${name}.log - fi - #run the tests - if ! make check >makecheck.log 2>&1 ; then - echo $me: failed make check - see makecheck.log - exit 1 - fi + if ! ./bootstrap.sh >bootstrap.log 2>&1; then + echo "$me: failed bootstrap - see bootstrap.log" + exit 1 + fi + if ! ./configure $ASSERT --enable-warnings CXX="$1" CXXFLAGS="-std=$2 $3" >configure.log 2>&1; then + echo "$me: failed configure - see configure.log" + exit 1 + fi + #make sure it compiles + if [ ! -x /usr/bin/time ]; then + echo "$me: please install /usr/bin/time (apt install time)" + exit 1 + fi + if ! /usr/bin/time --format=%e --output=time.log make >make.log 2>&1; then + echo "$me: failed make" + exit 1 + fi + if [ -n "$MEASURE_COMPILE_TIME" ]; then + echo "$me: compile with $(basename "$1") '$2' took $(cat time.log) seconds" + fi + #check for warnings + if grep -q "warning" make.log; then + # store as an artifact instead of erroring out + name="$(cat ./*.log | sha256sum | head -c 12)" + cp make.log "make_${name}.log" + echo "$me: found compile warning - see make.log, also stored as make_${name}.log" + fi + #run the tests + if ! make check >makecheck.log 2>&1; then + echo "$me: failed make check - see makecheck.log" + exit 1 + fi } ############################################################################### #argument 1 is the compiler compile_and_test() { - #this is the test program to compile, so we know the compiler and standard lib - #works. clang 4 with c++2a does not. - /bin/echo -e "#include ">x.cpp - #does the compiler understand c++17? That is mandatory. - if ! $1 -c x.cpp -std=c++17 >/dev/null 2>&1 ; then - echo $me: this compiler $1 does not understand c++17 - return 0 - fi + #this is the test program to compile, so we know the compiler and standard lib + #works. clang 4 with c++2a does not. + echo "#include " >x.cpp + #does the compiler understand c++17? That is mandatory. + if ! $1 -c x.cpp -std=c++17 >/dev/null 2>&1; then + echo "$me: this compiler '$1' does not understand c++17" + return 0 + fi - #loop over all standard flags>=17 and try those which work. - #use the code words. - for std in 1z 2a 2b ; do - if ! $1 -c x.cpp -std=c++$std >/dev/null 2>&1 ; then - echo $me: compiler does not understand c++$std, skipping this combination. - else - # debug build - ASSERT=--enable-assert - compile_and_test_standard $1 c++$std "-Og" + #loop over all standard flags>=17 and try those which work. + #use the code words. + for std in 1z 2a 2b; do + if ! $1 -c x.cpp -std=c++$std >/dev/null 2>&1; then + echo "$me: compiler does not understand c++$std, skipping this combination." + else + # debug build + ASSERT=--enable-assert + compile_and_test_standard "$1" "c++$std" "-Og" - # release build - ASSERT=--disable-assert - #compile_and_test_standard $1 c++$std "-O2" - compile_and_test_standard $1 c++$std "-O3" - #compile_and_test_standard $1 c++$std "-Os" - fi - done + # release build + ASSERT=--disable-assert + #compile_and_test_standard $1 c++$std "-O2" + compile_and_test_standard "$1" "c++$std" "-O3" + #compile_and_test_standard $1 c++$std "-Os" + fi + done - rm x.cpp + rm x.cpp } ############################################################################### # finds the latest clang on the form clang++- and if none found, checks for # clang++. first found is assigned to variable latestclang get_latest_clang() { - for ver in $(seq 30 -1 10); do - candidate=clang++-$ver - if which $candidate >/dev/null 2>&1; then - latestclang=$candidate - return - fi - done - if which clang++ >/dev/null 2>&1; then - latestclang=clang++ - return + for ver in $(seq 30 -1 10); do + candidate="clang++-$ver" + if which "$candidate" >/dev/null 2>&1; then + latestclang="$candidate" + return fi - latestclang= + done + if which clang++ >/dev/null 2>&1; then + latestclang=clang++ + return + fi + latestclang= } ############################################################################### run_with_sanitizer() { - echo $me: "running with sanitizer (options $1)" - get_latest_clang - if [ -z $latestclang ] ; then - echo "$me: could not find any clang compiler (on the form clang++-ver)" - return 0 - fi + echo "$me: running with sanitizer (options $1)" + get_latest_clang + if [ -z "$latestclang" ]; then + echo "$me: could not find any clang compiler (on the form clang++-ver)" + return 0 + fi - start_from_scratch - ./bootstrap.sh >bootstrap.log - ./configure $ASSERT CXX=$latestclang CXXFLAGS="-std=c++17 $1" >configure.log - make > make.log 2>&1 - export UBSAN_OPTIONS="halt_on_error=true exitcode=1" - export ASAN_OPTIONS="halt_on_error=true exitcode=1" - make check >make-check.log 2>&1 - unset UBSAN_OPTIONS - unset ASAN_OPTIONS + start_from_scratch + ./bootstrap.sh >bootstrap.log + ./configure "$ASSERT" "CXX=$latestclang" CXXFLAGS="-std=c++17 $1" >configure.log + make >make.log 2>&1 + export UBSAN_OPTIONS="halt_on_error=true exitcode=1" + export ASAN_OPTIONS="halt_on_error=true exitcode=1" + make check >make-check.log 2>&1 + unset UBSAN_OPTIONS + unset ASAN_OPTIONS } ############################################################################### #This tries to mimic how the debian package is built run_with_debian_buildflags() { - echo $me: "running with buildflags from debian dpkg-buildflags" - if ! which dpkg-buildflags >/dev/null ; then - echo $me: dpkg-buildflags not found - skipping - return 0 - fi - start_from_scratch - ./bootstrap.sh >bootstrap.log - eval $(DEB_BUILD_MAINT_OPTIONS="hardening=+all qa=+all,-canary reproducible=+all" dpkg-buildflags --export=sh) - ./configure >configure.log - make > make.log 2>&1 - #check for warnings - if grep -q "warning" make.log; then - # store as an artifact instead of erroring out - name=$(cat *.log |sha256sum |head -c 12) - cp make.log make_${name}.log - echo $me: found compile warnings - see make.log, also stored as make_${name}.log + echo "$me: running with buildflags from debian dpkg-buildflags" + if ! which dpkg-buildflags >/dev/null; then + echo "$me: dpkg-buildflags not found - skipping" + return 0 + fi + start_from_scratch + ./bootstrap.sh >bootstrap.log + eval "$(DEB_BUILD_MAINT_OPTIONS="hardening=+all qa=+all,-canary reproducible=+all" dpkg-buildflags --export=sh)" + ./configure >configure.log + make >make.log 2>&1 + #check for warnings + if grep -q "warning" make.log; then + # store as an artifact instead of erroring out + name="$(cat ./*.log | sha256sum | head -c 12)" + cp make.log "make_${name}.log" + echo "$me: found compile warnings - see make.log, also stored as make_${name}.log" - fi - make check >make-check.log 2>&1 + fi + make check >make-check.log 2>&1 - #restore the build environment - for flag in $(dpkg-buildflags |cut -f1 -d=) ; do - unset $flag - done + #restore the build environment + for flag in $(dpkg-buildflags | cut -f1 -d=); do + unset "$flag" + done } ############################################################################### run_with_libcpp() { - # use the latest clang and see if it works - echo "#include + # use the latest clang and see if it works + echo "#include int main() { std::cout<<\"libc++ works!\"<x.cpp - get_latest_clang - if [ ! -z $latestclang ] ; then - if ! $latestclang -std=c++17 -stdlib=libc++ -lc++abi x.cpp >/dev/null 2>&1 || [ ! -x ./a.out ] || ! ./a.out ; then - echo $me: "debug: $latestclang could not compile with libc++ - perhaps uninstalled." - continue - fi - compile_and_test_standard $latestclang c++17 "-stdlib=libc++ -D_LIBCPP_DEBUG=1" + get_latest_clang + if [ -n "$latestclang" ]; then + if ! "$latestclang" -std=c++17 -stdlib=libc++ -lc++abi x.cpp >/dev/null 2>&1 || [ ! -x ./a.out ] || ! ./a.out; then + echo "$me: debug: $latestclang could not compile with libc++ - perhaps uninstalled." return - fi - # we will get here if no clang could be found. that is not an error, - # having clang and libc++ installed is optional - echo $me: no working clang with libc++ found, skipping. + fi + compile_and_test_standard "$latestclang" c++17 "-stdlib=libc++ -D_LIBCPP_DEBUG=1" + return + fi + # we will get here if no clang could be found. that is not an error, + # having clang and libc++ installed is optional + echo "$me: no working clang with libc++ found, skipping." } ############################################################################### verify_packaging() { - #make sure the packaging works as intended. - echo $me: "trying to make a tar ball for release and building it..." - log="$(pwd)/packagetest.log" - ./bootstrap.sh >$log - ./configure >>$log + #make sure the packaging works as intended. + echo "$me: trying to make a tar ball for release and building it..." + log="$(pwd)/packagetest.log" + ./bootstrap.sh >"$log" + ./configure >>"$log" - touch dummy - make dist >>$log - TARGZ=$(find "$(pwd)" -newer dummy -name "rdfind*gz" -type f |head -n1) - rm dummy - temp=$(mktemp -d) - cp "$TARGZ" "$temp" - cd "$temp" - tar xzf $(basename "$TARGZ") >>$log - cd $(basename "$TARGZ" .tar.gz) - ./configure --prefix=$temp >>$log - make >>$log - make check >>$log - make install >>$log - $temp/bin/rdfind --version >>$log - #coming here means all went fine, go back to the source dir. - cd $(dirname "$TARGZ") - rm -rf "$temp" + touch dummy + make dist >>"$log" + TARGZ=$(find "$(pwd)" -newer dummy -name "rdfind*gz" -type f | head -n1) + rm dummy + temp="$(mktemp -d)" + cp "$TARGZ" "$temp" + cd "$temp" + tar xzf "$(basename "$TARGZ")" >>"$log" + cd "$(basename "$TARGZ" .tar.gz)" + { + ./configure --prefix="$temp" + make + make check + make install + "$temp/bin/rdfind" --version + } >>"$log" + #coming here means all went fine, go back to the source dir. + cd "$(dirname "$TARGZ")" + rm -rf "$temp" } ############################################################################### verify_self_contained_headers() { - /bin/echo -n "$me: verify that all header files are self contained..." - if [ ! -e configure ]; then - ./bootstrap.sh >bootstrap.log 2>&1 - fi - if [ ! -e config.h ]; then - ./configure >configure.log 2>&1 - fi - for header in *.hh ; do - cp $header tmp.cc - if ! g++ -std=c++17 -I. -c tmp.cc -o /dev/null >header.log 2>&1 ; then - echo "$me: found a header which is not self contained: $header." - echo "$me: see header.log for details" - exit 1 - fi - rm tmp.cc - done - echo "OK!" + # shellcheck disable=SC3037 + /bin/echo -n "$me: verify that all header files are self contained..." + if [ ! -e configure ]; then + ./bootstrap.sh >bootstrap.log 2>&1 + fi + if [ ! -e config.h ]; then + ./configure >configure.log 2>&1 + fi + for header in *.hh; do + cp "$header" tmp.cc + if ! g++ -std=c++17 -I. -c tmp.cc -o /dev/null >header.log 2>&1; then + echo "$me: found a header which is not self contained: $header." + echo "$me: see header.log for details" + exit 1 + fi + rm tmp.cc + done + echo "OK!" } ############################################################################### build_32bit() { - #compiling to 32 bit, on amd64. - #apt install libc6-i386 gcc-multilib g++-multilib - # - if [ $(uname -m) != x86_64 ] ; then - echo $me: "not on x64, won't cross compile with -m32" - return; - fi - echo $me: "trying to compile in 32 bit mode with -m32..." - configureflags="--build=i686-pc-linux-gnu CFLAGS=-m32 CXXFLAGS=-m32 LDFLAGS=-m32" - here=$(pwd) - nettleinstall=$here/nettle32bit - if [ -d "$nettleinstall" ] ; then - echo $me: "local nettle already seems to be installed" - else - mkdir "$nettleinstall" - cd "$nettleinstall" - nettleversion=3.10.1 - echo "$me: downloading nettle from gnu.org..." - wget --quiet https://ftp.gnu.org/gnu/nettle/nettle-$nettleversion.tar.gz - echo "b0fcdd7fc0cdea6e80dcf1dd85ba794af0d5b4a57e26397eee3bc193272d9132 nettle-$nettleversion.tar.gz" >checksum - sha256sum --strict --quiet -c checksum - tar xzf nettle-$nettleversion.tar.gz - cd nettle-$nettleversion - echo $me: trying to configure nettle - ./configure $configureflags --prefix="$nettleinstall" >$here/nettle.configure.log 2>&1 - make install >$here/nettle.install.log 2>&1 - echo $me: "local nettle install went ok" - cd $here - fi - ./bootstrap.sh >bootstrap.log 2>&1 - echo "$me: attempting configure with 32 bit flags... (see configure.log if it fails)" - ./configure --build=i686-pc-linux-gnu CFLAGS=-m32 CXXFLAGS="-m32 -I$nettleinstall/include" LDFLAGS="-m32 -L$nettleinstall/lib" >configure.log 2>&1 - echo "$me: building with 32 bit flags... (check make.log if it fails)" - make >make.log 2>&1 - echo "$me: make check with 32 bit flags... (check make-check.log if it fails)" - LD_LIBRARY_PATH=$nettleinstall/lib make check >make-check.log 2>&1 - echo "$me: 32 bit tests went fine!" + #compiling to 32 bit, on amd64. + #apt install libc6-i386 gcc-multilib g++-multilib + # + if [ "$(uname -m)" != x86_64 ]; then + echo "$me: not on x64, won't cross compile with -m32" + return + fi + echo "$me: trying to compile in 32 bit mode with -m32..." + configureflags="--build=i686-pc-linux-gnu CFLAGS=-m32 CXXFLAGS=-m32 LDFLAGS=-m32" + here="$(pwd)" + nettleinstall=$here/nettle32bit + if [ -d "$nettleinstall" ]; then + echo "$me: local nettle already seems to be installed" + else + mkdir "$nettleinstall" + cd "$nettleinstall" + nettleversion=3.10.1 + echo "$me: downloading nettle from gnu.org..." + wget --quiet https://ftp.gnu.org/gnu/nettle/nettle-$nettleversion.tar.gz + echo "b0fcdd7fc0cdea6e80dcf1dd85ba794af0d5b4a57e26397eee3bc193272d9132 nettle-$nettleversion.tar.gz" >checksum + sha256sum --strict --quiet -c checksum + tar xzf nettle-$nettleversion.tar.gz + cd nettle-$nettleversion + echo "$me: trying to configure nettle" + # shellcheck disable=SC2086 + ./configure $configureflags --prefix="$nettleinstall" >"$here/nettle.configure.log" 2>&1 + make install >"$here/nettle.install.log" 2>&1 + echo "$me: local nettle install went ok" + cd "$here" + fi + ./bootstrap.sh >bootstrap.log 2>&1 + echo "$me: attempting configure with 32 bit flags... (see configure.log if it fails)" + ./configure --build=i686-pc-linux-gnu CFLAGS=-m32 CXXFLAGS="-m32 -I$nettleinstall/include" LDFLAGS="-m32 -L$nettleinstall/lib" >configure.log 2>&1 + echo "$me: building with 32 bit flags... (check make.log if it fails)" + make >make.log 2>&1 + echo "$me: make check with 32 bit flags... (check make-check.log if it fails)" + LD_LIBRARY_PATH=$nettleinstall/lib make check >make-check.log 2>&1 + echo "$me: 32 bit tests went fine!" } ############################################################################### - #this is pretty quick so start with it. verify_self_contained_headers #keep track of which compilers have already been tested -echo "">inodes_for_tested_compilers.txt +echo "" >inodes_for_tested_compilers.txt #try all variants of g++ -if which g++ >/dev/null ; then - for COMPILER in $(ls $(which g++)* |grep -v libc); do - inode=$(stat --dereference --format=%i $COMPILER) - if grep -q "^$inode\$" inodes_for_tested_compilers.txt ; then - echo $me: skipping this compiler $COMPILER - already tested - else - #echo trying gcc $GCC:$($GCC --version|head -n1) - echo $inode >>inodes_for_tested_compilers.txt - compile_and_test $COMPILER - fi - done +if which g++ >/dev/null; then + for COMPILER in "$(which g++)"*; do + if echo "$COMPILER" | grep libc; then + continue + fi + inode=$(stat --dereference --format=%i "$COMPILER") + if [ -f inodes_for_tested_compilers.txt ] && grep -q "^$inode\$" inodes_for_tested_compilers.txt >/dev/null; then + echo "$me: skipping this compiler $COMPILER - already tested" + else + #echo trying gcc $GCC:$($GCC --version|head -n1) + echo "$inode" >>inodes_for_tested_compilers.txt + compile_and_test "$COMPILER" + fi + done fi #try all variants of clang get_latest_clang -if which $latestclang >/dev/null ; then - for COMPILER in $(ls $(dirname $(which $latestclang))/clang++* |grep -v libc); do - inode=$(stat --dereference --format=%i $COMPILER) - if grep -q "^$inode\$" inodes_for_tested_compilers.txt ; then - echo $me: skipping this compiler $COMPILER - already tested - else - #echo trying gcc $GCC:$($GCC --version|head -n1) - echo $inode >>inodes_for_tested_compilers.txt - compile_and_test $COMPILER - fi - done +if which "$latestclang" >/dev/null; then + for COMPILER in "$(dirname "$(which "$latestclang")")"/clang++*; do + if echo "$COMPILER" | grep libc; then + continue + fi + inode=$(stat --dereference --format=%i "$COMPILER") + if [ -f inodes_for_tested_compilers.txt ] && grep -q "^$inode\$" inodes_for_tested_compilers.txt >/dev/null; then + echo "$me: skipping this compiler $COMPILER - already tested" + else + #echo trying gcc $GCC:$($GCC --version|head -n1) + echo "$inode" >>inodes_for_tested_compilers.txt + compile_and_test "$COMPILER" + fi + done fi rm inodes_for_tested_compilers.txt @@ -359,10 +369,10 @@ run_with_libcpp #test build with running through valgrind if which valgrind >/dev/null; then - echo $me: running unit tests through valgrind - ASSERT="--disable-assert" - compile_and_test_standard g++ c++17 "-O3" - VALGRIND=valgrind make check >make-check.log + echo "$me: running unit tests through valgrind" + ASSERT="--disable-assert" + compile_and_test_standard g++ c++17 "-O3" + VALGRIND=valgrind make check >make-check.log fi #make sure it is possible to build a tar ball, diff --git a/do_shellcheck.sh b/do_shellcheck.sh new file mode 100755 index 0000000..b32d0c0 --- /dev/null +++ b/do_shellcheck.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +me=$(basename "$0") + +echo "$me: run shellcheck on shellscripts" +( + git ls-files | grep -v "^testcases" | grep -E ".sh$" | xargs shellcheck +) + +echo "$me: run shellcheck on testcases" +( + cd testcases && git ls-files | grep -E ".sh$" | xargs shellcheck -x +) diff --git a/do_shellfmt.sh b/do_shellfmt.sh new file mode 100755 index 0000000..f2bbcd8 --- /dev/null +++ b/do_shellfmt.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +me=$(basename "$0") + +if ! which shfmt >/dev/null 2>&1; then + echo "$me: please install shfmt" + exit 1 +fi + +git ls-files | grep -E ".sh$" | xargs shfmt --indent 2 \ + --binary-next-line \ + --case-indent \ + --write diff --git a/do_yamllint.sh b/do_yamllint.sh new file mode 100755 index 0000000..2df3e58 --- /dev/null +++ b/do_yamllint.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +set -e + +YMLLINT="" +if which yamllint >/dev/null 2>/dev/null; then + YMLLINT="yamllint" +elif [ -f .venv/bin/yamllint ]; then + YMLLINT=".venv/bin/yamllint" +else + echo "could not find yamllint please install" + echo "for debian based systems: apt -y install libxml2-utils" + echo "for redhat based systems: dnf install yamllint" + echo "local install: python3 -m venv .venv && .venv/bin/python3 -m pip install yamllint" + exit 3 +fi + +# run this when all issues are fixed +git ls-files | grep -E ".yml$" | xargs "$YMLLINT" diff --git a/inofficial_cmake/CMakeLists.txt b/inofficial_cmake/CMakeLists.txt new file mode 100644 index 0000000..29ec563 --- /dev/null +++ b/inofficial_cmake/CMakeLists.txt @@ -0,0 +1,49 @@ +cmake_minimum_required(VERSION 3.25) + +file(READ ../VERSION.txt RDFIND_VERSION) +string(STRIP "${RDFIND_VERSION}" RDFIND_VERSION) + +string(REGEX REPLACE "\\.next$" ".999" PROJECT_VERSION ${RDFIND_VERSION}) +project(rdfind VERSION "${PROJECT_VERSION}") + +find_package(PkgConfig) +pkg_check_modules(nettle REQUIRED nettle) + +pkg_check_modules(xxhash IMPORTED_TARGET libxxhash) + +if(xxhash_FOUND) + set(HAVE_LIBXXHASH 1) +else() + set(HAVE_LIBXXHASH 0) +endif() + + +configure_file(config.h.in config.h @ONLY) + +add_executable(rdfind + ../Checksum.cc + ../Checksum.hh + ../CmdlineParser.cc + ../CmdlineParser.hh + ../Dirlist.cc + ../Dirlist.hh + ../EasyRandom.cc + ../EasyRandom.hh + ../Fileinfo.cc + ../Fileinfo.hh + ../rdfind.cc + ../RdfindDebug.hh + ../Rdutil.cc + ../Rdutil.hh + ../UndoableUnlink.cc + ../UndoableUnlink.hh) +target_include_directories(rdfind PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") +target_include_directories(rdfind PRIVATE ..) + +target_compile_features(rdfind PRIVATE cxx_std_17) +target_link_libraries(rdfind nettle) +if(xxhash_FOUND) +target_link_libraries(rdfind PkgConfig::xxhash) +endif() +target_compile_options(rdfind PRIVATE -Wall -Wextra -Wpedantic) + diff --git a/inofficial_cmake/config.h.in b/inofficial_cmake/config.h.in new file mode 100644 index 0000000..6a55b9f --- /dev/null +++ b/inofficial_cmake/config.h.in @@ -0,0 +1,5 @@ +#cmakedefine RDFIND_VERSION "@RDFIND_VERSION@" +#cmakedefine FOO_ENABLE +#cmakedefine FOO_STRING "@FOO_STRING@" +#cmakedefine HAVE_LIBXXHASH @HAVE_LIBXXHASH@ +#define VERSION "@RDFIND_VERSION@" diff --git a/rdfind.1 b/rdfind.1 index 7f60c3b..17a50ee 100644 --- a/rdfind.1 +++ b/rdfind.1 @@ -3,7 +3,7 @@ .\" .\" Author Paul Dreik 2006 .\" see LICENSE for details. -.TH rdfind "1" 1.6.next "Aug 2021" rdfind +.TH rdfind "1" 1.7.next "Jan 2025" rdfind .SH NAME rdfind \- finds duplicate files .SH SYNOPSIS @@ -76,9 +76,17 @@ Follow symlinks. Default is false. Removes items found which have identical inode and device ID. Default is true. .TP -.BR \-checksum " " \fImd5\fR|\fIsha1\fR|\fIsha256\fR|\fIsha512\fR -What type of checksum to be used: md5, sha1, sha256 or sha512. The default is -sha1 since version 1.4.0. +.BR \-checksum " " \fImd5\fR|\fIsha1\fR|\fIsha256\fR|\fIsha512|\fIxxh128\fR +What type of checksum to be used: md5, sha1, sha256, sha512 or xxh128. The default is +sha1 since version 1.4.0. xxh128 is a very fast checksum, but not of cryptographic +quality. xxh support is optional and requires that rdfind was configured with +--with-xxhash. In case xxh is used but there is no support, an error is returned. +.TP +.BR \-buffersize " " \fIN\fR +Chunksize in bytes when calculating the checksum +for files, smaller or bigger can improve performance +dependent on filesystem and checksum algorithm. +The default is 1 MiB, the maximum allowed is 128MiB (inclusive). .TP .BR \-deterministic " " \fItrue\fR|\fIfalse\fR If set (the default), sort files of equal rank in an unspecified but @@ -170,7 +178,7 @@ rdfind is quite brittle in that case. Especially, when deleting or making links, rdfind can be subject to a symlink attack. Use with care! .SH AUTHOR -Paul Dreik 2006-2018, reachable at rdfind@pauldreik.se +Paul Dreik 2006-2025, reachable at rdfind@pauldreik.se Rdfind can be found at https://rdfind.pauldreik.se/ Do you find rdfind useful? Drop me a line! It is always fun to @@ -181,7 +189,7 @@ Several persons have helped with suggestions and improvements: Niels Möller, Carl Payne and Salvatore Ansani. Thanks also to you who tested the program and sent me feedback. .SH VERSION -1.6.next (release date 2023-xx-xx) +1.7.next (release date 2025-xx-xx) .SH COPYRIGHT This program is distributed under GPLv2 or later, at your option. .SH "SEE ALSO" diff --git a/rdfind.cc b/rdfind.cc index 84e3f68..71d7588 100644 --- a/rdfind.cc +++ b/rdfind.cc @@ -10,7 +10,6 @@ static_assert(__cplusplus >= 201703L, "this code requires a C++17 capable compiler!"); // std -#include #include #include #include @@ -40,8 +39,9 @@ int current_cmdline_index = 0; static void usage() { + const auto indent = " "; std::cout - << "Usage: " << "rdfind [options] FILE ...\n" + << "Usage: rdfind [options] FILE ...\n" << '\n' << "Finds duplicate files recursively in the given FILEs (directories),\n" << "and takes appropriate action (by default, nothing).\n" @@ -63,8 +63,12 @@ usage() << " -followsymlinks true |(false) follow symlinks\n" << " -removeidentinode (true)| false ignore files with nonunique " "device and inode\n" - << " -checksum md5 |(sha1)| sha256 | sha512\n" - << " checksum type\n" + << " -checksum md5 |(sha1)| sha256 | sha512 | xxh128\n" + << indent << "checksum type\n" + << indent << "xxh128 is very fast, but is noncryptographic.\n" + << " -buffersize N\n" + << indent << "chunksize in bytes when calculating the checksum.\n" + << indent << "The default is 1 MiB, can be up to 128 MiB.\n" << " -deterministic (true)| false makes results independent of order\n" << " from listing the filesystem\n" << " -makesymlinks true |(false) replace duplicate files with " @@ -75,7 +79,7 @@ usage() << " -outputname name sets the results file name to \"name\" " "(default results.txt)\n" << " -deleteduplicates true |(false) delete duplicate files\n" - << " -sleep Xms sleep for X milliseconds between " + << " -sleep Xms sleep for X milliseconds between " "file reads.\n" << " Default is 0. Only a few values\n" << " are supported; 0,1-5,10,25,50,100\n" @@ -109,7 +113,9 @@ struct Options bool usesha1 = false; // use sha1 checksum to check for similarity bool usesha256 = false; // use sha256 checksum to check for similarity bool usesha512 = false; // use sha512 checksum to check for similarity + bool usexxh128 = false; // use xxh128 checksum to check for similarity bool deterministic = true; // be independent of filesystem order + std::size_t buffersize = 1 << 20; // chunksize to use when reading files long nsecsleep = 0; // number of nanoseconds to sleep between each file read. std::string resultsfile = "results.txt"; // results file name. }; @@ -179,11 +185,32 @@ parseOptions(Parser& parser) o.usesha256 = true; } else if (parser.parsed_string_is("sha512")) { o.usesha512 = true; + } else if (parser.parsed_string_is("xxh128")) { +#ifdef HAVE_LIBXXHASH + o.usexxh128 = true; +#else + std::cerr << "not compiled with xxhash, to make use of xxh128 please " + "reconfigure and rebuild '--with-xxhash'\n"; + std::exit(EXIT_FAILURE); +#endif } else { - std::cerr << "expected md5/sha1/sha256/sha512, not \"" + std::cerr << "expected md5/sha1/sha256/sha512/xxh128, not \"" << parser.get_parsed_string() << "\"\n"; std::exit(EXIT_FAILURE); } + } else if (parser.try_parse_string("-buffersize")) { + const long buffersize = std::stoll(parser.get_parsed_string()); + constexpr long max_buffersize = 128 << 20; + if (buffersize <= 0) { + std::cerr << "a negative or zero buffersize is not allowed\n"; + std::exit(EXIT_FAILURE); + } else if (buffersize > max_buffersize) { + std::cerr << "a maximum of " << (max_buffersize >> 20) + << " MiB buffersize is allowed, got " << (buffersize >> 20) + << " MiB\n"; + std::exit(EXIT_FAILURE); + } + o.buffersize = static_cast(buffersize); } else if (parser.try_parse_string("-sleep")) { const auto nextarg = std::string(parser.get_parsed_string()); if (nextarg == "1ms") { @@ -242,7 +269,7 @@ parseOptions(Parser& parser) // done with parsing of options. remaining arguments are files and dirs. // decide what checksum to use - if no checksum is set, force sha1! - if (!o.usemd5 && !o.usesha1 && !o.usesha256 && !o.usesha512) { + if (!o.usemd5 && !o.usesha1 && !o.usesha256 && !o.usesha512 && !o.usexxh128) { o.usesha1 = true; } return o; @@ -269,7 +296,7 @@ report(const std::string& path, const std::string& name, int depth) } } } else { - std::cerr << "failed to read file info on file \"" << tmp.name() << '\n'; + std::cerr << "failed to read file info on file \"" << tmp.name() << "\"\n"; return -1; } return 0; @@ -377,13 +404,17 @@ main(int narg, const char* argv[]) modes.emplace_back(Fileinfo::readtobuffermode::CREATE_SHA512_CHECKSUM, "sha512 checksum"); } + if (o.usexxh128) { + modes.emplace_back(Fileinfo::readtobuffermode::CREATE_XXH128_CHECKSUM, + "xxh128 checksum"); + } for (auto it = modes.begin() + 1; it != modes.end(); ++it) { std::cout << dryruntext << "Now eliminating candidates based on " << it->second << ": " << std::flush; // read bytes (destroys the sorting, for disk reading efficiency) - gswd.fillwithbytes(it[0].first, it[-1].first, o.nsecsleep); + gswd.fillwithbytes(it[0].first, it[-1].first, o.nsecsleep, o.buffersize); // remove non-duplicates std::cout << "removed " << gswd.removeUniqSizeAndBuffer() diff --git a/testcases/checksum_buffersize.sh b/testcases/checksum_buffersize.sh new file mode 100755 index 0000000..dfa91df --- /dev/null +++ b/testcases/checksum_buffersize.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# Test that selection of buffersizes works as expected. + +set -e +. "$(dirname "$0")/common_funcs.sh" + +reset_teststate + +TEST_DIR=buffersizes_test +mkdir -p "$TEST_DIR" + +make_test_files() { + dbgecho "creating test files in $TEST_DIR" + head -c 1000000 /dev/zero >"$TEST_DIR/a" + cp "$TEST_DIR/a" "$TEST_DIR/b" + cp "$TEST_DIR/a" "$TEST_DIR/c" + cp "$TEST_DIR/a" "$TEST_DIR/d" + cp "$TEST_DIR/a" "$TEST_DIR/e" +} + +dbgecho "check so all buffersizes behave the same" + +# disables only run once shellscheck +# shellcheck disable=SC2043 +for checksumtype in sha256; do + i=1 + while :; do + if [ $i -gt 65536 ]; then + break + fi + i="$((i * 2))" + make_test_files + dbgecho "testing buffersize $((i * 1024))" + dbgecho "testing $checksumtype" + # Fix this properly by making rdfind to array and use "${rdfind[@]}" + # this requires bash not sh + # shellcheck disable=SC2086 + $rdfind -buffersize $((i * 1024)) -checksum "$checksumtype" -deleteduplicates true "$TEST_DIR" >/dev/null + [ -e "$TEST_DIR/a" ] + [ ! -e "$TEST_DIR/b" ] + [ ! -e "$TEST_DIR/c" ] + [ ! -e "$TEST_DIR/d" ] + [ ! -e "$TEST_DIR/e" ] + done +done diff --git a/testcases/checksum_buffersize_speedtest.sh b/testcases/checksum_buffersize_speedtest.sh new file mode 100755 index 0000000..c6d94c5 --- /dev/null +++ b/testcases/checksum_buffersize_speedtest.sh @@ -0,0 +1,50 @@ +#!/bin/sh +# Performance test for checksumming with different buffersizes. Not meant +# to be run for regular testing. + +set -e +. "$(dirname "$0")/common_funcs.sh" + +reset_teststate + +TEST_DIR=buffersizes_speedtest +mkdir -p "$TEST_DIR" + +make_test_files() { + dbgecho "creating test files in $TEST_DIR/bigfiles" + mkdir -p "$TEST_DIR/bigfiles" + head -c $((1024 * 1024 * 500)) /dev/zero >"$TEST_DIR/bigfiles/a" + for f in b c d e; do + cp "$TEST_DIR/bigfiles/a" "$TEST_DIR/bigfiles/$f" + done + dbgecho "creating test files in $TEST_DIR/smallfiles" + mkdir -p "$TEST_DIR/smallfiles" + ( + cd "$TEST_DIR/smallfiles" + head -c100000000 /dev/zero | split --bytes 1000 + ) +} + +dbgecho "run speed test for all shecksums and buffersizes" + +make_test_files + +cat /dev/null >"$TEST_DIR/results.tsv" +for filesize in big small; do + for checksumtype in sha1 xxh128; do + i=1 + while :; do + if [ $i -gt 4096 ]; then + break + fi + # Fix this properly by making rdfind to array and use "${rdfind[@]}" + # this requires bash not sh + # shellcheck disable=SC2086 + dbgecho "testing $checksumtype $i kB buffersize" + # shellcheck disable=SC2086 + /usr/bin/time --append --output=$TEST_DIR/results.tsv -f "$filesize\t$i\t$checksumtype\t%e\t%M\t%C" $rdfind -buffersize $((i * 1024)) -checksum "$checksumtype" -dryrun true -deleteduplicates true "$TEST_DIR/${filesize}files" >/dev/null 2>&1 + i="$((i * 2))" + done + done +done +cat "$TEST_DIR/results.tsv" diff --git a/testcases/checksum_options.sh b/testcases/checksum_options.sh index 445693c..b72f2c6 100755 --- a/testcases/checksum_options.sh +++ b/testcases/checksum_options.sh @@ -1,43 +1,45 @@ #!/bin/sh # Test that selection of checksum works as expected. - set -e . "$(dirname "$0")/common_funcs.sh" - - -allchecksumtypes="md5 sha1 sha256 sha512" - - for checksumtype in $allchecksumtypes; do - reset_teststate - dbgecho "trying checksum $checksumtype with small files" - echo checksumtest >a - echo checksumtest >b - $rdfind -checksum $checksumtype -deleteduplicates true a b - [ -e a ] - [ ! -e b ] + reset_teststate + dbgecho "trying checksum $checksumtype with small files" + echo checksumtest >a + echo checksumtest >b + $rdfind -checksum "$checksumtype" -deleteduplicates true a b + [ -e a ] + [ ! -e b ] done for checksumtype in $allchecksumtypes; do - reset_teststate - dbgecho "trying checksum $checksumtype with large files" - head -c 1000000 /dev/zero >a - head -c 1000000 /dev/zero >b - $rdfind -checksum $checksumtype -deleteduplicates true a b - [ -e a ] - [ ! -e b ] + reset_teststate + dbgecho "trying checksum $checksumtype with large files" + head -c 1000000 /dev/zero >a + head -c 1000000 /dev/zero >b + $rdfind -checksum "$checksumtype" -deleteduplicates true a b + [ -e a ] + [ ! -e b ] done for checksumtype in $allchecksumtypes; do - reset_teststate - dbgecho "trying checksum $checksumtype with large files that differ only in the middle" - ( head -c 1000000 /dev/zero; echo =====a=====; head -c 1000000 /dev/zero) >a - ( head -c 1000000 /dev/zero; echo =====b=====; head -c 1000000 /dev/zero) >b - $rdfind -checksum $checksumtype -deleteduplicates true a b - [ -e a ] - [ -e b ] + reset_teststate + dbgecho "trying checksum $checksumtype with large files that differ only in the middle" + ( + head -c 1000000 /dev/zero + echo =====a===== + head -c 1000000 /dev/zero + ) >a + ( + head -c 1000000 /dev/zero + echo =====b===== + head -c 1000000 /dev/zero + ) >b + $rdfind -checksum "$checksumtype" -deleteduplicates true a b + [ -e a ] + [ -e b ] done dbgecho "all is good in this test!" diff --git a/testcases/checksum_speedtest.sh b/testcases/checksum_speedtest.sh index 0359026..5884e5b 100755 --- a/testcases/checksum_speedtest.sh +++ b/testcases/checksum_speedtest.sh @@ -2,28 +2,25 @@ # Performance test for checksumming. Not meant # to be run for regular testing. - set -e . "$(dirname "$0")/common_funcs.sh" reset_teststate - -if [ ! -d speedtest ] ; then - mkdir -p speedtest +if [ ! -d speedtest ]; then + mkdir -p speedtest fi -if [ ! -e speedtest/largefile1 ] ; then - head -c1000000000 /dev/zero >speedtest/largefile1 - cp -al speedtest/largefile1 speedtest/largefile2 - #warm up the cache - md5sum speedtest/largefile1 speedtest/largefile2 +if [ ! -e speedtest/largefile1 ]; then + head -c1000000000 /dev/zero >speedtest/largefile1 + cp -al speedtest/largefile1 speedtest/largefile2 + #warm up the cache + md5sum speedtest/largefile1 speedtest/largefile2 fi - -for checksumtype in md5 sha1 sha256; do - dbgecho "trying checksum $checksumtype" - time $rdfind -removeidentinode false -checksum $checksumtype speedtest/largefile1 speedtest/largefile2 > rdfind.out +for checksumtype in $allchecksumtypes; do + dbgecho "trying checksum $checksumtype" + time $rdfind -removeidentinode false -checksum "$checksumtype" speedtest/largefile1 speedtest/largefile2 >rdfind.out done dbgecho "all is good in this test!" diff --git a/testcases/common_funcs.sh b/testcases/common_funcs.sh index 31c8760..a4787c4 100755 --- a/testcases/common_funcs.sh +++ b/testcases/common_funcs.sh @@ -1,41 +1,44 @@ #!/bin/sh # common functionality for the unit tests - - #bail out on the first error set -e -me=$(basename $0) +me="$(basename "$0")" +if [ "$WITH_XXHASH" = "1" ]; then + export allchecksumtypes="md5 sha1 sha256 sha512 xxh128" +else + export allchecksumtypes="md5 sha1 sha256 sha512" +fi +# shellcheck disable=SC3037 /bin/echo -n "$me: checking for rdfind ..." rdfind=$PWD/rdfind if [ ! -x "$rdfind" ]; then - echo "could not find $rdfind" - exit 1 + echo "could not find $rdfind" + exit 1 fi echo " OK." -/bin/echo -n "checking for valgrind ..." -if [ -z $VALGRIND ] ; then - echo "not used." +printf "checking for valgrind ..." +if [ -z "$VALGRIND" ]; then + echo "not used." else - echo "active! here is the command: $VALGRIND" + echo "active! here is the command: $VALGRIND" fi rdfind="$VALGRIND $rdfind" #where is the test scripts dir? -testscriptsdir=$(dirname $(readlink -f $0)) - +testscriptsdir="$(dirname "$(readlink -f "$0")")" +export testscriptsdir dbgecho() { - echo "$0 debug: " "$@" + echo "$0 debug: " "$@" } - -echo -n "checking for mktemp ..." +printf "checking for mktemp ..." which mktemp >/dev/null echo " OK." @@ -44,47 +47,44 @@ echo " OK." datadir=$(mktemp -d -t rdfindtestcases.d.XXXXXXXXXXXX) dbgecho "temp dir is $datadir" - - - -cleanup () { - cd / - rm -rf "$datadir" +cleanup() { + cd / + rm -rf "$datadir" } -if [ -z $KEEPTEMPDIR ] ; then - trap cleanup INT QUIT EXIT +if [ -z "$KEEPTEMPDIR" ]; then + trap cleanup INT QUIT EXIT fi -[ -d $datadir ] -cd $datadir +[ -d "$datadir" ] +cd "$datadir" reset_teststate() { - cd / - rm -rf "$datadir" - mkdir -p $datadir - cd "$datadir" + cd / + rm -rf "$datadir" + mkdir -p "$datadir" + cd "$datadir" } - verify() { - if ! $@ ; then - echo "failed asserting $@" - exit 1 - fi + if ! "$@"; then + echo "failed asserting $*" + exit 1 + fi } # where to mount disorderfs for the determinism tests -DISORDERED_MNT=$datadir/disordered_mnt -DISORDERED_ROOT=$datadir/disordered_root +DISORDERED_MNT="$datadir/disordered_mnt" +DISORDERED_ROOT="$datadir/disordered_root" # do we have a working disorder fs? hasdisorderfs=false if which disorderfs fusermount >/dev/null 2>&1; then - mkdir -p $DISORDERED_MNT $DISORDERED_ROOT - if disorderfs $DISORDERED_ROOT $DISORDERED_MNT >/dev/null 2>&1 ; then - # "Sälj inte skinnet förrän björnen är skjuten - Don't count your chickens until they're hatched" - fusermount -z -u $DISORDERED_MNT - hasdisorderfs=true - fi + mkdir -p "$DISORDERED_MNT" "$DISORDERED_ROOT" + if disorderfs "$DISORDERED_ROOT" "$DISORDERED_MNT" >/dev/null 2>&1; then + # "Sälj inte skinnet förrän björnen är skjuten - Don't count your chickens until they're hatched" + fusermount -z -u "$DISORDERED_MNT" + hasdisorderfs=true + fi fi +export hasdisorderfs diff --git a/testcases/hardlink_fails.sh b/testcases/hardlink_fails.sh index 713f806..1e3d68e 100755 --- a/testcases/hardlink_fails.sh +++ b/testcases/hardlink_fails.sh @@ -3,7 +3,6 @@ # # See https://github.com/pauldreik/rdfind/issues/5 - set -e . "$(dirname "$0")/common_funcs.sh" @@ -12,54 +11,54 @@ reset_teststate #make identical files. files="a subdir/b c some/deeply/nested/subdir/d" nfiles=4 -for n in $files ; do - mkdir -p $(dirname $datadir/$n) - echo "hello hardlink" > $datadir/$n +for n in $files; do + mkdir -p "$(dirname "$datadir/$n")" + echo "hello hardlink" >"$datadir/$n" done #eliminate them. -$rdfind -makehardlinks true $datadir/ +$rdfind -makehardlinks true "$datadir/" #make sure one is a hard link to the other. -for n in $files ; do - nhardlinks=$(stat -c %h $datadir/$n) - if [ $nhardlinks -ne $nfiles ] ; then - dbgecho "expected $nfiles hardlinks, got $nhardlinks" - exit 1 - fi +for n in $files; do + nhardlinks=$(stat -c %h "$datadir/$n") + if [ "$nhardlinks" -ne "$nfiles" ]; then + dbgecho "expected $nfiles hardlinks, got $nhardlinks" + exit 1 + fi done dbgecho passed the happy path # try to make a hardlink to somewhere that fails. reset_teststate -mkdir -p $datadir/readonly.d/ -echo xxx > $datadir/readonly.d/a -echo xxx > $datadir/readonly.d/b -chmod 500 $datadir/readonly.d/ +mkdir -p "$datadir/readonly.d/" +echo xxx >"$datadir/readonly.d/a" +echo xxx >"$datadir/readonly.d/b" +chmod 500 "$datadir/readonly.d/" if [ "$(id -u)" -eq 0 ]; then - # if running as root, directory rights are not respected. drop the capability - # for doing that (requires capsh from package libcap2-bin) - MAYBEDROP="capsh --drop=CAP_DAC_OVERRIDE -- -c" + # if running as root, directory rights are not respected. drop the capability + # for doing that (requires capsh from package libcap2-bin) + MAYBEDROP="capsh --drop=CAP_DAC_OVERRIDE -- -c" else - MAYBEDROP="/bin/sh -c" + MAYBEDROP="/bin/sh -c" fi -$MAYBEDROP "$rdfind -makehardlinks true $datadir/readonly.d/" 2>&1 |tee rdfind.out -if ! grep -iq "failed" rdfind.out ; then - dbgecho "expected failure when trying to make hardlink on readonly directory" - exit 1 +$MAYBEDROP "$rdfind -makehardlinks true $datadir/readonly.d/" 2>&1 | tee rdfind.out +if ! grep -iq "failed" rdfind.out; then + dbgecho "expected failure when trying to make hardlink on readonly directory" + exit 1 fi #make sure that our own copy is still there -for f in a b ; do - if [ ! -e $datadir/readonly.d/$f ] ; then - dbgecho file $f is missing, rdfind should not have removed it! - exit 1 - fi +for f in a b; do + if [ ! -e "$datadir/readonly.d/$f" ]; then + dbgecho "file $f is missing, rdfind should not have removed it!" + exit 1 + fi done # make sure it can be cleaned up -chmod 700 $datadir/readonly.d/ +chmod 700 "$datadir/readonly.d/" dbgecho "all is good in this test!" diff --git a/testcases/largefilesupport.sh b/testcases/largefilesupport.sh index f7476ac..c487197 100755 --- a/testcases/largefilesupport.sh +++ b/testcases/largefilesupport.sh @@ -7,8 +7,8 @@ set -e reset_teststate #create a large file, sparse. -filesizem1=2147483647 #size, in bytes. This is no problem. -filesize=$(($filesizem1+1)) #size, in bytes. This is a problematic value. +filesizem1=2147483647 #size, in bytes. This is no problem. +filesize=$((filesizem1 + 1)) #size, in bytes. This is a problematic value. #below, dd is used and the file is later appended to, to avoid problems #on Hurd which currently (20130619) can not take $filesize as argument to @@ -16,8 +16,8 @@ filesize=$(($filesizem1+1)) #size, in bytes. This is a problematic value. #make two files, which differ at the first byte to make #rdfind return fast after comparing the initial part. -echo "a">sparse-file1 -echo "b">sparse-file2 +echo "a" >sparse-file1 +echo "b" >sparse-file2 dd if=/dev/null of=sparse-file1 bs=1 seek=$filesizem1 count=1 dd if=/dev/null of=sparse-file2 bs=1 seek=$filesizem1 count=1 head -c1 /dev/zero >>sparse-file1 @@ -29,11 +29,11 @@ sync #To prevent rdfind from reading its result file or rdfind.out mkdir subdir mv sparse-file* subdir -$rdfind subdir 2>&1 |tee rdfind.out +$rdfind subdir 2>&1 | tee rdfind.out dbgecho "rdfind ran ok." #make sure rdfind.out contains the right size -grep -q "^Total size is $((filesize*2)) bytes" rdfind.out +grep -q "^Total size is $((filesize * 2)) bytes" rdfind.out #make sure none could be reduced grep -q "^It seems like you have 0 files that are not unique$" rdfind.out diff --git a/testcases/md5collisions.sh b/testcases/md5collisions.sh index 8545761..4030ecf 100755 --- a/testcases/md5collisions.sh +++ b/testcases/md5collisions.sh @@ -1,26 +1,23 @@ #!/bin/sh #This just tests basic operation - set -e . "$(dirname "$0")/common_funcs.sh" reset_teststate - #check md5 collision files mkdir md5coll -cp $testscriptsdir/md5collisions/*.ps md5coll +cp "$testscriptsdir/md5collisions/"*.ps md5coll sync #make sure nothing happens when using sha -$rdfind -checksum sha1 -deleteduplicates true md5coll 2>&1 |tee rdfind.out +$rdfind -checksum sha1 -deleteduplicates true md5coll 2>&1 | tee rdfind.out grep -q "^Deleted 0 files.$" rdfind.out dbgecho "using sha1 did not delete any files, as expected" -$rdfind -checksum md5 -deleteduplicates true md5coll 2>&1 |tee rdfind.out +$rdfind -checksum md5 -deleteduplicates true md5coll 2>&1 | tee rdfind.out grep -q "^Deleted 1 files.$" rdfind.out dbgecho "using md5 did delete files, as expected" - dbgecho "all is good in this test!" diff --git a/testcases/sha1collisions.sh b/testcases/sha1collisions.sh index 7c2711e..3c1b817 100755 --- a/testcases/sha1collisions.sh +++ b/testcases/sha1collisions.sh @@ -1,24 +1,21 @@ #!/bin/sh # Test for sha1 vs sha256 - set -e . "$(dirname "$0")/common_funcs.sh" - reset_teststate #unpack collisions example from https://shattered.it/static/shattered.pdf -base64 --decode <$testscriptsdir/sha1collisions/coll.tar.bz2.b64 |tar xvfj - +base64 --decode <"$testscriptsdir/sha1collisions/coll.tar.bz2.b64" | tar xvfj - #make sure nothing happens when using sha256 -$rdfind -checksum sha256 -deleteduplicates true . 2>&1 |tee rdfind.out +$rdfind -checksum sha256 -deleteduplicates true . 2>&1 | tee rdfind.out grep -q "^Deleted 0 files.$" rdfind.out dbgecho "using sha256 did not delete any files, as expected" -$rdfind -checksum sha1 -deleteduplicates true . 2>&1 |tee rdfind.out +$rdfind -checksum sha1 -deleteduplicates true . 2>&1 | tee rdfind.out grep -q "^Deleted 1 files.$" rdfind.out dbgecho "using sha1 did delete the files, as expected" - dbgecho "all is good in this test!" diff --git a/testcases/symlinking_action.sh b/testcases/symlinking_action.sh index c6959f5..6f0ad0e 100755 --- a/testcases/symlinking_action.sh +++ b/testcases/symlinking_action.sh @@ -2,7 +2,6 @@ # Investigate what happen when symlinking fails. # - set -e . "$(dirname "$0")/common_funcs.sh" @@ -10,114 +9,111 @@ reset_teststate #make identical files. files="first subdir/b c some/deeply/nested/subdir/d" -nfiles=4 -for n in $files ; do - mkdir -p $(dirname $datadir/$n) - echo "hello symlink" > $datadir/$n +for n in $files; do + mkdir -p "$(dirname "$datadir/$n")" + echo "hello symlink" >"$datadir/$n" done #eliminate them. -$rdfind -makesymlinks true $datadir/first $datadir/ +$rdfind -makesymlinks true "$datadir/first" "$datadir/" #make sure the first one is untouched (it has the highest rank), and the rest are symlinks. export LANG= -for n in $files ; do - if [ $n = "first" ]; then - inodeforfirst=$(stat -c %i "$datadir/first") - if [ x"$(stat -c %F "$datadir/first")" != x"regular file" ] ; then - dbgecho "expected first to be a regular file" - exit 1 - fi - else - if [ x"$(stat -c %F "$datadir/$n")" != x"symbolic link" ] ; then - dbgecho "expected file $n to be a symbolic link" - exit 1 - fi - inodeforn=$(stat --dereference -c %i "$datadir/$n") - if [ $inodeforfirst != $inodeforn ] ; then - dbgecho "$n does not refer to first - inode mismatch $inodeforfirst vs $inodeforn" - exit 1 - fi - - fi +for n in $files; do + if [ "$n" = "first" ]; then + inodeforfirst=$(stat -c %i "$datadir/first") + if [ x"$(stat -c %F "$datadir/first")" != x"regular file" ]; then + dbgecho "expected first to be a regular file" + exit 1 + fi + else + if [ x"$(stat -c %F "$datadir/$n")" != x"symbolic link" ]; then + dbgecho "expected file $n to be a symbolic link" + exit 1 + fi + inodeforn=$(stat --dereference -c %i "$datadir/$n") + if [ "$inodeforfirst" != "$inodeforn" ]; then + dbgecho "$n does not refer to first - inode mismatch $inodeforfirst vs $inodeforn" + exit 1 + fi + + fi done dbgecho passed the happy path # try to make a symlink somewhere where it fails. reset_teststate -mkdir -p $datadir/readonly.d/ -echo xxx > $datadir/readonly.d/a -echo xxx > $datadir/readonly.d/b -chmod 500 $datadir/readonly.d/ +mkdir -p "$datadir/readonly.d/" +echo xxx >"$datadir/readonly.d/a" +echo xxx >"$datadir/readonly.d/b" +chmod 500 "$datadir/readonly.d/" if [ "$(id -u)" -eq 0 ]; then - # if running as root, directory rights are not respected. drop the capability - # for doing that (requires capsh from package libcap2-bin) - MAYBEDROP="capsh --drop=CAP_DAC_OVERRIDE -- -c" + # if running as root, directory rights are not respected. drop the capability + # for doing that (requires capsh from package libcap2-bin) + MAYBEDROP="capsh --drop=CAP_DAC_OVERRIDE -- -c" else - MAYBEDROP="/bin/sh -c" + MAYBEDROP="/bin/sh -c" fi -$MAYBEDROP "$rdfind -makesymlinks true $datadir/readonly.d/" 2>&1 |tee rdfind.out -if ! grep -iq "failed" rdfind.out ; then - dbgecho "expected failure when trying to make symlink on readonly directory" - exit 1 +$MAYBEDROP "$rdfind -makesymlinks true $datadir/readonly.d/" 2>&1 | tee rdfind.out +if ! grep -iq "failed" rdfind.out; then + dbgecho "expected failure when trying to make symlink on readonly directory" + exit 1 fi # make sure that our own copy is still there -for f in a b ; do - if [ ! -e $datadir/readonly.d/$f ] ; then - dbgecho file $f is missing, rdfind should not have removed it! - exit 1 - fi +for f in a b; do + if [ ! -e "$datadir/readonly.d/$f" ]; then + dbgecho "file $f is missing, rdfind should not have removed it!" + exit 1 + fi done # make sure it can be cleaned up -chmod 700 $datadir/readonly.d/ - -dbgecho passed the test with trying to write to a readonly directory - +chmod 700 "$datadir/readonly.d/" +dbgecho "passed the test with trying to write to a readonly directory" #This test tries to provoke errors in relative paths, path simplification # etc. # argument 1 is path to file 1. argument 2 is path to file 2. pathsimplification() { - reset_teststate - mkdir -p $(dirname $1) && echo "simplification test" >$1 - mkdir -p $(dirname $2) && echo "simplification test" >$2 - - #dbgecho "state before (args $1 $2)" - #tree - - $rdfind -makesymlinks true $1 $2 2>&1 |tee rdfind.out - # $2 should be a symlink to $1 - if [ x"$(stat -c %F "$1")" != x"regular file" ] ; then - dbgecho "expected file $1 to be a regular file" - exit 1 - fi - if [ x"$(stat -c %F "$2")" != x"symbolic link" ] ; then - dbgecho "expected file $1 to be a symbolic link" - exit 1 - fi - inodefor1=$(stat -c %i "$1") - inodefor2=$(stat --dereference -c %i "$2") - if [ $inodefor1 != $inodefor2 ] ; then - dbgecho "inode mismatch $inodefor1 vs $inodefor2" - exit 1 - fi - #switching directory should still give the correct answer - cd $(dirname $2) - inodefor2=$(stat --dereference -c %i $(basename "$2")) - if [ $inodefor1 != $inodefor2 ] ; then - dbgecho "inode mismatch $inodefor1 vs $inodefor2" - exit 1 - fi - #dbgecho "state after $1 $2" - #sync - #tree - echo ----------------------------------------------------------- + reset_teststate + mkdir -p "$(dirname "$1")" && echo "simplification test" >"$1" + mkdir -p "$(dirname "$2")" && echo "simplification test" >"$2" + + #dbgecho "state before (args $1 $2)" + #tree + + $rdfind -makesymlinks true "$1" "$2" 2>&1 | tee rdfind.out + # $2 should be a symlink to $1 + if [ x"$(stat -c %F "$1")" != x"regular file" ]; then + dbgecho "expected file $1 to be a regular file" + exit 1 + fi + if [ x"$(stat -c %F "$2")" != x"symbolic link" ]; then + dbgecho "expected file $1 to be a symbolic link" + exit 1 + fi + inodefor1=$(stat -c %i "$1") + inodefor2=$(stat --dereference -c %i "$2") + if [ "$inodefor1" != "$inodefor2" ]; then + dbgecho "inode mismatch $inodefor1 vs $inodefor2" + exit 1 + fi + #switching directory should still give the correct answer + cd "$(dirname "$2")" + inodefor2="$(stat --dereference -c %i "$(basename "$2")")" + if [ "$inodefor1" != "$inodefor2" ]; then + dbgecho "inode mismatch $inodefor1 vs $inodefor2" + exit 1 + fi + #dbgecho "state after $1 $2" + #sync + #tree + echo ----------------------------------------------------------- } pathsimplification a b @@ -128,10 +124,10 @@ pathsimplification subdir1/../a subdir2/b pathsimplification subdir1/../a subdir2/./././b pathsimplification subdir2/./././b subdir1/../a pathsimplification a subdir2/./././b -pathsimplification $(pwd)/a b -pathsimplification a $(pwd)/b -pathsimplification $(pwd)/a $(pwd)/b -pathsimplification $(pwd)/subdir/../a $(pwd)/b +pathsimplification "$(pwd)/a" b +pathsimplification a "$(pwd)/b" +pathsimplification "$(pwd)/a" "$(pwd)/b" +pathsimplification "$(pwd)/subdir/../a" "$(pwd)/b" pathsimplification ./a b pathsimplification ./a ./b pathsimplification a ./b diff --git a/testcases/verify_deterministic_operation.sh b/testcases/verify_deterministic_operation.sh index b74a02b..a894912 100755 --- a/testcases/verify_deterministic_operation.sh +++ b/testcases/verify_deterministic_operation.sh @@ -2,25 +2,24 @@ # Ensures that the deterministic flag works as intended. # - set -e . "$(dirname "$0")/common_funcs.sh" -if $hasdisorderfs ; then - echo "$me: found a working disorderfs setup. unit test will be properly executed" +if $hasdisorderfs; then + echo "$me: found a working disorderfs setup. unit test will be properly executed" else - echo "$me: please install disorderfs to execute this test properly!" - echo "$me: falsely exiting with success now" - exit 0 + echo "$me: please install disorderfs to execute this test properly!" + echo "$me: falsely exiting with success now" + exit 0 fi #unmount disordered unmount_disordered() { - if [ -d $DISORDERED_MNT ]; then - if ! fusermount --quiet -u $DISORDERED_MNT ; then - dbgecho failed unmounting disordered - fi - fi + if [ -d "$DISORDERED_MNT" ]; then + if ! fusermount --quiet -u "$DISORDERED_MNT"; then + dbgecho "failed unmounting disordered" + fi + fi } DISORDERED_FLAGS_RANDOM="--shuffle-dirents=yes --sort-dirents=no --reverse-dirents=no" @@ -28,40 +27,41 @@ DISORDERED_FLAGS_ASC="--shuffle-dirents=no --sort-dirents=yes --reverse-dirents= DISORDERED_FLAGS_DESC="--shuffle-dirents=no --sort-dirents=yes --reverse-dirents=yes" DISORDERED_FLAGS=$DISORDERED_FLAGS_RANDOM mount_disordered() { - mkdir -p $DISORDERED_MNT - mkdir -p $DISORDERED_ROOT - disorderfs $DISORDERED_FLAGS $DISORDERED_ROOT $DISORDERED_MNT >/dev/null + mkdir -p "$DISORDERED_MNT" + mkdir -p "$DISORDERED_ROOT" + # shellcheck disable=SC2086 + disorderfs $DISORDERED_FLAGS "$DISORDERED_ROOT" "$DISORDERED_MNT" >/dev/null } #create cr8() { - while [ $# -gt 0 ] ; do - mkdir -p $(dirname $1) - # make sure the file is longer than what fits in the byte buffer - head -c1000 /dev/zero >$1 - shift - done + while [ $# -gt 0 ]; do + mkdir -p "$(dirname "$1")" + # make sure the file is longer than what fits in the byte buffer + head -c1000 /dev/zero >"$1" + shift + done } local_reset() { - unmount_disordered - reset_teststate - mount_disordered - cr8 $@ + unmount_disordered + reset_teststate + mount_disordered + cr8 "$@" } #sets global variable outcome to which file was preserved, a or b. #$1 - value of -deterministic flag (true or false) run_outcome() { - local_reset $DISORDERED_MNT/a $DISORDERED_MNT/b - $rdfind -deterministic $1 -deleteduplicates true $DISORDERED_MNT >rdfind.out - if [ -f $DISORDERED_MNT/a -a ! -e $DISORDERED_MNT/b ] ; then - outcome=a - elif [ ! -e $DISORDERED_MNT/a -a -f $DISORDERED_MNT/b ] ; then - outcome=b - else - dbgecho "bad result! test failed!" - exit 1 - fi + local_reset "$DISORDERED_MNT/a" "$DISORDERED_MNT/b" + $rdfind -deterministic "$1" -deleteduplicates true "$DISORDERED_MNT" >rdfind.out + if [ -f "$DISORDERED_MNT/a" ] && [ ! -e "$DISORDERED_MNT/b" ]; then + outcome=a + elif [ ! -e "$DISORDERED_MNT/a" ] && [ -f "$DISORDERED_MNT/b" ]; then + outcome=b + else + dbgecho "bad result! test failed!" + exit 1 + fi } trap "unmount_disordered;cleanup" INT QUIT EXIT @@ -75,9 +75,9 @@ DISORDERED_FLAGS=$DISORDERED_FLAGS_DESC run_outcome false outcome_desc=$outcome -if [ $outcome_desc = $outcome_asc ] ; then - dbgecho "fail! \"-deterministic false\" should have given the same outcome regardless of ordering" - exit 1 +if [ $outcome_desc = $outcome_asc ]; then + dbgecho "fail! \"-deterministic false\" should have given the same outcome regardless of ordering" + exit 1 fi dbgecho "tests for deterministic false passed ok (non-randomized)" @@ -86,25 +86,24 @@ dbgecho "tests for deterministic false passed ok (non-randomized)" #depending on the output from the file system DISORDERED_FLAGS=$DISORDERED_FLAGS_RANDOM run_outcome false -last_outcome=$outcome -for i in $(seq 128) ; do - run_outcome false - if [ $last_outcome != $outcome ] ; then - #proved that both outcomes can happen. good! - dbgecho "got a different outcome after $i random tries" - break - else - if [ $i -eq 64 ] ; then - dbgecho "reached max number of iterations without getting different results". - exit 1 - fi - fi - last_outcome=$outcome +last_outcome="$outcome" +for i in $(seq 128); do + run_outcome false + if [ "$last_outcome" != "$outcome" ]; then + #proved that both outcomes can happen. good! + dbgecho "got a different outcome after $i random tries" + break + else + if [ "$i" -eq 64 ]; then + dbgecho "reached max number of iterations without getting different results". + exit 1 + fi + fi + last_outcome=$outcome done dbgecho "tests for \"-deterministic false\" passed ok on randomized filesystem order" - #verify that with deterministic enabled, we get the same results regardless of ordering DISORDERED_FLAGS=$DISORDERED_FLAGS_ASC run_outcome true @@ -114,13 +113,10 @@ DISORDERED_FLAGS=$DISORDERED_FLAGS_DESC run_outcome true outcome_desc=$outcome -if [ $outcome_desc != $outcome_asc ] ; then - dbgecho "fail! \"-deterministic true\" should have given the same outcome regardless of ordering" - exit 1 +if [ $outcome_desc != $outcome_asc ]; then + dbgecho "fail! \"-deterministic true\" should have given the same outcome regardless of ordering" + exit 1 fi dbgecho "tests for deterministic true passed ok" - - - dbgecho "all is good for the ranking tests!" diff --git a/testcases/verify_dryrun_option.sh b/testcases/verify_dryrun_option.sh index 9916ede..4b2d7e2 100755 --- a/testcases/verify_dryrun_option.sh +++ b/testcases/verify_dryrun_option.sh @@ -2,99 +2,94 @@ # Ensures that dryrun does not modify anything # - set -e . "$(dirname "$0")/common_funcs.sh" local_reset() { - reset_teststate - echo "dryrun" >a - echo "dryrun" >b + reset_teststate + echo "dryrun" >a + echo "dryrun" >b } -for dryrunopt in -dryrun -n ; do - local_reset - $rdfind $dryrunopt true -deleteduplicates true a b >rdfind.out - [ -f a ] - [ -f b ] - dbgecho "files still there, good" - - local_reset - $rdfind $dryrunopt false -deleteduplicates true a b >rdfind.out - [ -f a ] - [ ! -e b ] - dbgecho "b was removed, good" - - - local_reset - $rdfind $dryrunopt true -makesymlinks true a b >rdfind.out - [ -f a ] - [ -f b ] - [ $(stat -c %i a) != $(stat --dereference -c %i b) ] - dbgecho "files still there, good" - $rdfind $dryrunopt false -makesymlinks true a b >rdfind.out - [ -f a ] - [ -L b ] - [ $(stat -c %i a) = $(stat --dereference -c %i b) ] - dbgecho "b was replaced with a symlink, good" - +for dryrunopt in -dryrun -n; do + local_reset + $rdfind $dryrunopt true -deleteduplicates true a b >rdfind.out + [ -f a ] + [ -f b ] + dbgecho "files still there, good" + local_reset + $rdfind $dryrunopt false -deleteduplicates true a b >rdfind.out + [ -f a ] + [ ! -e b ] + dbgecho "b was removed, good" - local_reset - $rdfind $dryrunopt true -makehardlinks true a b >rdfind.out - [ -f a ] - [ -f b ] - [ $(stat -c %i a) != $(stat -c %i b) ] - [ $(stat -c %h a) -eq 1 ] - dbgecho "files still there, good" - $rdfind $dryrunopt false -makehardlinks true a b >rdfind.out - [ -f a ] - [ -f b ] - [ $(stat -c %i a) = $(stat -c %i b) ] - [ $(stat -c %h a) -eq 2 ] - [ $(stat -c %h b) -eq 2 ] - dbgecho "b was replaced with a hard link, good" + local_reset + $rdfind $dryrunopt true -makesymlinks true a b >rdfind.out + [ -f a ] + [ -f b ] + [ "$(stat -c %i a)" != "$(stat --dereference -c %i b)" ] + dbgecho "files still there, good" + $rdfind $dryrunopt false -makesymlinks true a b >rdfind.out + [ -f a ] + [ -L b ] + [ "$(stat -c %i a)" = "$(stat --dereference -c %i b)" ] + dbgecho "b was replaced with a symlink, good" + local_reset + $rdfind $dryrunopt true -makehardlinks true a b >rdfind.out + [ -f a ] + [ -f b ] + [ "$(stat -c %i a)" != "$(stat -c %i b)" ] + [ "$(stat -c %h a)" -eq 1 ] + dbgecho "files still there, good" + $rdfind $dryrunopt false -makehardlinks true a b >rdfind.out + [ -f a ] + [ -f b ] + [ "$(stat -c %i a)" = "$(stat -c %i b)" ] + [ "$(stat -c %h a)" -eq 2 ] + [ "$(stat -c %h b)" -eq 2 ] + dbgecho "b was replaced with a hard link, good" - #make sure users who forget the boolean argument after - #dryrun get something comprehensible. see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=754663 - local_reset - if $rdfind $dryrunopt a b >rdfind.out 2>&1 ; then - dbgecho "this should have failed, but did not!" - exit 1 - fi - dbgecho "rdfind exited with error status after \"rdfind $dryrunopt a b\", good." - if ! grep -iq "^expected true or false after $dryrunopt" rdfind.out ; then - dbgecho "got unexpected response after \"rdfind $dryrunopt a b\":" - tail rdfind.out - exit 1 - fi + #make sure users who forget the boolean argument after + #dryrun get something comprehensible. see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=754663 + local_reset + if $rdfind $dryrunopt a b >rdfind.out 2>&1; then + dbgecho "this should have failed, but did not!" + exit 1 + fi + dbgecho "rdfind exited with error status after \"rdfind $dryrunopt a b\", good." + if ! grep -iq "^expected true or false after $dryrunopt" rdfind.out; then + dbgecho "got unexpected response after \"rdfind $dryrunopt a b\":" + tail rdfind.out + exit 1 + fi - #dryrun on it's own: "rdfind -dryrun" - local_reset - if $rdfind $dryrunopt >rdfind.out 2>&1 ; then - dbgecho "this should have failed, but did not!" - exit 1 - fi - dbgecho "rdfind exited with error status after \"rdfind $dryrunopt\", good." - if grep -iq "^did not understand option 1:" rdfind.out ; then - dbgecho "got the old non-helpful answer:" - tail rdfind.out - exit 1 - fi + #dryrun on it's own: "rdfind -dryrun" + local_reset + if $rdfind $dryrunopt >rdfind.out 2>&1; then + dbgecho "this should have failed, but did not!" + exit 1 + fi + dbgecho "rdfind exited with error status after \"rdfind $dryrunopt\", good." + if grep -iq "^did not understand option 1:" rdfind.out; then + dbgecho "got the old non-helpful answer:" + tail rdfind.out + exit 1 + fi - #dryrun with single argument: "rdfind -dryrun ." - local_reset - if $rdfind $dryrunopt a >rdfind.out 2>&1 ; then - dbgecho "this should have failed, but did not!" - exit 1 - fi - dbgecho "rdfind exited with error status after \"rdfind $dryrunopt\", good." - if grep -iq "^did not understand option 1:" rdfind.out ; then - dbgecho "got the old non-helpful answer:" - tail rdfind.out - exit 1 - fi + #dryrun with single argument: "rdfind -dryrun ." + local_reset + if $rdfind $dryrunopt a >rdfind.out 2>&1; then + dbgecho "this should have failed, but did not!" + exit 1 + fi + dbgecho "rdfind exited with error status after \"rdfind $dryrunopt\", good." + if grep -iq "^did not understand option 1:" rdfind.out; then + dbgecho "got the old non-helpful answer:" + tail rdfind.out + exit 1 + fi done dbgecho "all is good for the dryrun tests!" diff --git a/testcases/verify_filesize_option.sh b/testcases/verify_filesize_option.sh index 4fb5f27..e2b9c42 100755 --- a/testcases/verify_filesize_option.sh +++ b/testcases/verify_filesize_option.sh @@ -2,29 +2,27 @@ # Ensures the exclusion of empty files work as intended. # - set -e . "$(dirname "$0")/common_funcs.sh" #make pairs of files, with specific sizes makefiles() { - for i in $(seq 0 4) ; do - head -c$i /dev/zero >a$i - head -c$i /dev/zero >b$i - done + for i in $(seq 0 4); do + head -c"$i" /dev/zero >"a$i" + head -c"$i" /dev/zero >"b$i" + done } - reset_teststate makefiles #try eliminate them, but they are correctly ignored. $rdfind -ignoreempty true -deleteduplicates true a* b* -verify [ -e a0 ] -verify [ -e b0 ] -for i in $(seq 1 4) ; do - verify [ -e a$i ] - verify [ ! -e b$i ] +verify [ -e "a0" ] +verify [ -e "b0" ] +for i in $(seq 1 4); do + verify [ -e "a$i" ] + verify [ ! -e "b$i" ] done dbgecho "passed ignoreempty true test case" @@ -35,60 +33,48 @@ makefiles $rdfind -ignoreempty false -deleteduplicates true a* b* verify [ -e a0 ] verify [ ! -e b0 ] -for i in $(seq 1 4) ; do - verify [ -e a$i ] - verify [ ! -e b$i ] +for i in $(seq 1 4); do + verify [ -e "a$i" ] + verify [ ! -e "b$i" ] done dbgecho "passed ignoreempty false test case" - - reset_teststate makefiles $rdfind -minsize 0 -deleteduplicates true a* b* verify [ -e a0 ] verify [ ! -e b0 ] -for i in $(seq 1 4) ; do - verify [ -e a$i ] - verify [ ! -e b$i ] +for i in $(seq 1 4); do + verify [ -e "a$i" ] + verify [ ! -e "b$i" ] done dbgecho "passed -minsize 0 test case" - reset_teststate makefiles $rdfind -minsize 1 -deleteduplicates true a* b* verify [ -e a0 ] verify [ -e b0 ] -for i in $(seq 1 4) ; do - verify [ -e a$i ] - verify [ ! -e b$i ] +for i in $(seq 1 4); do + verify [ -e "a$i" ] + verify [ ! -e "b$i" ] done dbgecho "passed -minsize 1 test case" - - - -for cutoff in $(seq 0 4) ; do - reset_teststate - makefiles - $rdfind -minsize $cutoff -deleteduplicates true a* b* - for i in $(seq 0 4) ; do - verify [ -e a$i ] - if [ $i -lt $cutoff ] ; then - verify [ -e b$i ] - else - verify [ ! -e b$i ] - fi - done - dbgecho "passed -minsize $cutoff test case" +for cutoff in $(seq 0 4); do + reset_teststate + makefiles + $rdfind -minsize "$cutoff" -deleteduplicates true a* b* + for i in $(seq 0 4); do + verify [ -e "a$i" ] + if [ "$i" -lt "$cutoff" ]; then + verify [ -e "b$i" ] + else + verify [ ! -e "b$i" ] + fi + done + dbgecho "passed -minsize $cutoff test case" done - - - - - - dbgecho "all is good for the min filesize test!" diff --git a/testcases/verify_maxfilesize_option.sh b/testcases/verify_maxfilesize_option.sh index 999852d..6850fce 100755 --- a/testcases/verify_maxfilesize_option.sh +++ b/testcases/verify_maxfilesize_option.sh @@ -2,40 +2,35 @@ # Ensures the exclusion of empty files work as intended. # - set -e . "$(dirname "$0")/common_funcs.sh" - - makefiles() { - #make pairs of files, with specific sizes - for i in $(seq 0 4) ; do - head -c$i /dev/zero >a$i - head -c$i /dev/zero >b$i - done + #make pairs of files, with specific sizes + for i in $(seq 0 4); do + head -c"$i" /dev/zero >"a$i" + head -c"$i" /dev/zero >"b$i" + done } - #negative value should be reported as misusage reset_teststate makefiles -if $rdfind -deleteduplicates true -maxsize -1 a* b* ; then - dbgecho "negative value should have been detected" - exit 1 +if $rdfind -deleteduplicates true -maxsize -1 a* b*; then + dbgecho "negative value should have been detected" + exit 1 fi dbgecho "passed negative value test" #conflict between min and max should be reported as misusage reset_teststate makefiles -if $rdfind -deleteduplicates true -minsize 123 -maxsize 123 a* b* ; then - dbgecho "conflicting values should have been detected" - exit 1 +if $rdfind -deleteduplicates true -minsize 123 -maxsize 123 a* b*; then + dbgecho "conflicting values should have been detected" + exit 1 fi dbgecho "passed conflicting value test" - reset_teststate makefiles #try eliminate them, but they are correctly ignored. @@ -43,11 +38,10 @@ $rdfind -deleteduplicates true -minsize 2 -maxsize 3 a* b* verify [ -e a2 ] verify [ ! -e b2 ] for i in $(seq 0 1) $(seq 3 4); do - verify [ -e a$i ] - verify [ -e b$i ] + verify [ -e "a$i" ] + verify [ -e "b$i" ] done dbgecho "passed specific size test" - dbgecho "all is good for the max filesize test!" diff --git a/testcases/verify_ranking.sh b/testcases/verify_ranking.sh index 60fb19b..8ed1027 100755 --- a/testcases/verify_ranking.sh +++ b/testcases/verify_ranking.sh @@ -3,54 +3,52 @@ # Ensures that ranking works as intended. # - set -e . "$(dirname "$0")/common_funcs.sh" #unmount disordered unmount_disordered() { - if ! $hasdisorderfs ; then - return - fi - if [ -d $DISORDERED_MNT ]; then - if ! fusermount --quiet -z -u $DISORDERED_MNT ; then - dbgecho failed unmounting disordered - fi - fi + if ! $hasdisorderfs; then + return + fi + if [ -d "$DISORDERED_MNT" ]; then + if ! fusermount --quiet -z -u "$DISORDERED_MNT"; then + dbgecho failed unmounting disordered + fi + fi } mount_disordered() { - mkdir -p $DISORDERED_MNT $DISORDERED_ROOT - if ! $hasdisorderfs ; then - return - fi - disorderfs --sort-dirents=yes --reverse-dirents=no $DISORDERED_ROOT $DISORDERED_MNT >/dev/null + mkdir -p "$DISORDERED_MNT" "$DISORDERED_ROOT" + if ! $hasdisorderfs; then + return + fi + disorderfs --sort-dirents=yes --reverse-dirents=no "$DISORDERED_ROOT" "$DISORDERED_MNT" >/dev/null } #create cr8() { - while [ $# -gt 0 ] ; do - mkdir -p $(dirname $1) - # make sure the file is longer than what fits in the byte buffer - head -c1000 /dev/zero >$1 - shift - done + while [ $# -gt 0 ]; do + mkdir -p "$(dirname "$1")" + # make sure the file is longer than what fits in the byte buffer + head -c1000 /dev/zero >"$1" + shift + done } local_reset() { - unmount_disordered - reset_teststate - mount_disordered - cr8 $@ + unmount_disordered + reset_teststate + mount_disordered + cr8 "$@" } - #enforce the rules form RANKING in the man page. -if $hasdisorderfs ; then - echo "$me: found a working disorderfs setup. unit test will be properly executed" +if $hasdisorderfs; then + echo "$me: found a working disorderfs setup. unit test will be properly executed" else - echo "$me: no working disorderfs setup, unit test will be partially executed" + echo "$me: no working disorderfs setup, unit test will be partially executed" fi trap "unmount_disordered;cleanup" INT QUIT EXIT @@ -86,7 +84,7 @@ $rdfind -deleteduplicates true sd0 >rdfind.out [ ! -e sd0/sd1/sd2/a ] local_reset sd0/a sd0/sd1/b0 sd0/sd1/b1 sd0/sd1/sd2/c -$rdfind -deleteduplicates true sd0>rdfind.out +$rdfind -deleteduplicates true sd0 >rdfind.out [ -f sd0/a ] [ ! -e sd0/sd1/sd2/a ] @@ -95,21 +93,21 @@ dbgecho "tests for rule 2 passed ok" #Rule 3: If A was found earlier than B, A is higher ranked. #We will have to test this using a tool from the reproducible builds project. #apt install disorderfs, and make sure you are member of the fuse group. -if $hasdisorderfs ; then - - local_reset $DISORDERED_MNT/a $DISORDERED_MNT/b - $rdfind -deleteduplicates true $DISORDERED_MNT >rdfind.out - [ -f $DISORDERED_MNT/a ] - [ ! -e $DISORDERED_MNT/b ] - dbgecho "tests for rule 3 passed ok" - - local_reset $DISORDERED_MNT/b $DISORDERED_MNT/a - $rdfind -deleteduplicates true $DISORDERED_MNT >rdfind.out - [ -f $DISORDERED_MNT/a ] - [ ! -e $DISORDERED_MNT/b ] - dbgecho "tests for rule 3 passed ok" +if $hasdisorderfs; then + + local_reset "$DISORDERED_MNT/a" "$DISORDERED_MNT/b" + $rdfind -deleteduplicates true "$DISORDERED_MNT" >rdfind.out + [ -f "$DISORDERED_MNT/a" ] + [ ! -e "$DISORDERED_MNT/b" ] + dbgecho "tests for rule 3 passed ok" + + local_reset "$DISORDERED_MNT/b" "$DISORDERED_MNT/a" + $rdfind -deleteduplicates true "$DISORDERED_MNT" >rdfind.out + [ -f "$DISORDERED_MNT/a" ] + [ ! -e "$DISORDERED_MNT/b" ] + dbgecho "tests for rule 3 passed ok" else - dbgecho "could not execute tests for rule 3 - please install disorderfs" + dbgecho "could not execute tests for rule 3 - please install disorderfs" fi dbgecho "all is good for the ranking tests!"