diff --git a/.cargo/mutants.toml b/.cargo/mutants.toml new file mode 100644 index 00000000..e1e99382 --- /dev/null +++ b/.cargo/mutants.toml @@ -0,0 +1,2 @@ +# Include only files that are currently well-tested. +examine_globs = ["src/bandid.rs", "src/bin/conserve.rs"] diff --git a/.codespell.dict b/.codespell.dict new file mode 100644 index 00000000..764c1698 --- /dev/null +++ b/.codespell.dict @@ -0,0 +1 @@ +assertino->assertion diff --git a/.codespell.words b/.codespell.words new file mode 100644 index 00000000..d84f1c41 --- /dev/null +++ b/.codespell.words @@ -0,0 +1,2 @@ +crate +ser diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 00000000..40b9ae00 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,5 @@ +[codespell] +ignore-words = .codespell.words +skip = target,.git +builtin = clear,rare,code +dictionary = .codespell.dict diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 32a2d027..a40545a8 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -7,8 +7,11 @@ on: jobs: cargo-install: + strategy: + matrix: + locked: ["", "--locked"] runs-on: ubuntu-latest steps: - name: cargo-install run: | - cargo install cargo-mutants + cargo install cargo-mutants ${{ matrix.locked }} diff --git a/.github/workflows/mutants.yaml b/.github/workflows/mutants.yaml new file mode 100644 index 00000000..6418590f --- /dev/null +++ b/.github/workflows/mutants.yaml @@ -0,0 +1,29 @@ +name: cargo-mutants + +on: + pull_request: + branches: + - main + push: + branches: + - main + - ci-* + +jobs: + cargo-mutants: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Install cargo-mutants + uses: baptiste0928/cargo-install@v2 + with: + crate: cargo-mutants + - name: Run mutant tests + # Can't use --all-features here because BLAKE2 SIMD needs unstable... + run: cargo mutants -j2 --no-shuffle -vV + - name: Archive results + uses: actions/upload-artifact@v3 + with: + name: mutation-report + path: mutants.out diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 252e93f8..42ce2481 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -1,11 +1,6 @@ name: Rust -on: - push: - branches: - - "main" - - "releases/*" - pull_request: +on: [push, pull_request] # see https://matklad.github.io/2021/09/04/fast-rust-builds.html env: diff --git a/Cargo.lock b/Cargo.lock index 13aacafb..92cbdad3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,15 +2,74 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "6748e8def348ed4d14996fa801f4122cd763fff530258cdc03f64b25f89d3a5a" dependencies = [ "memchr", ] +[[package]] +name = "anstream" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15c4c2c83f81532e5845a733998b6971faca23490340a418e9b72a3ec9de12ea" + +[[package]] +name = "anstyle-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd" +dependencies = [ + "anstyle", + "windows-sys", +] + [[package]] name = "arrayvec" version = "0.4.12" @@ -22,10 +81,11 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.0.8" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9834fcc22e0874394a010230586367d4a3e9f11b560f469262678547e1d2575e" +checksum = "88903cb14723e4d4003335bb7f8a14f27691649105346a0f0957466c096adfe6" dependencies = [ + "anstyle", "bstr", "doc-comment", "predicates", @@ -36,10 +96,11 @@ dependencies = [ [[package]] name = "assert_fs" -version = "1.0.10" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d94b2a3f3786ff2996a98afbd6b4e5b7e890d685ccf67577f508ee2342c71cc9" +checksum = "f070617a68e5c2ed5d06ee8dd620ee18fb72b99f6c094bed34cf8ab07c875b48" dependencies = [ + "anstyle", "doc-comment", "globwalk", "predicates", @@ -48,6 +109,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "assert_matches" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" + [[package]] name = "atty" version = "0.2.14" @@ -86,6 +153,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + [[package]] name = "blake2-rfc" version = "0.2.18" @@ -98,16 +171,21 @@ dependencies = [ [[package]] name = "bstr" -version = "1.1.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45ea9b00a7b3f2988e9a65ad3917e62123c38dba709b666506207be96d1790b" +checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05" dependencies = [ "memchr", - "once_cell", - "regex-automata", + "regex-automata 0.3.6", "serde", ] +[[package]] +name = "bumpalo" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" + [[package]] name = "byteorder" version = "1.4.3" @@ -116,9 +194,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "bytes" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c" +checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" [[package]] name = "cachedir" @@ -131,9 +209,12 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.78" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] [[package]] name = "cfg-if" @@ -143,61 +224,88 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.1.4" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f13b9c79b5d1dd500d20ef541215a6423c75829ef43117e1b4d17fd8af0b5d76" +checksum = "1d5f1946157a96594eb2d2c10eb7ad9a2b27518cb3000209dec700c35df9197d" dependencies = [ - "bitflags", + "clap_builder", "clap_derive", - "clap_lex", - "is-terminal", "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78116e32a042dd73c2901f0dc30790d20ff3447f3e3472fad359e8c3d282bcd6" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", "strsim", - "termcolor", "terminal_size", ] [[package]] name = "clap_derive" -version = "4.1.0" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8" +checksum = "c9fd1a5729c4548118d7d70ff234a44868d00489a4b6597b0b020918a0e91a1a" dependencies = [ "heck", - "proc-macro-error", - "proc-macro2 1.0.50", - "quote 1.0.23", - "syn 1.0.107", + "proc-macro2 1.0.66", + "quote 1.0.33", + "syn 2.0.29", ] [[package]] name = "clap_lex" -version = "0.3.1" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "783fe232adfca04f90f56201b26d79682d4cd2625e0bc7290b95123afe558ade" +checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" + +[[package]] +name = "clicolors-control" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90082ee5dcdd64dc4e9e0d37fbf3ee325419e39c0092191e0393df65518f741e" dependencies = [ - "os_str_bytes", + "atty", + "lazy_static", + "libc", + "winapi", ] +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + [[package]] name = "conserve" -version = "23.1.1" +version = "23.5.0" dependencies = [ "assert_cmd", "assert_fs", + "assert_matches", "blake2-rfc", "bytes", "cachedir", "clap", + "clicolors-control", "cp_r", "derive_more", "dir-assert", "filetime", + "flatbuffers", "globset", "hex", "indoc", "itertools", "lazy_static", + "metrics", + "metrics-util", "mutants", "nix", "nutmeg", @@ -208,6 +316,7 @@ dependencies = [ "rayon", "readahead-iterator", "regex", + "rstest", "semver", "serde", "serde_json", @@ -248,9 +357,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.6" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" dependencies = [ "cfg-if", "crossbeam-utils", @@ -258,9 +367,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -269,34 +378,33 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.13" +version = "0.9.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", - "memoffset", + "memoffset 0.9.0", "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.14" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" dependencies = [ "cfg-if", ] [[package]] -name = "ctor" -version = "0.1.26" +name = "deranged" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" dependencies = [ - "quote 1.0.23", - "syn 1.0.107", + "serde", ] [[package]] @@ -306,10 +414,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" dependencies = [ "convert_case", - "proc-macro2 1.0.50", - "quote 1.0.23", + "proc-macro2 1.0.66", + "quote 1.0.33", "rustc_version", - "syn 1.0.107", + "syn 1.0.109", ] [[package]] @@ -338,19 +446,25 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.8.0" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "endian-type" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" [[package]] name = "errno" -version = "0.2.8" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" dependencies = [ "errno-dragonfly", "libc", - "winapi", + "windows-sys", ] [[package]] @@ -365,18 +479,15 @@ dependencies = [ [[package]] name = "fastrand" -version = "1.8.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" -dependencies = [ - "instant", -] +checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" [[package]] name = "filetime" -version = "0.2.19" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9" +checksum = "d4029edd3e734da6fe05b6cd7bd2960760a616bd2ddd0d59a0124746d6272af0" dependencies = [ "cfg-if", "libc", @@ -384,6 +495,16 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "flatbuffers" +version = "23.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77f5399c2c9c50ae9418e522842ad362f61ee48b346ac106807bd355a8a7c619" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + [[package]] name = "float-cmp" version = "0.9.0" @@ -401,18 +522,18 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "form_urlencoded" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" dependencies = [ "percent-encoding", ] [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", @@ -421,9 +542,9 @@ dependencies = [ [[package]] name = "globset" -version = "0.4.10" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "029d74589adefde59de1a0c4f4732695c32805624aec7b68d91503d4dba79afc" +checksum = "759c97c1e17c55525b57192c06a267cda0ac5210b222d6b82189a2338fa1c13d" dependencies = [ "aho-corasick", "bstr", @@ -438,16 +559,31 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93e3af942408868f6934a7b85134a3230832b9977cf66125df2f9edcfce4ddcc" dependencies = [ - "bitflags", + "bitflags 1.3.2", "ignore", "walkdir", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hashbrown" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038" +dependencies = [ + "ahash", +] + [[package]] name = "heck" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" @@ -460,12 +596,9 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.2.6" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" -dependencies = [ - "libc", -] +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" [[package]] name = "hex" @@ -475,9 +608,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "idna" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -501,39 +634,29 @@ dependencies = [ ] [[package]] -name = "indoc" -version = "1.0.8" +name = "indexmap" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2d6f23ffea9d7e76c53eee25dfb67bcd8fde7f1198b0855350698c9f07c780" - -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ - "cfg-if", + "autocfg", + "hashbrown 0.12.3", ] [[package]] -name = "io-lifetimes" -version = "1.0.4" +name = "indoc" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" -dependencies = [ - "libc", - "windows-sys", -] +checksum = "2c785eefb63ebd0e33416dfcb8d6da0bf27ce752843a45632a67bf10d4d4b5c4" [[package]] -name = "is-terminal" -version = "0.4.2" +name = "io-lifetimes" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ - "hermit-abi 0.2.6", - "io-lifetimes", - "rustix", + "hermit-abi 0.3.2", + "libc", "windows-sys", ] @@ -548,9 +671,18 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.5" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "js-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +dependencies = [ + "wasm-bindgen", +] [[package]] name = "lazy_static" @@ -560,21 +692,33 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.139" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" + +[[package]] +name = "libm" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "linux-raw-sys" -version = "0.1.4" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" + +[[package]] +name = "linux-raw-sys" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" [[package]] name = "lock_api" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" dependencies = [ "autocfg", "scopeguard", @@ -582,11 +726,17 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "mach2" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0d1830bcd151a6fc4aea1369af235b36c1528fe976b8ff678683c9995eade8" dependencies = [ - "cfg-if", + "libc", ] [[package]] @@ -595,7 +745,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" dependencies = [ - "regex-automata", + "regex-automata 0.1.10", ] [[package]] @@ -613,22 +763,81 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "metrics" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fde3af1a009ed76a778cb84fdef9e7dbbdf5775ae3e4cc1f434a6a307f6f76c5" +dependencies = [ + "ahash", + "metrics-macros", + "portable-atomic", +] + +[[package]] +name = "metrics-macros" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddece26afd34c31585c74a4db0630c376df271c285d682d1e55012197830b6df" +dependencies = [ + "proc-macro2 1.0.66", + "quote 1.0.33", + "syn 2.0.29", +] + +[[package]] +name = "metrics-util" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4de2ed6e491ed114b40b732e4d1659a9d53992ebd87490c44a6ffe23739d973e" +dependencies = [ + "aho-corasick", + "crossbeam-epoch", + "crossbeam-utils", + "hashbrown 0.13.1", + "indexmap", + "metrics", + "num_cpus", + "ordered-float", + "quanta", + "radix_trie", + "sketches-ddsketch", +] + [[package]] name = "mutants" version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc0287524726960e07b119cebd01678f852f147742ae0d925e6a520dca956126" +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + [[package]] name = "nix" version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cfg-if", "libc", - "memoffset", + "memoffset 0.7.1", "pin-utils", "static_assertions", ] @@ -657,20 +866,21 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", + "libm", ] [[package]] name = "num_cpus" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.2.6", + "hermit-abi 0.3.2", "libc", ] @@ -685,9 +895,8 @@ dependencies = [ [[package]] name = "nutmeg" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cf1f0653933873dfd8eccc0ac30b6e12d1db895a4d0dd868d81ce4105400ea" +version = "0.1.3" +source = "git+https://github.com/sourcefrog/nutmeg#ebee48a9b964271e0668d60aa57f05190aba3cce" dependencies = [ "atty", "parking_lot", @@ -697,23 +906,17 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.0" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] -name = "os_str_bytes" -version = "6.4.1" +name = "ordered-float" +version = "3.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" - -[[package]] -name = "output_vt100" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" +checksum = "2a54938017eacd63036332b4ae5c8a49fc8c0c1d6d629893057e4f13609edd06" dependencies = [ - "winapi", + "num-traits", ] [[package]] @@ -734,28 +937,28 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.6" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba1ef8814b5c993410bb3adfad7a5ed269563e4a2f90c41f5d85be7fb47133bf" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-targets", ] [[package]] name = "percent-encoding" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "pin-project-lite" -version = "0.2.9" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" [[package]] name = "pin-utils" @@ -763,6 +966,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "portable-atomic" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f32154ba0af3a075eefa1eda8bb414ee928f62303a54ea85b8d6638ff1a6ee9e" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -771,10 +980,11 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "predicates" -version = "2.1.5" +version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" +checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" dependencies = [ + "anstyle", "difflib", "float-cmp", "itertools", @@ -785,15 +995,15 @@ dependencies = [ [[package]] name = "predicates-core" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72f883590242d3c6fc5bf50299011695fa6590c2c70eac95ee1bdb9a733ad1a2" +checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" [[package]] name = "predicates-tree" -version = "1.0.7" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54ff541861505aabf6ea722d2131ee980b8276e10a1297b94e896dd8b621850d" +checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" dependencies = [ "predicates-core", "termtree", @@ -801,40 +1011,14 @@ dependencies = [ [[package]] name = "pretty_assertions" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a25e9bcb20aa780fd0bb16b72403a9064d6b3f22f026946029acb941a50af755" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" dependencies = [ - "ctor", "diff", - "output_vt100", "yansi", ] -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2 1.0.50", - "quote 1.0.23", - "syn 1.0.107", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2 1.0.50", - "quote 1.0.23", - "version_check", -] - [[package]] name = "proc-macro2" version = "0.4.30" @@ -846,31 +1030,31 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.50" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "proptest" -version = "1.0.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e0d9cc07f18492d879586c92b485def06bc850da3118075cd45d50e9c95b0e5" +checksum = "4e35c06b98bf36aba164cc17cb25f7e232f5c4aeea73baa14b8a9f0d92dbfa65" dependencies = [ "bit-set", - "bitflags", + "bitflags 1.3.2", "byteorder", "lazy_static", "num-traits", - "quick-error 2.0.1", "rand", "rand_chacha", "rand_xorshift", - "regex-syntax", + "regex-syntax 0.6.29", "rusty-fork", "tempfile", + "unarray", ] [[package]] @@ -885,16 +1069,26 @@ dependencies = [ ] [[package]] -name = "quick-error" -version = "1.2.3" +name = "quanta" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +checksum = "a17e662a7a8291a865152364c20c7abc5e60486ab2001e8ec10b24862de0b9ab" +dependencies = [ + "crossbeam-utils", + "libc", + "mach2", + "once_cell", + "raw-cpuid", + "wasi", + "web-sys", + "winapi", +] [[package]] name = "quick-error" -version = "2.0.1" +version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quote" @@ -907,11 +1101,21 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.23" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ - "proc-macro2 1.0.50", + "proc-macro2 1.0.66", +] + +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", ] [[package]] @@ -953,11 +1157,20 @@ dependencies = [ "rand_core", ] +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "rayon" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" dependencies = [ "either", "rayon-core", @@ -965,9 +1178,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.10.2" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -983,22 +1196,23 @@ checksum = "73ea134c32fe12df286020949d57d052a90c4001f2dbec4c1c074f39bcb7fc8c" [[package]] name = "redox_syscall" -version = "0.2.16" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] name = "regex" -version = "1.7.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-automata 0.3.6", + "regex-syntax 0.7.4", ] [[package]] @@ -1007,22 +1221,54 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" dependencies = [ - "regex-syntax", + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.4", ] [[package]] name = "regex-syntax" -version = "0.6.28" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] -name = "remove_dir_all" -version = "0.5.3" +name = "regex-syntax" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" + +[[package]] +name = "rstest" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +checksum = "de1bb486a691878cd320c2f0d319ba91eeaa2e894066d8b5f8f117c000e9d962" dependencies = [ - "winapi", + "rstest_macros", + "rustc_version", +] + +[[package]] +name = "rstest_macros" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290ca1a1c8ca7edb7c3283bd44dc35dd54fdec6253a3912e201ba1072018fca8" +dependencies = [ + "cfg-if", + "proc-macro2 1.0.66", + "quote 1.0.33", + "rustc_version", + "syn 1.0.109", + "unicode-ident", ] [[package]] @@ -1036,15 +1282,28 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.7" +version = "0.37.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4fdebc4b395b7fbb9ab11e462e20ed9051e7b16e42d24042c776eca0ac81b03" +checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06" dependencies = [ - "bitflags", + "bitflags 1.3.2", "errno", "io-lifetimes", "libc", - "linux-raw-sys", + "linux-raw-sys 0.3.8", + "windows-sys", +] + +[[package]] +name = "rustix" +version = "0.38.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ed4fa021d81c8392ce04db050a3da9a60299050b7ae1cf482d862b54a7218f" +dependencies = [ + "bitflags 2.4.0", + "errno", + "libc", + "linux-raw-sys 0.4.5", "windows-sys", ] @@ -1055,16 +1314,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" dependencies = [ "fnv", - "quick-error 1.2.3", + "quick-error", "tempfile", "wait-timeout", ] [[package]] name = "ryu" -version = "1.0.12" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" [[package]] name = "same-file" @@ -1077,41 +1336,41 @@ dependencies = [ [[package]] name = "scopeguard" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "semver" -version = "1.0.16" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" +checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" [[package]] name = "serde" -version = "1.0.152" +version = "1.0.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +checksum = "9f5db24220c009de9bd45e69fb2938f4b6d2df856aa9304ce377b3180f83b7c1" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.152" +version = "1.0.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +checksum = "5ad697f7e0b65af4983a4ce8f56ed5b357e8d3c36651bf6a7e13639c17b8e670" dependencies = [ - "proc-macro2 1.0.50", - "quote 1.0.23", - "syn 1.0.107", + "proc-macro2 1.0.66", + "quote 1.0.33", + "syn 2.0.29", ] [[package]] name = "serde_json" -version = "1.0.91" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" +checksum = "693151e1ac27563d6dbcec9dee9fbd5da8539b20fa14ad3752b2e6d363ace360" dependencies = [ "itoa", "ryu", @@ -1127,11 +1386,17 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "sketches-ddsketch" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68a406c1882ed7f29cd5e248c9848a80e7cb6ae0fea82346d2746f2f941c07e1" + [[package]] name = "smallvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" [[package]] name = "snap" @@ -1164,72 +1429,73 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.107" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ - "proc-macro2 1.0.50", - "quote 1.0.23", + "proc-macro2 1.0.66", + "quote 1.0.33", "unicode-ident", ] [[package]] -name = "tempfile" -version = "3.3.0" +name = "syn" +version = "2.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" dependencies = [ - "cfg-if", - "fastrand", - "libc", - "redox_syscall", - "remove_dir_all", - "winapi", + "proc-macro2 1.0.66", + "quote 1.0.33", + "unicode-ident", ] [[package]] -name = "termcolor" -version = "1.2.0" +name = "tempfile" +version = "3.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" dependencies = [ - "winapi-util", + "cfg-if", + "fastrand", + "redox_syscall", + "rustix 0.38.8", + "windows-sys", ] [[package]] name = "terminal_size" -version = "0.2.3" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb20089a8ba2b69debd491f8d2d023761cbf196e999218c591fa1e7e15a21907" +checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237" dependencies = [ - "rustix", + "rustix 0.37.23", "windows-sys", ] [[package]] name = "termtree" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95059e91184749cb66be6dc994f67f182b6d897cb3df74a5bf66b5e709295fd8" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "thiserror" -version = "1.0.38" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +checksum = "97a802ec30afc17eee47b2855fc72e0c4cd62be9b4efe6591edde0ec5bd68d8f" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.38" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +checksum = "6bb623b56e39ab7dcd4b1b98bb6c8f8d907ed255b18de254088016b27a8ee19b" dependencies = [ - "proc-macro2 1.0.50", - "quote 1.0.23", - "syn 1.0.107", + "proc-macro2 1.0.66", + "quote 1.0.33", + "syn 2.0.29", ] [[package]] @@ -1240,19 +1506,21 @@ checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820" [[package]] name = "thread_local" -version = "1.1.4" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" dependencies = [ + "cfg-if", "once_cell", ] [[package]] name = "time" -version = "0.3.17" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" +checksum = "0bb39ee79a6d8de55f48f2293a830e040392f1c5f16e336bdd1788cd0aadce07" dependencies = [ + "deranged", "itoa", "libc", "num_threads", @@ -1263,15 +1531,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" [[package]] name = "time-macros" -version = "0.2.6" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2" +checksum = "733d258752e9303d392b94b75230d07b0b9c489350c69b851fc6c065fde3e8f9" dependencies = [ "time-core", ] @@ -1287,9 +1555,9 @@ dependencies = [ [[package]] name = "tinyvec_macros" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tracing" @@ -1316,20 +1584,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.23" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" +checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ - "proc-macro2 1.0.50", - "quote 1.0.23", - "syn 1.0.107", + "proc-macro2 1.0.66", + "quote 1.0.33", + "syn 2.0.29", ] [[package]] name = "tracing-core" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" dependencies = [ "once_cell", "valuable", @@ -1346,29 +1614,43 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-serde" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" -version = "0.3.16" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6176eae26dd70d0c919749377897b54a9276bd7061339665dd68777926b5a70" +checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" dependencies = [ "matchers", "nu-ansi-term", "once_cell", "regex", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", + "time", "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] [[package]] name = "tracing-test" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e3d272c44878d2bbc9f4a20ad463724f03e19dbc667c6e84ac433ab7ffcc70b" +checksum = "3a2c0ff408fe918a94c428a3f2ad04e4afd5c95bbc08fcf868eff750c15728a4" dependencies = [ "lazy_static", "tracing-core", @@ -1378,26 +1660,32 @@ dependencies = [ [[package]] name = "tracing-test-macro" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744324b12d69a9fc1edea4b38b7b1311295b662d161ad5deac17bb1358224a08" +checksum = "258bc1c4f8e2e73a977812ab339d503e6feeb92700f6d07a6de4d321522d5c08" dependencies = [ "lazy_static", - "quote 1.0.23", - "syn 1.0.107", + "quote 1.0.33", + "syn 1.0.109", ] +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unicode-bidi" -version = "0.3.10" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.6" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unicode-normalization" @@ -1416,15 +1704,15 @@ checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" [[package]] name = "unix_mode" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35abed4630bb800f02451a7428205d1f37b8e125001471bfab259beee6a587ed" +checksum = "b55eedc365f81a3c32aea49baf23fa965e3cd85bcc28fb8045708c7388d124ef" [[package]] name = "url" -version = "2.3.1" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" dependencies = [ "form_urlencoded", "idna", @@ -1441,6 +1729,12 @@ dependencies = [ "log", ] +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + [[package]] name = "valuable" version = "0.1.0" @@ -1464,12 +1758,11 @@ dependencies = [ [[package]] name = "walkdir" -version = "2.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" dependencies = [ "same-file", - "winapi", "winapi-util", ] @@ -1479,6 +1772,70 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2 1.0.66", + "quote 1.0.33", + "syn 2.0.29", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +dependencies = [ + "quote 1.0.33", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +dependencies = [ + "proc-macro2 1.0.66", + "quote 1.0.33", + "syn 2.0.29", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" + +[[package]] +name = "web-sys" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" @@ -1512,9 +1869,18 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" -version = "0.42.0" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", @@ -1527,45 +1893,45 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_msvc" -version = "0.42.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_i686_gnu" -version = "0.42.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_msvc" -version = "0.42.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_x86_64_gnu" -version = "0.42.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_msvc" -version = "0.42.1" +version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "yansi" diff --git a/Cargo.toml b/Cargo.toml index 16edd0d7..03b78849 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ license = "GPL-2.0" name = "conserve" readme = "README.md" repository = "https://github.com/sourcefrog/conserve/" -version = "23.1.1" +version = "23.5.0" rust-version = "1.63" [[bin]] @@ -16,17 +16,22 @@ doc = false name = "conserve" [dependencies] +assert_matches = "1.5.0" blake2-rfc = "0.2.18" bytes = "1.1.0" cachedir = "0.3" +clicolors-control = "1.0" derive_more = "0.99" filetime = "0.2" +flatbuffers = "23.1.21" globset = "0.4.5" hex = "0.4.2" +indoc = "2.0" itertools = "0.10" lazy_static = "1.4.0" +metrics = "0.21" +metrics-util = "0.15" mutants = "0.0.3" -nutmeg = "0.1" rayon = "1.3.0" readahead-iterator = "0.1.1" regex = "1.3.9" @@ -37,32 +42,43 @@ snap = "1.0.0" tempfile = "3" thiserror = "1.0.19" thousands = "0.2.0" -time = { version = "0.3", features = ["local-offset"] } +time = { version = "0.3", features = [ + "local-offset", + "serde", + "serde-human-readable", +] } tracing = "0.1" tracing-appender = "0.2" -tracing-subscriber = { version = "0.3.11", features = ["env-filter", "fmt"] } unix_mode = "0.1" url = "2.2.2" -indoc = "1.0.8" [target.'cfg(unix)'.dependencies] users = "0.11" nix = "0.26" [dependencies.clap] -version = "4.0" +version = "4.3" features = ["derive", "deprecated", "wrap_help"] +[dependencies.nutmeg] +version = "0.1.3-pre" +git = "https://github.com/sourcefrog/nutmeg" + +[dependencies.tracing-subscriber] +version = "0.3.16" +features = ["env-filter", "fmt", "json", "local-time", "time"] + [dev-dependencies] assert_cmd = "2.0" assert_fs = "1.0" cp_r = "0.5" dir-assert = "0.2" -predicates = "2" +predicates = "3" pretty_assertions = "1.0" proptest = "1.0" proptest-derive = "0.3" -tracing-test = "0.2" +rstest = { version = "0.17", default-features = false } +tracing-test = { version = "0.2", features = ["no-env-filter"] } [features] blake2_simd_asm = ["blake2-rfc/simd_asm"] diff --git a/NEWS.md b/NEWS.md index 542aec66..d5b97a0f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,23 @@ # Conserve release history -## Unreleased +## 23.5.0 + +- Better progress bars for various operations including `validate`. - Don't complain if unable to chown during restore; this is normal when not run as root. +- New `--log-json` global option to capture all logs, and `--metrics-json` to write out counters. + +- New internal non-breaking format change: backups (in the band header) can now declare some + format flags needed to read the backup correctly. If any format flags are set then at least + Conserve 23.2.0 is needed to read the backup. + +- New `--changes-json` option to `restore` and `backup`. + +- `diff` output format has changed slightly to be the same as `backup`. + +- New `diff --json` and `ls --json`. + ## 23.1.1 - Fixed: User and group mappings are now cached in memory. This fixes a performance regression in diff --git a/doc/design.md b/doc/design.md index a6e18107..d9f42947 100644 --- a/doc/design.md +++ b/doc/design.md @@ -98,7 +98,7 @@ the filesystem behavior, they should notice the band has already been created, and abort. Index blocks are written by atomically renaming them in to place. If the block -already exists, the new version (with identical contents) is simpy discarded. +already exists, the new version (with identical contents) is simply discarded. So, concurrent writes of blocks are safe, and indeed can happen from multiple threads in the same process. @@ -196,7 +196,7 @@ well as to the terminal, and at a different level of detail. This implies: - Since the terminal UI is a log target, it must be constructed just once near program startup, and therefore it cannot be on during in-process tests. -Progess bars are drawn only for the small number of main loops that are expected +Progress bars are drawn only for the small number of main loops that are expected to take a long time, and don't implicitly pop up due to IO. ## Diff diff --git a/doc/format.md b/doc/format.md index 7dac4b00..3a0f3de5 100644 --- a/doc/format.md +++ b/doc/format.md @@ -119,11 +119,6 @@ In Conserve 0.6, only bands with a single integer, called a _top level band_, are generated or supported. Top level bands contain an index listing every entry present in that tree. Top level bands are numbered sequentially from `b0000`. -Bands that are not top-level are _child bands_, and their _parent band_ is the -band with the last component of their name removed. Child bands' index contains -only the changes relative to their parent band's index. (Child bands are not -implemented as of Conserve 0.6.) - A band can be _complete_, while it is receiving data, or _incomplete_ when everything from the source has been written. Bands may remain incomplete indefinitely, across multiple Conserve invocations, until they are finished. @@ -158,6 +153,9 @@ The head file contains: - `start_time`: The Unix time, in seconds, when the band was started. - `band_format_version`: The minimum program version to correctly read this band. +- `format_flags`: A list of strings indicating capabilities required to read + this band correctly. If this is set and non-empty, then the `band_format_version` + must be at least 23.2.0. ### Band tail file @@ -170,6 +168,10 @@ Band footer contains: - `index_hunk_count`: The number of index hunks that should be present for this band. (Since 0.6.4.) +## Format flags + +(None are defined yet.) + ## Data block directory An archive contains a single data block directory, which stores the compressed diff --git a/doc/manifesto.md b/doc/manifesto.md index c0a7f948..58c30411 100644 --- a/doc/manifesto.md +++ b/doc/manifesto.md @@ -96,32 +96,32 @@ touch or rewrite any files other than those being deleted. Storage to cloud object stores, local disks, and removable media are all important. Conserve should rely on only features common across all of them. -- You can write whole files, but not update in place. +* You can write whole files, but not update in place. -- May have relatively long per-file latency on both read and write. +* May have relatively long per-file latency on both read and write. -- Storage bandwidth may be relatively limited relative to the source tree +* Storage bandwidth may be relatively limited relative to the source tree size. -- No filesystem metadata (ownership etc) can be stored directly; it must +* No filesystem metadata (ownership etc) can be stored directly; it must be encoded -- You can list directories (or, "list files starting with a certain prefix") +* You can list directories (or, "list files starting with a certain prefix") -- May or may not be case sensitive. +* May or may not be case sensitive. -- Can't detect whether an empty directory exists or not, and might not have a +* Can't detect whether an empty directory exists or not, and might not have a strong concept of directories, perhaps only ordered names. -- Do not assume that renaming over an existing file is allowed or disallowed. +* Do not assume that renaming over an existing file is allowed or disallowed. -- Conserve can cache information onto the source machine's local disk, but of +* Conserve can cache information onto the source machine's local disk, but of course this cache may be lost or may need to be disabled. (We don't currently do this, and it would keep things simpler and more robust not to.) -- Connection may be lost and the backup terminated at any point. +* Connection may be lost and the backup terminated at any point. -- No guarantee of read-after-write consistency. (In practice, perhaps several +* No guarantee of read-after-write consistency. (In practice, perhaps several seconds after writing the change will be visible.) We cannot assume a remote smart server: the only network calls are @@ -181,7 +181,7 @@ archive. It's very possible that the size of the source relative to the IO bandwidth of the destination means writing all the new data will take hours. This can most -easily happen on the first backup, but als on incremental backups. +easily happen on the first backup, but also on incremental backups. In that case the backup may be interrupted - by the user interrupting it, machine going to sleep, or losing connectivity, or rebooting. @@ -208,7 +208,6 @@ This excludes a few design options taken by other programs: Remembering a save-point on the source machine seems more dangerous than looking in the archive to see what's been stored. - ## Validation Test restores of the whole tree take a long time and users don't do them @@ -224,14 +223,12 @@ to the source directory, to catch corruption or Conserve bugs. These can flag false-positive if there have been intended changes to the source directory after the backup, so the results need to be understandable. - ## Hands-off Conserve will let you set up cron jobs to do daily backups, verification, and retrenchment, and it should then run hands off and entirely unattended. (Users should also do a black-box restore test, which should never fail.) - ## UI Conserve will have a human oriented text UI, and a machine UI that can diff --git a/doc/style.md b/doc/style.md index 776e8343..a1c114d5 100644 --- a/doc/style.md +++ b/doc/style.md @@ -69,7 +69,7 @@ Code in Conserve can be tested in any of three ways: 1. Key features and behaviors accessible through the command-line interface should be tested in `tests/cli`, which runs the `conserve` binary as a - subprocess and examines its output. Since Conserve is + subprocess and examines its output. Since Conserve is primarily intended for use as a command-line tool these are the most important tests to add. @@ -97,7 +97,7 @@ tree) won't have deterministic permissions or mtimes. Use `use crate::xyz` rather than `use super::xyz` to import other things from the Conserve implementation. (Either is valid and they seem just as good, but -let's pick `crate` to be consisent.) +let's pick `crate` to be consistent.) Conserve implementation code and integration tests can say `use crate::*` to include every re-exported symbol, although this isn't recommended for external diff --git a/doc/unimplemented/encryption.md b/doc/unimplemented/encryption.md index 0234394f..16d1476a 100644 --- a/doc/unimplemented/encryption.md +++ b/doc/unimplemented/encryption.md @@ -21,7 +21,7 @@ But we assume that attackers can: - For example, because they control the server to which encrypted archives are written - Or because they can gain physical access to a drive holding backups - See how the encrypted archive changes over time - - For example, if they control the server hosting archives they can collect a trace of + - For example, if they control the server hosting archives they can collect a trace of file reads and writes - Tamper with the encrypted archive content: - Including deleting and modifying files @@ -66,17 +66,17 @@ some separate stable storage, a record of when backups were made. An attacker should not be able to: -- Silently corrupt, change, or remove data, other than reverting to a previous version of the +- Silently corrupt, change, or remove data, other than reverting to a previous version of the archive, including: - Copying files to different names - Removing some files from the tree (other than by reverting to a moment when the backup was incomplete) -- Prevent a machine making a new correct backup. In other words, after +- Prevent a machine making a new correct backup. In other words, after tampering, newly written files in a new backup will still be correct. -- Execute downgrade attacks that manipulate the backup program into +- Execute downgrade attacks that manipulate the backup program into writing unencrypted content or using an attacker-influenced key. -For performance reasons Conserve does not throughly validate all existing +For performance reasons Conserve does not thoroughly validate all existing blocks when it writes a new archive, so corruption of existing blocks may go unnoticed for some time. However `conserve validate` should detect this corruption. @@ -87,7 +87,7 @@ The key can be stored in a file for noninteractive scheduled backups. The key can optionally be stored in some kind of system keyring, so that it is somewhat harder to steal, e.g. so that it is only unlocked when the user is logged in. (At the price of only being available to make backups when the user is logged in, in that case.) -It's important that users keep a copy of the key in a place where it will not be lost if the backup source is lost, e.g. typically not on the same machine. The key should be concisely representable as text. These backups of the key must also be stored somewhere that the user feels is significantly less likely to be compromised than the backup storage itself, otherwise the encrytion is adding no value. +It's important that users keep a copy of the key in a place where it will not be lost if the backup source is lost, e.g. typically not on the same machine. The key should be concisely representable as text. These backups of the key must also be stored somewhere that the user feels is significantly less likely to be compromised than the backup storage itself, otherwise the encryption is adding no value. Test restores or validation should allow the user to try presenting the key as if they were doing a recovery, e.g. by typing it in or using a non-default file, even if it is normally read from a file or keyring. @@ -164,7 +164,7 @@ When the keys are rotated, existing blocks in unchanged files can still match ag ### Block encryption To write a block, it is first hashed, with the hash key. If the hash is already present, that's -enough, and the keyed hash can be used to refer to the block content from the index or +enough, and the keyed hash can be used to refer to the block content from the index or meta-index. Otherwise, the block content is encrypted. (In unencrypted archives the block would be compressed at this point; in encrypted archives it is not.) @@ -217,8 +217,8 @@ Since Tink generates a random IV for each block, IVs are never reused. By the same logic as for Eve, Mallory cannot decrypt block content. -If Mallory blindly changes the content of a block file or truncates it, then -when decrypted it will be discovered to have the wrong keyed hash, which +If Mallory blindly changes the content of a block file or truncates it, then +when decrypted it will be discovered to have the wrong keyed hash, which will be detected as corruption. If Mallory copies one block file in place of another the IV will be wrong, so @@ -236,11 +236,11 @@ It is important that the backup client must not trust the archive's assertion wh ### Assessment: chosen-plaintext attacks -An attacker who can both inject chosen plaintext and observe writes to the archive +An attacker who can both inject chosen plaintext and observe writes to the archive may be able to determine whether the plaintext is already present in the archive. For example, if the attacker injects a 1MB file (which will be written as a single block) and observes that no new large blocks are written, then they can infer -that an identical block was already present at some point in the archive. +that an identical block was already present at some point in the archive. (It does not necessarily prove that the content is present in the most recent tree, only that the block was still present.) @@ -257,13 +257,13 @@ fairly large and hold multiple files, but in some cases Conserve will emit only small data blocks, most obviously when only one small file has changed, but also when changes have to be flushed out to finalize an index block. -The most favorable case for an attacker is if they're trying to guess whether +The most favorable case for an attacker is if they're trying to guess whether a particular single-byte file is present, and they can inject new single-byte files into an otherwise-quiescent archive. The simplest attack is to guess one file at a time, in which case they will likely find the answer after 255 guesses. -Potentially the attacker could make multiple guesses per backup cycle, but -they then face the risk that their small files will be combined into a single -larger block, yielding inconclusive results. +Potentially the attacker could make multiple guesses per backup cycle, but +they then face the risk that their small files will be combined into a single +larger block, yielding inconclusive results. Interestingly, this attack can only be done once per archive, since after each byte is guessed it will then be present in the blockdir and future guesses will @@ -277,10 +277,8 @@ If, as is planned, small files are stored inline in the index then this attack becomes infeasible for any file small enough to make guessing even remotely feasible. -If blocks were compressed, it might be possible for an attacker to inject a -series of chosen plaintexts and gradually measure whether they compress well +If blocks were compressed, it might be possible for an attacker to inject a +series of chosen plaintexts and gradually measure whether they compress well against other files nearby in the tree. Because compression is disabled in encrypted archives the attacker is limited to guessing at whether whole blocks are present, which seems much less tractable. - - diff --git a/doc/unimplemented/format7.md b/doc/unimplemented/format7.md index c2ac1c7f..a3cf83e2 100644 --- a/doc/unimplemented/format7.md +++ b/doc/unimplemented/format7.md @@ -20,11 +20,19 @@ Band ids currently support a dashed-decimal syntax and are internally a `Vec) -> Result { - transport - .create_dir("") - .map_err(|source| Error::CreateArchiveDirectory { source })?; - let names = transport.list_dir_names("").map_err(Error::from)?; + transport.create_dir("")?; + let names = transport.list_dir("")?; if !names.files.is_empty() || !names.dirs.is_empty() { return Err(Error::NewArchiveDirectoryNotEmpty); } @@ -95,14 +91,7 @@ impl Archive { pub fn open(transport: Box) -> Result { let header: ArchiveHeader = - read_json(&transport, HEADER_FILENAME).map_err(|err| match err { - Error::MetadataNotFound { .. } => Error::NotAnArchive {}, - Error::IOError { source } if source.kind() == ErrorKind::NotFound => { - Error::NotAnArchive {} - } - Error::IOError { source } => Error::ReadArchiveHeader { source }, - other => other, - })?; + read_json(&transport, HEADER_FILENAME)?.ok_or(Error::NotAnArchive)?; if header.conserve_archive_version != ARCHIVE_VERSION { return Err(Error::UnsupportedArchiveVersion { version: header.conserve_archive_version, @@ -119,18 +108,29 @@ impl Archive { &self.block_dir } - pub fn band_exists(&self, band_id: &BandId) -> Result { + pub fn band_exists(&self, band_id: BandId) -> Result { self.transport .is_file(&format!("{}/{}", band_id, crate::BAND_HEAD_FILENAME)) .map_err(Error::from) } - pub fn band_is_closed(&self, band_id: &BandId) -> Result { + pub fn band_is_closed(&self, band_id: BandId) -> Result { self.transport .is_file(&format!("{}/{}", band_id, crate::BAND_TAIL_FILENAME)) .map_err(Error::from) } + /// Return an iterator of entries in a selected version. + pub fn iter_entries( + &self, + band_selection: BandSelectionPolicy, + subtree: Apath, + exclude: Exclude, + ) -> Result> { + self.open_stored_tree(band_selection)? + .iter_entries(subtree, exclude) + } + /// Returns a vector of band ids, in sorted order from first to last. pub fn list_band_ids(&self) -> Result> { let mut band_ids: Vec = self.iter_band_ids_unsorted()?.collect(); @@ -146,15 +146,15 @@ impl Archive { match band_selection { BandSelectionPolicy::LatestClosed => self .last_complete_band()? - .map(|band| band.id().clone()) - .ok_or(Error::ArchiveEmpty), + .map(|band| band.id()) + .ok_or(Error::NoCompleteBands), BandSelectionPolicy::Specified(band_id) => Ok(band_id), BandSelectionPolicy::Latest => self.last_band_id()?.ok_or(Error::ArchiveEmpty), } } pub fn open_stored_tree(&self, band_selection: BandSelectionPolicy) -> Result { - StoredTree::open(self, &self.resolve_band_id(band_selection)?) + StoredTree::open(self, self.resolve_band_id(band_selection)?) } /// Return an iterator of valid band ids in this archive, in arbitrary order. @@ -165,8 +165,7 @@ impl Archive { // problem. Validate does. Ok(self .transport - .list_dir_names("") - .map_err(|source| Error::ListBands { source })? + .list_dir("")? .dirs .into_iter() .filter(|dir_name| dir_name != BLOCK_DIR) @@ -181,8 +180,8 @@ impl Archive { /// Return the last completely-written band id, if any. pub fn last_complete_band(&self) -> Result> { - for id in self.list_band_ids()?.iter().rev() { - let b = Band::open(self, id)?; + for band_id in self.list_band_ids()?.into_iter().rev() { + let b = Band::open(self, band_id)?; if b.is_closed()? { return Ok(Some(b)); } @@ -195,17 +194,29 @@ impl Archive { /// Shows a progress bar as they're collected. pub fn referenced_blocks(&self, band_ids: &[BandId]) -> Result> { let archive = self.clone(); - let progress = nutmeg::View::new( - LinearModel::new("Find referenced blocks in band", band_ids.len()), - ui::nutmeg_options(), - ); + // TODO: Percentage completion based on how many bands have been checked so far. + let bar = Bar::new(); + let references_found = AtomicUsize::new(0); + let bands_started = AtomicUsize::new(0); + let total_bands = band_ids.len(); + let start = Instant::now(); Ok(band_ids .par_iter() - .inspect(move |_| progress.update(|model| model.increment(1))) - .map(move |band_id| Band::open(&archive, band_id).expect("Failed to open band")) + .inspect(|_| { + bands_started.fetch_add(1, Ordering::Relaxed); + }) + .map(move |band_id| Band::open(&archive, *band_id).expect("Failed to open band")) .flat_map_iter(|band| band.index().iter_entries()) .flat_map_iter(|entry| entry.addrs) .map(|addr| addr.hash) + .inspect(|_hash| { + bar.post(Progress::ReferencedBlocks { + references_found: references_found.fetch_add(1, Ordering::Relaxed), + bands_started: bands_started.load(Ordering::Relaxed), + total_bands, + start, + }) + }) .collect()) } @@ -214,7 +225,7 @@ impl Archive { let referenced = self.referenced_blocks(&self.list_band_ids()?)?; Ok(self .block_dir() - .block_names()? + .iter_block_names()? .filter(move |h| !referenced.contains(h))) } @@ -236,58 +247,68 @@ impl Archive { } else { gc_lock::GarbageCollectionLock::new(self)? }; + debug!("Got gc lock"); let block_dir = self.block_dir(); + debug!("List band ids..."); let mut keep_band_ids = self.list_band_ids()?; keep_band_ids.retain(|b| !delete_band_ids.contains(b)); + debug!("List referenced blocks..."); let referenced = self.referenced_blocks(&keep_band_ids)?; - let progress = nutmeg::View::new( - UnboundedModel::new("Find present blocks"), - ui::nutmeg_options(), - ); - let unref = self - .block_dir() - .block_names()? - .inspect(|_| progress.update(|model| model.increment(1))) - .filter(|bh| !referenced.contains(bh)) - .collect_vec(); - drop(progress); + debug!(referenced.len = referenced.len()); + + debug!("Find present blocks..."); + let present = self.block_dir.block_names_set()?; + debug!(present.len = present.len()); + + debug!("Find unreferenced blocks..."); + let unref = present.difference(&referenced).collect_vec(); let unref_count = unref.len(); + debug!(unref_count); stats.unreferenced_block_count = unref_count; - let progress = nutmeg::View::new( - LinearModel::new("Measure unreferenced blocks", unref.len()), - ui::nutmeg_options(), - ); + debug!("Measure unreferenced blocks..."); + let measure_bar = Bar::new(); let total_bytes = unref .par_iter() - .inspect(|_| progress.update(|model| model.increment(1))) - .map(|block_id| block_dir.compressed_size(block_id).unwrap_or_default()) + .enumerate() + .inspect(|(i, _)| { + measure_bar.post(Progress::MeasureUnreferenced { + blocks_done: *i, + blocks_total: unref_count, + }) + }) + .map(|(_i, block_id)| block_dir.compressed_size(block_id).unwrap_or_default()) .sum(); + drop(measure_bar); stats.unreferenced_block_bytes = total_bytes; if !options.dry_run { delete_guard.check()?; + let bar = Bar::new(); - let progress = nutmeg::View::new( - LinearModel::new("Delete bands", delete_band_ids.len()), - ui::nutmeg_options(), - ); - for band_id in delete_band_ids { - Band::delete(self, band_id)?; + for (bands_done, band_id) in delete_band_ids.iter().enumerate() { + Band::delete(self, *band_id)?; stats.deleted_band_count += 1; - progress.update(|model| model.increment(1)); + bar.post(Progress::DeleteBands { + bands_done, + total_bands: delete_band_ids.len(), + }); } - let progress = nutmeg::View::new( - LinearModel::new("Delete blocks", unref_count), - ui::nutmeg_options(), - ); + let blocks_done: AtomicUsize = AtomicUsize::new(0); + let start = Instant::now(); let error_count = unref .par_iter() - .inspect(|_| progress.update(|model| model.increment(1))) - .filter(|block_hash| block_dir.delete_block(block_hash).is_err()) + .filter(|block_hash| { + bar.post(Progress::DeleteBlocks { + blocks_done: blocks_done.fetch_add(1, Ordering::Relaxed), + start, + total_blocks: unref_count, + }); + block_dir.delete_block(block_hash).is_err() + }) .count(); stats.deletion_errors += error_count; stats.deleted_block_count += unref_count - error_count; @@ -297,225 +318,84 @@ impl Archive { Ok(stats) } - pub fn validate(&self, options: &ValidateOptions) -> Result { - let start = Instant::now(); - let mut stats = self.validate_archive_dir()?; + /// Walk the archive to check all invariants. + /// + /// If problems are found, they are emitted as `warn` or `error` level + /// tracing messages. This function only returns an error if validation + /// stops due to a fatal error. + pub fn validate(&self, options: &ValidateOptions) -> Result<()> { + self.validate_archive_dir()?; - ui::println("Count indexes..."); + debug!("List bands..."); let band_ids = self.list_band_ids()?; - ui::println(&format!("Checking {} indexes...", band_ids.len())); + debug!("Check {} bands...", band_ids.len()); // 1. Walk all indexes, collecting a list of (block_hash6, min_length) // values referenced by all the indexes. - let (referenced_lens, ref_stats) = validate::validate_bands(self, &band_ids); - stats += ref_stats; + let referenced_lens = validate::validate_bands(self, &band_ids)?; if options.skip_block_hashes { // 3a. Check that all referenced blocks are present, without spending time reading their // content. - ui::println("List present blocks..."); - // TODO: Just validate blockdir structure. + debug!("List blocks..."); + // TODO: Check for unexpected files or directories in the blockdir. let present_blocks: HashSet = self.block_dir.block_names_set()?; - for block_hash in referenced_lens - .keys() - .filter(|&bh| !present_blocks.contains(bh)) - { - ui::problem(&format!("Block {block_hash:?} is missing")); - stats.block_missing_count += 1; + for block_hash in referenced_lens.keys() { + if !present_blocks.contains(block_hash) { + error!(%block_hash, "Referenced block missing"); + } } } else { // 2. Check the hash of all blocks are correct, and remember how long // the uncompressed data is. - ui::println("Check blockdir..."); - let block_lengths: HashMap = self.block_dir.validate(&mut stats)?; + let block_lengths: HashMap = self.block_dir.validate()?; // 3b. Check that all referenced ranges are inside the present data. for (block_hash, referenced_len) in referenced_lens { - if let Some(actual_len) = block_lengths.get(&block_hash) { - if referenced_len > (*actual_len as u64) { - ui::problem(&format!("Block {block_hash:?} is too short",)); - // TODO: A separate counter; this is worse than just being missing - stats.block_missing_count += 1; + if let Some(&actual_len) = block_lengths.get(&block_hash) { + if referenced_len > actual_len as u64 { + error!( + %block_hash, + referenced_len, + actual_len, + "Block is shorter than referenced length" + ); } } else { - ui::problem(&format!("Block {block_hash:?} is missing")); - stats.block_missing_count += 1; + error!(%block_hash, "Referenced block missing"); } } } - - stats.elapsed = start.elapsed(); - Ok(stats) + Ok(()) } - fn validate_archive_dir(&self) -> Result { + fn validate_archive_dir(&self) -> Result<()> { // TODO: More tests for the problems detected here. - let mut stats = ValidateStats::default(); - ui::println("Check archive top-level directory..."); + debug!("Check archive directory..."); let mut seen_bands = HashSet::::new(); - for entry_result in self - .transport - .iter_dir_entries("") - .map_err(|source| Error::ListBands { source })? - { - match entry_result { - Ok(DirEntry { - kind: Kind::Dir, - name, - .. - }) => { - if name.eq_ignore_ascii_case(BLOCK_DIR) { - } else if let Ok(band_id) = name.parse() { - if !seen_bands.insert(band_id) { - stats.structure_problems += 1; - ui::problem(&format!( - "Duplicated band directory in {:?}: {name:?}", - self.transport, - )); - } - } else { - stats.unexpected_files += 1; - ui::problem(&format!( - "Unexpected directory in {:?}: {name:?}", - self.transport, - )); - } - } - Ok(DirEntry { - kind: Kind::File, - name, - .. - }) => { - if !name.eq_ignore_ascii_case(HEADER_FILENAME) - && !name.eq_ignore_ascii_case(crate::gc_lock::GC_LOCK) - && !name.eq_ignore_ascii_case(".DS_Store") - { - stats.unexpected_files += 1; - ui::problem(&format!( - "Unexpected file in archive directory {:?}: {name:?}", - self.transport, - )); - } - } - Ok(DirEntry { kind, name, .. }) => { - ui::problem(&format!( - "Unexpected file kind in archive directory: {name:?} of kind {kind:?}" - )); - stats.unexpected_files += 1; - } - Err(source) => { - ui::problem(&format!("Error listing archive directory: {source:?}")); - stats.io_errors += 1; + let list_dir = self.transport.list_dir("")?; + for dir_name in list_dir.dirs { + if let Ok(band_id) = dir_name.parse::() { + if !seen_bands.insert(band_id) { + // TODO: Test this + error!(%band_id, "Duplicated band directory"); } + } else if !dir_name.eq_ignore_ascii_case(BLOCK_DIR) { + // TODO: The whole path not just the filename + warn!( + path = dir_name, + "Unexpected subdirectory in archive directory" + ); } } - Ok(stats) - } -} - -#[cfg(test)] -mod tests { - use std::fs; - use std::io::Read; - - use assert_fs::prelude::*; - use assert_fs::TempDir; - - use crate::test_fixtures::ScratchArchive; - - use super::*; - - #[test] - fn create_then_open_archive() { - let testdir = TempDir::new().unwrap(); - let arch_path = testdir.path().join("arch"); - let arch = Archive::create_path(&arch_path).unwrap(); - - assert!(arch.list_band_ids().unwrap().is_empty()); - - // We can re-open it. - Archive::open_path(&arch_path).unwrap(); - assert!(arch.list_band_ids().unwrap().is_empty()); - assert!(arch.last_complete_band().unwrap().is_none()); - } - - #[test] - fn fails_on_non_empty_directory() { - let temp = TempDir::new().unwrap(); - - temp.child("i am already here").touch().unwrap(); - - let result = Archive::create_path(temp.path()); - assert!(result.is_err()); - if let Err(Error::NewArchiveDirectoryNotEmpty) = result { - } else { - panic!("expected an error for a non-empty new archive directory") + for name in list_dir.files { + if !name.eq_ignore_ascii_case(HEADER_FILENAME) + && !name.eq_ignore_ascii_case(crate::gc_lock::GC_LOCK) + && !name.eq_ignore_ascii_case(".DS_Store") + { + // TODO: The whole path not just the filename + warn!(path = name, "Unexpected file in archive directory"); + } } - - temp.close().unwrap(); - } - - /// A new archive contains just one header file. - /// The header is readable json containing only a version number. - #[test] - fn empty_archive() { - let af = ScratchArchive::new(); - - assert!(af.path().is_dir()); - assert!(af.path().join("CONSERVE").is_file()); - assert!(af.path().join("d").is_dir()); - - let header_path = af.path().join("CONSERVE"); - let mut header_file = fs::File::open(header_path).unwrap(); - let mut contents = String::new(); - header_file.read_to_string(&mut contents).unwrap(); - assert_eq!(contents, "{\"conserve_archive_version\":\"0.6\"}\n"); - - assert!( - af.last_band_id().unwrap().is_none(), - "Archive should have no bands yet" - ); - assert!( - af.last_complete_band().unwrap().is_none(), - "Archive should have no bands yet" - ); - assert_eq!( - af.referenced_blocks(&af.list_band_ids().unwrap()) - .unwrap() - .len(), - 0 - ); - assert_eq!(af.block_dir.block_names().unwrap().count(), 0); - } - - #[test] - fn create_bands() { - let af = ScratchArchive::new(); - assert!(af.path().join("d").is_dir()); - - // Make one band - let _band1 = Band::create(&af).unwrap(); - let band_path = af.path().join("b0000"); - assert!(band_path.is_dir()); - assert!(band_path.join("BANDHEAD").is_file()); - assert!(band_path.join("i").is_dir()); - - assert_eq!(af.list_band_ids().unwrap(), vec![BandId::new(&[0])]); - assert_eq!(af.last_band_id().unwrap(), Some(BandId::new(&[0]))); - - // Try creating a second band. - let _band2 = Band::create(&af).unwrap(); - assert_eq!( - af.list_band_ids().unwrap(), - vec![BandId::new(&[0]), BandId::new(&[1])] - ); - assert_eq!(af.last_band_id().unwrap(), Some(BandId::new(&[1]))); - - assert_eq!( - af.referenced_blocks(&af.list_band_ids().unwrap()) - .unwrap() - .len(), - 0 - ); - assert_eq!(af.block_dir.block_names().unwrap().count(), 0); + Ok(()) } } diff --git a/src/backup.rs b/src/backup.rs index 3ed2cff3..6d70f916 100644 --- a/src/backup.rs +++ b/src/backup.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -14,40 +14,45 @@ //! Make a backup by walking a source directory and copying the contents //! into an archive. +use std::convert::TryInto; +use std::fmt; use std::io::prelude::*; -use std::{convert::TryInto, time::Instant}; +use std::path::Path; +use std::time::{Duration, Instant}; +use derive_more::{Add, AddAssign}; use itertools::Itertools; +use tracing::error; use crate::blockdir::Address; +use crate::change::Change; +use crate::entry::EntryValue; use crate::io::read_with_retries; -use crate::stats::BackupStats; +use crate::progress::{Bar, Progress}; +use crate::stats::{ + write_compressed_size, write_count, write_duration, write_size, IndexWriterStats, +}; use crate::stitch::IterStitchedIndexHunks; use crate::tree::ReadTree; use crate::*; /// Configuration of how to make a backup. -#[derive(Debug, Clone)] -pub struct BackupOptions { - /// Print filenames to the UI as they're copied. - pub print_filenames: bool, - +pub struct BackupOptions<'cb> { /// Exclude these globs from the backup. pub exclude: Exclude, - /// If printing filenames, include metadata such as file permissions - pub long_listing: bool, - pub max_entries_per_hunk: usize, + + // Call this callback as each entry is successfully stored. + pub change_callback: Option>, } -impl Default for BackupOptions { - fn default() -> BackupOptions { +impl Default for BackupOptions<'_> { + fn default() -> BackupOptions<'static> { BackupOptions { - print_filenames: false, exclude: Exclude::nothing(), max_entries_per_hunk: crate::index::MAX_ENTRIES_PER_HUNK, - long_listing: false, + change_callback: None, } } } @@ -64,89 +69,63 @@ impl Default for BackupOptions { // progress_bar.set_bytes_total(source.size()?.file_bytes as u64); // } -#[derive(Default)] -struct ProgressModel { - filename: String, - scanned_file_bytes: u64, - scanned_dirs: usize, - scanned_files: usize, - entries_new: usize, - entries_changed: usize, - entries_unchanged: usize, - entries_deleted: usize, -} - -impl nutmeg::Model for ProgressModel { - fn render(&mut self, _width: usize) -> String { - format!( - "Scanned {} directories, {} files, {} MB\n{} new entries, {} changed, {} deleted, {} unchanged\n{}", - self.scanned_dirs, - self.scanned_files, - self.scanned_file_bytes / 1_000_000, - self.entries_new, self.entries_changed, self.entries_deleted, self.entries_unchanged, - self.filename - ) - } -} - /// Backup a source directory into a new band in the archive. /// /// Returns statistics about what was copied. +// TODO: Maybe this should take a Path and the LiveTree should be an implementation detail? pub fn backup( archive: &Archive, - source: &LiveTree, + source_path: &Path, options: &BackupOptions, ) -> Result { let start = Instant::now(); let mut writer = BackupWriter::begin(archive)?; let mut stats = BackupStats::default(); - let mut view = nutmeg::View::new(ProgressModel::default(), ui::nutmeg_options()); + let bar = Bar::new(); + let source_tree = LiveTree::open(source_path)?; + + let mut scanned_file_bytes = 0; + let mut entries_new = 0; + let mut entries_changed = 0; + let mut entries_unchanged = 0; - let entry_iter = source.iter_entries(Apath::root(), options.exclude.clone())?; + let entry_iter = source_tree.iter_entries(Apath::root(), options.exclude.clone())?; for entry_group in entry_iter.chunks(options.max_entries_per_hunk).into_iter() { for entry in entry_group { - view.update(|model| { - model.filename = entry.apath().to_string(); - match entry.kind() { - Kind::Dir => model.scanned_dirs += 1, - Kind::File => model.scanned_files += 1, - _ => (), - } - }); - match writer.copy_entry(&entry, source) { - Err(e) => { - writeln!(view, "{}", ui::format_error_causes(&e))?; + match writer.copy_entry(&entry, &source_tree) { + Err(err) => { + error!(?entry, ?err, "Error copying entry to backup"); stats.errors += 1; continue; } - Ok(Some(diff_kind)) => { - if options.print_filenames && diff_kind != DiffKind::Unchanged { - if options.long_listing { - writeln!( - view, - "{} {} {} {}", - diff_kind.as_sigil(), - entry.unix_mode(), - entry.owner(), - entry.apath() - )?; - } else { - writeln!(view, "{} {}", diff_kind.as_sigil(), entry.apath())?; - } + Ok(Some(entry_change)) => { + match entry_change.change { + Change::Changed { .. } => entries_changed += 1, + Change::Added { .. } => entries_new += 1, + Change::Unchanged { .. } => entries_unchanged += 1, + // Deletions are not produced at the moment. + Change::Deleted { .. } => (), // model.entries_deleted += 1, + } + if let Some(cb) = &options.change_callback { + cb(&entry_change)?; } - view.update(|model| match diff_kind { - DiffKind::Changed => model.entries_changed += 1, - DiffKind::New => model.entries_new += 1, - DiffKind::Unchanged => model.entries_unchanged += 1, - DiffKind::Deleted => model.entries_deleted += 1, - }) } Ok(_) => {} } - if let Some(bytes) = entry.size() { - if bytes > 0 { - view.update(|model| model.scanned_file_bytes += bytes) + match entry.size() { + Some(bytes) if bytes > 0 => { + scanned_file_bytes += bytes; + bar.post(Progress::Backup { + filename: entry.apath().to_string(), + scanned_file_bytes, + scanned_dirs: stats.directories, + scanned_files: stats.files, + entries_new, + entries_changed, + entries_unchanged, + }); } + _ => (), } } writer.flush_group()?; @@ -217,7 +196,8 @@ impl BackupWriter { /// Return an indication of whether it changed (if it's a file), or /// None for non-plain-file types where that information is not currently /// calculated. - fn copy_entry(&mut self, entry: &LiveEntry, source: &LiveTree) -> Result> { + fn copy_entry(&mut self, entry: &EntryValue, source: &LiveTree) -> Result> { + // TODO: Emit deletions for entries in the basis not present in the source. match entry.kind() { Kind::Dir => self.copy_dir(entry), Kind::File => self.copy_file(entry, source), @@ -232,51 +212,52 @@ impl BackupWriter { } } - fn copy_dir(&mut self, source_entry: &E) -> Result> { + fn copy_dir(&mut self, source_entry: &EntryValue) -> Result> { self.stats.directories += 1; self.index_builder .push_entry(IndexEntry::metadata_from(source_entry)); - Ok(None) // TODO: See if it changed from the basis? + Ok(None) // TODO: Emit the actual change. } /// Copy in the contents of a file from another tree. fn copy_file( &mut self, - source_entry: &LiveEntry, + source_entry: &EntryValue, from_tree: &LiveTree, - ) -> Result> { + ) -> Result> { self.stats.files += 1; let apath = source_entry.apath(); let result; if let Some(basis_entry) = self.basis_index.advance_to(apath) { - if source_entry.is_unchanged_from(&basis_entry) { + if entry_metadata_unchanged(source_entry, &basis_entry) { self.stats.unmodified_files += 1; + let change = Some(EntryChange::unchanged(&basis_entry)); self.index_builder.push_entry(basis_entry); - return Ok(Some(DiffKind::Unchanged)); + return Ok(change); } else { self.stats.modified_files += 1; - result = Some(DiffKind::Changed); + result = Some(EntryChange::changed(&basis_entry, source_entry)); } } else { self.stats.new_files += 1; - result = Some(DiffKind::New); + result = Some(EntryChange::added(source_entry)); } - let mut read_source = from_tree.file_contents(source_entry)?; - let size = source_entry.size().expect("LiveEntry has a size"); + let size = source_entry.size().expect("source entry has a size"); if size == 0 { self.index_builder .push_entry(IndexEntry::metadata_from(source_entry)); self.stats.empty_files += 1; return Ok(result); } + let mut source_file = from_tree.open_file(source_entry)?; if size <= SMALL_FILE_CAP { self.file_combiner - .push_file(source_entry, &mut read_source)?; + .push_file(source_entry, &mut source_file)?; return Ok(result); } let addrs = store_file_content( apath, - &mut read_source, + &mut source_file, &mut self.block_dir, &mut self.stats, )?; @@ -287,12 +268,13 @@ impl BackupWriter { Ok(result) } - fn copy_symlink(&mut self, source_entry: &E) -> Result> { - let target = source_entry.symlink_target().clone(); + fn copy_symlink(&mut self, source_entry: &EntryValue) -> Result> { + let target = source_entry.symlink_target(); self.stats.symlinks += 1; assert!(target.is_some()); self.index_builder .push_entry(IndexEntry::metadata_from(source_entry)); + // TODO: Emit the actual change. Ok(None) } } @@ -307,8 +289,8 @@ fn store_file_content( let mut addresses = Vec::
::with_capacity(1); loop { read_with_retries(&mut buffer, MAX_BLOCK_SIZE, from_file).map_err(|source| { - Error::StoreFile { - apath: apath.to_owned(), + Error::ReadSourceFile { + path: apath.to_string().into(), source, } })?; @@ -412,26 +394,27 @@ impl FileCombiner { /// Add the contents of a small file into this combiner. /// /// `entry` should be an IndexEntry that's complete apart from the block addresses. - fn push_file(&mut self, live_entry: &LiveEntry, from_file: &mut dyn Read) -> Result<()> { + fn push_file(&mut self, entry: &EntryValue, from_file: &mut dyn Read) -> Result<()> { let start = self.buf.len(); - let expected_len: usize = live_entry + let expected_len: usize = entry .size() .expect("small file has no length") .try_into() .unwrap(); - let index_entry = IndexEntry::metadata_from(live_entry); + let index_entry = IndexEntry::metadata_from(entry); if expected_len == 0 { self.stats.empty_files += 1; self.finished.push(index_entry); return Ok(()); } self.buf.resize(start + expected_len, 0); - let len = from_file - .read(&mut self.buf[start..]) - .map_err(|source| Error::StoreFile { - apath: live_entry.apath().to_owned(), - source, - })?; + let len = + from_file + .read(&mut self.buf[start..]) + .map_err(|source| Error::ReadSourceFile { + path: entry.apath.to_string().into(), + source, + })?; self.buf.truncate(start + len); if len == 0 { self.stats.empty_files += 1; @@ -454,3 +437,90 @@ impl FileCombiner { } } } + +/// True if the metadata supports an assumption the file contents have +/// not changed, without reading the file content. +/// +/// Caution: this does not check the symlink target. +fn entry_metadata_unchanged(new_entry: &E, basis_entry: &O) -> bool { + basis_entry.kind() == new_entry.kind() + && basis_entry.mtime() == new_entry.mtime() + && basis_entry.size() == new_entry.size() + && basis_entry.unix_mode() == new_entry.unix_mode() + && basis_entry.owner() == new_entry.owner() +} + +#[derive(Add, AddAssign, Debug, Default, Eq, PartialEq, Clone)] +pub struct BackupStats { + // TODO: Have separate more-specific stats for backup and restore, and then + // each can have a single Display method. + // TODO: Include source file bytes, including unmodified files. + pub files: usize, + pub symlinks: usize, + pub directories: usize, + pub unknown_kind: usize, + + pub unmodified_files: usize, + pub modified_files: usize, + pub new_files: usize, + + /// Bytes that matched an existing block. + pub deduplicated_bytes: u64, + /// Bytes that were stored as new blocks, before compression. + pub uncompressed_bytes: u64, + pub compressed_bytes: u64, + + pub deduplicated_blocks: usize, + pub written_blocks: usize, + /// Blocks containing combined small files. + pub combined_blocks: usize, + + pub empty_files: usize, + pub small_combined_files: usize, + pub single_block_files: usize, + pub multi_block_files: usize, + + pub errors: usize, + + pub index_builder_stats: IndexWriterStats, + pub elapsed: Duration, +} + +impl fmt::Display for BackupStats { + fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { + write_count(w, "files:", self.files); + write_count(w, " unmodified files", self.unmodified_files); + write_count(w, " modified files", self.modified_files); + write_count(w, " new files", self.new_files); + write_count(w, "symlinks", self.symlinks); + write_count(w, "directories", self.directories); + write_count(w, "unsupported file kind", self.unknown_kind); + writeln!(w).unwrap(); + + write_count(w, "files stored:", self.new_files + self.modified_files); + write_count(w, " empty files", self.empty_files); + write_count(w, " small combined files", self.small_combined_files); + write_count(w, " single block files", self.single_block_files); + write_count(w, " multi-block files", self.multi_block_files); + writeln!(w).unwrap(); + + write_count(w, "data blocks deduplicated:", self.deduplicated_blocks); + write_size(w, " saved", self.deduplicated_bytes); + writeln!(w).unwrap(); + + write_count(w, "new data blocks written:", self.written_blocks); + write_count(w, " blocks of combined files", self.combined_blocks); + write_compressed_size(w, self.compressed_bytes, self.uncompressed_bytes); + writeln!(w).unwrap(); + + let idx = &self.index_builder_stats; + write_count(w, "new index hunks", idx.index_hunks); + write_compressed_size(w, idx.compressed_index_bytes, idx.uncompressed_index_bytes); + writeln!(w).unwrap(); + + write_count(w, "errors", self.errors); + write_duration(w, "elapsed", self.elapsed)?; + + Ok(()) + } +} diff --git a/src/band.rs b/src/band.rs index 4e7fa688..79a948a7 100644 --- a/src/band.rs +++ b/src/band.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2020 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -21,19 +21,30 @@ //! To read a consistent tree possibly composed from several incremental backups, use //! StoredTree rather than the Band itself. +use std::borrow::Cow; + +use itertools::Itertools; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; +use tracing::{debug, error, warn}; use crate::jsonio::{read_json, write_json}; use crate::misc::remove_item; -use crate::transport::{ListDirNames, Transport}; +use crate::transport::{ListDir, Transport}; use crate::*; static INDEX_DIR: &str = "i"; -/// Band format-compatibility. Bands written out by this program, can only be -/// read correctly by versions equal or later than the stated version. -pub const BAND_FORMAT_VERSION: &str = "0.6.3"; +/// Per-band format flags. +pub mod flags { + use std::borrow::Cow; + + /// Default flags for newly created bands. + pub static DEFAULT: &[Cow<'static, str>] = &[]; + + /// All the flags understood by this version of Conserve. + pub static SUPPORTED: &[&str] = &[]; +} /// Describes how to select a band from an archive. #[derive(Debug, Clone, Eq, PartialEq)] @@ -47,13 +58,13 @@ pub enum BandSelectionPolicy { } fn band_version_requirement() -> semver::VersionReq { - semver::VersionReq::parse("<=0.6.3").unwrap() + semver::VersionReq::parse(&format!("<={}", crate::VERSION)).unwrap() } fn band_version_supported(version: &str) -> bool { semver::Version::parse(version) .map(|sv| band_version_requirement().matches(&sv)) - .unwrap_or(false) + .unwrap() } /// Each backup makes a new `band` containing an index directory. @@ -76,6 +87,11 @@ struct Head { /// Semver string for the minimum Conserve version to read this band /// correctly. band_format_version: Option, + + /// Format flags that must be understood to read this band and the + /// referenced data correctly. + #[serde(default)] + format_flags: Vec>, } /// Format of the on-disk tail file. @@ -112,17 +128,31 @@ impl Band { /// /// The Band gets the next id after those that already exist. pub fn create(archive: &Archive) -> Result { + Band::create_with_flags(archive, flags::DEFAULT) + } + + pub fn create_with_flags( + archive: &Archive, + format_flags: &[Cow<'static, str>], + ) -> Result { + format_flags + .iter() + .for_each(|f| assert!(flags::SUPPORTED.contains(&f.as_ref()), "unknown flag {f:?}")); let band_id = archive .last_band_id()? .map_or_else(BandId::zero, |b| b.next_sibling()); let transport: Box = archive.transport().sub_transport(&band_id.to_string()); - transport - .create_dir("") - .and_then(|()| transport.create_dir(INDEX_DIR)) - .map_err(|source| Error::CreateBand { source })?; + transport.create_dir("")?; + transport.create_dir(INDEX_DIR)?; + let band_format_version = if format_flags.is_empty() { + Some("0.6.3".to_owned()) + } else { + Some("23.2.0".to_owned()) + }; let head = Head { start_time: OffsetDateTime::now_utc().unix_timestamp(), - band_format_version: Some(BAND_FORMAT_VERSION.to_owned()), + band_format_version, + format_flags: format_flags.into(), }; write_json(&transport, BAND_HEAD_FILENAME, &head)?; Ok(Band { @@ -142,23 +172,39 @@ impl Band { index_hunk_count: Some(index_hunk_count), }, ) + .map_err(Error::from) } /// Open the band with the given id. - pub fn open(archive: &Archive, band_id: &BandId) -> Result { + pub fn open(archive: &Archive, band_id: BandId) -> Result { let transport: Box = archive.transport().sub_transport(&band_id.to_string()); - let head: Head = read_json(&transport, BAND_HEAD_FILENAME)?; + let head: Head = + read_json(&transport, BAND_HEAD_FILENAME)?.ok_or(Error::BandHeadMissing { band_id })?; if let Some(version) = &head.band_format_version { if !band_version_supported(version) { return Err(Error::UnsupportedBandVersion { - band_id: band_id.to_owned(), + band_id, version: version.to_owned(), }); } } else { + debug!("Old(?) band {band_id} has no format version"); // Unmarked, old bands, are accepted for now. In the next archive // version, band version markers ought to become mandatory. } + + let unsupported_flags = head + .format_flags + .iter() + .filter(|f| !flags::SUPPORTED.contains(&f.as_ref())) + .cloned() + .collect_vec(); + if !unsupported_flags.is_empty() { + return Err(Error::UnsupportedBandFormatFlags { + band_id, + unsupported_flags, + }); + } Ok(Band { band_id: band_id.to_owned(), head, @@ -167,14 +213,17 @@ impl Band { } /// Delete a band. - pub fn delete(archive: &Archive, band_id: &BandId) -> Result<()> { + pub fn delete(archive: &Archive, band_id: BandId) -> Result<()> { // TODO: Count how many files were deleted, and the total size? archive .transport() .remove_dir_all(&band_id.to_string()) - .map_err(|source| Error::BandDeletion { - band_id: band_id.clone(), - source, + .map_err(|err| { + if err.is_not_found() { + Error::BandNotFound { band_id } + } else { + Error::from(err) + } }) } @@ -184,8 +233,18 @@ impl Band { .map_err(Error::from) } - pub fn id(&self) -> &BandId { - &self.band_id + pub fn id(&self) -> BandId { + self.band_id + } + + /// Get the minimum supported version for this band. + pub fn band_format_version(&self) -> Option<&str> { + self.head.band_format_version.as_deref() + } + + /// Get the format flags in this band, from [flags]. + pub fn format_flags(&self) -> &[Cow<'static, str>] { + &self.head.format_flags } pub fn index_builder(&self) -> IndexWriter { @@ -199,18 +258,25 @@ impl Band { /// Return info about the state of this band. pub fn get_info(&self) -> Result { - let tail_option: Option = match read_json(&self.transport, BAND_TAIL_FILENAME) { - Ok(tail) => Some(tail), - Err(Error::MetadataNotFound { .. }) => None, - Err(err) => return Err(err), - }; - let start_time = OffsetDateTime::from_unix_timestamp(self.head.start_time) - .expect("invalid band start timestamp"); - let end_time = tail_option.as_ref().map(|tail| { - OffsetDateTime::from_unix_timestamp(tail.end_time).expect("invalid end timestamp") - }); + let tail_option: Option = read_json(&self.transport, BAND_TAIL_FILENAME)?; + let start_time = + OffsetDateTime::from_unix_timestamp(self.head.start_time).map_err(|_| { + Error::InvalidMetadata { + details: format!("Invalid band start timestamp {:?}", self.head.start_time), + } + })?; + let end_time = tail_option + .as_ref() + .map(|tail| { + OffsetDateTime::from_unix_timestamp(tail.end_time).map_err(|_| { + Error::InvalidMetadata { + details: format!("Invalid band end timestamp {:?}", tail.end_time), + } + }) + }) + .transpose()?; Ok(Info { - id: self.band_id.clone(), + id: self.band_id, is_closed: tail_option.is_some(), start_time, end_time, @@ -218,32 +284,19 @@ impl Band { }) } - pub fn validate(&self, stats: &mut ValidateStats) -> Result<()> { - let ListDirNames { mut files, dirs } = - self.transport.list_dir_names("").map_err(Error::from)?; + pub fn validate(&self) -> Result<()> { + let ListDir { mut files, dirs } = self.transport.list_dir("")?; if !files.contains(&BAND_HEAD_FILENAME.to_string()) { - ui::problem(&format!("No band head file in {:?}", self.transport)); - stats.missing_band_heads += 1; + error!(band_id = ?self.band_id, "Band head file missing"); } remove_item(&mut files, &BAND_HEAD_FILENAME); remove_item(&mut files, &BAND_TAIL_FILENAME); - - if !files.is_empty() { - ui::problem(&format!( - "Unexpected files in band directory {:?}: {:?}", - self.transport, files - )); - stats.unexpected_files += 1; + for unexpected in files { + warn!(path = ?unexpected, "Unexpected file in band directory"); } - - if dirs != [INDEX_DIR.to_string()] { - ui::problem(&format!( - "Incongruous directories in band directory {:?}: {:?}", - self.transport, dirs - )); - stats.unexpected_files += 1; + for unexpected in dirs.iter().filter(|n| n != &INDEX_DIR) { + warn!(path = ?unexpected, "Unexpected subdirectory in band directory"); } - Ok(()) } } @@ -279,7 +332,7 @@ mod tests { assert!(band.is_closed().unwrap()); let band_id = BandId::from_str("b0000").unwrap(); - let band2 = Band::open(&af, &band_id).expect("failed to re-open band"); + let band2 = Band::open(&af, band_id).expect("failed to re-open band"); assert!(band2.is_closed().unwrap()); // Try get_info @@ -297,10 +350,12 @@ mod tests { fn delete_band() { let af = ScratchArchive::new(); let _band = Band::create(&af).unwrap(); - Band::delete(&af, &BandId::new(&[0])).expect("delete band"); + assert!(af.transport().is_file("b0000/BANDHEAD").unwrap()); + + Band::delete(&af, BandId::new(&[0])).expect("delete band"); assert!(!af.transport().is_file("b0000").unwrap()); - assert!(!af.transport().is_dir("b0000").unwrap()); + assert!(!af.transport().is_file("b0000/BANDHEAD").unwrap()); } #[test] @@ -309,7 +364,7 @@ mod tests { fs::create_dir(af.path().join("b0000")).unwrap(); let head = json!({ "start_time": 0, - "band_format_version": "0.8.8", + "band_format_version": "8888.8.8", }); fs::write( af.path().join("b0000").join(BAND_HEAD_FILENAME), @@ -317,10 +372,10 @@ mod tests { ) .unwrap(); - let e = Band::open(&af, &BandId::zero()); + let e = Band::open(&af, BandId::zero()); let e_str = e.unwrap_err().to_string(); assert!( - e_str.contains("Band version \"0.8.8\" in"), + e_str.contains("Unsupported band version \"8888.8.8\" in b0000"), "bad band version: {e_str:#?}" ); } diff --git a/src/bandid.rs b/src/bandid.rs index ec606b04..4357dbc8 100644 --- a/src/bandid.rs +++ b/src/bandid.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2022 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -11,43 +11,36 @@ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. -//! Bands are identified by a string like `b0001-0023`, represented by a `BandId` object. +//! Bands are identified by a string like `b0001`, represented by a [BandId] object. -use std::fmt::{self, Write}; +use std::fmt; use std::str::FromStr; +use serde::Serialize; + use crate::errors::Error; -/// Identifier for a band within an archive, eg 'b0001' or 'b0001-0020'. -/// -/// `BandId`s implement a total ordering `std::cmp::Ord`. -#[derive(Debug, PartialEq, Clone, Eq, PartialOrd, Ord, Hash)] -pub struct BandId { - /// The sequence numbers at each tier. - seqs: Vec, -} +/// Identifier for a band within an archive, eg 'b0001'. +#[derive(Debug, PartialEq, Clone, Copy, Eq, PartialOrd, Ord, Hash, Serialize)] +pub struct BandId(u32); impl BandId { /// Makes a new BandId from a sequence of integers. pub fn new(seqs: &[u32]) -> BandId { - assert!(!seqs.is_empty()); - BandId { - seqs: seqs.to_vec(), - } + assert_eq!(seqs.len(), 1, "Band id should have a single element"); + BandId(seqs[0]) } /// Return the origin BandId. #[must_use] pub fn zero() -> BandId { - BandId::new(&[0]) + BandId(0) } /// Return the next BandId at the same level as self. #[must_use] pub fn next_sibling(&self) -> BandId { - let mut next_seqs = self.seqs.clone(); - next_seqs[self.seqs.len() - 1] += 1; - BandId::new(&next_seqs) + BandId(self.0 + 1) } /// Return the previous band, unless this is zero. @@ -57,13 +50,10 @@ impl BandId { /// Currently only implemented for top-level bands. #[must_use] pub fn previous(&self) -> Option { - if self.seqs.len() != 1 { - unimplemented!("BandId::previous only supported on len 1") - } - if self.seqs[0] == 0 { + if self.0 == 0 { None } else { - Some(BandId::new(&[self.seqs[0] - 1])) + Some(BandId(self.0 - 1)) } } } @@ -73,22 +63,18 @@ impl FromStr for BandId { /// Make a new BandId from a string form. fn from_str(s: &str) -> std::result::Result { - let nope = || Err(Error::InvalidVersion { version: s.into() }); - if !s.starts_with('b') { - return nope(); - } - let mut seqs = Vec::::new(); - for num_part in s[1..].split('-') { - match num_part.parse::() { - Ok(num) => seqs.push(num), - Err(..) => return nope(), + if let Some(num) = s.strip_prefix('b') { + if let Ok(num) = num.parse::() { + return Ok(BandId(num)); } } - if seqs.is_empty() { - nope() - } else { - Ok(BandId::new(&seqs)) - } + Err(Error::InvalidVersion { version: s.into() }) + } +} + +impl From for BandId { + fn from(value: u32) -> Self { + BandId(value) } } @@ -102,100 +88,6 @@ impl fmt::Display for BandId { /// Numbers are zero-padded to what should normally be a reasonable length, /// but they can be longer. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut result = String::with_capacity(self.seqs.len() * 5); - result.push('b'); - for s in &self.seqs { - let _ = write!(result, "{s:04}-"); - } - result.pop(); // remove the last dash - result.shrink_to_fit(); - f.pad(&result) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - #[should_panic] - fn empty_id_not_allowed() { - BandId::new(&[]); - } - - #[test] - fn equality() { - assert_eq!(BandId::new(&[1]), BandId::new(&[1])) - } - - #[test] - fn zero() { - assert_eq!(BandId::zero().to_string(), "b0000"); - } - - #[test] - fn zero_has_no_previous() { - assert_eq!(BandId::zero().previous(), None); - } - - #[test] - fn previous_of_one_is_zero() { - assert_eq!( - BandId::zero().next_sibling().previous(), - Some(BandId::zero()) - ); - } - - #[test] - fn next() { - assert_eq!(BandId::zero().next_sibling().to_string(), "b0001"); - assert_eq!( - BandId::new(&[2, 3]).next_sibling().to_string(), - "b0002-0004" - ); - } - - #[test] - fn to_string() { - let band_id = BandId::new(&[1, 10, 20]); - assert_eq!(band_id.to_string(), "b0001-0010-0020"); - assert_eq!( - BandId::new(&[1_000_000, 2_000_000]).to_string(), - "b1000000-2000000" - ) - } - - #[test] - fn from_string_detects_invalid() { - assert!(BandId::from_str("").is_err()); - assert!(BandId::from_str("hello").is_err()); - assert!(BandId::from_str("b").is_err()); - assert!(BandId::from_str("b-").is_err()); - assert!(BandId::from_str("b2-").is_err()); - assert!(BandId::from_str("b-2").is_err()); - assert!(BandId::from_str("b2-1-").is_err()); - assert!(BandId::from_str("b2--1").is_err()); - assert!(BandId::from_str("beta").is_err()); - assert!(BandId::from_str("b-eta").is_err()); - assert!(BandId::from_str("b-1eta").is_err()); - assert!(BandId::from_str("b-1-eta").is_err()); - } - - #[test] - fn from_string_valid() { - assert_eq!(BandId::from_str("b0001").unwrap().to_string(), "b0001"); - assert_eq!(BandId::from_str("b123456").unwrap().to_string(), "b123456"); - assert_eq!( - BandId::from_str("b0001-0100-0234").unwrap().to_string(), - "b0001-0100-0234" - ); - } - - #[test] - fn format() { - let a_bandid = BandId::from_str("b0001-0234").unwrap(); - assert_eq!(format!("{a_bandid}"), "b0001-0234"); - // Implements padding correctly - assert_eq!(format!("{a_bandid:<15}"), "b0001-0234 "); + f.pad(&format!("b{:0>4}", self.0)) } } diff --git a/src/bin/conserve.rs b/src/bin/conserve.rs index 27e7f056..d23d7749 100644 --- a/src/bin/conserve.rs +++ b/src/bin/conserve.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -13,13 +13,22 @@ //! Command-line entry point for Conserve backups. +use std::cell::RefCell; +use std::fs::OpenOptions; use std::io::{BufWriter, Write}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; +use std::time::Instant; use clap::{Parser, Subcommand}; -use tracing::trace; +use conserve::change::Change; +use conserve::progress::ProgressImpl; +use conserve::trace_counter::{global_error_count, global_warn_count}; +use metrics::increment_counter; +#[allow(unused_imports)] +use tracing::{debug, error, info, trace, warn, Level}; use conserve::backup::BackupOptions; +use conserve::ui::termui::TraceTimeStyle; use conserve::ReadTree; use conserve::RestoreOptions; use conserve::*; @@ -37,6 +46,18 @@ struct Args { /// Show debug trace to stdout. #[arg(long, short = 'D', global = true)] debug: bool, + + /// Control timestamps prefixes on stderr. + #[arg(long, value_enum, global = true, default_value_t = TraceTimeStyle::None)] + trace_time: TraceTimeStyle, + + /// Append a json formatted log to this file. + #[arg(long, global = true)] + log_json: Option, + + /// Write metrics to this file. + #[arg(long, global = true)] + metrics_json: Option, } #[derive(Debug, Subcommand)] @@ -47,13 +68,18 @@ enum Command { archive: String, /// Source directory to copy from. source: PathBuf, + /// Write a list of changes to this file. + #[arg(long)] + changes_json: Option, /// Print copied file names. #[arg(long, short)] verbose: bool, #[arg(long, short)] exclude: Vec, + /// Read a list of globs to exclude from this file. #[arg(long, short = 'E')] exclude_from: Vec, + /// Don't print statistics after the backup completes. #[arg(long)] no_stats: bool, /// Show permissions, owner, and group in verbose output. @@ -93,6 +119,10 @@ enum Command { exclude_from: Vec, #[arg(long)] include_unchanged: bool, + + /// Print the diff as json. + #[arg(long, short)] + json: bool, }, /// Create a new archive. @@ -102,8 +132,6 @@ enum Command { }, /// Delete blocks unreferenced by any index. - /// - /// CAUTION: Do not gc while a backup is underway. Gc { /// Archive to delete from. archive: String, @@ -124,9 +152,14 @@ enum Command { #[arg(long, short)] exclude: Vec, + #[arg(long, short = 'E')] exclude_from: Vec, + /// Print entries as json. + #[arg(long, short)] + json: bool, + /// Show permissions, owner, and group. #[arg(short = 'l')] long_listing: bool, @@ -138,6 +171,9 @@ enum Command { destination: PathBuf, #[arg(long, short)] backup: Option, + /// Write a list of restored files to this json file. + #[arg(long)] + changes_json: Option, #[arg(long, short)] force_overwrite: bool, #[arg(long, short)] @@ -231,20 +267,37 @@ enum Debug { backup: Option, }, + WriteIndexFlatbuf { + archive: String, + out: PathBuf, + }, + /// List all blocks. - Blocks { archive: String }, + Blocks { + archive: String, + }, /// List all blocks referenced by any band. - Referenced { archive: String }, + Referenced { + archive: String, + }, /// List garbage blocks referenced by no band. - Unreferenced { archive: String }, + Unreferenced { + archive: String, + }, } enum ExitCode { - Ok = 0, - Failed = 1, - PartialCorruption = 2, + Success = 0, + Failure = 1, + NonFatalErrors = 2, +} + +impl std::process::Termination for ExitCode { + fn report(self) -> std::process::ExitCode { + (self as u8).into() + } } impl Command { @@ -253,30 +306,37 @@ impl Command { match self { Command::Backup { archive, - source, - verbose, + changes_json, exclude, exclude_from, - no_stats, long_listing, + no_stats, + source, + verbose, } => { - let source = &LiveTree::open(source)?; let options = BackupOptions { - print_filenames: *verbose, exclude: Exclude::from_patterns_and_files(exclude, exclude_from)?, - long_listing: *long_listing, + change_callback: make_change_callback( + *verbose, + *long_listing, + &changes_json.as_deref(), + )?, ..Default::default() }; + if *long_listing || *verbose { + // TODO(CON-23): Really Nutmeg should coordinate stdout and stderr... + ProgressImpl::Null.activate() + } let stats = backup(&Archive::open(open_transport(archive)?)?, source, &options)?; if !no_stats { - ui::println(&format!("Backup complete.\n{stats}")); + info!("Backup complete.\n{stats}"); } } Command::Debug(Debug::Blocks { archive }) => { let mut bw = BufWriter::new(stdout); for hash in Archive::open(open_transport(archive)?)? .block_dir() - .block_names()? + .iter_block_names()? { writeln!(bw, "{hash}")?; } @@ -285,6 +345,15 @@ impl Command { let st = stored_tree_from_opt(archive, backup)?; show::show_index_json(st.band(), &mut stdout)?; } + Command::Debug(Debug::WriteIndexFlatbuf { archive, out }) => { + let st = stored_tree_from_opt(archive, &None)?; + let out_file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(out)?; + conserve::fbs::write_index(&st, out_file)?; + } Command::Debug(Debug::Referenced { archive }) => { let mut bw = BufWriter::new(stdout); let archive = Archive::open(open_transport(archive)?)?; @@ -313,7 +382,7 @@ impl Command { }, )?; if !no_stats { - ui::println(&format!("{stats}")); + println!("{stats}"); } } Command::Diff { @@ -323,6 +392,7 @@ impl Command { exclude, exclude_from, include_unchanged, + json, } => { let st = stored_tree_from_opt(archive, backup)?; let lt = LiveTree::open(source)?; @@ -330,7 +400,14 @@ impl Command { exclude: Exclude::from_patterns_and_files(exclude, exclude_from)?, include_unchanged: *include_unchanged, }; - show_diff(diff(&st, <, &options)?, &mut stdout)?; + let mut bw = BufWriter::new(stdout); + for change in diff(&st, <, &options)? { + if *json { + serde_json::to_writer(&mut bw, &change)?; + } else { + writeln!(bw, "{change}")?; + } + } } Command::Gc { archive, @@ -347,41 +424,48 @@ impl Command { }, )?; if !no_stats { - ui::println(&format!("{stats}")); + info!(%stats); } } Command::Init { archive } => { Archive::create(open_transport(archive)?)?; - ui::println(&format!("Created new archive in {:?}", &archive)); + debug!("Created new archive in {archive:?}"); } Command::Ls { + json, stos, exclude, exclude_from, long_listing, } => { let exclude = Exclude::from_patterns_and_files(exclude, exclude_from)?; - if let Some(archive) = &stos.archive { - // TODO: Option for subtree. - show::show_entry_names( - stored_tree_from_opt(archive, &stos.backup)? - .iter_entries(Apath::root(), exclude)?, - &mut stdout, - *long_listing, - )?; + let entry_iter: Box> = + if let Some(archive) = &stos.archive { + // TODO: Option for subtree. + Box::new( + stored_tree_from_opt(archive, &stos.backup)? + .iter_entries(Apath::root(), exclude)? + .map(|it| it.into()), + ) + } else { + Box::new( + LiveTree::open(stos.source.clone().unwrap())? + .iter_entries(Apath::root(), exclude)?, + ) + }; + if *json { + for entry in entry_iter { + println!("{}", serde_json::ser::to_string(&entry)?); + } } else { - show::show_entry_names( - LiveTree::open(stos.source.clone().unwrap())? - .iter_entries(Apath::root(), exclude)?, - &mut stdout, - *long_listing, - )?; + show::show_entry_names(entry_iter, &mut stdout, *long_listing)?; } } Command::Restore { archive, destination, backup, + changes_json, verbose, force_overwrite, exclude, @@ -393,17 +477,23 @@ impl Command { let band_selection = band_selection_policy_from_opt(backup); let archive = Archive::open(open_transport(archive)?)?; let options = RestoreOptions { - print_filenames: *verbose, exclude: Exclude::from_patterns_and_files(exclude, exclude_from)?, only_subtree: only_subtree.clone(), band_selection, overwrite: *force_overwrite, - long_listing: *long_listing, + change_callback: make_change_callback( + *verbose, + *long_listing, + &changes_json.as_deref(), + )?, }; - + if *verbose || *long_listing { + ProgressImpl::Null.activate(); + } let stats = restore(&archive, destination, &options)?; + debug!("Restore complete"); if !no_stats { - ui::println(&format!("Restore complete.\n{stats}")); + debug!(%stats); } } Command::Size { @@ -423,28 +513,20 @@ impl Command { .file_bytes }; if *bytes { - ui::println(&format!("{size}")); + println!("{size}"); } else { - ui::println(&conserve::bytes_to_human_mb(size)); + println!("{}", conserve::bytes_to_human_mb(size)); } } - Command::Validate { - archive, - quick, - no_stats, - } => { + Command::Validate { archive, quick, .. } => { let options = ValidateOptions { skip_block_hashes: *quick, }; - let stats = Archive::open(open_transport(archive)?)?.validate(&options)?; - if !no_stats { - println!("{stats}"); - } - if stats.has_problems() { - ui::problem("Archive has some problems."); - return Ok(ExitCode::PartialCorruption); + Archive::open(open_transport(archive)?)?.validate(&options)?; + if global_error_count() > 0 || global_warn_count() > 0 { + warn!("Archive has some problems."); } else { - ui::println("Archive is OK."); + info!("Archive is OK."); } } Command::Versions { @@ -454,7 +536,6 @@ impl Command { sizes, utc, } => { - ui::enable_progress(false); let archive = Archive::open(open_transport(archive)?)?; let options = ShowVersionsOptions { newest_first: *newest, @@ -466,7 +547,7 @@ impl Command { conserve::show_versions(&archive, &options, &mut stdout)?; } } - Ok(ExitCode::Ok) + Ok(ExitCode::Success) } } @@ -478,35 +559,94 @@ fn stored_tree_from_opt(archive_location: &str, backup: &Option) -> Resu fn band_selection_policy_from_opt(backup: &Option) -> BandSelectionPolicy { if let Some(band_id) = backup { - BandSelectionPolicy::Specified(band_id.clone()) + BandSelectionPolicy::Specified(*band_id) } else { BandSelectionPolicy::Latest } } -fn main() { +fn make_change_callback<'a>( + print_changes: bool, + ls_long: bool, + changes_json: &Option<&Path>, +) -> Result>> { + if !print_changes && !ls_long && changes_json.is_none() { + return Ok(None); + }; + + let changes_json_writer = if let Some(path) = changes_json { + Some(RefCell::new(BufWriter::new( + OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(path)?, + ))) + } else { + None + }; + Ok(Some(Box::new(move |entry_change| { + if matches!(entry_change.change, Change::Unchanged { .. }) { + return Ok(()); + } + if ls_long { + let change_meta = entry_change.change.primary_metadata(); + println!( + "{} {} {} {}", + entry_change.change.sigil(), + change_meta.unix_mode, + change_meta.owner, + entry_change.apath + ); + } else if print_changes { + println!("{} {}", entry_change.change.sigil(), entry_change.apath); + } + if let Some(w) = &changes_json_writer { + let mut w = w.borrow_mut(); + writeln!( + w, + "{}", + serde_json::to_string(entry_change).expect("Failed to serialize change") + )?; + } + Ok(()) + }))) +} + +fn main() -> Result { let args = Args::parse(); - ui::enable_progress(!args.no_progress && !args.debug); - if args.debug { - tracing_subscriber::fmt::Subscriber::builder() - .with_max_level(tracing::Level::TRACE) - .init(); - trace!("tracing enabled"); + let start_time = Instant::now(); + if !args.no_progress { + progress::ProgressImpl::Terminal.activate(); } + let trace_level = if args.debug { + Level::TRACE + } else { + Level::INFO + }; + let _flush_guard = ui::termui::enable_tracing(&args.trace_time, trace_level, &args.log_json); + ::metrics::set_recorder(&conserve::metric_recorder::IN_MEMORY) + .expect("Failed to install recorder"); + increment_counter!("conserve.start"); let result = args.command.run(); + metric_recorder::emit_to_trace(); + debug!(elapsed = ?start_time.elapsed()); + let error_count = global_error_count(); + let warn_count = global_warn_count(); + if let Some(metrics_json_path) = args.metrics_json { + metric_recorder::write_json_metrics(&metrics_json_path)?; + } match result { - Err(ref e) => { - ui::show_error(e); - // // TODO: Perhaps always log the traceback to a log file. - // if let Some(bt) = e.backtrace() { - // if std::env::var("RUST_BACKTRACE") == Ok("1".to_string()) { - // println!("{}", bt); - // } - // } - // Avoid Rust redundantly printing the error. - std::process::exit(ExitCode::Failed as i32) + Err(err) => { + error!("{err:#}"); + debug!(error_count, warn_count,); + Ok(ExitCode::Failure) + } + Ok(ExitCode::Success) if error_count > 0 || warn_count > 0 => { + debug!(error_count, warn_count,); + Ok(ExitCode::NonFatalErrors) } - Ok(code) => std::process::exit(code as i32), + Ok(exit_code) => Ok(exit_code), } } diff --git a/src/blockdir.rs b/src/blockdir.rs index d125f00f..bb2ecf02 100644 --- a/src/blockdir.rs +++ b/src/blockdir.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -23,24 +23,26 @@ use std::collections::{HashMap, HashSet}; use std::convert::TryInto; -use std::io; use std::path::Path; +use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Instant; +use ::metrics::{counter, histogram, increment_counter}; use blake2_rfc::blake2b; use blake2_rfc::blake2b::Blake2b; -use nutmeg::models::UnboundedModel; use rayon::prelude::*; use serde::{Deserialize, Serialize}; -use thousands::Separable; +#[allow(unused_imports)] +use tracing::{debug, error, info, warn}; +use crate::backup::BackupStats; use crate::blockhash::BlockHash; use crate::compress::snappy::{Compressor, Decompressor}; -use crate::kind::Kind; -use crate::stats::{BackupStats, Sizes, ValidateStats}; +use crate::progress::{Bar, Progress}; +use crate::stats::Sizes; use crate::transport::local::LocalTransport; -use crate::transport::{DirEntry, ListDirNames, Transport}; +use crate::transport::{ListDir, Transport}; use crate::*; const BLOCKDIR_FILE_NAME_LEN: usize = crate::BLAKE_HASH_SIZE_BYTES * 2; @@ -100,9 +102,7 @@ impl BlockDir { } pub fn create(transport: Box) -> Result { - transport - .create_dir("") - .map_err(|source| Error::CreateBlockDir { source })?; + transport.create_dir("")?; Ok(BlockDir { transport: Arc::from(transport), }) @@ -112,25 +112,26 @@ impl BlockDir { pub(crate) fn compress_and_store(&mut self, in_buf: &[u8], hash: &BlockHash) -> Result { // TODO: Move this to a BlockWriter, which can hold a reusable buffer. let mut compressor = Compressor::new(); + let uncomp_len = in_buf.len() as u64; let compressed = compressor.compress(in_buf)?; let comp_len: u64 = compressed.len().try_into().unwrap(); let hex_hash = hash.to_string(); let relpath = block_relpath(hash); self.transport.create_dir(subdir_relpath(&hex_hash))?; + increment_counter!("conserve.block.writes"); + counter!("conserve.block.write_uncompressed_bytes", uncomp_len); + histogram!("conserve.block.write_uncompressed_bytes", uncomp_len as f64); + counter!("conserve.block.write_compressed_bytes", comp_len); + histogram!("conserve.block.write_compressed_bytes", comp_len as f64); self.transport .write_file(&relpath, compressed) - .or_else(|io_err| { - if io_err.kind() == io::ErrorKind::AlreadyExists { + .or_else(|err| { + if err.kind() == transport::ErrorKind::AlreadyExists { // Perhaps it was simultaneously created by another thread or process. - ui::problem(&format!( - "Unexpected late detection of existing block {hex_hash:?}" - )); + debug!("Unexpected late detection of existing block {hex_hash:?}"); Ok(()) } else { - Err(Error::WriteBlock { - hash: hex_hash, - source: io_err, - }) + Err(err) } })?; Ok(comp_len) @@ -142,11 +143,16 @@ impl BlockDir { stats: &mut BackupStats, ) -> Result { let hash = self.hash_bytes(block_data); + let len = block_data.len() as u64; if self.contains(&hash)? { + increment_counter!("conserve.block.matches"); stats.deduplicated_blocks += 1; - stats.deduplicated_bytes += block_data.len() as u64; + counter!("conserve.block.matched_bytes", len); + stats.deduplicated_bytes += len; } else { + let start = Instant::now(); let comp_len = self.compress_and_store(block_data, &hash)?; + histogram!("conserve.block.compress_and_store_seconds", start.elapsed()); stats.written_blocks += 1; stats.uncompressed_bytes += block_data.len() as u64; stats.compressed_bytes += comp_len; @@ -176,7 +182,7 @@ impl BlockDir { let actual_len = decompressed.len(); if (start + len) > actual_len { return Err(Error::AddressTooLong { - address: address.to_owned(), + address: address.clone(), actual_len, }); } @@ -199,60 +205,49 @@ impl BlockDir { /// /// Errors, other than failure to open the directory at all, are logged and discarded. fn subdirs(&self) -> Result> { - let ListDirNames { mut dirs, .. } = self.transport.list_dir_names("")?; + let ListDir { mut dirs, .. } = self.transport.list_dir("")?; dirs.retain(|dirname| { if dirname.len() == SUBDIR_NAME_CHARS { true } else { - ui::problem(&format!("Unexpected subdirectory in blockdir: {dirname:?}")); + warn!("Unexpected subdirectory in blockdir: {dirname:?}"); false } }); Ok(dirs) } - fn iter_block_dir_entries(&self) -> Result> { + /// Return all the blocknames in the blockdir, in arbitrary order. + pub fn iter_block_names(&self) -> Result> { + // TODO: Read subdirs in parallel. let transport = self.transport.clone(); Ok(self .subdirs()? .into_iter() - .map(move |subdir_name| transport.iter_dir_entries(&subdir_name)) - .filter_map(|iter_or| { - if let Err(ref err) = iter_or { - ui::problem(&format!("Error listing block directory: {:?}", &err)); - } - iter_or.ok() - }) - .flatten() + .map(move |subdir_name| transport.list_dir(&subdir_name)) .filter_map(|iter_or| { if let Err(ref err) = iter_or { - ui::problem(&format!("Error listing block subdirectory: {:?}", &err)); + error!(%err, "Error listing block subdirectory"); } iter_or.ok() }) - .filter(|DirEntry { name, kind, .. }| { - *kind == Kind::File - && name.len() == BLOCKDIR_FILE_NAME_LEN - && !name.starts_with(TMP_PREFIX) - })) - } - - /// Return all the blocknames in the blockdir, in arbitrary order. - pub fn block_names(&self) -> Result> { - let progress = nutmeg::View::new("List blocks", ui::nutmeg_options()); - progress.update(|_| ()); - Ok(self - .iter_block_dir_entries()? - .filter_map(|de| de.name.parse().ok())) + .flat_map(|ListDir { files, .. }| files) + .filter(|name| name.len() == BLOCKDIR_FILE_NAME_LEN && !name.starts_with(TMP_PREFIX)) + .filter_map(|name| name.parse().ok())) } - /// Return all the blocknames in the blockdir. + /// Return all the blocknames in the blockdir, while showing progress. pub fn block_names_set(&self) -> Result> { - let progress = nutmeg::View::new(UnboundedModel::new("List blocks"), ui::nutmeg_options()); + // TODO: We could estimate time remaining by accounting for how + // many prefixes are present and how many have been read. + let bar = Bar::new(); Ok(self - .iter_block_dir_entries()? - .filter_map(|de| de.name.parse().ok()) - .inspect(|_| progress.update(|model| model.increment(1))) + .iter_block_names()? + .enumerate() + .map(|(count, hash)| { + bar.post(Progress::ListBlocks { count }); + hash + }) .collect()) } @@ -260,68 +255,40 @@ impl BlockDir { /// /// Return a dict describing which blocks are present, and the length of their uncompressed /// data. - pub fn validate(&self, stats: &mut ValidateStats) -> Result> { + pub fn validate(&self) -> Result> { // TODO: In the top-level directory, no files or directories other than prefix // directories of the right length. // TODO: Test having a block with the right compression but the wrong contents. - ui::println("Count blocks..."); + // TODO: Warn on blocks in the wrong subdir. + debug!("Start list blocks"); let blocks = self.block_names_set()?; - crate::ui::println(&format!( - "Check {} blocks...", - blocks.len().separate_with_commas() - )); - stats.block_read_count = blocks.len().try_into().unwrap(); - struct ProgressModel { - total_blocks: usize, - blocks_done: usize, - bytes_done: usize, - start: Instant, - } - impl nutmeg::Model for ProgressModel { - fn render(&mut self, _width: usize) -> String { - format!( - "Check block {}/{}: {} done, {} MB checked, {} remaining", - self.blocks_done, - self.total_blocks, - nutmeg::percent_done(self.blocks_done, self.total_blocks), - self.bytes_done / 1_000_000, - nutmeg::estimate_remaining(&self.start, self.blocks_done, self.total_blocks) - ) - } - } - let progress_bar = nutmeg::View::new( - ProgressModel { - total_blocks: blocks.len(), - blocks_done: 0, - bytes_done: 0, - start: Instant::now(), - }, - ui::nutmeg_options(), - ); - // Make a vec of Some(usize) if the block could be read, or None if it - // failed, where the usize gives the uncompressed data size. - let results: Vec> = blocks + let total_blocks = blocks.len(); + debug!("Check {total_blocks} blocks"); + let blocks_done = AtomicUsize::new(0); + let bytes_done = AtomicU64::new(0); + let start = Instant::now(); + let task = Bar::new(); + let block_lens = blocks .into_par_iter() - // .into_iter() - .map(|hash| { - let r = self - .get_block_content(&hash) - .map(|(bytes, _sizes)| (hash, bytes.len())) - .ok(); - let bytes = r.as_ref().map(|x| x.1).unwrap_or_default(); - progress_bar.update(|model| { - model.blocks_done += 1; - model.bytes_done += bytes - }); - r + .flat_map(|hash| match self.get_block_content(&hash) { + Ok((bytes, _sizes)) => { + let len = bytes.len(); + let len64 = len as u64; + task.post(Progress::ValidateBlocks { + blocks_done: blocks_done.fetch_add(1, Ordering::Relaxed) + 1, + total_blocks, + bytes_done: bytes_done.fetch_add(len64, Ordering::Relaxed) + len64, + start, + }); + Some((hash, len)) + } + Err(err) => { + error!(%err, %hash, "Error reading block content"); + None + } }) .collect(); - stats.block_error_count += results.iter().filter(|o| o.is_none()).count(); - let len_map: HashMap = results - .into_iter() - .flatten() // keep only Some values - .collect(); - Ok(len_map) + Ok(block_lens) } /// Return the entire contents of the block. @@ -330,15 +297,11 @@ impl BlockDir { pub fn get_block_content(&self, hash: &BlockHash) -> Result<(Vec, Sizes)> { // TODO: Reuse decompressor buffer. // TODO: Reuse read buffer. + // TODO: Most importantly, cache decompressed blocks! + increment_counter!("conserve.block.read"); let mut decompressor = Decompressor::new(); let block_relpath = block_relpath(hash); - let compressed_bytes = - self.transport - .read_file(&block_relpath) - .map_err(|source| Error::ReadBlock { - source, - hash: hash.to_string(), - })?; + let compressed_bytes = self.transport.read_file(&block_relpath)?; let decompressed_bytes = decompressor.decompress(&compressed_bytes)?; let actual_hash = BlockHash::from(blake2b::blake2b( BLAKE_HASH_SIZE_BYTES, @@ -346,14 +309,8 @@ impl BlockDir { decompressed_bytes, )); if actual_hash != *hash { - ui::problem(&format!( - "Block file {:?} has actual decompressed hash {}", - &block_relpath, actual_hash - )); - return Err(Error::BlockCorrupt { - hash: hash.to_string(), - actual_hash: actual_hash.to_string(), - }); + error!(%hash, %actual_hash, %block_relpath, "Block file has wrong hash"); + return Err(Error::BlockCorrupt { hash: hash.clone() }); } let sizes = Sizes { uncompressed: decompressed_bytes.len() as u64, diff --git a/src/blockhash.rs b/src/blockhash.rs index 7194cc73..9d2f8c86 100644 --- a/src/blockhash.rs +++ b/src/blockhash.rs @@ -37,6 +37,12 @@ pub struct BlockHash { bin: [u8; BLAKE_HASH_SIZE_BYTES], } +impl BlockHash { + pub fn as_slice(&self) -> &[u8] { + &self.bin + } +} + #[derive(Debug)] pub struct BlockHashParseError { rejected_string: String, diff --git a/src/change.rs b/src/change.rs new file mode 100644 index 00000000..5f792a1f --- /dev/null +++ b/src/change.rs @@ -0,0 +1,182 @@ +// Conserve backup system. +// Copyright 2015-2023 Martin Pool. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +//! A change to an entry during backup, diff, restore, etc. + +use std::fmt; + +use serde::Serialize; +use time::OffsetDateTime; + +use crate::{Apath, EntryTrait, Kind, Owner, Result, UnixMode}; + +/// Summary of some kind of change to an entry from backup, diff, restore, etc. +#[derive(Debug, Clone, Eq, PartialEq, Serialize)] +pub struct EntryChange { + pub apath: Apath, + #[serde(flatten)] + pub change: Change, +} + +impl EntryChange { + pub fn is_unchanged(&self) -> bool { + self.change.is_unchanged() + } + + pub(crate) fn diff_metadata(a: &AE, b: &BE) -> Self { + debug_assert_eq!(a.apath(), b.apath()); + let ak = a.kind(); + // mtime is only treated as a significant change for files, because + // the behavior on directories is not consistent between Unix and + // Windows (and maybe not across filesystems even on Unix.) + if ak != b.kind() + || a.owner() != b.owner() + || a.unix_mode() != b.unix_mode() + || (ak == Kind::File && (a.size() != b.size() || a.mtime() != b.mtime())) + || (ak == Kind::Symlink && (a.symlink_target() != b.symlink_target())) + { + EntryChange::changed(a, b) + } else { + EntryChange::unchanged(a) + } + } + + pub(crate) fn added(entry: &dyn EntryTrait) -> Self { + EntryChange { + apath: entry.apath().clone(), + change: Change::Added { + added: EntryMetadata::from(entry), + }, + } + } + + #[allow(unused)] // Never generated in backups at the moment + pub(crate) fn deleted(entry: &dyn EntryTrait) -> Self { + EntryChange { + apath: entry.apath().clone(), + change: Change::Deleted { + deleted: EntryMetadata::from(entry), + }, + } + } + + pub(crate) fn unchanged(entry: &dyn EntryTrait) -> Self { + EntryChange { + apath: entry.apath().clone(), + change: Change::Unchanged { + unchanged: EntryMetadata::from(entry), + }, + } + } + + pub(crate) fn changed(old: &dyn EntryTrait, new: &dyn EntryTrait) -> Self { + debug_assert_eq!(old.apath(), new.apath()); + EntryChange { + apath: old.apath().clone(), + change: Change::Changed { + old: EntryMetadata::from(old), + new: EntryMetadata::from(new), + }, + } + } +} + +impl fmt::Display for EntryChange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {}", self.change.sigil(), self.apath) + } +} + +#[derive(Debug, Clone, Eq, PartialEq, Serialize)] +#[serde(tag = "change")] +pub enum Change { + Unchanged { unchanged: E }, + Added { added: E }, + Deleted { deleted: E }, + Changed { old: E, new: E }, +} + +impl Change { + pub fn is_unchanged(&self) -> bool { + matches!(self, Change::Unchanged { .. }) + } + + /// Return the primary metadata: the new version, unless this entry was + /// deleted in which case the old version. + pub fn primary_metadata(&self) -> &E { + match self { + Change::Unchanged { unchanged } => unchanged, + Change::Added { added } => added, + Change::Deleted { deleted } => deleted, + Change::Changed { new, .. } => new, + } + } + + pub fn sigil(&self) -> char { + match self { + Change::Unchanged { .. } => '.', + Change::Added { .. } => '+', + Change::Deleted { .. } => '-', + Change::Changed { .. } => '*', + } + } +} + +/// Metadata about a changed entry other than its apath. +#[derive(Debug, Clone, Eq, PartialEq, Serialize)] +pub struct EntryMetadata { + // TODO: Eventually unify with EntryValue or Entry? + #[serde(flatten)] + pub kind: KindMetadata, + pub mtime: OffsetDateTime, + #[serde(flatten)] + pub owner: Owner, + pub unix_mode: UnixMode, +} + +impl From<&dyn EntryTrait> for EntryMetadata { + fn from(entry: &dyn EntryTrait) -> Self { + EntryMetadata { + kind: KindMetadata::from(entry), + mtime: entry.mtime(), + owner: entry.owner().clone(), + unix_mode: entry.unix_mode(), + } + } +} + +#[derive(Debug, Clone, Eq, PartialEq, Serialize)] +#[serde(tag = "kind")] +pub enum KindMetadata { + File { size: u64 }, + Dir, + Symlink { target: String }, +} + +impl From<&dyn EntryTrait> for KindMetadata { + fn from(entry: &dyn EntryTrait) -> Self { + match entry.kind() { + Kind::File => KindMetadata::File { + size: entry.size().unwrap(), + }, + Kind::Dir => KindMetadata::Dir, + Kind::Symlink => KindMetadata::Symlink { + target: entry.symlink_target().unwrap().to_owned(), + }, + Kind::Unknown => panic!("unexpected Kind::Unknown on {:?}", entry.apath()), + } + } +} + +/// A callback when a changed entry is visited, e.g. during a backup. +pub type ChangeCallback<'cb> = Box Result<()> + 'cb>; diff --git a/src/diff.rs b/src/diff.rs index aa561502..ec5c08a8 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -15,20 +15,16 @@ //! //! See also [conserve::show_diff] to format the diff as text. -use std::fmt; - use readahead_iterator::IntoReadahead; use crate::*; -use DiffKind::*; -use Kind::*; -use MergedEntryKind::*; - #[derive(Debug)] pub struct DiffOptions { pub exclude: Exclude, pub include_unchanged: bool, + // TODO: An option to filter to a subtree? + // TODO: Optionally compare all the content? } impl Default for DiffOptions { @@ -40,95 +36,22 @@ impl Default for DiffOptions { } } -/// The overall state of change of an entry. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -pub enum DiffKind { - Unchanged, - New, - Deleted, - Changed, -} - -impl DiffKind { - pub fn as_sigil(self) -> char { - match self { - Unchanged => '.', - New => '+', - Deleted => '-', - Changed => '*', - } - } -} - -#[derive(Debug, Eq, PartialEq)] -pub struct DiffEntry { - pub apath: Apath, - pub kind: DiffKind, -} - -impl fmt::Display for DiffEntry { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}\t{}", self.kind.as_sigil(), self.apath) - } -} - /// Generate an iter of per-entry diffs between two trees. pub fn diff( st: &StoredTree, lt: &LiveTree, options: &DiffOptions, -) -> Result> { +) -> Result> { let readahead = 1000; - let include_unchanged: bool = options.include_unchanged; - // TODO: Take an option for the subtree? + let include_unchanged: bool = options.include_unchanged; // Copy out to avoid lifetime problems in the callback let ait = st .iter_entries(Apath::root(), options.exclude.clone())? .readahead(readahead); let bit = lt .iter_entries(Apath::root(), options.exclude.clone())? - .filter(|le| le.kind() != Unknown) + .filter(|le| le.kind() != Kind::Unknown) .readahead(readahead); Ok(MergeTrees::new(ait, bit) - .map(diff_merged_entry) - .filter(move |de: &DiffEntry| include_unchanged || de.kind != DiffKind::Unchanged)) -} - -fn diff_merged_entry(me: merge::MergedEntry) -> DiffEntry -where - AE: Entry, - BE: Entry, -{ - let apath = me.apath; - match me.kind { - Both(ae, be) => diff_common_entry(ae, be, apath), - LeftOnly(_) => DiffEntry { - kind: Deleted, - apath, - }, - RightOnly(_) => DiffEntry { kind: New, apath }, - } -} - -fn diff_common_entry(ae: AE, be: BE, apath: Apath) -> DiffEntry -where - AE: Entry, - BE: Entry, -{ - // TODO: Actually compare content, if requested. - // TODO: Skip Kind::Unknown. - let ak = ae.kind(); - if ak != be.kind() - || (ak == File && (ae.mtime() != be.mtime() || ae.size() != be.size())) - || (ak == Symlink && (ae.symlink_target() != be.symlink_target())) - { - DiffEntry { - kind: Changed, - apath, - } - } else { - DiffEntry { - kind: Unchanged, - apath, - } - } + .map(|me| me.to_entry_change()) + .filter(move |c: &EntryChange| include_unchanged || !c.is_unchanged())) } diff --git a/src/entry.rs b/src/entry.rs index 9a20ef6f..1fea0be6 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2020 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -14,30 +14,102 @@ //! An entry representing a file, directory, etc, in either a //! stored tree or local tree. +use std::borrow::Borrow; use std::fmt::Debug; +use serde::Serialize; +use time::OffsetDateTime; + use crate::kind::Kind; use crate::owner::Owner; use crate::unix_mode::UnixMode; -use crate::unix_time::UnixTime; use crate::*; -pub trait Entry: Debug + Eq + PartialEq { +/// A description of an file, directory, or symlink in a tree, independent +/// of whether it's recorded in a archive (an [IndexEntry]), or +/// in a source tree. +// TODO: Maybe keep this entirely in memory and explicitly look things +// up when needed. +pub trait EntryTrait: Debug { fn apath(&self) -> &Apath; fn kind(&self) -> Kind; - fn mtime(&self) -> UnixTime; + fn mtime(&self) -> OffsetDateTime; fn size(&self) -> Option; - fn symlink_target(&self) -> &Option; + fn symlink_target(&self) -> Option<&str>; fn unix_mode(&self) -> UnixMode; - fn owner(&self) -> Owner; - - /// True if the metadata supports an assumption the file contents have - /// not changed. - fn is_unchanged_from(&self, basis_entry: &O) -> bool { - basis_entry.kind() == self.kind() - && basis_entry.mtime() == self.mtime() - && basis_entry.size() == self.size() - && basis_entry.unix_mode() == self.unix_mode() - && basis_entry.owner() == self.owner() + fn owner(&self) -> &Owner; +} + +/// Per-kind metadata. +#[derive(Debug, Clone, Eq, PartialEq, Serialize)] +#[serde(tag = "kind")] +pub enum KindMeta { + File { size: u64 }, + Dir, + Symlink { target: String }, + Unknown, +} + +impl From<&KindMeta> for Kind { + fn from(from: &KindMeta) -> Kind { + match from { + KindMeta::Dir => Kind::Dir, + KindMeta::File { .. } => Kind::File, + KindMeta::Symlink { .. } => Kind::Symlink, + KindMeta::Unknown => Kind::Unknown, + } + } +} + +/// An in-memory [Entry] describing a file/dir/symlink, with no addresses. +#[derive(Debug, Serialize, Clone, Eq, PartialEq)] +pub struct EntryValue { + pub(crate) apath: Apath, + + /// Is it a file, dir, or symlink, and for files the size and for symlinks the target. + #[serde(flatten)] + pub(crate) kind_meta: KindMeta, + + /// Modification time. + pub(crate) mtime: OffsetDateTime, + pub(crate) unix_mode: UnixMode, + #[serde(flatten)] + pub(crate) owner: Owner, +} + +impl + Debug> EntryTrait for B { + fn apath(&self) -> &Apath { + &self.borrow().apath + } + + fn kind(&self) -> Kind { + Kind::from(&self.borrow().kind_meta) + } + + fn mtime(&self) -> OffsetDateTime { + self.borrow().mtime + } + + fn size(&self) -> Option { + if let KindMeta::File { size } = self.borrow().kind_meta { + Some(size) + } else { + None + } + } + + fn symlink_target(&self) -> Option<&str> { + match &self.borrow().kind_meta { + KindMeta::Symlink { target } => Some(target), + _ => None, + } + } + + fn unix_mode(&self) -> UnixMode { + self.borrow().unix_mode + } + + fn owner(&self) -> &Owner { + &self.borrow().owner } } diff --git a/src/errors.rs b/src/errors.rs index 962dde23..1a052596 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2020, 2022 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -13,6 +13,8 @@ //! Conserve error types. +use std::borrow::Cow; +use std::io; use std::path::PathBuf; use thiserror::Error; @@ -20,31 +22,18 @@ use thiserror::Error; use crate::blockdir::Address; use crate::*; -type IOError = std::io::Error; - /// Conserve specific error. +#[non_exhaustive] #[derive(Debug, Error)] pub enum Error { - #[error("Block file {hash:?} corrupt; actual hash {actual_hash:?}")] - BlockCorrupt { hash: String, actual_hash: String }, + #[error("Block file {hash:?} corrupt: does not have the expected hash")] + BlockCorrupt { hash: BlockHash }, #[error("{address:?} extends beyond decompressed block length {actual_len:?}")] AddressTooLong { address: Address, actual_len: usize }, - #[error("Failed to write block {hash:?}")] - WriteBlock { hash: String, source: IOError }, - - #[error("Failed to read block {hash:?}")] - ReadBlock { hash: String, source: IOError }, - - #[error("Failed to list block files")] - ListBlocks { source: IOError }, - - #[error("Not a Conserve archive")] - NotAnArchive {}, - - #[error("Failed to read archive header")] - ReadArchiveHeader { source: std::io::Error }, + #[error("Not a Conserve archive (no CONSERVE header found)")] + NotAnArchive, #[error( "Archive version {:?} is not supported by Conserve {}", @@ -53,35 +42,32 @@ pub enum Error { )] UnsupportedArchiveVersion { version: String }, - #[error( - "Band version {version:?} in {band_id} is not supported by Conserve {}", - crate::version() - )] + #[error("Unsupported band version {version:?} in {band_id}")] UnsupportedBandVersion { band_id: BandId, version: String }, - #[error("Destination directory not empty: {:?}", path)] - DestinationNotEmpty { path: PathBuf }, - - #[error("Archive has no bands")] + #[error("Archive is empty")] ArchiveEmpty, + #[error("Archive has no complete bands")] + NoCompleteBands, + + #[error("Unsupported band format flags {unsupported_flags:?} in {band_id}")] + UnsupportedBandFormatFlags { + band_id: BandId, + unsupported_flags: Vec>, + }, + + #[error("Destination directory is not empty")] + DestinationNotEmpty, + #[error("Directory for new archive is not empty")] NewArchiveDirectoryNotEmpty, #[error("Invalid backup version number {:?}", version)] InvalidVersion { version: String }, - #[error("Failed to create band")] - CreateBand { source: std::io::Error }, - - #[error("Failed to create block directory")] - CreateBlockDir { source: std::io::Error }, - - #[error("Failed to create archive directory")] - CreateArchiveDirectory { source: std::io::Error }, - - #[error("Band {} is incomplete", band_id)] - BandIncomplete { band_id: BandId }, + #[error("Band {band_id} head file missing")] + BandHeadMissing { band_id: BandId }, #[error( "Can't delete blocks because the last band ({}) is incomplete and may be in use", @@ -95,91 +81,100 @@ pub enum Error { #[error("Archive is locked for garbage collection")] GarbageCollectionLockHeld, + #[error("A backup was created while the garbage collection lock was held; CHECK ARCHIVE NOW")] + GarbageCollectionLockHeldDuringBackup, + #[error(transparent)] ParseGlob { #[from] source: globset::Error, }, - #[error("Failed to write index hunk {:?}", path)] - WriteIndex { path: String, source: IOError }, - - #[error("Failed to read index hunk {:?}", path)] - ReadIndex { path: String, source: IOError }, - - #[error("Failed to serialize index")] - SerializeIndex { source: serde_json::Error }, - - #[error("Failed to deserialize index hunk {:?}", path)] - DeserializeIndex { - path: String, - source: serde_json::Error, - }, - #[error("Failed to write metadata file {:?}", path)] - WriteMetadata { - path: String, - source: std::io::Error, - }, + WriteMetadata { path: String, source: io::Error }, #[error("Failed to deserialize json from {:?}", path)] DeserializeJson { - path: PathBuf, + path: String, + #[source] source: serde_json::Error, }, - #[error("Failed to serialize json to {:?}", path)] + #[error("Failed to serialize json")] SerializeJson { - path: String, + #[from] source: serde_json::Error, }, #[error("Metadata file not found: {:?}", path)] - MetadataNotFound { - path: String, - source: std::io::Error, - }, + MetadataNotFound { path: String, source: io::Error }, + + #[error("Invalid metadata: {details}")] + InvalidMetadata { details: String }, + + #[error("Band not found: {band_id}")] + BandNotFound { band_id: BandId }, #[error("Failed to list bands")] - ListBands { source: std::io::Error }, + ListBands { source: io::Error }, #[error("Failed to read source file {:?}", path)] - ReadSourceFile { - path: PathBuf, - source: std::io::Error, - }, + ReadSourceFile { path: PathBuf, source: io::Error }, - #[error("Failed to read source tree {:?}", path)] - ListSourceTree { path: PathBuf, source: IOError }, + #[error("Unsupported source file kind: {path:?}")] + UnsupportedSourceKind { path: PathBuf }, + + #[error("Unsupported symlink encoding: {path:?}")] + UnsupportedTargetEncoding { path: PathBuf }, - #[error("Failed to store file {:?}", apath)] - StoreFile { apath: Apath, source: IOError }, + #[error("Failed to read source tree {:?}", path)] + ListSourceTree { path: PathBuf, source: io::Error }, #[error("Failed to restore {:?}", path)] - Restore { path: PathBuf, source: IOError }, + Restore { path: PathBuf, source: io::Error }, #[error("Failed to restore modification time on {:?}", path)] - RestoreModificationTime { path: PathBuf, source: IOError }, - - #[error("Failed to delete band {}", band_id)] - BandDeletion { band_id: BandId, source: IOError }, + RestoreModificationTime { path: PathBuf, source: io::Error }, #[error("Unsupported URL scheme {:?}", scheme)] UrlScheme { scheme: String }, + #[error("Unexpected file {path:?} in archive directory")] + UnexpectedFile { path: String }, + /// Generic IO error. #[error(transparent)] IOError { #[from] - source: IOError, + source: io::Error, }, #[error("Failed to set owner of {path:?}")] - SetOwner { source: IOError, path: PathBuf }, + SetOwner { source: io::Error, path: PathBuf }, #[error(transparent)] SnapCompressionError { + // TODO: Maybe say in which file, etc. #[from] source: snap::Error, }, + + #[error(transparent)] + Transport { + #[from] + source: transport::Error, + }, +} + +impl From for Error { + fn from(value: jsonio::Error) -> Self { + match value { + jsonio::Error::Io { source } => Error::IOError { source }, + jsonio::Error::Json { source, path } => Error::DeserializeJson { + source, + path: path.to_string_lossy().into_owned(), + }, // conflates serialize/deserialize + jsonio::Error::Transport { source } => Error::Transport { source }, + } + } } diff --git a/src/fbs.rs b/src/fbs.rs new file mode 100644 index 00000000..345d0ca7 --- /dev/null +++ b/src/fbs.rs @@ -0,0 +1,124 @@ +//! Experimental support for storing indexes as flatbuffers. + +#[allow(dead_code, unused_imports, clippy::all)] +pub(crate) mod index_generated; + +use std::collections::HashMap; +use std::{fs::File, io::Write}; + +use tracing::{debug, trace}; + +use crate::*; + +use index_generated::conserve::index as gen; + +pub fn write_index(st: &StoredTree, mut out_file: File) -> Result<()> { + let all_entries: Vec<_> = st + .iter_entries(Apath::root(), Exclude::nothing())? + .collect(); + debug!("Loaded {} entries", all_entries.len()); + + // Map from hash to serialized location, so that hashes are stored only once. + let mut hash_to_fb: HashMap = HashMap::new(); + let mut name_to_pb: HashMap = HashMap::new(); + + // TODO: Possibly, we should have the serialized layout have all the apaths together, + // all the hashes, all the user/group names, and then all the structs. That seems + // possible and would probably help bytewise compression. + + let mut builder = flatbuffers::FlatBufferBuilder::with_capacity(200 * all_entries.len()); + trace!("Allocated builder"); + let fb_entries: Vec<_> = all_entries + .into_iter() + .map(|entry| { + let addrs = entry + .addrs + .iter() + .map(|addr| { + let hash = *hash_to_fb + .entry(addr.hash.clone()) + .or_insert_with(|| builder.create_vector(addr.hash.as_slice())); + gen::Addr::create( + &mut builder, + &gen::AddrArgs { + hash: Some(hash), + start: addr.start, + len: addr.len, + }, + ) + }) + .collect::>(); + let addrs = if addrs.is_empty() { + None + } else { + Some(builder.create_vector(&addrs)) + }; + let user = entry.owner.user.as_ref().map(|user| { + name_to_pb + .entry(user.to_owned()) + .or_insert_with(|| builder.create_string(user)) + .to_owned() + }); + let group = entry.owner.group.as_ref().map(|group| { + name_to_pb + .entry(group.to_owned()) + .or_insert_with(|| builder.create_string(group)) + .to_owned() + }); + let apath = Some(builder.create_string(entry.apath())); + let target = entry + .target + .as_ref() + .map(|target| builder.create_string(target)); + let unix_mode = entry + .unix_mode + .as_u32() + .map(|mode| gen::UnixMode::new(mode.try_into().expect("unix mode too large"))); + gen::Entry::create( + &mut builder, + &gen::EntryArgs { + apath, + addrs, + kind: entry.kind().into(), + target, + mtime: entry.mtime, + mtime_nanos: entry.mtime_nanos, + unix_mode: unix_mode.as_ref(), + user, + group, + }, + ) + }) + .collect(); + let n_entries = fb_entries.len(); + let fb_entries = builder.create_vector(&fb_entries); + + let index = gen::Index::create( + &mut builder, + &gen::IndexArgs { + entries: Some(fb_entries), + }, + ); + builder.finish(index, None); + + let buf = builder.finished_data(); + let mean_size = buf.len() / n_entries; + debug!( + serialized_len = buf.len(), + n_entries, mean_size, "serialized index to flatbuf" + ); + out_file.write_all(buf)?; + debug!("wrote to out file"); + Ok(()) +} + +impl From for gen::Kind { + fn from(value: Kind) -> Self { + match value { + Kind::Dir => Self::Dir, + Kind::File => Self::File, + Kind::Symlink => Self::Symlink, + _ => panic!("Can't serialize kind {value:?} to flatbuffers"), + } + } +} diff --git a/src/fbs/index_generated.rs b/src/fbs/index_generated.rs new file mode 100644 index 00000000..7f8c76c9 --- /dev/null +++ b/src/fbs/index_generated.rs @@ -0,0 +1,744 @@ +// automatically generated by the FlatBuffers compiler, do not modify + + +// @generated + +use core::mem; +use core::cmp::Ordering; + +extern crate flatbuffers; +use self::flatbuffers::{EndianScalar, Follow}; + +#[allow(unused_imports, dead_code)] +pub mod conserve { + + use core::mem; + use core::cmp::Ordering; + + extern crate flatbuffers; + use self::flatbuffers::{EndianScalar, Follow}; +#[allow(unused_imports, dead_code)] +pub mod index { + + use core::mem; + use core::cmp::Ordering; + + extern crate flatbuffers; + use self::flatbuffers::{EndianScalar, Follow}; + +#[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] +pub const ENUM_MIN_KIND: i8 = 0; +#[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] +pub const ENUM_MAX_KIND: i8 = 2; +#[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] +#[allow(non_camel_case_types)] +pub const ENUM_VALUES_KIND: [Kind; 3] = [ + Kind::File, + Kind::Dir, + Kind::Symlink, +]; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +#[repr(transparent)] +pub struct Kind(pub i8); +#[allow(non_upper_case_globals)] +impl Kind { + pub const File: Self = Self(0); + pub const Dir: Self = Self(1); + pub const Symlink: Self = Self(2); + + pub const ENUM_MIN: i8 = 0; + pub const ENUM_MAX: i8 = 2; + pub const ENUM_VALUES: &'static [Self] = &[ + Self::File, + Self::Dir, + Self::Symlink, + ]; + /// Returns the variant's name or "" if unknown. + pub fn variant_name(self) -> Option<&'static str> { + match self { + Self::File => Some("File"), + Self::Dir => Some("Dir"), + Self::Symlink => Some("Symlink"), + _ => None, + } + } +} +impl core::fmt::Debug for Kind { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if let Some(name) = self.variant_name() { + f.write_str(name) + } else { + f.write_fmt(format_args!("", self.0)) + } + } +} +impl<'a> flatbuffers::Follow<'a> for Kind { + type Inner = Self; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + let b = flatbuffers::read_scalar_at::(buf, loc); + Self(b) + } +} + +impl flatbuffers::Push for Kind { + type Output = Kind; + #[inline] + unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { + flatbuffers::emplace_scalar::(dst, self.0); + } +} + +impl flatbuffers::EndianScalar for Kind { + type Scalar = i8; + #[inline] + fn to_little_endian(self) -> i8 { + self.0.to_le() + } + #[inline] + #[allow(clippy::wrong_self_convention)] + fn from_little_endian(v: i8) -> Self { + let b = i8::from_le(v); + Self(b) + } +} + +impl<'a> flatbuffers::Verifiable for Kind { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, pos: usize + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + i8::run_verifier(v, pos) + } +} + +impl flatbuffers::SimpleToVerifyInSlice for Kind {} +// struct UnixMode, aligned to 2 +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq)] +pub struct UnixMode(pub [u8; 2]); +impl Default for UnixMode { + fn default() -> Self { + Self([0; 2]) + } +} +impl core::fmt::Debug for UnixMode { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("UnixMode") + .field("mode", &self.mode()) + .finish() + } +} + +impl flatbuffers::SimpleToVerifyInSlice for UnixMode {} +impl<'a> flatbuffers::Follow<'a> for UnixMode { + type Inner = &'a UnixMode; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + <&'a UnixMode>::follow(buf, loc) + } +} +impl<'a> flatbuffers::Follow<'a> for &'a UnixMode { + type Inner = &'a UnixMode; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + flatbuffers::follow_cast_ref::(buf, loc) + } +} +impl<'b> flatbuffers::Push for UnixMode { + type Output = UnixMode; + #[inline] + unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { + let src = ::core::slice::from_raw_parts(self as *const UnixMode as *const u8, Self::size()); + dst.copy_from_slice(src); + } +} + +impl<'a> flatbuffers::Verifiable for UnixMode { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, pos: usize + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.in_buffer::(pos) + } +} + +impl<'a> UnixMode { + #[allow(clippy::too_many_arguments)] + pub fn new( + mode: u16, + ) -> Self { + let mut s = Self([0; 2]); + s.set_mode(mode); + s + } + + pub fn mode(&self) -> u16 { + let mut mem = core::mem::MaybeUninit::<::Scalar>::uninit(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + EndianScalar::from_little_endian(unsafe { + core::ptr::copy_nonoverlapping( + self.0[0..].as_ptr(), + mem.as_mut_ptr() as *mut u8, + core::mem::size_of::<::Scalar>(), + ); + mem.assume_init() + }) + } + + pub fn set_mode(&mut self, x: u16) { + let x_le = x.to_little_endian(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + unsafe { + core::ptr::copy_nonoverlapping( + &x_le as *const _ as *const u8, + self.0[0..].as_mut_ptr(), + core::mem::size_of::<::Scalar>(), + ); + } + } + +} + +pub enum AddrOffset {} +#[derive(Copy, Clone, PartialEq)] + +pub struct Addr<'a> { + pub _tab: flatbuffers::Table<'a>, +} + +impl<'a> flatbuffers::Follow<'a> for Addr<'a> { + type Inner = Addr<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } +} + +impl<'a> Addr<'a> { + pub const VT_HASH: flatbuffers::VOffsetT = 4; + pub const VT_START: flatbuffers::VOffsetT = 6; + pub const VT_LEN: flatbuffers::VOffsetT = 8; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + Addr { _tab: table } + } + #[allow(unused_mut)] + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, + args: &'args AddrArgs<'args> + ) -> flatbuffers::WIPOffset> { + let mut builder = AddrBuilder::new(_fbb); + builder.add_len(args.len); + builder.add_start(args.start); + if let Some(x) = args.hash { builder.add_hash(x); } + builder.finish() + } + + + #[inline] + pub fn hash(&self) -> Option> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::>>(Addr::VT_HASH, None)} + } + #[inline] + pub fn start(&self) -> u64 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(Addr::VT_START, Some(0)).unwrap()} + } + #[inline] + pub fn len(&self) -> u64 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(Addr::VT_LEN, Some(0)).unwrap()} + } +} + +impl flatbuffers::Verifiable for Addr<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, pos: usize + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>>("hash", Self::VT_HASH, false)? + .visit_field::("start", Self::VT_START, false)? + .visit_field::("len", Self::VT_LEN, false)? + .finish(); + Ok(()) + } +} +pub struct AddrArgs<'a> { + pub hash: Option>>, + pub start: u64, + pub len: u64, +} +impl<'a> Default for AddrArgs<'a> { + #[inline] + fn default() -> Self { + AddrArgs { + hash: None, + start: 0, + len: 0, + } + } +} + +pub struct AddrBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, + start_: flatbuffers::WIPOffset, +} +impl<'a: 'b, 'b> AddrBuilder<'a, 'b> { + #[inline] + pub fn add_hash(&mut self, hash: flatbuffers::WIPOffset>) { + self.fbb_.push_slot_always::>(Addr::VT_HASH, hash); + } + #[inline] + pub fn add_start(&mut self, start: u64) { + self.fbb_.push_slot::(Addr::VT_START, start, 0); + } + #[inline] + pub fn add_len(&mut self, len: u64) { + self.fbb_.push_slot::(Addr::VT_LEN, len, 0); + } + #[inline] + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> AddrBuilder<'a, 'b> { + let start = _fbb.start_table(); + AddrBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } +} + +impl core::fmt::Debug for Addr<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("Addr"); + ds.field("hash", &self.hash()); + ds.field("start", &self.start()); + ds.field("len", &self.len()); + ds.finish() + } +} +pub enum EntryOffset {} +#[derive(Copy, Clone, PartialEq)] + +pub struct Entry<'a> { + pub _tab: flatbuffers::Table<'a>, +} + +impl<'a> flatbuffers::Follow<'a> for Entry<'a> { + type Inner = Entry<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } +} + +impl<'a> Entry<'a> { + pub const VT_APATH: flatbuffers::VOffsetT = 4; + pub const VT_KIND: flatbuffers::VOffsetT = 6; + pub const VT_TARGET: flatbuffers::VOffsetT = 8; + pub const VT_MTIME: flatbuffers::VOffsetT = 10; + pub const VT_MTIME_NANOS: flatbuffers::VOffsetT = 12; + pub const VT_UNIX_MODE: flatbuffers::VOffsetT = 14; + pub const VT_ADDRS: flatbuffers::VOffsetT = 16; + pub const VT_USER: flatbuffers::VOffsetT = 18; + pub const VT_GROUP: flatbuffers::VOffsetT = 20; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + Entry { _tab: table } + } + #[allow(unused_mut)] + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, + args: &'args EntryArgs<'args> + ) -> flatbuffers::WIPOffset> { + let mut builder = EntryBuilder::new(_fbb); + builder.add_mtime(args.mtime); + if let Some(x) = args.group { builder.add_group(x); } + if let Some(x) = args.user { builder.add_user(x); } + if let Some(x) = args.addrs { builder.add_addrs(x); } + if let Some(x) = args.unix_mode { builder.add_unix_mode(x); } + builder.add_mtime_nanos(args.mtime_nanos); + if let Some(x) = args.target { builder.add_target(x); } + if let Some(x) = args.apath { builder.add_apath(x); } + builder.add_kind(args.kind); + builder.finish() + } + + + #[inline] + pub fn apath(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::>(Entry::VT_APATH, None)} + } + #[inline] + pub fn kind(&self) -> Kind { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(Entry::VT_KIND, Some(Kind::File)).unwrap()} + } + #[inline] + pub fn target(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::>(Entry::VT_TARGET, None)} + } + #[inline] + pub fn mtime(&self) -> i64 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(Entry::VT_MTIME, Some(0)).unwrap()} + } + #[inline] + pub fn mtime_nanos(&self) -> u32 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(Entry::VT_MTIME_NANOS, Some(0)).unwrap()} + } + #[inline] + pub fn unix_mode(&self) -> Option<&'a UnixMode> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(Entry::VT_UNIX_MODE, None)} + } + #[inline] + pub fn addrs(&self) -> Option>>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::>>>(Entry::VT_ADDRS, None)} + } + #[inline] + pub fn user(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::>(Entry::VT_USER, None)} + } + #[inline] + pub fn group(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::>(Entry::VT_GROUP, None)} + } +} + +impl flatbuffers::Verifiable for Entry<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, pos: usize + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>("apath", Self::VT_APATH, false)? + .visit_field::("kind", Self::VT_KIND, false)? + .visit_field::>("target", Self::VT_TARGET, false)? + .visit_field::("mtime", Self::VT_MTIME, false)? + .visit_field::("mtime_nanos", Self::VT_MTIME_NANOS, false)? + .visit_field::("unix_mode", Self::VT_UNIX_MODE, false)? + .visit_field::>>>("addrs", Self::VT_ADDRS, false)? + .visit_field::>("user", Self::VT_USER, false)? + .visit_field::>("group", Self::VT_GROUP, false)? + .finish(); + Ok(()) + } +} +pub struct EntryArgs<'a> { + pub apath: Option>, + pub kind: Kind, + pub target: Option>, + pub mtime: i64, + pub mtime_nanos: u32, + pub unix_mode: Option<&'a UnixMode>, + pub addrs: Option>>>>, + pub user: Option>, + pub group: Option>, +} +impl<'a> Default for EntryArgs<'a> { + #[inline] + fn default() -> Self { + EntryArgs { + apath: None, + kind: Kind::File, + target: None, + mtime: 0, + mtime_nanos: 0, + unix_mode: None, + addrs: None, + user: None, + group: None, + } + } +} + +pub struct EntryBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, + start_: flatbuffers::WIPOffset, +} +impl<'a: 'b, 'b> EntryBuilder<'a, 'b> { + #[inline] + pub fn add_apath(&mut self, apath: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>(Entry::VT_APATH, apath); + } + #[inline] + pub fn add_kind(&mut self, kind: Kind) { + self.fbb_.push_slot::(Entry::VT_KIND, kind, Kind::File); + } + #[inline] + pub fn add_target(&mut self, target: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>(Entry::VT_TARGET, target); + } + #[inline] + pub fn add_mtime(&mut self, mtime: i64) { + self.fbb_.push_slot::(Entry::VT_MTIME, mtime, 0); + } + #[inline] + pub fn add_mtime_nanos(&mut self, mtime_nanos: u32) { + self.fbb_.push_slot::(Entry::VT_MTIME_NANOS, mtime_nanos, 0); + } + #[inline] + pub fn add_unix_mode(&mut self, unix_mode: &UnixMode) { + self.fbb_.push_slot_always::<&UnixMode>(Entry::VT_UNIX_MODE, unix_mode); + } + #[inline] + pub fn add_addrs(&mut self, addrs: flatbuffers::WIPOffset>>>) { + self.fbb_.push_slot_always::>(Entry::VT_ADDRS, addrs); + } + #[inline] + pub fn add_user(&mut self, user: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>(Entry::VT_USER, user); + } + #[inline] + pub fn add_group(&mut self, group: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>(Entry::VT_GROUP, group); + } + #[inline] + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> EntryBuilder<'a, 'b> { + let start = _fbb.start_table(); + EntryBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } +} + +impl core::fmt::Debug for Entry<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("Entry"); + ds.field("apath", &self.apath()); + ds.field("kind", &self.kind()); + ds.field("target", &self.target()); + ds.field("mtime", &self.mtime()); + ds.field("mtime_nanos", &self.mtime_nanos()); + ds.field("unix_mode", &self.unix_mode()); + ds.field("addrs", &self.addrs()); + ds.field("user", &self.user()); + ds.field("group", &self.group()); + ds.finish() + } +} +pub enum IndexOffset {} +#[derive(Copy, Clone, PartialEq)] + +pub struct Index<'a> { + pub _tab: flatbuffers::Table<'a>, +} + +impl<'a> flatbuffers::Follow<'a> for Index<'a> { + type Inner = Index<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } +} + +impl<'a> Index<'a> { + pub const VT_ENTRIES: flatbuffers::VOffsetT = 4; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + Index { _tab: table } + } + #[allow(unused_mut)] + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, + args: &'args IndexArgs<'args> + ) -> flatbuffers::WIPOffset> { + let mut builder = IndexBuilder::new(_fbb); + if let Some(x) = args.entries { builder.add_entries(x); } + builder.finish() + } + + + #[inline] + pub fn entries(&self) -> Option>>> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::>>>(Index::VT_ENTRIES, None)} + } +} + +impl flatbuffers::Verifiable for Index<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, pos: usize + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>>>("entries", Self::VT_ENTRIES, false)? + .finish(); + Ok(()) + } +} +pub struct IndexArgs<'a> { + pub entries: Option>>>>, +} +impl<'a> Default for IndexArgs<'a> { + #[inline] + fn default() -> Self { + IndexArgs { + entries: None, + } + } +} + +pub struct IndexBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, + start_: flatbuffers::WIPOffset, +} +impl<'a: 'b, 'b> IndexBuilder<'a, 'b> { + #[inline] + pub fn add_entries(&mut self, entries: flatbuffers::WIPOffset>>>) { + self.fbb_.push_slot_always::>(Index::VT_ENTRIES, entries); + } + #[inline] + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> IndexBuilder<'a, 'b> { + let start = _fbb.start_table(); + IndexBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } +} + +impl core::fmt::Debug for Index<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("Index"); + ds.field("entries", &self.entries()); + ds.finish() + } +} +#[inline] +/// Verifies that a buffer of bytes contains a `Index` +/// and returns it. +/// Note that verification is still experimental and may not +/// catch every error, or be maximally performant. For the +/// previous, unchecked, behavior use +/// `root_as_index_unchecked`. +pub fn root_as_index(buf: &[u8]) -> Result { + flatbuffers::root::(buf) +} +#[inline] +/// Verifies that a buffer of bytes contains a size prefixed +/// `Index` and returns it. +/// Note that verification is still experimental and may not +/// catch every error, or be maximally performant. For the +/// previous, unchecked, behavior use +/// `size_prefixed_root_as_index_unchecked`. +pub fn size_prefixed_root_as_index(buf: &[u8]) -> Result { + flatbuffers::size_prefixed_root::(buf) +} +#[inline] +/// Verifies, with the given options, that a buffer of bytes +/// contains a `Index` and returns it. +/// Note that verification is still experimental and may not +/// catch every error, or be maximally performant. For the +/// previous, unchecked, behavior use +/// `root_as_index_unchecked`. +pub fn root_as_index_with_opts<'b, 'o>( + opts: &'o flatbuffers::VerifierOptions, + buf: &'b [u8], +) -> Result, flatbuffers::InvalidFlatbuffer> { + flatbuffers::root_with_opts::>(opts, buf) +} +#[inline] +/// Verifies, with the given verifier options, that a buffer of +/// bytes contains a size prefixed `Index` and returns +/// it. Note that verification is still experimental and may not +/// catch every error, or be maximally performant. For the +/// previous, unchecked, behavior use +/// `root_as_index_unchecked`. +pub fn size_prefixed_root_as_index_with_opts<'b, 'o>( + opts: &'o flatbuffers::VerifierOptions, + buf: &'b [u8], +) -> Result, flatbuffers::InvalidFlatbuffer> { + flatbuffers::size_prefixed_root_with_opts::>(opts, buf) +} +#[inline] +/// Assumes, without verification, that a buffer of bytes contains a Index and returns it. +/// # Safety +/// Callers must trust the given bytes do indeed contain a valid `Index`. +pub unsafe fn root_as_index_unchecked(buf: &[u8]) -> Index { + flatbuffers::root_unchecked::(buf) +} +#[inline] +/// Assumes, without verification, that a buffer of bytes contains a size prefixed Index and returns it. +/// # Safety +/// Callers must trust the given bytes do indeed contain a valid size prefixed `Index`. +pub unsafe fn size_prefixed_root_as_index_unchecked(buf: &[u8]) -> Index { + flatbuffers::size_prefixed_root_unchecked::(buf) +} +#[inline] +pub fn finish_index_buffer<'a, 'b>( + fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>, + root: flatbuffers::WIPOffset>) { + fbb.finish(root, None); +} + +#[inline] +pub fn finish_size_prefixed_index_buffer<'a, 'b>(fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>, root: flatbuffers::WIPOffset>) { + fbb.finish_size_prefixed(root, None); +} +} // pub mod Index +} // pub mod Conserve + diff --git a/src/gc_lock.rs b/src/gc_lock.rs index 2b2f4501..a1a6f245 100644 --- a/src/gc_lock.rs +++ b/src/gc_lock.rs @@ -55,13 +55,13 @@ impl GarbageCollectionLock { pub fn new(archive: &Archive) -> Result { let archive = archive.clone(); let band_id = archive.last_band_id()?; - if let Some(band_id) = band_id.clone() { - if !archive.band_is_closed(&band_id)? { + if let Some(band_id) = band_id { + if !archive.band_is_closed(band_id)? { return Err(Error::DeleteWithIncompleteBackup { band_id }); } } if archive.transport().is_file(GC_LOCK).unwrap_or(true) { - return Err(Error::GarbageCollectionLockHeld {}); + return Err(Error::GarbageCollectionLockHeld); } archive.transport().write_file(GC_LOCK, b"{}\n")?; Ok(GarbageCollectionLock { archive, band_id }) @@ -90,7 +90,7 @@ impl GarbageCollectionLock { if self.band_id == current_last_band_id { Ok(()) } else { - Err(Error::DeleteWithConcurrentActivity) + Err(Error::GarbageCollectionLockHeldDuringBackup) } } } @@ -126,7 +126,7 @@ mod test { fn completed_backup_ok() { let archive = ScratchArchive::new(); let source = TreeFixture::new(); - backup(&archive, &source.live_tree(), &BackupOptions::default()).unwrap(); + backup(&archive, source.path(), &BackupOptions::default()).unwrap(); let delete_guard = GarbageCollectionLock::new(&archive).unwrap(); delete_guard.check().unwrap(); } @@ -136,7 +136,7 @@ mod test { let archive = ScratchArchive::new(); let source = TreeFixture::new(); let _delete_guard = GarbageCollectionLock::new(&archive).unwrap(); - let backup_result = backup(&archive, &source.live_tree(), &BackupOptions::default()); + let backup_result = backup(&archive, source.path(), &BackupOptions::default()); assert_eq!( backup_result.expect_err("backup fails").to_string(), "Archive is locked for garbage collection" @@ -161,10 +161,10 @@ mod test { let _lock1 = GarbageCollectionLock::new(&archive).unwrap(); // Should not be able to create a second lock while one gc is running. let lock2_result = GarbageCollectionLock::new(&archive); - match lock2_result { - Err(Error::GarbageCollectionLockHeld) => (), - other => panic!("unexpected result {other:?}"), - }; + assert_eq!( + lock2_result.unwrap_err().to_string(), + "Archive is locked for garbage collection" + ); } #[test] diff --git a/src/index.rs b/src/index.rs index 864a6082..f6d5023c 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2020, 2022 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -14,20 +14,24 @@ //! Index lists the files in a band in the archive. use std::cmp::Ordering; -use std::io; use std::iter::Peekable; use std::path::Path; use std::sync::Arc; use std::vec; +use metrics::{counter, increment_counter}; +use time::OffsetDateTime; +use tracing::error; + use crate::compress::snappy::{Compressor, Decompressor}; +use crate::entry::{EntryValue, KindMeta}; use crate::kind::Kind; use crate::owner::Owner; use crate::stats::{IndexReadStats, IndexWriterStats}; use crate::transport::local::LocalTransport; use crate::transport::Transport; use crate::unix_mode::UnixMode; -use crate::unix_time::UnixTime; +use crate::unix_time::FromUnixAndNanos; use crate::*; pub const MAX_ENTRIES_PER_HUNK: usize = 1000; @@ -84,7 +88,35 @@ pub struct IndexEntry { } // GRCOV_EXCLUDE_STOP -impl Entry for IndexEntry { +impl From for EntryValue { + fn from(index_entry: IndexEntry) -> EntryValue { + let kind_meta = match index_entry.kind { + Kind::File => KindMeta::File { + size: index_entry.addrs.iter().map(|a| a.len).sum(), + }, + Kind::Symlink => KindMeta::Symlink { + // TODO: Should not be fatal + target: index_entry + .target + .expect("symlink entry should have a target"), + }, + Kind::Dir => KindMeta::Dir, + Kind::Unknown => KindMeta::Unknown, + }; + EntryValue { + apath: index_entry.apath, + kind_meta, + mtime: OffsetDateTime::from_unix_seconds_and_nanos( + index_entry.mtime, + index_entry.mtime_nanos, + ), + unix_mode: index_entry.unix_mode, + owner: index_entry.owner, + } + } +} + +impl EntryTrait for IndexEntry { /// Return apath relative to the top of the tree. fn apath(&self) -> &Apath { &self.apath @@ -96,11 +128,8 @@ impl Entry for IndexEntry { } #[inline] - fn mtime(&self) -> UnixTime { - UnixTime { - secs: self.mtime, - nanosecs: self.mtime_nanos, - } + fn mtime(&self) -> OffsetDateTime { + OffsetDateTime::from_unix_seconds_and_nanos(self.mtime, self.mtime_nanos) } /// Size of the file, if it is a file. None for directories and symlinks. @@ -110,22 +139,22 @@ impl Entry for IndexEntry { /// Target of the symlink, if this is a symlink. #[inline] - fn symlink_target(&self) -> &Option { - &self.target + fn symlink_target(&self) -> Option<&str> { + self.target.as_deref() } fn unix_mode(&self) -> UnixMode { self.unix_mode } - fn owner(&self) -> Owner { - self.owner.clone() + fn owner(&self) -> &Owner { + &self.owner } } impl IndexEntry { /// Copy the metadata, but not the body content, from another entry. - pub(crate) fn metadata_from(source: &E) -> IndexEntry { + pub(crate) fn metadata_from(source: &EntryValue) -> IndexEntry { let mtime = source.mtime(); assert_eq!( source.symlink_target().is_some(), @@ -135,11 +164,11 @@ impl IndexEntry { apath: source.apath().clone(), kind: source.kind(), addrs: Vec::new(), - target: source.symlink_target().clone(), - mtime: mtime.secs, - mtime_nanos: mtime.nanosecs, + target: source.symlink_target().map(|t| t.to_owned()), + mtime: mtime.unix_timestamp(), + mtime_nanos: mtime.nanosecond(), unix_mode: source.unix_mode(), - owner: source.owner(), + owner: source.owner().to_owned(), } } } @@ -221,22 +250,12 @@ impl IndexWriter { self.check_order.check(&self.entries.last().unwrap().apath); } let relpath = hunk_relpath(self.sequence); - let write_error = |source| Error::WriteIndex { - path: relpath.clone(), - source, - }; - let json = - serde_json::to_vec(&self.entries).map_err(|source| Error::SerializeIndex { source })?; + let json = serde_json::to_vec(&self.entries)?; if (self.sequence % HUNKS_PER_SUBDIR) == 0 { - self.transport - .create_dir(&subdir_relpath(self.sequence)) - .map_err(write_error)?; + self.transport.create_dir(&subdir_relpath(self.sequence))?; } let compressed_bytes = self.compressor.compress(&json)?; - self.transport - .write_file(&relpath, compressed_bytes) - .map_err(write_error)?; - + self.transport.write_file(&relpath, compressed_bytes)?; self.stats.index_hunks += 1; self.stats.compressed_index_bytes += compressed_bytes.len() as u64; self.stats.uncompressed_index_bytes += json.len() as u64; @@ -282,11 +301,7 @@ impl IndexRead { // one hunk being missing. for i in 0.. { let path = hunk_relpath(i); - if !self - .transport - .is_file(&path) - .map_err(|source| Error::ReadIndex { source, path })? - { + if !self.transport.is_file(&path)? { // If hunk 1 is missing, 1 hunks exists. return Ok(i); } @@ -340,9 +355,7 @@ impl Iterator for IndexHunkIter { Ok(Some(entries)) => entries, Err(err) => { self.stats.errors += 1; - ui::problem(&format!( - "Error reading index hunk {hunk_number:?}: {err:?} " - )); + error!("Error reading index hunk {hunk_number:?}: {err}"); continue; } }; @@ -382,33 +395,38 @@ impl IndexHunkIter { } fn read_next_hunk(&mut self) -> Result>> { - let path = &hunk_relpath(self.next_hunk_number); + let path = hunk_relpath(self.next_hunk_number); // Whether we succeed or fail, don't try to read this hunk again. self.next_hunk_number += 1; - let compressed_bytes = match self.transport.read_file(path) { + let compressed_bytes = match self.transport.read_file(&path) { Ok(b) => b, - Err(err) if err.kind() == io::ErrorKind::NotFound => { + Err(err) if err.is_not_found() => { // TODO: Cope with one hunk being missing, while there are still // later-numbered hunks. This would require reading the whole // list of hunks first. return Ok(None); } - Err(source) => { - return Err(Error::ReadIndex { - path: path.clone(), - source, - }); - } + Err(source) => return Err(Error::Transport { source }), }; + increment_counter!("conserve.index.read.hunks"); self.stats.index_hunks += 1; + counter!( + "conserve.index.read.compressed_bytes", + compressed_bytes.len() as u64 + ); self.stats.compressed_index_bytes += compressed_bytes.len() as u64; let index_bytes = self.decompressor.decompress(&compressed_bytes)?; + counter!( + "conserve.index.read.decompressed_bytes", + index_bytes.len() as u64 + ); self.stats.uncompressed_index_bytes += index_bytes.len() as u64; let entries: Vec = - serde_json::from_slice(index_bytes).map_err(|source| Error::DeserializeIndex { + serde_json::from_slice(index_bytes).map_err(|source| Error::DeserializeJson { path: path.clone(), source, })?; + counter!("conserve.index.read.entries", entries.len() as u64); if entries.is_empty() { // It's legal, it's just weird - and it can be produced by some old Conserve versions. } diff --git a/src/jsonio.rs b/src/jsonio.rs index dd19c3e1..e3ee32f3 100644 --- a/src/jsonio.rs +++ b/src/jsonio.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2018, 2020 Martin Pool. +// Copyright 2015, 2016, 2018, 2020, 2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -14,12 +14,34 @@ //! Read and write JSON files. use std::io; +use std::path::PathBuf; use serde::de::DeserializeOwned; -use crate::errors::Error; -use crate::transport::Transport; -use crate::Result; +use crate::transport::{self, Transport}; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("IO error")] + Io { + #[from] + source: io::Error, + }, + + #[error("JSON serialization error")] + Json { + source: serde_json::Error, + path: PathBuf, + }, + + #[error("Transport error")] + Transport { + #[from] + source: transport::Error, + }, +} + +pub type Result = std::result::Result; /// Write uncompressed json to a file on a Transport. pub(crate) fn write_json(transport: &TR, relpath: &str, obj: &T) -> Result<()> @@ -27,40 +49,37 @@ where T: serde::Serialize, TR: AsRef, { - let mut s: String = serde_json::to_string(&obj).map_err(|source| Error::SerializeJson { - path: relpath.to_string(), + let mut s: String = serde_json::to_string(&obj).map_err(|source| Error::Json { source, + path: relpath.into(), })?; s.push('\n'); transport .as_ref() .write_file(relpath, s.as_bytes()) - .map_err(|source| Error::WriteMetadata { - path: relpath.to_owned(), - source, - }) + .map_err(Error::from) } -/// Read and deserialize uncompressed json from a Transport. -pub(crate) fn read_json(transport: &TR, path: &str) -> Result +/// Read and deserialize uncompressed json from a file on a Transport. +/// +/// Returns None if the file does not exist. +pub(crate) fn read_json(transport: &TR, path: &str) -> Result> where T: DeserializeOwned, TR: AsRef, { - let bytes = transport - .as_ref() - .read_file(path) - .map_err(|err| match err.kind() { - io::ErrorKind::NotFound => Error::MetadataNotFound { - path: path.to_owned(), - source: err, - }, - _ => err.into(), - })?; - serde_json::from_slice(&bytes).map_err(|source| Error::DeserializeJson { - source, - path: path.into(), - }) + let bytes = match transport.as_ref().read_file(path) { + Ok(b) => b, + Err(err) if err.is_not_found() => return Ok(None), + Err(err) => return Err(err.into()), + }; + serde_json::from_slice(&bytes) + .map(|t| Some(t)) + .map_err(|source| Error::Json { + source, + // TODO: Full path from the transport? + path: path.into(), + }) } #[cfg(test)] @@ -104,7 +123,9 @@ mod tests { .unwrap(); let transport = LocalTransport::new(temp.path()); - let content: TestContents = read_json(&transport, "test.json").unwrap(); + let content: TestContents = read_json(&transport, "test.json") + .expect("no error") + .expect("file exists"); assert_eq!( content, diff --git a/src/lib.rs b/src/lib.rs index 64a043f4..49727f0f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2015, 2016, 2017, 2018, 2019, 2020, 2021 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -12,7 +12,6 @@ //! Conserve backup system. -// Conserve implementation modules. pub mod apath; pub mod archive; pub mod backup; @@ -20,11 +19,13 @@ mod band; pub mod bandid; mod blockdir; pub mod blockhash; +pub mod change; pub mod compress; mod diff; -mod entry; +pub mod entry; pub mod errors; pub mod excludes; +pub mod fbs; mod gc_lock; pub mod index; mod io; @@ -32,8 +33,10 @@ mod jsonio; pub mod kind; pub mod live_tree; mod merge; -pub(crate) mod misc; +pub mod metric_recorder; +pub mod misc; pub mod owner; +pub mod progress; pub mod restore; pub mod show; pub mod stats; @@ -41,38 +44,41 @@ mod stitch; mod stored_file; mod stored_tree; pub mod test_fixtures; +pub mod trace_counter; pub mod transport; mod tree; pub mod ui; pub mod unix_mode; pub mod unix_time; -mod validate; +pub mod validate; pub use crate::apath::Apath; pub use crate::archive::Archive; pub use crate::archive::DeleteOptions; -pub use crate::backup::{backup, BackupOptions}; -pub use crate::band::Band; -pub use crate::band::BandSelectionPolicy; +pub use crate::backup::{backup, BackupOptions, BackupStats}; +pub use crate::band::{Band, BandSelectionPolicy}; pub use crate::bandid::BandId; pub use crate::blockdir::BlockDir; pub use crate::blockhash::BlockHash; -pub use crate::diff::{diff, DiffEntry, DiffKind, DiffOptions}; -pub use crate::entry::Entry; +pub use crate::change::{ChangeCallback, EntryChange}; +pub use crate::diff::{diff, DiffOptions}; +pub use crate::entry::{EntryTrait, EntryValue}; pub use crate::errors::Error; pub use crate::excludes::Exclude; pub use crate::gc_lock::GarbageCollectionLock; pub use crate::index::{IndexEntry, IndexRead, IndexWriter}; pub use crate::kind::Kind; -pub use crate::live_tree::{LiveEntry, LiveTree}; -pub use crate::merge::{MergeTrees, MergedEntryKind}; +pub use crate::live_tree::LiveTree; +pub use crate::merge::MergeTrees; pub use crate::misc::bytes_to_human_mb; -pub use crate::restore::{restore, RestoreOptions, RestoreTree}; -pub use crate::show::{show_diff, show_versions, ShowVersionsOptions}; -pub use crate::stats::{BackupStats, DeleteStats, RestoreStats, ValidateStats}; +pub use crate::owner::Owner; +pub use crate::restore::{restore, RestoreOptions}; +pub use crate::show::{show_versions, ShowVersionsOptions}; +pub use crate::stats::{DeleteStats, RestoreStats}; pub use crate::stored_tree::StoredTree; pub use crate::transport::{open_transport, Transport}; pub use crate::tree::{ReadBlocks, ReadTree, TreeSize}; +pub use crate::unix_mode::UnixMode; pub use crate::validate::ValidateOptions; pub type Result = std::result::Result; @@ -110,3 +116,6 @@ static BAND_TAIL_FILENAME: &str = "BANDTAIL"; /// Length of the binary content hash. pub(crate) const BLAKE_HASH_SIZE_BYTES: usize = 64; + +/// A callback when an entry is visited. +pub type EntryCallback<'cb> = Box; diff --git a/src/live_tree.rs b/src/live_tree.rs index 41b54531..67593624 100644 --- a/src/live_tree.rs +++ b/src/live_tree.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2020, 2022 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -15,13 +15,16 @@ use std::collections::vec_deque::VecDeque; use std::fs; +use std::fs::File; use std::io::ErrorKind; use std::path::{Path, PathBuf}; +use tracing::{error, warn}; + +use crate::entry::{EntryValue, KindMeta}; use crate::owner::Owner; use crate::stats::LiveTreeIterStats; use crate::unix_mode::UnixMode; -use crate::unix_time::UnixTime; use crate::*; /// A real tree on the filesystem, for use as a backup source or restore destination. @@ -47,35 +50,23 @@ impl LiveTree { pub fn path(&self) -> &Path { &self.path } -} -/// An in-memory Entry describing a file/dir/symlink in a live tree. -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct LiveEntry { - apath: Apath, - kind: Kind, - mtime: UnixTime, - size: Option, - symlink_target: Option, - unix_mode: UnixMode, - owner: Owner, + /// Open a file inside the tree to read. + pub fn open_file(&self, entry: &EntryValue) -> Result { + assert_eq!(entry.kind(), Kind::File); + let path = self.relative_path(&entry.apath); + fs::File::open(&path).map_err(|source| Error::ReadSourceFile { path, source }) + } } impl tree::ReadTree for LiveTree { - type Entry = LiveEntry; - type R = std::fs::File; + type Entry = EntryValue; type IT = Iter; fn iter_entries(&self, subtree: Apath, exclude: Exclude) -> Result { Iter::new(&self.path, subtree, exclude) } - fn file_contents(&self, entry: &LiveEntry) -> Result { - assert_eq!(entry.kind(), Kind::File); - let path = self.relative_path(&entry.apath); - fs::File::open(&path).map_err(|source| Error::ReadSourceFile { path, source }) - } - fn estimate_count(&self) -> Result { // TODO: This stats the file and builds an entry about them, just to // throw it away. We could perhaps change the iter to optionally do @@ -86,64 +77,53 @@ impl tree::ReadTree for LiveTree { } } -impl Entry for LiveEntry { - fn apath(&self) -> &Apath { - &self.apath - } - - fn kind(&self) -> Kind { - self.kind - } - - fn mtime(&self) -> UnixTime { - self.mtime - } - - fn size(&self) -> Option { - self.size - } - - fn symlink_target(&self) -> &Option { - &self.symlink_target - } - - fn unix_mode(&self) -> UnixMode { - self.unix_mode - } - - fn owner(&self) -> Owner { - self.owner.clone() - } -} - -impl LiveEntry { - fn from_fs_metadata( - apath: Apath, - metadata: &fs::Metadata, - symlink_target: Option, - ) -> LiveEntry { - // TODO: Could we read the symlink target here, rather than in the caller? - let mtime = metadata - .modified() - .expect("Failed to get file mtime") - .into(); - let size = if metadata.is_file() { - Some(metadata.len()) - } else { - None - }; - let owner = Owner::from(metadata); - let unix_mode = UnixMode::from(metadata.permissions()); - LiveEntry { - apath, - kind: metadata.file_type().into(), - mtime, - symlink_target, - size, - unix_mode, - owner, +fn entry_from_fs_metadata( + apath: Apath, + source_path: &Path, + metadata: &fs::Metadata, +) -> Result { + let mtime = metadata + .modified() + .expect("Failed to get file mtime") + .into(); + let kind_meta = if metadata.is_file() { + KindMeta::File { + size: metadata.len(), } - } + } else if metadata.is_dir() { + KindMeta::Dir + } else if metadata.is_symlink() { + let t = match source_path.read_link() { + Ok(t) => t, + Err(e) => { + error!("Failed to read target of symlink {source_path:?}: {e}"); + return Err(e.into()); + } + }; + let target = match t.into_os_string().into_string() { + Ok(t) => t, + Err(e) => { + error!("Failed to decode target of symlink {source_path:?}: {e:?}"); + return Err(Error::UnsupportedTargetEncoding { + path: source_path.to_owned(), + }); + } + }; + KindMeta::Symlink { target } + } else { + return Err(Error::UnsupportedSourceKind { + path: source_path.to_owned(), + }); + }; + let owner = Owner::from(metadata); + let unix_mode = UnixMode::from(metadata.permissions()); + Ok(EntryValue { + apath, + mtime, + kind_meta, + unix_mode, + owner, + }) } /// Recursive iterator of the contents of a live tree. @@ -164,7 +144,7 @@ pub struct Iter { /// All entries that have been seen but not yet returned by the iterator, in the order they /// should be returned. - entry_deque: VecDeque, + entry_deque: VecDeque, /// Check that emitted paths are in the right order. check_order: apath::DebugCheckOrder, @@ -179,13 +159,14 @@ impl Iter { /// Construct a new iter that will visit everything below this root path, /// subject to some exclusions fn new(root_path: &Path, subtree: Apath, exclude: Exclude) -> Result { - let start_metadata = fs::symlink_metadata(subtree.below(root_path))?; + let start_path = subtree.below(root_path); + let start_metadata = fs::symlink_metadata(&start_path)?; // Preload iter to return the root and then recurse into it. - let entry_deque: VecDeque = [LiveEntry::from_fs_metadata( + let entry_deque: VecDeque = [entry_from_fs_metadata( subtree.clone(), + &start_path, &start_metadata, - None, - )] + )?] .into(); // TODO: Consider the case where the root is not actually a directory? // Should that be supported? @@ -207,12 +188,12 @@ impl Iter { fn visit_next_directory(&mut self, parent_apath: &Apath) { self.stats.directories_visited += 1; // Tuples of (name, entry) so that we can sort children by name. - let mut children = Vec::<(String, LiveEntry)>::new(); + let mut children = Vec::<(String, EntryValue)>::new(); let dir_path = parent_apath.below(&self.root_path); let dir_iter = match fs::read_dir(&dir_path) { Ok(i) => i, - Err(e) => { - ui::problem(&format!("Error reading directory {:?}: {}", &dir_path, e)); + Err(err) => { + error!("Error reading directory {dir_path:?}: {err}"); return; } }; @@ -220,11 +201,8 @@ impl Iter { for dir_entry in dir_iter { let dir_entry = match dir_entry { Ok(dir_entry) => dir_entry, - Err(e) => { - ui::problem(&format!( - "Error reading next entry from directory {:?}: {}", - &dir_path, e - )); + Err(err) => { + error!("Error reading next entry from directory {dir_path:?}: {err}"); continue; } }; @@ -232,9 +210,7 @@ impl Iter { let child_name = match child_osstr.to_str() { Some(c) => c, None => { - ui::problem(&format!( - "Couldn't decode filename {child_osstr:?} in {dir_path:?}", - )); + error!("Couldn't decode filename {child_osstr:?} in {dir_path:?}",); continue; } }; @@ -248,9 +224,7 @@ impl Iter { let ft = match dir_entry.file_type() { Ok(ft) => ft, Err(e) => { - ui::problem(&format!( - "Error getting type of {child_apath:?} during iteration: {e}" - )); + error!("Error getting type of {child_apath:?} during iteration: {e}"); continue; } }; @@ -261,9 +235,7 @@ impl Iter { Ok(true) => continue, Ok(false) => (), Err(e) => { - ui::problem(&format!( - "Error checking CACHEDIR.TAG in {dir_entry:?}: {e}" - )); + error!("Error checking CACHEDIR.TAG in {dir_entry:?}: {e}"); } } } @@ -275,14 +247,10 @@ impl Iter { ErrorKind::NotFound => { // Fairly harmless, and maybe not even worth logging. Just a race // between listing the directory and looking at the contents. - ui::problem(&format!( - "File disappeared during iteration: {child_apath:?}: {e}" - )); + warn!("File disappeared during iteration: {child_apath:?}: {e}"); } _ => { - ui::problem(&format!( - "Failed to read source metadata from {child_apath:?}: {e}" - )); + error!("Failed to read source metadata from {child_apath:?}: {e}"); self.stats.metadata_error += 1; } }; @@ -290,37 +258,23 @@ impl Iter { } }; - // TODO: Move this into LiveEntry::from_fs_metadata, once there's a - // global way for it to complain about errors. - let target: Option = if ft.is_symlink() { - let t = match dir_path.join(dir_entry.file_name()).read_link() { - Ok(t) => t, - Err(e) => { - ui::problem(&format!( - "Failed to read target of symlink {child_apath:?}: {e}" - )); - continue; - } - }; - match t.into_os_string().into_string() { - Ok(t) => Some(t), - Err(e) => { - ui::problem(&format!( - "Failed to decode target of symlink {child_apath:?}: {e:?}" - )); - continue; - } - } - } else { - None - }; if ft.is_dir() { subdir_apaths.push(child_apath.clone()); } - children.push(( - child_name.to_string(), - LiveEntry::from_fs_metadata(child_apath, &metadata, target), - )); + let child_path = dir_path.join(dir_entry.file_name()); + let entry = match entry_from_fs_metadata(child_apath, &child_path, &metadata) { + Ok(entry) => entry, + Err(Error::UnsupportedSourceKind { .. }) => { + // It's not too surprising that there would be fifos or sockets or files + // we don't support; don't log them. + continue; + } + Err(err) => { + error!("Failed to build entry for {child_path:?}: {err:?}"); + continue; + } + }; + children.push((child_name.to_string(), entry)); } // To get the right overall tree ordering, any new subdirectories // discovered here should be visited together in apath order, but before @@ -347,9 +301,9 @@ impl Iter { // subdirectories are then visited, also in sorted order, before returning to // any higher-level directories. impl Iterator for Iter { - type Item = LiveEntry; + type Item = EntryValue; - fn next(&mut self) -> Option { + fn next(&mut self) -> Option { loop { if let Some(entry) = self.entry_deque.pop_front() { // Have already found some entries, so just return the first. diff --git a/src/merge.rs b/src/merge.rs index fb641f61..2cbbfa9e 100644 --- a/src/merge.rs +++ b/src/merge.rs @@ -1,4 +1,4 @@ -// Copyright 2018, 2019, 2020, 2021 Martin Pool. +// Copyright 2018-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -19,51 +19,60 @@ use std::cmp::Ordering; use crate::*; +/// When merging entries from two trees a particular apath might +/// be present in either or both trees. +/// +/// Unlike the [Change] struct, this contains the full entry rather than +/// just metadata, and in particular will contain the block addresses for +/// [IndexEntry]. #[derive(Debug, PartialEq, Eq)] -pub enum MergedEntryKind +pub enum MatchedEntries where - AE: Entry, - BE: Entry, + AE: EntryTrait, + BE: EntryTrait, { - LeftOnly(AE), - RightOnly(BE), + Left(AE), + Right(BE), Both(AE, BE), } -use self::MergedEntryKind::*; - -#[derive(Debug, PartialEq, Eq)] -pub struct MergedEntry +impl MatchedEntries where - AE: Entry, - BE: Entry, + AE: EntryTrait, + BE: EntryTrait, { - pub apath: Apath, - pub kind: MergedEntryKind, + pub(crate) fn to_entry_change(&self) -> EntryChange { + match self { + MatchedEntries::Both(ae, be) => EntryChange::diff_metadata(ae, be), + MatchedEntries::Left(ae) => EntryChange::deleted(ae), + MatchedEntries::Right(be) => EntryChange::added(be), + } + } } -/// Zip together entries from two trees, into an iterator of MergedEntryKind. +/// Zip together entries from two trees, into an iterator of [MatchedEntries]. /// /// Note that at present this only says whether files are absent from either /// side, not whether there is a content difference. pub struct MergeTrees where - AE: Entry, - BE: Entry, + AE: EntryTrait, + BE: EntryTrait, AIT: Iterator, BIT: Iterator, { ait: AIT, bit: BIT, - // Read in advance entries from A and B. + /// Peeked next entry from [ait]. na: Option, + /// Peeked next entry from [bit]. nb: Option, } impl MergeTrees where - AE: Entry, - BE: Entry, + AE: EntryTrait, + BE: EntryTrait, AIT: Iterator, BIT: Iterator, { @@ -79,70 +88,44 @@ where impl Iterator for MergeTrees where - AE: Entry, - BE: Entry, + AE: EntryTrait, + BE: EntryTrait, AIT: Iterator, BIT: Iterator, { - type Item = MergedEntry; + type Item = MatchedEntries; fn next(&mut self) -> Option { - // TODO: Stats about the merge. - let ait = &mut self.ait; - let bit = &mut self.bit; - // Preload next-A and next-B, if they're not already - // loaded. - // - // TODO: Perhaps use `Peekable` instead of keeping a readahead here? + // Preload next-A and next-B, if they're not already loaded. if self.na.is_none() { - self.na = ait.next(); + self.na = self.ait.next(); } if self.nb.is_none() { - self.nb = bit.next(); + self.nb = self.bit.next(); } - if self.na.is_none() { - if self.nb.is_none() { - None - } else { - let tb = self.nb.take().unwrap(); - Some(MergedEntry { - apath: tb.apath().clone(), - kind: RightOnly(tb), - }) - } - } else if self.nb.is_none() { - let ta = self.na.take().unwrap(); - Some(MergedEntry { - apath: ta.apath().clone(), - kind: LeftOnly(ta), - }) - } else { - let pa = self.na.as_ref().unwrap().apath().clone(); - let pb = self.nb.as_ref().unwrap().apath().clone(); - match pa.cmp(&pb) { - Ordering::Equal => Some(MergedEntry { - apath: pa, - kind: Both(self.na.take().unwrap(), self.nb.take().unwrap()), - }), - Ordering::Less => Some(MergedEntry { - apath: pa, - kind: LeftOnly(self.na.take().unwrap()), - }), - Ordering::Greater => Some(MergedEntry { - apath: pb, - kind: RightOnly(self.nb.take().unwrap()), - }), - } + match (&self.na, &self.nb) { + (None, None) => None, + (Some(_a), None) => Some(MatchedEntries::Left(self.na.take().unwrap())), + (None, Some(_b)) => Some(MatchedEntries::Right(self.nb.take().unwrap())), + (Some(a), Some(b)) => match a.apath().cmp(b.apath()) { + Ordering::Equal => Some(MatchedEntries::Both( + self.na.take().unwrap(), + self.nb.take().unwrap(), + )), + Ordering::Less => Some(MatchedEntries::Left(self.na.take().unwrap())), + Ordering::Greater => Some(MatchedEntries::Right(self.nb.take().unwrap())), + }, } } } #[cfg(test)] mod tests { - use super::MergedEntryKind::*; use crate::test_fixtures::*; use crate::*; + use super::MatchedEntries; + #[test] fn merge_entry_trees() { let ta = TreeFixture::new(); @@ -157,9 +140,8 @@ mod tests { ) .collect::>(); assert_eq!(di.len(), 1); - assert_eq!(di[0].apath, "/"); - match &di[0].kind { - Both(ae, be) => { + match &di[0] { + MatchedEntries::Both(ae, be) => { assert_eq!(ae.kind(), Kind::Dir); assert_eq!(be.kind(), Kind::Dir); assert_eq!(ae.apath(), "/"); diff --git a/src/metric_recorder.rs b/src/metric_recorder.rs new file mode 100644 index 00000000..e5141583 --- /dev/null +++ b/src/metric_recorder.rs @@ -0,0 +1,149 @@ +// Copyright 2023 Martin Pool. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +//! A metrics recorder that just keeps atomic values in memory, +//! so they can be logged or inspected at the end of the process, or potentially +//! earlier. + +use std::collections::BTreeMap; +use std::fs::OpenOptions; +use std::path::Path; +use std::sync::atomic::AtomicU64; +use std::sync::Mutex; +use std::sync::{atomic::Ordering, Arc}; + +use ::metrics::{ + Counter, Gauge, Histogram, HistogramFn, Key, KeyName, Recorder, SharedString, Unit, +}; +use itertools::Itertools; +use lazy_static::lazy_static; +use metrics_util::registry::{Registry, Storage}; +use metrics_util::Summary; +use serde_json::json; +use tracing::debug; + +use crate::{Error, Result}; + +lazy_static! { + static ref REGISTRY: Registry = Registry::new(SummaryStorage::new()); +} + +pub struct InMemory {} +pub static IN_MEMORY: InMemory = InMemory {}; + +impl Recorder for InMemory { + fn describe_counter(&self, _key: KeyName, _unit: Option, _description: SharedString) { + todo!() + } + + fn describe_gauge(&self, _key: KeyName, _unit: Option, _description: SharedString) { + todo!() + } + + fn describe_histogram(&self, __key: KeyName, __unit: Option, _description: SharedString) { + todo!() + } + + fn register_counter(&self, key: &Key) -> Counter { + REGISTRY.get_or_create_counter(key, |c| Counter::from_arc(Arc::clone(c))) + } + + fn register_gauge(&self, _key: &Key) -> Gauge { + todo!() + } + + fn register_histogram(&self, key: &Key) -> Histogram { + REGISTRY.get_or_create_histogram(key, |g| Histogram::from_arc(Arc::clone(g))) + } +} + +pub fn counter_values() -> BTreeMap { + REGISTRY + .get_counter_handles() + .into_iter() + .map(|(key, counter)| (key.name().to_owned(), counter.load(Ordering::Relaxed))) + .collect() +} + +pub fn emit_to_trace() { + for (counter_name, count) in counter_values() { + debug!(counter_name, count); + } + for (histogram_name, histogram) in REGISTRY + .get_histogram_handles() + .into_iter() + .sorted_by_key(|(k, _v)| k.clone()) + { + let summary = histogram.0.lock().unwrap(); + debug!( + histogram = histogram_name.name(), + p10 = summary.quantile(0.1), + p50 = summary.quantile(0.5), + p90 = summary.quantile(0.9), + p99 = summary.quantile(0.99), + p100 = summary.quantile(1.0), + ); + } +} + +/// Like AtomicStorage but using a Summary. +struct SummaryStorage {} + +impl SummaryStorage { + const fn new() -> Self { + SummaryStorage {} + } +} + +impl Storage for SummaryStorage { + type Counter = Arc; + type Gauge = Arc; + type Histogram = Arc; + + fn counter(&self, _key: &K) -> Self::Counter { + Arc::new(AtomicU64::new(0)) + } + + fn gauge(&self, _: &K) -> Self::Gauge { + Arc::new(AtomicU64::new(0)) + } + + fn histogram(&self, _: &K) -> Self::Histogram { + Arc::new(SummaryHistogram::new()) + } +} + +struct SummaryHistogram(Mutex); + +impl HistogramFn for SummaryHistogram { + fn record(&self, value: f64) { + self.0.lock().unwrap().add(value) + } +} + +impl SummaryHistogram { + fn new() -> Self { + SummaryHistogram(Mutex::new(Summary::with_defaults())) + } +} + +pub fn write_json_metrics(path: &Path) -> Result<()> { + let f = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(path)?; + let j = json!( { + "counters": counter_values(), + }); + serde_json::to_writer_pretty(f, &j).map_err(|source| Error::SerializeJson { source }) +} diff --git a/src/misc.rs b/src/misc.rs index c36e3ed4..03a323d5 100644 --- a/src/misc.rs +++ b/src/misc.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -13,6 +13,10 @@ //! Generally useful functions. +use std::time::Duration; + +use crate::stats::Sizes; + /// Remove and return an item from a vec, if it's present. pub(crate) fn remove_item>(v: &mut Vec, item: &U) { // Remove this when it's stabilized in std: @@ -44,3 +48,80 @@ pub(crate) fn zero_u32(a: &u32) -> bool { pub(crate) fn zero_u64(a: &u64) -> bool { *a == 0 } + +#[allow(unused)] +pub(crate) fn compression_percent(s: &Sizes) -> i64 { + if s.uncompressed > 0 { + 100i64 - (100 * s.compressed / s.uncompressed) as i64 + } else { + 0 + } +} + +pub fn duration_to_hms(d: Duration) -> String { + let elapsed_secs = d.as_secs(); + if elapsed_secs >= 3600 { + format!( + "{:2}:{:02}:{:02}", + elapsed_secs / 3600, + (elapsed_secs / 60) % 60, + elapsed_secs % 60 + ) + } else { + format!(" {:2}:{:02}", (elapsed_secs / 60) % 60, elapsed_secs % 60) + } +} + +#[allow(unused)] +pub(crate) fn mbps_rate(bytes: u64, elapsed: Duration) -> f64 { + let secs = elapsed.as_secs() as f64 + f64::from(elapsed.subsec_millis()) / 1000.0; + if secs > 0.0 { + bytes as f64 / secs / 1e6 + } else { + 0f64 + } +} + +/// Describe the compression ratio: higher is better. +#[allow(unused)] +pub(crate) fn compression_ratio(s: &Sizes) -> f64 { + if s.compressed > 0 { + s.uncompressed as f64 / s.compressed as f64 + } else { + 0f64 + } +} + +/// Adds `Result::inspect_err` which is not yet stabilized. +pub(crate) trait ResultExt { + type T; + type E; + fn our_inspect_err(self, f: F) -> Self; +} + +impl ResultExt for std::result::Result { + type T = T; + type E = E; + + #[inline] + fn our_inspect_err(self, f: F) -> Self { + if let Err(ref e) = self { + f(e); + } + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + pub fn test_compression_ratio() { + let ratio = compression_ratio(&Sizes { + compressed: 2000, + uncompressed: 4000, + }); + assert_eq!(format!("{ratio:3.1}x"), "2.0x"); + } +} diff --git a/src/owner/unix.rs b/src/owner/unix.rs index b1241d27..5f768e22 100644 --- a/src/owner/unix.rs +++ b/src/owner/unix.rs @@ -62,7 +62,7 @@ pub(crate) fn set_owner(owner: &Owner, path: &Path) -> Result<()> { // TODO: use `std::os::unix::fs::chown(path, uid, gid)?;` once stable match unistd::chown(path, uid_opt, gid_opt) { Ok(()) => Ok(()), - Err(errno) if errno == Errno::EPERM => { + Err(Errno::EPERM) => { // If the restore is not run as root (or with special capabilities) // then we probably can't set ownership, and there's no point // complaining diff --git a/src/progress.rs b/src/progress.rs new file mode 100644 index 00000000..db0ae2c3 --- /dev/null +++ b/src/progress.rs @@ -0,0 +1,154 @@ +// Conserve backup system. +// Copyright 2015-2023 Martin Pool. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +//! Generic progress bar indications. + +// static PROGRESS_IMPL; + +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::RwLock; +use std::time::Instant; + +static IMPL: RwLock = RwLock::new(ProgressImpl::Null); + +static NEXT_TASK_ID: AtomicUsize = AtomicUsize::new(0); + +pub(crate) mod term; + +/// How to show progress bars? +#[derive(Debug, Clone, Copy)] +pub enum ProgressImpl { + Null, + Terminal, +} + +impl ProgressImpl { + /// Make this the selected way to show progress bars. + pub fn activate(self) { + *IMPL.write().expect("locked progress impl") = self + } + + fn remove_bar(&mut self, task: &Bar) { + match self { + ProgressImpl::Null => (), + ProgressImpl::Terminal => term::remove_bar(task.bar_id), + } + } + + fn add_bar(&mut self) -> Bar { + let bar_id = assign_new_bar_id(); + match self { + ProgressImpl::Null => (), + ProgressImpl::Terminal => term::add_bar(bar_id), + } + Bar { bar_id } + } + + fn post(&self, task: &Bar, progress: Progress) { + match self { + ProgressImpl::Null => (), + ProgressImpl::Terminal => term::update_bar(task.bar_id, progress), + } + } +} + +fn assign_new_bar_id() -> usize { + NEXT_TASK_ID.fetch_add(1, Ordering::Relaxed) +} + +/// State of progress on one bar. +#[derive(Clone)] +pub enum Progress { + None, + Backup { + filename: String, + scanned_file_bytes: u64, + scanned_dirs: usize, + scanned_files: usize, + entries_new: usize, + entries_changed: usize, + entries_unchanged: usize, + }, + DeleteBands { + bands_done: usize, + total_bands: usize, + }, + DeleteBlocks { + blocks_done: usize, + total_blocks: usize, + start: Instant, + }, + ListBlocks { + count: usize, + }, + MeasureUnreferenced { + blocks_done: usize, + blocks_total: usize, + }, + MeasureTree { + files: usize, + total_bytes: u64, + }, + ReferencedBlocks { + bands_started: usize, + total_bands: usize, + references_found: usize, + start: Instant, + }, + Restore { + filename: String, + bytes_done: u64, + }, + ValidateBands { + total_bands: usize, + bands_done: usize, + start: Instant, + }, + ValidateBlocks { + blocks_done: usize, + total_blocks: usize, + bytes_done: u64, + start: Instant, + }, +} + +/// A transient progress task. The UI may draw these as some kind of +/// progress bar. +#[derive(Debug)] +pub struct Bar { + /// An opaque unique ID for each concurrent task. + bar_id: usize, +} + +impl Bar { + #[must_use] + pub fn new() -> Self { + IMPL.write().expect("lock progress impl").add_bar() + } + + pub fn post(&self, progress: Progress) { + IMPL.read().unwrap().post(self, progress) + } +} + +impl Default for Bar { + fn default() -> Self { + Bar::new() + } +} + +impl Drop for Bar { + fn drop(&mut self) { + IMPL.write().expect("lock progress impl").remove_bar(self) + } +} diff --git a/src/progress/term.rs b/src/progress/term.rs new file mode 100644 index 00000000..52253b1f --- /dev/null +++ b/src/progress/term.rs @@ -0,0 +1,209 @@ +// Conserve backup system. +// Copyright 2015-2023 Martin Pool. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +use std::io; + +use itertools::Itertools; +use lazy_static::lazy_static; +use nutmeg::estimate_remaining; +use thousands::Separable; + +use super::*; + +lazy_static! { + /// A global Nutmeg view. + /// + /// This is global to reflect that there is globally one stdout/stderr: + /// this object manages it. + static ref NUTMEG_VIEW: nutmeg::View = + nutmeg::View::new( + MultiModel::new(), + nutmeg::Options::new() + .destination(nutmeg::Destination::Stderr) + ); +} + +pub(super) fn add_bar(bar_id: usize) { + NUTMEG_VIEW.update(|model| model.add_bar(bar_id)); +} + +/// Show progress on the global terminal progress bar, +/// or clear the bar if it's [Progress::None]. +pub(super) fn update_bar(bar_id: usize, progress: Progress) { + NUTMEG_VIEW.update(|model| model.update_bar(bar_id, progress)); +} + +pub(super) fn remove_bar(bar_id: usize) { + let removed_last = NUTMEG_VIEW.update(|model| model.remove_bar(bar_id)); + if removed_last { + NUTMEG_VIEW.clear(); + } +} + +/// A stack of multiple Progress objects, each identified by an integer id. +/// +/// Each entry corresponds to one progress::Bar in the abstract interface. +struct MultiModel(Vec<(usize, Progress)>); + +impl MultiModel { + const fn new() -> Self { + MultiModel(Vec::new()) + } + + fn add_bar(&mut self, bar_id: usize) { + assert!( + !self.0.iter().any(|x| x.0 == bar_id), + "task_id should not be already present" + ); + self.0.push((bar_id, Progress::None)); + } + + fn update_bar(&mut self, bar_id: usize, progress: Progress) { + let pos = self + .0 + .iter() + .position(|x| x.0 == bar_id) + .expect("task_id should be present"); + self.0[pos].1 = progress; + } + + fn remove_bar(&mut self, bar_id: usize) -> bool { + self.0.retain(|(id, _)| *id != bar_id); + self.0.is_empty() + } +} + +impl nutmeg::Model for MultiModel { + fn render(&mut self, width: usize) -> String { + self.0.iter_mut().map(|(_id, p)| p.render(width)).join("\n") + } +} + +impl nutmeg::Model for Progress { + fn render(&mut self, _width: usize) -> String { + match self { + Progress::None => String::new(), + Progress::Backup { + filename, + scanned_file_bytes, + scanned_dirs, + scanned_files, + entries_new, + entries_changed, + entries_unchanged, + } => format!( + "\ + Scanned {dirs} directories, {files} files, {mb} MB\n\ + {new} new entries, {changed} changed, {unchanged} unchanged\n\ + {filename}", + dirs = scanned_dirs.separate_with_commas(), + files = scanned_files.separate_with_commas(), + mb = (*scanned_file_bytes / 1_000_000).separate_with_commas(), + new = entries_new.separate_with_commas(), + changed = entries_changed.separate_with_commas(), + unchanged = entries_unchanged.separate_with_commas(), + ), + Progress::DeleteBands { + bands_done, + total_bands, + } => format!( + "Delete bands: {}/{}...", + bands_done.separate_with_commas(), + total_bands.separate_with_commas(), + ), + Progress::DeleteBlocks { + blocks_done, + total_blocks, + start, + } => format!( + "Delete blocks: {}/{}, {eta} remaining...", + blocks_done.separate_with_commas(), + total_blocks.separate_with_commas(), + eta = estimate_remaining(start, *blocks_done, *total_blocks), + ), + Progress::ListBlocks { count } => format!( + "List blocks: {count}...", + count = count.separate_with_commas(), + ), + Progress::MeasureTree { files, total_bytes } => format!( + "Measuring... {} files, {} MB", + files.separate_with_commas(), + (*total_bytes / 1_000_000).separate_with_commas() + ), + Progress::MeasureUnreferenced { + blocks_done, + blocks_total, + } => format!( + "Measure unreferenced blocks: {}/{}...", + blocks_done.separate_with_commas(), + blocks_total.separate_with_commas(), + ), + Progress::ReferencedBlocks { + references_found, + bands_started, + total_bands, + start, + } => format!( + "Find referenced blocks: {found} in {bands_started}/{total_bands} bands, {eta} remaining...", + found = references_found.separate_with_commas(), + eta = estimate_remaining(start, *bands_started, *total_bands), + ), + Progress::Restore { + filename, + bytes_done, + } => format!( + "Restoring: {mb} MB\n{filename}", + mb = *bytes_done / 1_000_000, + ), + Progress::ValidateBlocks { + blocks_done, + total_blocks, + bytes_done, + start, + } => { + format!( + "Check block {}/{}: {} done, {} MB checked, {} remaining", + blocks_done.separate_with_commas(), + total_blocks.separate_with_commas(), + nutmeg::percent_done(*blocks_done, *total_blocks), + (*bytes_done / 1_000_000).separate_with_commas(), + nutmeg::estimate_remaining(start, *blocks_done, *total_blocks) + ) + } + Progress::ValidateBands { + total_bands, + bands_done, + start, + } => format!( + "Check index {}/{}, {} done, {} remaining", + bands_done, + total_bands, + nutmeg::percent_done(*bands_done, *total_bands), + nutmeg::estimate_remaining(start, *bands_done, *total_bands) + ), + } + } +} + +pub(crate) struct WriteToNutmeg(); + +impl io::Write for WriteToNutmeg { + fn write(&mut self, buf: &[u8]) -> io::Result { + NUTMEG_VIEW.message_bytes(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} diff --git a/src/restore.rs b/src/restore.rs index a441dfc1..b680297b 100644 --- a/src/restore.rs +++ b/src/restore.rs @@ -21,77 +21,59 @@ use std::{fs, time::Instant}; use filetime::set_file_handle_times; #[cfg(unix)] use filetime::set_symlink_file_times; +use metrics::{counter, increment_counter}; +use time::OffsetDateTime; +#[allow(unused_imports)] +use tracing::{error, warn}; use crate::band::BandSelectionPolicy; -use crate::entry::Entry; use crate::io::{directory_is_empty, ensure_dir_exists}; +use crate::progress::{Bar, Progress}; use crate::stats::RestoreStats; use crate::unix_mode::UnixMode; -use crate::unix_time::UnixTime; +use crate::unix_time::ToFileTime; use crate::*; /// Description of how to restore a tree. -#[derive(Debug)] -pub struct RestoreOptions { - pub print_filenames: bool, +// #[derive(Debug)] +pub struct RestoreOptions<'cb> { pub exclude: Exclude, /// Restore only this subdirectory. pub only_subtree: Option, pub overwrite: bool, // The band to select, or by default the last complete one. pub band_selection: BandSelectionPolicy, - /// If printing filenames, include metadata such as file permissions - pub long_listing: bool, + + // Call this callback as each entry is successfully restored. + pub change_callback: Option>, } -impl Default for RestoreOptions { +impl Default for RestoreOptions<'_> { fn default() -> Self { RestoreOptions { - print_filenames: false, overwrite: false, band_selection: BandSelectionPolicy::LatestClosed, exclude: Exclude::nothing(), only_subtree: None, - long_listing: false, + change_callback: None, } } } -struct ProgressModel { - filename: String, - bytes_done: u64, -} - -impl nutmeg::Model for ProgressModel { - fn render(&mut self, _width: usize) -> String { - format!( - "Restoring: {} MB\n{}", - self.bytes_done / 1_000_000, - self.filename - ) - } -} - /// Restore a selected version, or by default the latest, to a destination directory. pub fn restore( archive: &Archive, - destination_path: &Path, + destination: &Path, options: &RestoreOptions, ) -> Result { let st = archive.open_stored_tree(options.band_selection.clone())?; - let mut rt = if options.overwrite { - RestoreTree::create_overwrite(destination_path) - } else { - RestoreTree::create(destination_path) - }?; + ensure_dir_exists(destination)?; + if !options.overwrite && !directory_is_empty(destination)? { + return Err(Error::DestinationNotEmpty); + } let mut stats = RestoreStats::default(); - let progress_bar = nutmeg::View::new( - ProgressModel { - filename: String::new(), - bytes_done: 0, - }, - ui::nutmeg_options(), - ); + let mut bytes_done = 0; + let bar = Bar::new(); let start = Instant::now(); // // This causes us to walk the source tree twice, which is probably an acceptable option // // since it's nice to see realistic overall progress. We could keep all the entries @@ -108,202 +90,175 @@ pub fn restore( options.only_subtree.clone().unwrap_or_else(Apath::root), options.exclude.clone(), )?; + let mut deferrals = Vec::new(); for entry in entry_iter { - if options.print_filenames { - if options.long_listing { - progress_bar.message(format!( - "{} {} {}\n", - entry.unix_mode(), - entry.owner(), - entry.apath() - )); - } else { - progress_bar.message(format!("{}\n", entry.apath())); - } - } - progress_bar.update(|model| model.filename = entry.apath().to_string()); - if let Err(e) = match entry.kind() { + bar.post(Progress::Restore { + filename: entry.apath().to_string(), + bytes_done, + }); + let path = destination.join(&entry.apath[1..]); + match entry.kind() { Kind::Dir => { stats.directories += 1; - rt.copy_dir(&entry) + increment_counter!("conserve.restore.dirs"); + if let Err(err) = fs::create_dir_all(&path) { + if err.kind() != io::ErrorKind::AlreadyExists { + error!(?path, ?err, "Failed to create directory"); + stats.errors += 1; + continue; + } + } + deferrals.push(DirDeferral { + path, + unix_mode: entry.unix_mode(), + mtime: entry.mtime(), + }) } Kind::File => { stats.files += 1; - let result = rt.copy_file(&entry, &st).map(|s| stats += s); - if let Some(bytes) = entry.size() { - progress_bar.update(|model| model.bytes_done += bytes); + increment_counter!("conserve.restore.files"); + match restore_file(path.clone(), &entry, &st) { + Err(err) => { + error!(?err, ?path, "Failed to restore file"); + stats.errors += 1; + continue; + } + Ok(s) => { + if let Some(bytes) = entry.size() { + bytes_done += bytes; + } + stats += s; + } } - result } Kind::Symlink => { stats.symlinks += 1; - rt.copy_symlink(&entry) + increment_counter!("conserve.restore.symlinks"); + if let Err(err) = restore_symlink(&path, &entry) { + error!(?path, ?err, "Failed to restore symlink"); + stats.errors += 1; + continue; + } } Kind::Unknown => { stats.unknown_kind += 1; - // TODO: Perhaps eventually we could backup and restore pipes, - // sockets, etc. Or at least count them. For now, silently skip. - // https://github.com/sourcefrog/conserve/issues/82 - continue; + warn!(apath = ?entry.apath(), "Unknown file kind"); } - } { - ui::show_error(&e); - stats.errors += 1; - continue; + }; + if let Some(cb) = options.change_callback.as_ref() { + // Since we only restore to empty directories they're all added. + cb(&EntryChange::added(&entry))?; } } - stats += rt.finish()?; + stats += apply_deferrals(&deferrals)?; stats.elapsed = start.elapsed(); // TODO: Merge in stats from the tree iter and maybe the source tree? Ok(stats) } -/// A write-only tree on the filesystem, as a restore destination. -#[derive(Debug)] -pub struct RestoreTree { +/// Recorded changes to apply to directories after all their contents +/// have been applied. +/// +/// For example we might want to make the directory read-only, but we +/// shouldn't do that until we added all the children. +struct DirDeferral { path: PathBuf, - - dir_unix_modes: Vec<(PathBuf, UnixMode)>, - dir_mtimes: Vec<(PathBuf, UnixTime)>, + unix_mode: UnixMode, + mtime: OffsetDateTime, } -impl RestoreTree { - fn new(path: PathBuf) -> RestoreTree { - RestoreTree { - path, - dir_mtimes: Vec::new(), - dir_unix_modes: Vec::new(), - } - } - - /// Create a RestoreTree. - /// - /// The destination must either not yet exist, or be an empty directory. - pub fn create>(path: P) -> Result { - let path = path.into(); - match ensure_dir_exists(&path).and_then(|()| directory_is_empty(&path)) { - Err(source) => Err(Error::Restore { path, source }), - Ok(true) => Ok(RestoreTree::new(path)), - Ok(false) => Err(Error::DestinationNotEmpty { path }), - } - } - - /// Create a RestoreTree, even if the destination directory is not empty. - pub fn create_overwrite(path: &Path) -> Result { - Ok(RestoreTree::new(path.to_path_buf())) - } - - fn rooted_path(&self, apath: &Apath) -> PathBuf { - // Remove initial slash so that the apath is relative to the destination. - self.path.join(&apath[1..]) - } - - fn finish(self) -> Result { - #[cfg(unix)] - for (path, unix_mode) in self.dir_unix_modes { - if let Err(err) = unix_mode.set_permissions(path) { - ui::problem(&format!("Failed to set directory permissions: {err:?}")); - } +fn apply_deferrals(deferrals: &[DirDeferral]) -> Result { + let mut stats = RestoreStats::default(); + for DirDeferral { + path, + unix_mode, + mtime, + } in deferrals + { + if let Err(err) = unix_mode.set_permissions(path) { + error!(?path, ?err, "Failed to set directory permissions"); + stats.errors += 1; } - for (path, time) in self.dir_mtimes { - if let Err(err) = filetime::set_file_mtime(path, time.into()) { - ui::problem(&format!("Failed to set directory mtime: {err:?}")); - } + if let Err(err) = filetime::set_file_mtime(path, (*mtime).to_file_time()) { + error!(?path, ?err, "Failed to set directory mtime"); + stats.errors += 1; } - Ok(RestoreStats::default()) } + Ok(stats) +} - fn copy_dir(&mut self, entry: &E) -> Result<()> { - let path = self.rooted_path(entry.apath()); - if let Err(source) = fs::create_dir_all(&path) { - if source.kind() != io::ErrorKind::AlreadyExists { - return Err(Error::Restore { path, source }); - } - } - self.dir_mtimes.push((path.clone(), entry.mtime())); - self.dir_unix_modes.push((path, entry.unix_mode())); - Ok(()) - } +/// Copy in the contents of a file from another tree. +fn restore_file( + path: PathBuf, + source_entry: &IndexEntry, + from_tree: &StoredTree, +) -> Result { + let restore_err = |source| Error::Restore { + path: path.clone(), + source, + }; + let mut stats = RestoreStats::default(); + let mut restore_file = File::create(&path).map_err(restore_err)?; + // TODO: Read one block at a time, maybe don't go through io::copy. + let stored_file = from_tree.open_stored_file(source_entry); + let len = + std::io::copy(&mut stored_file.into_read(), &mut restore_file).map_err(restore_err)?; + stats.uncompressed_file_bytes = len; + counter!("conserve.restore.file_bytes", len); + restore_file.flush().map_err(restore_err)?; - /// Copy in the contents of a file from another tree. - fn copy_file( - &mut self, - source_entry: &R::Entry, - from_tree: &R, - ) -> Result { - let path = self.rooted_path(source_entry.apath()); - let restore_err = |source| Error::Restore { + let mtime = Some(source_entry.mtime().to_file_time()); + set_file_handle_times(&restore_file, mtime, mtime).map_err(|source| { + Error::RestoreModificationTime { path: path.clone(), source, - }; - let mut stats = RestoreStats::default(); - let mut restore_file = File::create(&path).map_err(restore_err)?; - // TODO: Read one block at a time: don't pull all the contents into memory. - let content = &mut from_tree.file_contents(source_entry)?; - stats.uncompressed_file_bytes = - std::io::copy(content, &mut restore_file).map_err(restore_err)?; - restore_file.flush().map_err(restore_err)?; - - let mtime = Some(source_entry.mtime().into()); - set_file_handle_times(&restore_file, mtime, mtime).map_err(|source| { - Error::RestoreModificationTime { - path: path.clone(), - source, - } - })?; - - #[cfg(unix)] - { - // Restore permissions only if there are mode bits stored in the archive - source_entry - .unix_mode() - .set_permissions(&path) - .map_err(|e| { - ui::show_error(&e); - stats.errors += 1; - }) - .ok(); } + })?; - // Restore ownership if possible. - // TODO: Stats and warnings if a user or group is specified in the index but - // does not exist on the local system. - if let Err(err) = source_entry.owner().set_owner(&path) { - ui::show_error(&err); - stats.errors += 1; - } + // Restore permissions only if there are mode bits stored in the archive + if let Err(err) = source_entry.unix_mode().set_permissions(&path) { + error!(?path, ?err, "Error restoring unix permissions"); + stats.errors += 1; + } - // TODO: Accumulate more stats. - Ok(stats) + // Restore ownership if possible. + // TODO: Stats and warnings if a user or group is specified in the index but + // does not exist on the local system. + if let Err(err) = &source_entry.owner().set_owner(&path) { + error!(?path, ?err, "Error restoring ownership"); + stats.errors += 1; } + // TODO: Accumulate more stats. + Ok(stats) +} - #[cfg(unix)] - fn copy_symlink(&mut self, entry: &E) -> Result<()> { - use std::os::unix::fs as unix_fs; - if let Some(ref target) = entry.symlink_target() { - let path = self.rooted_path(entry.apath()); - if let Err(source) = unix_fs::symlink(target, &path) { - return Err(Error::Restore { path, source }); - } - let mtime = entry.mtime().into(); - if let Err(source) = set_symlink_file_times(&path, mtime, mtime) { - return Err(Error::RestoreModificationTime { path, source }); - } - } else { - // TODO: Treat as an error. - ui::problem(&format!("No target in symlink entry {}", entry.apath())); +#[cfg(unix)] +fn restore_symlink(path: &Path, entry: &IndexEntry) -> Result<()> { + use std::os::unix::fs as unix_fs; + if let Some(ref target) = entry.symlink_target() { + if let Err(source) = unix_fs::symlink(target, path) { + return Err(Error::Restore { + path: path.to_owned(), + source, + }); + } + let mtime = entry.mtime().to_file_time(); + if let Err(source) = set_symlink_file_times(path, mtime, mtime) { + return Err(Error::RestoreModificationTime { + path: path.to_owned(), + source, + }); } - Ok(()) + } else { + error!(apath = ?entry.apath(), "No target in symlink entry"); } + Ok(()) +} - #[cfg(not(unix))] - fn copy_symlink(&mut self, entry: &E) -> Result<()> { - // TODO: Add a test with a canned index containing a symlink, and expect - // it cannot be restored on Windows and can be on Unix. - ui::problem(&format!( - "Can't restore symlinks on non-Unix: {}", - entry.apath() - )); - Ok(()) - } +#[cfg(not(unix))] +fn restore_symlink(_restore_path: &Path, entry: &IndexEntry) -> Result<()> { + // TODO: Add a test with a canned index containing a symlink, and expect + // it cannot be restored on Windows and can be on Unix. + warn!("Can't restore symlinks on non-Unix: {}", entry.apath()); + Ok(()) } diff --git a/src/show.rs b/src/show.rs index 3198a785..6ac8c1a2 100644 --- a/src/show.rs +++ b/src/show.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2018, 2020, 2021, 2022 Martin Pool. +// Copyright 2018-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -22,7 +22,9 @@ use std::io::{BufWriter, Write}; use time::format_description::well_known::Rfc3339; use time::UtcOffset; +use tracing::error; +use crate::misc::duration_to_hms; use crate::*; /// Options controlling the behavior of `show_versions`. @@ -59,17 +61,17 @@ pub fn show_versions( } let mut l: Vec = Vec::new(); l.push(format!("{band_id:<20}")); - let band = match Band::open(archive, &band_id) { + let band = match Band::open(archive, band_id) { Ok(band) => band, - Err(e) => { - ui::problem(&format!("Failed to open band {band_id:?}: {e:?}")); + Err(err) => { + error!("Failed to open band {band_id:?}: {err}"); continue; } }; let info = match band.get_info() { Ok(info) => info, - Err(e) => { - ui::problem(&format!("Failed to read band tail {band_id:?}: {e:?}")); + Err(err) => { + error!("Failed to read band tail {band_id:?}: {err}"); continue; } }; @@ -90,7 +92,7 @@ pub fn show_versions( if let Some(end_time) = info.end_time { let duration = end_time - info.start_time; if let Ok(duration) = duration.try_into() { - crate::ui::duration_to_hms(duration).into() + duration_to_hms(duration).into() } else { Cow::Borrowed("negative") } @@ -106,7 +108,7 @@ pub fn show_versions( if options.tree_size { let tree_mb_str = crate::misc::bytes_to_human_mb( archive - .open_stored_tree(BandSelectionPolicy::Specified(band_id.clone()))? + .open_stored_tree(BandSelectionPolicy::Specified(band_id))? .size(Exclude::nothing())? .file_bytes, ); @@ -123,10 +125,10 @@ pub fn show_index_json(band: &Band, w: &mut dyn Write) -> Result<()> { let bw = BufWriter::new(w); let index_entries: Vec = band.index().iter_entries().collect(); serde_json::ser::to_writer_pretty(bw, &index_entries) - .map_err(|source| Error::SerializeIndex { source }) + .map_err(|source| Error::SerializeJson { source }) } -pub fn show_entry_names>( +pub fn show_entry_names>( it: I, w: &mut dyn Write, long_listing: bool, @@ -147,14 +149,3 @@ pub fn show_entry_names>( } Ok(()) } - -pub fn show_diff>(diff: D, w: &mut dyn Write) -> Result<()> { - // TODO: Consider whether the actual files have changed. - // TODO: Summarize diff. - // TODO: Optionally include unchanged files. - let mut bw = BufWriter::new(w); - for de in diff { - writeln!(bw, "{de}")?; - } - Ok(()) -} diff --git a/src/stats.rs b/src/stats.rs index db44b6da..8ac5fdca 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2020, 2021 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -14,10 +14,10 @@ use std::fmt; use std::time::Duration; -use derive_more::{Add, AddAssign, Sum}; +use derive_more::{Add, AddAssign}; use thousands::Separable; -use crate::ui::duration_to_hms; +use crate::misc::duration_to_hms; pub fn mb_string(s: u64) -> String { (s / 1_000_000).separate_with_commas() @@ -32,11 +32,15 @@ fn ratio(uncompressed: u64, compressed: u64) -> f64 { } } -fn write_size>(w: &mut fmt::Formatter<'_>, label: &str, value: I) { +pub(crate) fn write_size>(w: &mut fmt::Formatter<'_>, label: &str, value: I) { writeln!(w, "{:>12} MB {}", mb_string(value.into()), label).unwrap(); } -fn write_compressed_size(w: &mut fmt::Formatter<'_>, compressed: u64, uncompressed: u64) { +pub(crate) fn write_compressed_size( + w: &mut fmt::Formatter<'_>, + compressed: u64, + uncompressed: u64, +) { write_size(w, "uncompressed", uncompressed); write_size( w, @@ -45,7 +49,7 @@ fn write_compressed_size(w: &mut fmt::Formatter<'_>, compressed: u64, uncompress ); } -fn write_count>(w: &mut fmt::Formatter<'_>, label: &str, value: I) { +pub(crate) fn write_count>(w: &mut fmt::Formatter<'_>, label: &str, value: I) { writeln!( w, "{:>12} {}", @@ -55,7 +59,11 @@ fn write_count>(w: &mut fmt::Formatter<'_>, label: &str, value: I .unwrap(); } -fn write_duration(w: &mut fmt::Formatter<'_>, label: &str, duration: Duration) -> fmt::Result { +pub(crate) fn write_duration( + w: &mut fmt::Formatter<'_>, + label: &str, + duration: Duration, +) -> fmt::Result { writeln!(w, "{:>12} {}", duration_to_hms(duration), label) } @@ -67,69 +75,6 @@ pub struct Sizes { pub uncompressed: u64, } -#[derive(Debug, Default, Clone, PartialEq, Eq, Add, AddAssign, Sum)] -pub struct ValidateStats { - /// Count of files in the wrong place. - pub structure_problems: usize, - pub io_errors: usize, - - /// Failed to open a band. - pub band_open_errors: usize, - pub band_metadata_problems: usize, - pub missing_band_heads: usize, - - /// Failed to open a stored tree. - pub tree_open_errors: usize, - pub tree_validate_errors: usize, - - /// Count of files not expected to be in the archive. - pub unexpected_files: usize, - - /// Number of blocks read. - pub block_read_count: u64, - /// Number of blocks that failed to read back. - pub block_error_count: usize, - pub block_missing_count: usize, - pub block_too_short: usize, - - pub elapsed: Duration, -} - -impl fmt::Display for ValidateStats { - fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.has_problems() { - writeln!(w, "VALIDATION FOUND PROBLEMS")?; - } else { - writeln!(w, "No problems found in archive")?; - } - - write_count(w, "structure problems", self.structure_problems); - write_count(w, "IO errors", self.io_errors); - write_count(w, "band open errors", self.band_open_errors); - write_count(w, "band metadata errors", self.band_metadata_problems); - write_count(w, "missing band heads", self.missing_band_heads); - write_count(w, "tree open errors", self.tree_open_errors); - write_count(w, "tree validate errors", self.tree_validate_errors); - write_count(w, "unexpected files", self.unexpected_files); - writeln!(w).unwrap(); - write_count(w, "block errors", self.block_error_count); - write_count(w, "blocks missing", self.block_too_short); - write_count(w, "blocks too short", self.block_missing_count); - writeln!(w).unwrap(); - - write_count(w, "blocks read", self.block_read_count as usize); - write_duration(w, "elapsed", self.elapsed)?; - - Ok(()) - } -} - -impl ValidateStats { - pub fn has_problems(&self) -> bool { - self.block_error_count > 0 || self.io_errors > 0 || self.block_missing_count > 0 - } -} - #[derive(Default, Debug, Clone, Eq, PartialEq)] pub struct IndexReadStats { pub index_hunks: usize, @@ -184,81 +129,6 @@ impl fmt::Display for RestoreStats { } } -#[derive(Add, AddAssign, Debug, Default, Eq, PartialEq, Clone)] -pub struct BackupStats { - // TODO: Have separate more-specific stats for backup and restore, and then - // each can have a single Display method. - // TODO: Include source file bytes, including unmodified files. - pub files: usize, - pub symlinks: usize, - pub directories: usize, - pub unknown_kind: usize, - - pub unmodified_files: usize, - pub modified_files: usize, - pub new_files: usize, - - /// Bytes that matched an existing block. - pub deduplicated_bytes: u64, - /// Bytes that were stored as new blocks, before compression. - pub uncompressed_bytes: u64, - pub compressed_bytes: u64, - - pub deduplicated_blocks: usize, - pub written_blocks: usize, - /// Blocks containing combined small files. - pub combined_blocks: usize, - - pub empty_files: usize, - pub small_combined_files: usize, - pub single_block_files: usize, - pub multi_block_files: usize, - - pub errors: usize, - - pub index_builder_stats: IndexWriterStats, - pub elapsed: Duration, -} - -impl fmt::Display for BackupStats { - fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { - write_count(w, "files:", self.files); - write_count(w, " unmodified files", self.unmodified_files); - write_count(w, " modified files", self.modified_files); - write_count(w, " new files", self.new_files); - write_count(w, "symlinks", self.symlinks); - write_count(w, "directories", self.directories); - write_count(w, "unsupported file kind", self.unknown_kind); - writeln!(w).unwrap(); - - write_count(w, "files stored:", self.new_files + self.modified_files); - write_count(w, " empty files", self.empty_files); - write_count(w, " small combined files", self.small_combined_files); - write_count(w, " single block files", self.single_block_files); - write_count(w, " multi-block files", self.multi_block_files); - writeln!(w).unwrap(); - - write_count(w, "data blocks deduplicated:", self.deduplicated_blocks); - write_size(w, " saved", self.deduplicated_bytes); - writeln!(w).unwrap(); - - write_count(w, "new data blocks written:", self.written_blocks); - write_count(w, " blocks of combined files", self.combined_blocks); - write_compressed_size(w, self.compressed_bytes, self.uncompressed_bytes); - writeln!(w).unwrap(); - - let idx = &self.index_builder_stats; - write_count(w, "new index hunks", idx.index_hunks); - write_compressed_size(w, idx.compressed_index_bytes, idx.uncompressed_index_bytes); - writeln!(w).unwrap(); - - write_count(w, "errors", self.errors); - write_duration(w, "elapsed", self.elapsed)?; - - Ok(()) - } -} - #[derive(Add, AddAssign, Clone, Copy, Debug, Default, Eq, PartialEq)] pub struct DeleteStats { pub deleted_band_count: usize, diff --git a/src/stitch.rs b/src/stitch.rs index 20aa02ad..6a87a698 100644 --- a/src/stitch.rs +++ b/src/stitch.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2020 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -28,6 +28,8 @@ //! seen. //! * Bands might be deleted, so their numbers are not contiguous. +use tracing::warn; + use crate::index::IndexEntryIter; use crate::*; @@ -79,6 +81,7 @@ impl Iterator for IterStitchedIndexHunks { fn next(&mut self) -> Option { loop { + // Until we find the next hunk or run out of bands. // If we're already reading an index, and it has more content, return that. if let Some(index_hunks) = &mut self.index_hunks { // An index iterator must be assigned to a band. @@ -94,7 +97,7 @@ impl Iterator for IterStitchedIndexHunks { self.index_hunks = None; let band_id = self.band_id.take().expect("last band id should be present"); - if self.archive.band_is_closed(&band_id).unwrap_or(false) { + if self.archive.band_is_closed(band_id).unwrap_or(false) { // We reached the end of a complete index in this band, // so there's no need to look at any earlier bands, and we're done iterating. return None; @@ -102,16 +105,20 @@ impl Iterator for IterStitchedIndexHunks { // self.band_id might be None afterwards, if there is no previous band. // If so, we're done. - self.band_id = previous_existing_band(&self.archive, &band_id); + self.band_id = previous_existing_band(&self.archive, band_id); } - if let Some(band_id) = &self.band_id { + if let Some(band_id) = self.band_id { + let band = match Band::open(&self.archive, band_id) { + Ok(band) => band, + Err(err) => { + warn!(?err, ?band_id, "Failed to open band, skipping it"); + self.band_id = previous_existing_band(&self.archive, band_id); + continue; + } + }; // Start reading this new index and skip forward until after last_apath - let mut iter_hunks = Band::open(&self.archive, band_id) - .expect("Failed to open band") - .index() - .iter_hunks(); - + let mut iter_hunks = band.index().iter_hunks(); if let Some(last) = &self.last_apath { iter_hunks = iter_hunks.advance_to_after(last) } @@ -124,14 +131,13 @@ impl Iterator for IterStitchedIndexHunks { } } -fn previous_existing_band(archive: &Archive, band_id: &BandId) -> Option { - let mut band_id = band_id.clone(); +fn previous_existing_band(archive: &Archive, mut band_id: BandId) -> Option { loop { // TODO: It might be faster to list the present bands and calculate // from that, rather than walking backwards one at a time... if let Some(prev_band_id) = band_id.previous() { band_id = prev_band_id; - if archive.band_exists(&band_id).unwrap_or(false) { + if archive.band_exists(band_id).unwrap_or(false) { return Some(band_id); } } else { @@ -158,8 +164,8 @@ mod test { } } - fn simple_ls(archive: &Archive, band_id: &BandId) -> String { - let strs: Vec = IterStitchedIndexHunks::new(archive, Some(band_id.clone())) + fn simple_ls(archive: &Archive, band_id: BandId) -> String { + let strs: Vec = IterStitchedIndexHunks::new(archive, Some(band_id)) .flatten() .map(|entry| format!("{}:{}", &entry.apath, entry.target.unwrap())) .collect(); @@ -186,7 +192,7 @@ mod test { // and 3 is carried over from b1. let band = Band::create(&af)?; - assert_eq!(*band.id(), BandId::zero()); + assert_eq!(band.id(), BandId::zero()); let mut ib = band.index_builder(); ib.push_entry(symlink("/0", "b0")); ib.push_entry(symlink("/1", "b0")); @@ -240,19 +246,19 @@ mod test { std::fs::remove_dir_all(af.path().join("b0003"))?; let archive = Archive::open_path(af.path())?; - assert_eq!(simple_ls(&archive, &BandId::new(&[0])), "/0:b0 /1:b0 /2:b0"); + assert_eq!(simple_ls(&archive, BandId::new(&[0])), "/0:b0 /1:b0 /2:b0"); assert_eq!( - simple_ls(&archive, &BandId::new(&[1])), + simple_ls(&archive, BandId::new(&[1])), "/0:b1 /1:b1 /2:b1 /3:b1" ); - assert_eq!(simple_ls(&archive, &BandId::new(&[2])), "/0:b2 /2:b2 /3:b1"); + assert_eq!(simple_ls(&archive, BandId::new(&[2])), "/0:b2 /2:b2 /3:b1"); - assert_eq!(simple_ls(&archive, &BandId::new(&[4])), "/0:b2 /2:b2 /3:b1"); + assert_eq!(simple_ls(&archive, BandId::new(&[4])), "/0:b2 /2:b2 /3:b1"); assert_eq!( - simple_ls(&archive, &BandId::new(&[5])), + simple_ls(&archive, BandId::new(&[5])), "/0:b5 /00:b5 /2:b2 /3:b1" ); @@ -267,16 +273,15 @@ mod test { let tf = TreeFixture::new(); tf.create_file("file_a"); - let lt = tf.live_tree(); let af = ScratchArchive::new(); - backup(&af, <, &BackupOptions::default()).expect("backup should work"); + backup(&af, tf.path(), &BackupOptions::default()).expect("backup should work"); af.transport().remove_file("b0000/BANDTAIL").unwrap(); let band_ids = af.list_band_ids().expect("should list bands"); let band_id = band_ids.first().expect("expected at least one band"); - let mut iter = IterStitchedIndexHunks::new(&af, Some(band_id.clone())); + let mut iter = IterStitchedIndexHunks::new(&af, Some(*band_id)); // Get the first and only index entry. // `index_hunks` and `band_id` should be `Some`. assert!(iter.next().is_some()); diff --git a/src/stored_file.rs b/src/stored_file.rs index dc2714e9..506e33c4 100644 --- a/src/stored_file.rs +++ b/src/stored_file.rs @@ -10,14 +10,13 @@ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. -///! Access a file stored in the archive. +//! Access a file stored in the archive. use crate::stats::Sizes; use crate::*; /// Returns the contents of a file stored in the archive, as an iter of byte blocks. /// -/// These can be constructed through `StoredTree::open_stored_file()` or more -/// generically through `ReadTree::file_contents`. +/// These can be constructed through `StoredTree::open_stored_file()`. pub struct StoredFile { block_dir: BlockDir, @@ -32,7 +31,7 @@ impl StoredFile { } /// Open a cursor on this file that implements `std::io::Read`. - pub(crate) fn into_read(self) -> ReadStoredFile { + pub fn into_read(self) -> ReadStoredFile { ReadStoredFile { remaining_addrs: self.addrs.into_iter(), buf: Vec::::new(), diff --git a/src/stored_tree.rs b/src/stored_tree.rs index 62d416d0..79fddefc 100644 --- a/src/stored_tree.rs +++ b/src/stored_tree.rs @@ -20,10 +20,11 @@ use crate::blockdir::BlockDir; use crate::stitch::IterStitchedIndexHunks; -use crate::stored_file::{ReadStoredFile, StoredFile}; +use crate::stored_file::StoredFile; use crate::*; /// Read index and file contents for a version stored in the archive. +#[derive(Debug)] pub struct StoredTree { band: Band, archive: Archive, @@ -31,7 +32,7 @@ pub struct StoredTree { } impl StoredTree { - pub(crate) fn open(archive: &Archive, band_id: &BandId) -> Result { + pub(crate) fn open(archive: &Archive, band_id: BandId) -> Result { Ok(StoredTree { band: Band::open(archive, band_id)?, block_dir: archive.block_dir().clone(), @@ -48,28 +49,25 @@ impl StoredTree { } /// Open a file stored within this tree. - fn open_stored_file(&self, entry: &IndexEntry) -> StoredFile { + pub fn open_stored_file(&self, entry: &IndexEntry) -> StoredFile { + assert_eq!(entry.kind(), Kind::File); StoredFile::open(self.block_dir.clone(), entry.addrs.clone()) } } impl ReadTree for StoredTree { - type R = ReadStoredFile; type Entry = IndexEntry; type IT = index::IndexEntryIter; /// Return an iter of index entries in this stored tree. + // TODO: Should return an iter of Result so that we can inspect them... fn iter_entries(&self, subtree: Apath, exclude: Exclude) -> Result { Ok( - IterStitchedIndexHunks::new(&self.archive, Some(self.band.id().clone())) + IterStitchedIndexHunks::new(&self.archive, Some(self.band.id())) .iter_entries(subtree, exclude), ) } - fn file_contents(&self, entry: &Self::Entry) -> Result { - Ok(self.open_stored_file(entry).into_read()) - } - fn estimate_count(&self) -> Result { self.band.index().estimate_entry_count() } @@ -90,7 +88,7 @@ mod test { let last_band_id = af.last_band_id().unwrap().unwrap(); let st = af.open_stored_tree(BandSelectionPolicy::Latest).unwrap(); - assert_eq!(*st.band().id(), last_band_id); + assert_eq!(st.band().id(), last_band_id); let names: Vec = st .iter_entries(Apath::root(), Exclude::nothing()) @@ -115,11 +113,12 @@ mod test { #[test] pub fn cant_open_no_versions() { let af = ScratchArchive::new(); - match af.open_stored_tree(BandSelectionPolicy::Latest) { - Err(Error::ArchiveEmpty) => (), - Err(other) => panic!("unexpected result {other:?}"), - Ok(_) => panic!("unexpected success"), - } + assert_eq!( + af.open_stored_tree(BandSelectionPolicy::Latest) + .unwrap_err() + .to_string(), + "Archive is empty" + ); } #[test] diff --git a/src/test_fixtures.rs b/src/test_fixtures.rs index 4f792462..af2dec15 100644 --- a/src/test_fixtures.rs +++ b/src/test_fixtures.rs @@ -65,10 +65,10 @@ impl ScratchArchive { } let options = &BackupOptions::default(); - backup(&self.archive, &srcdir.live_tree(), options).unwrap(); + backup(&self.archive, srcdir.path(), options).unwrap(); srcdir.create_file("hello2"); - backup(&self.archive, &srcdir.live_tree(), options).unwrap(); + backup(&self.archive, srcdir.path(), options).unwrap(); } pub fn transport(&self) -> &dyn Transport { @@ -186,3 +186,17 @@ impl Default for TreeFixture { Self::new() } } + +/// Collect apaths from an iterator into a list of string. +/// +/// This is more loosely typed but useful for tests. +pub fn entry_iter_to_apath_strings(entry_iter: EntryIter) -> Vec +where + EntryIter: IntoIterator, + E: EntryTrait, +{ + entry_iter + .into_iter() + .map(|entry| entry.apath().clone().into()) + .collect() +} diff --git a/src/trace_counter.rs b/src/trace_counter.rs new file mode 100644 index 00000000..e881089b --- /dev/null +++ b/src/trace_counter.rs @@ -0,0 +1,41 @@ +// Copyright 2023 Martin Pool. + +//! Count the number of `tracing` errors and warnings. + +use std::sync::atomic::{AtomicUsize, Ordering}; + +use tracing::{Event, Level, Subscriber}; +use tracing_subscriber::layer::Context; +use tracing_subscriber::Layer; + +/// Count of errors emitted to trace. +static ERROR_COUNT: AtomicUsize = AtomicUsize::new(0); + +/// Count of warnings emitted to trace. +static WARN_COUNT: AtomicUsize = AtomicUsize::new(0); + +/// Return the number of errors logged in the program so far. +pub fn global_error_count() -> usize { + ERROR_COUNT.load(Ordering::Relaxed) +} + +/// Return the number of warnings logged in the program so far. +pub fn global_warn_count() -> usize { + WARN_COUNT.load(Ordering::Relaxed) +} + +/// A tracing Layer that counts errors and warnings into static counters. +pub(crate) struct CounterLayer(); + +impl Layer for CounterLayer +where + S: Subscriber, +{ + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + match *event.metadata().level() { + Level::ERROR => ERROR_COUNT.fetch_add(1, Ordering::Relaxed), + Level::WARN => WARN_COUNT.fetch_add(1, Ordering::Relaxed), + _ => 0, + }; + } +} diff --git a/src/transport.rs b/src/transport.rs index 3713171c..9b8bcd39 100644 --- a/src/transport.rs +++ b/src/transport.rs @@ -1,4 +1,4 @@ -// Copyright 2020, 2021, 2022 Martin Pool. +// Copyright 2020, 2021, 2022, 2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -14,10 +14,11 @@ //! //! Transport operations return std::io::Result to reflect their narrower focus. -use std::io; -use std::path::Path; +use std::path::{Path, PathBuf}; +use std::{error, fmt, io, result}; use bytes::Bytes; +use derive_more::Display; use url::Url; use crate::*; @@ -28,7 +29,7 @@ use local::LocalTransport; /// Open a `Transport` to access a local directory. /// /// `s` may be a local path or a URL. -pub fn open_transport(s: &str) -> Result> { +pub fn open_transport(s: &str) -> crate::Result> { if let Ok(url) = Url::parse(s) { match url.scheme() { "file" => Ok(Box::new(LocalTransport::new( @@ -38,7 +39,7 @@ pub fn open_transport(s: &str) -> Result> { // Probably a Windows path with drive letter, like "c:/thing", not actually a URL. Ok(Box::new(LocalTransport::new(Path::new(s)))) } - other => Err(Error::UrlScheme { + other => Err(crate::Error::UrlScheme { scheme: other.to_owned(), }), } @@ -60,55 +61,24 @@ pub fn open_transport(s: &str) -> Result> { /// Files in Conserve archives have bounded size and fit in memory so this does not need to /// support streaming or partial reads and writes. pub trait Transport: Send + Sync + std::fmt::Debug { - /// Read the contents of a directory under this transport, without recursing down. - /// - /// Returned entries are in arbitrary order and may be interleaved with errors. - /// - /// The result should not contain entries for "." and "..". - fn iter_dir_entries( - &self, - path: &str, - ) -> io::Result>>>; - - /// As a convenience, read all filenames from the directory into vecs of - /// dirs and files. + /// List a directory, separating out file and subdirectory names. /// /// Names are in the arbitrary order that they're returned from the transport. /// /// Any error during iteration causes overall failure. - fn list_dir_names(&self, relpath: &str) -> io::Result { - let mut names = ListDirNames::default(); - for dir_entry in self.iter_dir_entries(relpath)? { - let dir_entry = dir_entry?; - match dir_entry.kind { - Kind::Dir => names.dirs.push(dir_entry.name), - Kind::File => names.files.push(dir_entry.name), - _ => (), - } - } - Ok(names) - } + fn list_dir(&self, relpath: &str) -> Result; /// Get one complete file into a caller-provided buffer. /// /// Files in the archive are of bounded size, so it's OK to always read them entirely into /// memory, and this is simple to support on all implementations. - fn read_file(&self, path: &str) -> io::Result; - - /// Check if a directory exists. - fn is_dir(&self, path: &str) -> io::Result { - match self.metadata(path) { - Ok(metadata) => Ok(metadata.kind == Kind::Dir), - Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(false), - Err(err) => Err(err), - } - } + fn read_file(&self, path: &str) -> Result; /// Check if a regular file exists. - fn is_file(&self, path: &str) -> io::Result { + fn is_file(&self, path: &str) -> Result { match self.metadata(path) { Ok(metadata) => Ok(metadata.kind == Kind::File), - Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(false), + Err(err) if err.kind() == ErrorKind::NotFound => Ok(false), Err(err) => Err(err), } } @@ -118,7 +88,7 @@ pub trait Transport: Send + Sync + std::fmt::Debug { /// If the directory already exists, it's not an error. /// /// This function does not create missing parent directories. - fn create_dir(&self, relpath: &str) -> io::Result<()>; + fn create_dir(&self, relpath: &str) -> Result<()>; /// Write a complete file. /// @@ -127,19 +97,16 @@ pub trait Transport: Send + Sync + std::fmt::Debug { /// then renamed. /// /// If a temporary file is used, the name should start with `crate::TMP_PREFIX`. - fn write_file(&self, relpath: &str, content: &[u8]) -> io::Result<()>; + fn write_file(&self, relpath: &str, content: &[u8]) -> Result<()>; /// Get metadata about a file. - fn metadata(&self, relpath: &str) -> io::Result; + fn metadata(&self, relpath: &str) -> Result; /// Delete a file. - fn remove_file(&self, relpath: &str) -> io::Result<()>; - - /// Delete an empty directory. - fn remove_dir(&self, relpath: &str) -> io::Result<()>; + fn remove_file(&self, relpath: &str) -> Result<()>; /// Delete a directory and all its contents. - fn remove_dir_all(&self, relpath: &str) -> io::Result<()>; + fn remove_dir_all(&self, relpath: &str) -> Result<()>; /// Make a new transport addressing a subdirectory. fn sub_transport(&self, relpath: &str) -> Box; @@ -169,7 +136,87 @@ pub struct Metadata { /// A list of all the files and directories in a directory. #[derive(Debug, Default, Eq, PartialEq)] -pub struct ListDirNames { +pub struct ListDir { pub files: Vec, pub dirs: Vec, } + +/// A transport error, as a generalization of IO errors. +#[derive(Debug)] +pub struct Error { + pub kind: ErrorKind, + /// Might be for example an IO error or S3 error. + pub details: ErrorDetails, +} + +/// General categories of transport errors. +#[derive(Debug, Display, PartialEq, Eq, Clone, Copy)] +pub enum ErrorKind { + #[display(fmt = "Not found")] + NotFound, + #[display(fmt = "Already exists")] + AlreadyExists, + #[display(fmt = "Permission denied")] + PermissionDenied, + #[display(fmt = "Other transport error")] + Other, +} + +#[derive(Debug)] +pub enum ErrorDetails { + Io { source: io::Error, path: PathBuf }, // S3(s3::Error), + None, +} + +impl Error { + pub fn kind(&self) -> ErrorKind { + self.kind + } + + pub(self) fn io_error(path: &Path, source: io::Error) -> Error { + let kind = match source.kind() { + io::ErrorKind::NotFound => ErrorKind::NotFound, + io::ErrorKind::AlreadyExists => ErrorKind::AlreadyExists, + io::ErrorKind::PermissionDenied => ErrorKind::PermissionDenied, + _ => ErrorKind::Other, + }; + Error { + details: ErrorDetails::Io { + source, + path: path.to_owned(), + }, + kind, + } + } + + pub fn is_not_found(&self) -> bool { + self.kind == ErrorKind::NotFound + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // source is not in the short format; maybe should be in the alternate format? + match &self.details { + ErrorDetails::Io { path, .. } => { + write!(f, "{}", self.kind)?; + if !path.as_os_str().is_empty() { + write!(f, ": {}", path.display())?; + } + } + ErrorDetails::None => write!(f, "{}", self.kind)?, + } + Ok(()) + } +} + +impl error::Error for Error { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + match &self.details { + ErrorDetails::Io { source, .. } => Some(source), + ErrorDetails::None => None, + } + } +} + +type Result = result::Result; diff --git a/src/transport/local.rs b/src/transport/local.rs index 9061c408..66576096 100644 --- a/src/transport/local.rs +++ b/src/transport/local.rs @@ -1,4 +1,4 @@ -// Copyright 2020 Martin Pool. +// Copyright 2020-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -19,8 +19,10 @@ use std::io::prelude::*; use std::path::{Path, PathBuf}; use bytes::Bytes; +use metrics::{counter, increment_counter}; +use tracing::warn; -use crate::transport::{DirEntry, Metadata, Transport}; +use super::{Error, ListDir, Metadata, Result, Transport}; #[derive(Clone, Debug)] pub struct LocalTransport { @@ -42,78 +44,103 @@ impl LocalTransport { } impl Transport for LocalTransport { - fn iter_dir_entries( - &self, - relpath: &str, - ) -> io::Result>>> { + fn list_dir(&self, relpath: &str) -> Result { // Archives should never normally contain non-UTF-8 (or even non-ASCII) filenames, but // let's pass them back as lossy UTF-8 so they can be reported at a higher level, for // example during validation. - let full_path = self.full_path(relpath); - Ok(Box::new(full_path.read_dir()?.map(move |de_result| { - let de = de_result?; - Ok(DirEntry { - name: de.file_name().to_string_lossy().into(), - kind: de.file_type()?.into(), - }) - }))) - } - - fn read_file(&self, relpath: &str) -> io::Result { - let mut file = File::open(self.full_path(relpath))?; - let estimated_len: usize = file.metadata()?.len().try_into().unwrap(); - let mut out_buf = Vec::with_capacity(estimated_len); - let actual_len = file.read_to_end(&mut out_buf)?; - out_buf.truncate(actual_len); - Ok(out_buf.into()) + let path = self.full_path(relpath); + increment_counter!("conserve.local_transport.read_dirs"); + let fail = |err| Error::io_error(&path, err); + let mut names = ListDir::default(); + for dir_entry in path.read_dir().map_err(fail)? { + let dir_entry = dir_entry.map_err(fail)?; + if let Ok(name) = dir_entry.file_name().into_string() { + match dir_entry.file_type().map_err(fail)? { + t if t.is_dir() => names.dirs.push(name), + t if t.is_file() => names.files.push(name), + _ => (), + } + } else { + // These should never normally exist in archive directories, so warn + // and continue. + warn!("Non-UTF-8 filename in archive {:?}", dir_entry.file_name()); + } + } + Ok(names) } - fn is_file(&self, relpath: &str) -> io::Result { - Ok(self.full_path(relpath).is_file()) + fn read_file(&self, relpath: &str) -> Result { + increment_counter!("conserve.local_transport.read_files"); + fn try_block(path: &Path) -> io::Result { + let mut file = File::open(path)?; + let estimated_len: usize = file + .metadata()? + .len() + .try_into() + .expect("File size fits in usize"); + let mut out_buf = Vec::with_capacity(estimated_len); + let actual_len = file.read_to_end(&mut out_buf)?; + counter!( + "conserve.local_transport.read_file_bytes", + actual_len as u64 + ); + out_buf.truncate(actual_len); + Ok(out_buf.into()) + } + let path = &self.full_path(relpath); + try_block(path).map_err(|err| Error::io_error(path, err)) } - fn is_dir(&self, relpath: &str) -> io::Result { - Ok(self.full_path(relpath).is_dir()) + fn is_file(&self, relpath: &str) -> Result { + increment_counter!("conserve.local_transport.metadata_reads"); + let path = self.full_path(relpath); + Ok(path.is_file()) } - fn create_dir(&self, relpath: &str) -> io::Result<()> { - create_dir(self.full_path(relpath)).or_else(|err| { + fn create_dir(&self, relpath: &str) -> super::Result<()> { + let path = self.full_path(relpath); + create_dir(&path).or_else(|err| { if err.kind() == io::ErrorKind::AlreadyExists { Ok(()) } else { - Err(err) + Err(super::Error::io_error(&path, err)) } }) } - fn write_file(&self, relpath: &str, content: &[u8]) -> io::Result<()> { + fn write_file(&self, relpath: &str, content: &[u8]) -> super::Result<()> { + increment_counter!("conserve.local_transport.write_files"); + counter!( + "conserve.local_transport.write_file_bytes", + content.len() as u64 + ); let full_path = self.full_path(relpath); let dir = full_path.parent().unwrap(); + let context = |err| super::Error::io_error(&full_path, err); let mut temp = tempfile::Builder::new() .prefix(crate::TMP_PREFIX) - .tempfile_in(dir)?; + .tempfile_in(dir) + .map_err(context)?; if let Err(err) = temp.write_all(content) { let _ = temp.close(); - return Err(err); + return Err(context(err)); } if let Err(persist_error) = temp.persist(&full_path) { - persist_error.file.close()?; - Err(persist_error.error) + persist_error.file.close().map_err(context)?; + Err(context(persist_error.error)) } else { Ok(()) } } - fn remove_file(&self, relpath: &str) -> io::Result<()> { - std::fs::remove_file(self.full_path(relpath)) - } - - fn remove_dir(&self, relpath: &str) -> io::Result<()> { - std::fs::remove_dir(self.full_path(relpath)) + fn remove_file(&self, relpath: &str) -> super::Result<()> { + let path = self.full_path(relpath); + std::fs::remove_file(&path).map_err(|err| super::Error::io_error(&path, err)) } - fn remove_dir_all(&self, relpath: &str) -> io::Result<()> { - std::fs::remove_dir_all(self.full_path(relpath)) + fn remove_dir_all(&self, relpath: &str) -> super::Result<()> { + let path = self.full_path(relpath); + std::fs::remove_dir_all(&path).map_err(|err| super::Error::io_error(&path, err)) } fn sub_transport(&self, relpath: &str) -> Box { @@ -122,8 +149,10 @@ impl Transport for LocalTransport { }) } - fn metadata(&self, relpath: &str) -> io::Result { - let fsmeta = self.root.join(relpath).metadata()?; + fn metadata(&self, relpath: &str) -> Result { + increment_counter!("conserve.local_transport.metadata_reads"); + let path = self.root.join(relpath); + let fsmeta = path.metadata().map_err(|err| Error::io_error(&path, err))?; Ok(Metadata { len: fsmeta.len(), kind: fsmeta.file_type().into(), @@ -194,41 +223,16 @@ mod test { .unwrap(); let transport = LocalTransport::new(temp.path()); - let mut root_list: Vec<_> = transport - .iter_dir_entries(".") - .unwrap() - .map(std::io::Result::unwrap) - .collect(); - assert_eq!(root_list.len(), 2); - root_list.sort(); - - assert_eq!( - root_list[0], - DirEntry { - name: "root file".to_owned(), - kind: Kind::File, - } - ); - - // Len is unpredictable for directories, so check the other fields. - assert_eq!(root_list[1].name, "subdir"); - assert_eq!(root_list[1].kind, Kind::Dir); + let root_list = transport.list_dir(".").unwrap(); + assert_eq!(root_list.files, ["root file"]); + assert_eq!(root_list.dirs, ["subdir"]); assert!(transport.is_file("root file").unwrap()); assert!(!transport.is_file("nuh-uh").unwrap()); - let subdir_list: Vec<_> = transport - .iter_dir_entries("subdir") - .unwrap() - .map(std::io::Result::unwrap) - .collect(); - assert_eq!( - subdir_list, - vec![DirEntry { - name: "subfile".to_owned(), - kind: Kind::File, - }] - ); + let subdir_list = transport.list_dir("subdir").unwrap(); + assert_eq!(subdir_list.files, ["subfile"]); + assert_eq!(subdir_list.dirs, [""; 0]); temp.close().unwrap(); } @@ -273,28 +277,23 @@ mod test { transport.create_dir("aaa/bbb").unwrap(); let sub_transport = transport.sub_transport("aaa"); - let sub_list: Vec = sub_transport - .iter_dir_entries("") - .unwrap() - .map(|r| r.unwrap()) - .collect(); + let sub_list = sub_transport.list_dir("").unwrap(); - assert_eq!(sub_list.len(), 1); - assert_eq!(sub_list[0].name, "bbb"); + assert_eq!(sub_list.dirs, ["bbb"]); + assert_eq!(sub_list.files, [""; 0]); temp.close().unwrap(); } #[test] - fn remove_dir_all() -> std::io::Result<()> { + fn remove_dir_all() { let temp = assert_fs::TempDir::new().unwrap(); let transport = LocalTransport::new(temp.path()); - transport.create_dir("aaa")?; - transport.create_dir("aaa/bbb")?; - transport.create_dir("aaa/bbb/ccc")?; + transport.create_dir("aaa").unwrap(); + transport.create_dir("aaa/bbb").unwrap(); + transport.create_dir("aaa/bbb/ccc").unwrap(); - transport.remove_dir_all("aaa")?; - Ok(()) + transport.remove_dir_all("aaa").unwrap(); } } diff --git a/src/tree.rs b/src/tree.rs index e8953020..b35e3422 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2017, 2018, 2019, 2020, 2022 Martin Pool. +// Copyright 2017-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -15,13 +15,13 @@ use std::ops::Range; +use crate::progress::{Bar, Progress}; use crate::stats::Sizes; use crate::*; /// Abstract Tree that may be either on the real filesystem or stored in an archive. pub trait ReadTree { - type Entry: Entry + 'static; - type R: std::io::Read; + type Entry: EntryTrait + 'static; type IT: Iterator; /// Iterate, in apath order, all the entries in this tree. @@ -31,10 +31,6 @@ pub trait ReadTree { /// iterator. fn iter_entries(&self, subtree: Apath, exclude: Exclude) -> Result; - /// Read file contents as a `std::io::Read`. - // TODO: Remove this and use ReadBlocks or similar. - fn file_contents(&self, entry: &Self::Entry) -> Result; - /// Estimate the number of entries in the tree. /// This might do somewhat expensive IO, so isn't the Iter's `size_hint`. fn estimate_count(&self) -> Result; @@ -43,38 +39,20 @@ pub trait ReadTree { /// /// This typically requires walking all entries, which may take a while. fn size(&self, exclude: Exclude) -> Result { - struct Model { - files: usize, - total_bytes: u64, - } - impl nutmeg::Model for Model { - fn render(&mut self, _width: usize) -> String { - format!( - "Measuring... {} files, {} MB", - self.files, - self.total_bytes / 1_000_000 - ) - } - } - let progress = nutmeg::View::new( - Model { - files: 0, - total_bytes: 0, - }, - ui::nutmeg_options(), - ); - let mut tot = 0u64; + let mut files = 0; + let mut total_bytes = 0u64; + let bar = Bar::new(); for e in self.iter_entries(Apath::root(), exclude)? { // While just measuring size, ignore directories/files we can't stat. if let Some(bytes) = e.size() { - tot += bytes; - progress.update(|model| { - model.files += 1; - model.total_bytes += bytes; - }); + total_bytes += bytes; + files += 1; + bar.post(Progress::MeasureTree { files, total_bytes }); } } - Ok(TreeSize { file_bytes: tot }) + Ok(TreeSize { + file_bytes: total_bytes, + }) } } diff --git a/src/ui.rs b/src/ui.rs index a9bb4cbb..8e4dc87a 100644 --- a/src/ui.rs +++ b/src/ui.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2015, 2016, 2018, 2019, 2020, 2021, 2022 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -11,163 +11,6 @@ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. -//! Console UI. +//! Generic UI layer. -use std::fmt::Write; -use std::sync::Mutex; -use std::time::Duration; - -use lazy_static::lazy_static; - -use crate::stats::Sizes; - -/// A terminal/text UI. -/// -/// This manages interleaving log-type messages (info and error), interleaved -/// with progress bars. -/// -/// Progress bars are only drawn when the application requests them with -/// `enable_progress` and the output destination is a tty that's capable -/// of redrawing. -/// -/// So this class also works when stdout is redirected to a file, in -/// which case it will get only messages and no progress bar junk. -#[derive(Default)] -pub(crate) struct UIState { - /// Should a progress bar be drawn? - progress_enabled: bool, -} - -lazy_static! { - static ref UI_STATE: Mutex = Mutex::new(UIState::default()); -} - -pub fn println(s: &str) { - with_locked_ui(|ui| ui.println(s)) -} - -pub fn problem(s: &str) { - with_locked_ui(|ui| ui.problem(s)); -} - -pub(crate) fn with_locked_ui(mut cb: F) -where - F: FnMut(&mut UIState), -{ - use std::ops::DerefMut; - cb(UI_STATE.lock().unwrap().deref_mut()) -} - -pub(crate) fn format_error_causes(error: &dyn std::error::Error) -> String { - let mut buf = error.to_string(); - let mut cause = error; - while let Some(c) = cause.source() { - write!(&mut buf, "\n caused by: {c}").expect("Failed to format error cause"); - cause = c; - } - buf -} - -/// Report that a non-fatal error occurred. -/// -/// The program will continue. -pub fn show_error(e: &dyn std::error::Error) { - // TODO: Log it. - problem(&format_error_causes(e)); -} - -/// Enable drawing progress bars, only if stdout is a tty. -/// -/// Progress bars are off by default. -pub fn enable_progress(enabled: bool) { - let mut ui = UI_STATE.lock().unwrap(); - ui.progress_enabled = enabled; -} - -#[allow(unused)] -pub(crate) fn compression_percent(s: &Sizes) -> i64 { - if s.uncompressed > 0 { - 100i64 - (100 * s.compressed / s.uncompressed) as i64 - } else { - 0 - } -} - -pub(crate) fn duration_to_hms(d: Duration) -> String { - let elapsed_secs = d.as_secs(); - if elapsed_secs >= 3600 { - format!( - "{:2}:{:02}:{:02}", - elapsed_secs / 3600, - (elapsed_secs / 60) % 60, - elapsed_secs % 60 - ) - } else { - format!(" {:2}:{:02}", (elapsed_secs / 60) % 60, elapsed_secs % 60) - } -} - -#[allow(unused)] -pub(crate) fn mbps_rate(bytes: u64, elapsed: Duration) -> f64 { - let secs = elapsed.as_secs() as f64 + f64::from(elapsed.subsec_millis()) / 1000.0; - if secs > 0.0 { - bytes as f64 / secs / 1e6 - } else { - 0f64 - } -} - -/// Describe the compression ratio: higher is better. -#[allow(unused)] -pub(crate) fn compression_ratio(s: &Sizes) -> f64 { - if s.compressed > 0 { - s.uncompressed as f64 / s.compressed as f64 - } else { - 0f64 - } -} - -impl UIState { - pub(crate) fn println(&mut self, s: &str) { - // TODO: Go through Nutmeg instead... - // self.clear_progress(); - println!("{s}"); - } - - fn problem(&mut self, s: &str) { - // TODO: Go through Nutmeg instead... - // self.clear_progress(); - println!("conserve error: {s}"); - // Drawing this way makes messages leak from tests, for unclear reasons. - - // queue!( - // stdout, - // style::SetForegroundColor(style::Color::Red), - // style::SetAttribute(style::Attribute::Bold), - // style::Print("conserve error: "), - // style::SetAttribute(style::Attribute::Reset), - // style::Print(s), - // style::Print("\n"), - // style::ResetColor, - // ) - // .unwrap(); - } -} - -pub(crate) fn nutmeg_options() -> nutmeg::Options { - nutmeg::Options::default().progress_enabled(UI_STATE.lock().unwrap().progress_enabled) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - pub fn test_compression_ratio() { - let ratio = compression_ratio(&Sizes { - compressed: 2000, - uncompressed: 4000, - }); - assert_eq!(format!("{ratio:3.1}x"), "2.0x"); - } -} +pub mod termui; diff --git a/src/ui/termui.rs b/src/ui/termui.rs new file mode 100644 index 00000000..205b6ff1 --- /dev/null +++ b/src/ui/termui.rs @@ -0,0 +1,95 @@ +// Conserve backup system. +// Copyright 2015-2023 Martin Pool. + +//! Terminal/text UI. + +use std::fmt::Debug; +use std::fs::OpenOptions; +use std::path::PathBuf; + +#[allow(unused_imports)] +use tracing::{debug, error, info, trace, warn, Level}; +use tracing_appender::non_blocking::WorkerGuard; +use tracing_subscriber::filter::LevelFilter; +use tracing_subscriber::fmt::time::FormatTime; +use tracing_subscriber::layer::Layer; +use tracing_subscriber::prelude::*; +use tracing_subscriber::Registry; + +use crate::progress::term::WriteToNutmeg; + +/// Chosen style of timestamp prefix on trace lines. +#[derive(clap::ValueEnum, Clone, Debug)] +pub enum TraceTimeStyle { + /// No timestamp on trace lines. + None, + /// Universal time, in RFC 3339 style. + Utc, + /// Local time, in RFC 3339, using the offset when the program starts. + Local, + /// Time since the start of the process, in seconds. + Relative, +} + +#[must_use] +pub fn enable_tracing( + time_style: &TraceTimeStyle, + console_level: Level, + json_path: &Option, +) -> Option { + use tracing_subscriber::fmt::time; + fn hookup( + timer: FT, + console_level: Level, + json_path: &Option, + ) -> Option + where + FT: FormatTime + Send + Sync + 'static, + { + let console_layer = tracing_subscriber::fmt::Layer::default() + .with_ansi(clicolors_control::colors_enabled()) + .with_writer(WriteToNutmeg) + .with_timer(timer) + .with_filter(LevelFilter::from_level(console_level)); + let json_layer; + let flush_guard; + if let Some(json_path) = json_path { + let file_writer = OpenOptions::new() + .create(true) + .append(true) + .write(true) + .read(false) + .open(json_path) + .expect("open json log file"); + let (non_blocking, guard) = tracing_appender::non_blocking(file_writer); + flush_guard = Some(guard); + json_layer = Some( + tracing_subscriber::fmt::Layer::default() + .json() + .with_writer(non_blocking), + ); + } else { + flush_guard = None; + json_layer = None; + } + Registry::default() + .with(console_layer) + .with(crate::trace_counter::CounterLayer()) + .with(json_layer) + .init(); + flush_guard + } + + let flush_guard = match time_style { + TraceTimeStyle::None => hookup((), console_level, json_path), + TraceTimeStyle::Utc => hookup(time::UtcTime::rfc_3339(), console_level, json_path), + TraceTimeStyle::Relative => hookup(time::uptime(), console_level, json_path), + TraceTimeStyle::Local => hookup( + time::OffsetTime::local_rfc_3339().unwrap(), + console_level, + json_path, + ), + }; + trace!("Tracing enabled"); + flush_guard +} diff --git a/src/unix_mode.rs b/src/unix_mode.rs index 9b2bc062..b744f297 100644 --- a/src/unix_mode.rs +++ b/src/unix_mode.rs @@ -1,6 +1,6 @@ -// Copyright 2022 Stephanie Aelmore. // Conserve backup system. -// Copyright 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 Martin Pool. +// Copyright 2022 Stephanie Aelmore. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -31,13 +31,13 @@ //! using the write bit in the user class. //! TODO: Properly implement and test Windows compatibility. //! + +use std::fmt; +use std::fs::Permissions; +use std::io; +use std::path::Path; + use serde::{Deserialize, Serialize}; -use std::{ - fmt, - fs::{self, Permissions}, - io, - path::Path, -}; use unix_mode; #[derive(Debug, Default, Clone, Copy, PartialOrd, Ord, Serialize, Deserialize)] @@ -57,7 +57,7 @@ impl UnixMode { { if let Some(mode) = self.0 { let permissions = Permissions::from_mode(mode); - fs::set_permissions(&path, permissions) + std::fs::set_permissions(&path, permissions) } else { Ok(()) } @@ -88,7 +88,12 @@ impl UnixMode { None => false, } } + + pub fn as_u32(&self) -> Option { + self.0 + } } + impl fmt::Display for UnixMode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // Convert to string. Since the file type bits are stripped, there will @@ -103,6 +108,7 @@ impl fmt::Display for UnixMode { } } } + impl From for UnixMode { fn from(mode: u32) -> Self { Self(Some(mode & MODE_BITS)) diff --git a/src/unix_time.rs b/src/unix_time.rs index e9793813..bad52bd8 100644 --- a/src/unix_time.rs +++ b/src/unix_time.rs @@ -12,43 +12,41 @@ // GNU General Public License for more details. //! Times relative to the Unix epoch. +//! +//! In particular, glue between [filetime] and [time]. use filetime::FileTime; +use time::OffsetDateTime; -use std::convert::From; -use std::time::{SystemTime, UNIX_EPOCH}; +pub(crate) trait FromUnixAndNanos { + fn from_unix_seconds_and_nanos(unix_seconds: i64, nanoseconds: u32) -> Self; +} + +impl FromUnixAndNanos for OffsetDateTime { + fn from_unix_seconds_and_nanos(unix_seconds: i64, nanoseconds: u32) -> Self { + OffsetDateTime::from_unix_timestamp(unix_seconds) + .unwrap() + .replace_nanosecond(nanoseconds) + .unwrap() + } +} -/// A Unix time, as seconds since 1970 UTC, plus fractional nanoseconds. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -pub struct UnixTime { - /// Whole seconds after (or if negative, before) 1 Jan 1970 UTC. - pub secs: i64, - /// Fractional nanoseconds. - pub nanosecs: u32, +pub(crate) trait ToOffsetDateTime { + fn to_offset_date_time(&self) -> OffsetDateTime; } -impl From for UnixTime { - fn from(t: SystemTime) -> UnixTime { - if let Ok(after) = t.duration_since(UNIX_EPOCH) { - UnixTime { - secs: after.as_secs() as i64, - nanosecs: after.subsec_nanos(), - } - } else { - let before = UNIX_EPOCH.duration_since(t).unwrap(); - let mut secs = -(before.as_secs() as i64); - let mut nanosecs = before.subsec_nanos(); - if nanosecs > 0 { - secs -= 1; - nanosecs = 1_000_000_000 - nanosecs; - } - UnixTime { secs, nanosecs } - } +impl ToOffsetDateTime for FileTime { + fn to_offset_date_time(&self) -> OffsetDateTime { + OffsetDateTime::from_unix_seconds_and_nanos(self.unix_seconds(), self.nanoseconds()) } } -impl From for FileTime { - fn from(t: UnixTime) -> FileTime { - FileTime::from_unix_time(t.secs, t.nanosecs) +pub(crate) trait ToFileTime { + fn to_file_time(&self) -> FileTime; +} + +impl ToFileTime for OffsetDateTime { + fn to_file_time(&self) -> FileTime { + FileTime::from_unix_time(self.unix_timestamp(), self.nanosecond()) } } diff --git a/src/validate.rs b/src/validate.rs index dd0a670f..c3af0aff 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -12,83 +12,80 @@ use std::cmp::max; use std::collections::HashMap; +use std::fmt::Debug; use std::time::Instant; +#[allow(unused_imports)] +use tracing::{error, info, warn}; + +use crate::misc::ResultExt; +use crate::progress::{Bar, Progress}; use crate::*; +/// Options to [Archive::validate]. #[derive(Debug, Default)] pub struct ValidateOptions { /// Assume blocks that are present have the right content: don't read and hash them. pub skip_block_hashes: bool, } +/// Validate the indexes of all bands. +/// +/// Returns the lengths of all blocks that were referenced, so that the caller can check +/// that all blocks are present and long enough. pub(crate) fn validate_bands( archive: &Archive, band_ids: &[BandId], -) -> (HashMap, ValidateStats) { - let mut stats = ValidateStats::default(); +) -> Result> { let mut block_lens = HashMap::new(); - struct ProgressModel { - bands_done: usize, - bands_total: usize, - start: Instant, - } - impl nutmeg::Model for ProgressModel { - fn render(&mut self, _width: usize) -> String { - format!( - "Check index {}/{}, {} done, {} remaining", - self.bands_done, - self.bands_total, - nutmeg::percent_done(self.bands_done, self.bands_total), - nutmeg::estimate_remaining(&self.start, self.bands_done, self.bands_total) - ) - } - } - let view = nutmeg::View::new( - ProgressModel { - start: Instant::now(), - bands_done: 0, - bands_total: band_ids.len(), - }, - ui::nutmeg_options(), - ); - for band_id in band_ids { - if let Ok(b) = Band::open(archive, band_id) { - if b.validate(&mut stats).is_err() { - stats.band_metadata_problems += 1; + let start = Instant::now(); + let total_bands = band_ids.len(); + let bar = Bar::new(); + 'band: for (bands_done, band_id) in band_ids.iter().enumerate() { + let band = match Band::open(archive, *band_id) { + Ok(band) => band, + Err(err) => { + error!(%err, %band_id, "Error opening band"); + continue 'band; } - } else { - stats.band_open_errors += 1; - continue; + }; + if let Err(err) = band.validate() { + error!(%err, %band_id, "Error validating band"); + continue 'band; + }; + if let Err(err) = archive + .open_stored_tree(BandSelectionPolicy::Specified(*band_id)) + .and_then(|st| validate_stored_tree(&st)) + .map(|st_block_lens| merge_block_lens(&mut block_lens, &st_block_lens)) + { + error!(%err, %band_id, "Error validating stored tree"); + continue 'band; } - if let Ok(st) = archive.open_stored_tree(BandSelectionPolicy::Specified(band_id.clone())) { - if let Ok((st_block_lens, st_stats)) = validate_stored_tree(&st) { - stats += st_stats; - for (bh, bl) in st_block_lens { - block_lens - .entry(bh) - .and_modify(|al| *al = max(*al, bl)) - .or_insert(bl); - } - } else { - stats.tree_validate_errors += 1 - } - } else { - stats.tree_open_errors += 1; - continue; - } - view.update(|model| model.bands_done += 1); + bar.post(Progress::ValidateBands { + total_bands, + bands_done, + start, + }); + } + Ok(block_lens) +} + +fn merge_block_lens(into: &mut HashMap, from: &HashMap) { + for (bh, bl) in from { + into.entry(bh.clone()) + .and_modify(|l| *l = max(*l, *bl)) + .or_insert(*bl); } - (block_lens, stats) } -pub(crate) fn validate_stored_tree( - st: &StoredTree, -) -> Result<(HashMap, ValidateStats)> { +fn validate_stored_tree(st: &StoredTree) -> Result> { let mut block_lens = HashMap::new(); - let stats = ValidateStats::default(); + // TODO: Check other entry properties are correct. + // TODO: Check they're in apath order. + // TODO: Count progress for index blocks within one tree? for entry in st - .iter_entries(Apath::root(), Exclude::nothing())? + .iter_entries(Apath::root(), Exclude::nothing()) + .our_inspect_err(|err| error!(%err, "Error iterating index entries"))? .filter(|entry| entry.kind() == Kind::File) { for addr in entry.addrs { @@ -99,5 +96,5 @@ pub(crate) fn validate_stored_tree( .or_insert(end); } } - Ok((block_lens, stats)) + Ok(block_lens) } diff --git a/tests/api/archive.rs b/tests/api/archive.rs new file mode 100644 index 00000000..e02b0573 --- /dev/null +++ b/tests/api/archive.rs @@ -0,0 +1,119 @@ +// Conserve backup system. +// Copyright 2015-2023 Martin Pool. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +//! API tests for archives. + +use std::fs; +use std::io::Read; + +use assert_fs::prelude::*; +use assert_fs::TempDir; + +use conserve::archive::Archive; +use conserve::test_fixtures::ScratchArchive; +use conserve::Band; +use conserve::BandId; + +#[test] +fn create_then_open_archive() { + let testdir = TempDir::new().unwrap(); + let arch_path = testdir.path().join("arch"); + let arch = Archive::create_path(&arch_path).unwrap(); + + assert!(arch.list_band_ids().unwrap().is_empty()); + + // We can re-open it. + Archive::open_path(&arch_path).unwrap(); + assert!(arch.list_band_ids().unwrap().is_empty()); + assert!(arch.last_complete_band().unwrap().is_none()); +} + +#[test] +fn fails_on_non_empty_directory() { + let temp = TempDir::new().unwrap(); + + temp.child("i am already here").touch().unwrap(); + + let result = Archive::create_path(temp.path()); + assert_eq!( + result.as_ref().unwrap_err().to_string(), + "Directory for new archive is not empty", + "{result:?}" + ); + temp.close().unwrap(); +} + +/// A new archive contains just one header file. +/// The header is readable json containing only a version number. +#[test] +fn empty_archive() { + let af = ScratchArchive::new(); + + assert!(af.path().is_dir()); + assert!(af.path().join("CONSERVE").is_file()); + assert!(af.path().join("d").is_dir()); + + let header_path = af.path().join("CONSERVE"); + let mut header_file = fs::File::open(header_path).unwrap(); + let mut contents = String::new(); + header_file.read_to_string(&mut contents).unwrap(); + assert_eq!(contents, "{\"conserve_archive_version\":\"0.6\"}\n"); + + assert!( + af.last_band_id().unwrap().is_none(), + "Archive should have no bands yet" + ); + assert!( + af.last_complete_band().unwrap().is_none(), + "Archive should have no bands yet" + ); + assert_eq!( + af.referenced_blocks(&af.list_band_ids().unwrap()) + .unwrap() + .len(), + 0 + ); + assert_eq!(af.block_dir().iter_block_names().unwrap().count(), 0); +} + +#[test] +fn create_bands() { + let af = ScratchArchive::new(); + assert!(af.path().join("d").is_dir()); + + // Make one band + let _band1 = Band::create(&af).unwrap(); + let band_path = af.path().join("b0000"); + assert!(band_path.is_dir()); + assert!(band_path.join("BANDHEAD").is_file()); + assert!(band_path.join("i").is_dir()); + + assert_eq!(af.list_band_ids().unwrap(), vec![BandId::new(&[0])]); + assert_eq!(af.last_band_id().unwrap(), Some(BandId::new(&[0]))); + + // Try creating a second band. + let _band2 = Band::create(&af).unwrap(); + assert_eq!( + af.list_band_ids().unwrap(), + vec![BandId::new(&[0]), BandId::new(&[1])] + ); + assert_eq!(af.last_band_id().unwrap(), Some(BandId::new(&[1]))); + + assert_eq!( + af.referenced_blocks(&af.list_band_ids().unwrap()) + .unwrap() + .len(), + 0 + ); + assert_eq!(af.block_dir().iter_block_names().unwrap().count(), 0); +} diff --git a/tests/api/backup.rs b/tests/api/backup.rs index 56f526de..c8aad27f 100644 --- a/tests/api/backup.rs +++ b/tests/api/backup.rs @@ -1,4 +1,4 @@ -// Copyright 2015, 2016, 2017, 2019, 2020, 2021, 2022 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -10,7 +10,7 @@ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. -//! Tests focussed on backup behavior. +//! Tests focused on backup behavior. use assert_fs::prelude::*; use assert_fs::TempDir; @@ -20,6 +20,7 @@ use conserve::kind::Kind; use conserve::test_fixtures::ScratchArchive; use conserve::test_fixtures::TreeFixture; use conserve::*; +use tracing_test::traced_test; const HELLO_HASH: &str = "9063990e5c5b2184877f92adace7c801a549b00c39cd7549877f06d5dd0d3a6ca6eee42d5\ @@ -31,7 +32,7 @@ pub fn simple_backup() { let srcdir = TreeFixture::new(); srcdir.create_file("hello"); - let copy_stats = backup(&af, &srcdir.live_tree(), &BackupOptions::default()).expect("backup"); + let copy_stats = backup(&af, srcdir.path(), &BackupOptions::default()).expect("backup"); assert_eq!(copy_stats.index_builder_stats.index_hunks, 1); assert_eq!(copy_stats.files, 1); assert_eq!(copy_stats.deduplicated_blocks, 0); @@ -43,9 +44,9 @@ pub fn simple_backup() { let restore_dir = TempDir::new().unwrap(); let archive = Archive::open_path(af.path()).unwrap(); - assert!(archive.band_exists(&BandId::zero()).unwrap()); - assert!(archive.band_is_closed(&BandId::zero()).unwrap()); - assert!(!archive.band_exists(&BandId::new(&[1])).unwrap()); + assert!(archive.band_exists(BandId::zero()).unwrap()); + assert!(archive.band_is_closed(BandId::zero()).unwrap()); + assert!(!archive.band_exists(BandId::new(&[1])).unwrap()); let copy_stats = restore(&archive, restore_dir.path(), &RestoreOptions::default()).expect("restore"); @@ -53,6 +54,7 @@ pub fn simple_backup() { } #[test] +#[traced_test] pub fn simple_backup_with_excludes() -> Result<()> { let af = ScratchArchive::new(); let srcdir = TreeFixture::new(); @@ -62,12 +64,11 @@ pub fn simple_backup_with_excludes() -> Result<()> { srcdir.create_file("baz"); // TODO: Include a symlink only on Unix. let exclude = Exclude::from_strings(["/**/baz", "/**/bar", "/**/fooo*"]).unwrap(); - let source = srcdir.live_tree(); let options = BackupOptions { exclude, ..BackupOptions::default() }; - let copy_stats = backup(&af, &source, &options).expect("backup"); + let copy_stats = backup(&af, srcdir.path(), &options).expect("backup"); check_backup(&af); @@ -81,7 +82,7 @@ pub fn simple_backup_with_excludes() -> Result<()> { let archive = Archive::open_path(af.path()).unwrap(); - let band = Band::open(&archive, &BandId::zero()).unwrap(); + let band = Band::open(&archive, BandId::zero()).unwrap(); let band_info = band.get_info()?; assert_eq!(band_info.index_hunk_count, Some(1)); assert_eq!(band_info.id, BandId::zero()); @@ -98,8 +99,8 @@ pub fn simple_backup_with_excludes() -> Result<()> { // TODO: Check index stats. // TODO: Check what was restored. - let validate_stats = af.validate(&ValidateOptions::default()).unwrap(); - assert!(!validate_stats.has_problems()); + af.validate(&ValidateOptions::default()).unwrap(); + assert!(!logs_contain("ERROR") && !logs_contain("WARN")); Ok(()) } @@ -118,13 +119,11 @@ pub fn backup_more_excludes() { srcdir.create_file("bar"); let exclude = Exclude::from_strings(["/**/foo*", "/**/baz"]).unwrap(); - let source = srcdir.live_tree(); let options = BackupOptions { exclude, - print_filenames: false, ..Default::default() }; - let stats = backup(&af, &source, &options).expect("backup"); + let stats = backup(&af, srcdir.path(), &options).expect("backup"); assert_eq!(1, stats.written_blocks); assert_eq!(1, stats.files); @@ -139,11 +138,11 @@ fn check_backup(af: &ScratchArchive) { assert_eq!(1, band_ids.len()); assert_eq!("b0000", band_ids[0].to_string()); assert_eq!( - *af.last_complete_band().unwrap().unwrap().id(), + af.last_complete_band().unwrap().unwrap().id(), BandId::new(&[0]) ); - let band = Band::open(af, &band_ids[0]).unwrap(); + let band = Band::open(af, band_ids[0]).unwrap(); assert!(band.is_closed().unwrap()); let index_entries = band.index().iter_entries().collect::>(); @@ -169,7 +168,7 @@ fn check_backup(af: &ScratchArchive) { ); assert_eq!( af.block_dir() - .block_names() + .iter_block_names() .unwrap() .map(|h| h.to_string()) .collect::>(), @@ -186,7 +185,7 @@ fn large_file() { let large_content = vec![b'a'; 4 << 20]; tf.create_file_with_contents("large", &large_content); - let backup_stats = backup(&af, &tf.live_tree(), &BackupOptions::default()).expect("backup"); + let backup_stats = backup(&af, tf.path(), &BackupOptions::default()).expect("backup"); assert_eq!(backup_stats.new_files, 1); // First 1MB should be new; remainder should be deduplicated. assert_eq!(backup_stats.uncompressed_bytes, 1 << 20); @@ -220,7 +219,7 @@ fn source_unreadable() { tf.make_file_unreadable("b_unreadable"); - let stats = backup(&af, &tf.live_tree(), &BackupOptions::default()).expect("backup"); + let stats = backup(&af, tf.path(), &BackupOptions::default()).expect("backup"); assert_eq!(stats.errors, 1); assert_eq!(stats.new_files, 3); assert_eq!(stats.files, 3); @@ -250,7 +249,7 @@ fn mtime_before_epoch() { assert_eq!(entries[1].apath(), "/old_file"); let af = ScratchArchive::new(); - backup(&af, &tf.live_tree(), &BackupOptions::default()) + backup(&af, tf.path(), &BackupOptions::default()) .expect("backup shouldn't crash on before-epoch mtimes"); } @@ -260,7 +259,7 @@ pub fn symlink() { let af = ScratchArchive::new(); let srcdir = TreeFixture::new(); srcdir.create_symlink("symlink", "/a/broken/destination"); - let copy_stats = backup(&af, &srcdir.live_tree(), &BackupOptions::default()).expect("backup"); + let copy_stats = backup(&af, srcdir.path(), &BackupOptions::default()).expect("backup"); assert_eq!(0, copy_stats.files); assert_eq!(1, copy_stats.symlinks); @@ -270,7 +269,7 @@ pub fn symlink() { assert_eq!(1, band_ids.len()); assert_eq!("b0000", band_ids[0].to_string()); - let band = Band::open(&af, &band_ids[0]).unwrap(); + let band = Band::open(&af, band_ids[0]).unwrap(); assert!(band.is_closed().unwrap()); let index_entries = band.index().iter_entries().collect::>(); @@ -289,7 +288,7 @@ pub fn empty_file_uses_zero_blocks() { let af = ScratchArchive::new(); let srcdir = TreeFixture::new(); srcdir.create_file_with_contents("empty", &[]); - let stats = backup(&af, &srcdir.live_tree(), &BackupOptions::default()).unwrap(); + let stats = backup(&af, srcdir.path(), &BackupOptions::default()).unwrap(); assert_eq!(1, stats.files); assert_eq!(stats.written_blocks, 0); @@ -301,9 +300,9 @@ pub fn empty_file_uses_zero_blocks() { .unwrap() .find(|i| &i.apath == "/empty") .expect("found one entry"); - let mut sf = st.file_contents(&empty_entry).unwrap(); + let stored_file = st.open_stored_file(&empty_entry); let mut s = String::new(); - assert_eq!(sf.read_to_string(&mut s).unwrap(), 0); + assert_eq!(stored_file.into_read().read_to_string(&mut s).unwrap(), 0); assert_eq!(s.len(), 0); // Restore it @@ -321,7 +320,7 @@ pub fn detect_unmodified() { srcdir.create_file("bbb"); let options = BackupOptions::default(); - let stats = backup(&af, &srcdir.live_tree(), &options).unwrap(); + let stats = backup(&af, srcdir.path(), &options).unwrap(); assert_eq!(stats.files, 2); assert_eq!(stats.new_files, 2); @@ -329,7 +328,7 @@ pub fn detect_unmodified() { // Make a second backup from the same tree, and we should see that // both files are unmodified. - let stats = backup(&af, &srcdir.live_tree(), &options).unwrap(); + let stats = backup(&af, srcdir.path(), &options).unwrap(); assert_eq!(stats.files, 2); assert_eq!(stats.new_files, 0); @@ -339,7 +338,7 @@ pub fn detect_unmodified() { // as unmodified. srcdir.create_file_with_contents("bbb", b"longer content for bbb"); - let stats = backup(&af, &srcdir.live_tree(), &options).unwrap(); + let stats = backup(&af, srcdir.path(), &options).unwrap(); assert_eq!(stats.files, 2); assert_eq!(stats.new_files, 0); @@ -355,7 +354,7 @@ pub fn detect_minimal_mtime_change() { srcdir.create_file_with_contents("bbb", b"longer content for bbb"); let options = BackupOptions::default(); - let stats = backup(&af, &srcdir.live_tree(), &options).unwrap(); + let stats = backup(&af, srcdir.path(), &options).unwrap(); assert_eq!(stats.files, 2); assert_eq!(stats.new_files, 2); @@ -377,7 +376,7 @@ pub fn detect_minimal_mtime_change() { } } - let stats = backup(&af, &srcdir.live_tree(), &options).unwrap(); + let stats = backup(&af, srcdir.path(), &options).unwrap(); assert_eq!(stats.files, 2); assert_eq!(stats.unmodified_files, 1); } @@ -389,7 +388,7 @@ fn small_files_combined_two_backups() { srcdir.create_file("file1"); srcdir.create_file("file2"); - let stats1 = backup(&af, &srcdir.live_tree(), &BackupOptions::default()).unwrap(); + let stats1 = backup(&af, srcdir.path(), &BackupOptions::default()).unwrap(); // Although the two files have the same content, we do not yet dedupe them // within a combined block, so the block is different to when one identical // file is stored alone. This could be fixed. @@ -401,13 +400,13 @@ fn small_files_combined_two_backups() { // Add one more file, also identical, but it is not combined with the previous blocks. // This is a shortcoming of the current dedupe approach. srcdir.create_file("file3"); - let stats2 = backup(&af, &srcdir.live_tree(), &BackupOptions::default()).unwrap(); + let stats2 = backup(&af, srcdir.path(), &BackupOptions::default()).unwrap(); assert_eq!(stats2.new_files, 1); assert_eq!(stats2.unmodified_files, 2); assert_eq!(stats2.written_blocks, 1); assert_eq!(stats2.combined_blocks, 1); - assert_eq!(af.block_dir().block_names().unwrap().count(), 2); + assert_eq!(af.block_dir().iter_block_names().unwrap().count(), 2); } #[test] @@ -423,7 +422,7 @@ fn many_small_files_combined_to_one_block() { format!("something about {i}").as_bytes(), ); } - let stats = backup(&af, &srcdir.live_tree(), &BackupOptions::default()).expect("backup"); + let stats = backup(&af, srcdir.path(), &BackupOptions::default()).expect("backup"); assert_eq!( stats.index_builder_stats.index_hunks, 2, "expect exactly 2 hunks" @@ -469,7 +468,7 @@ pub fn mixed_medium_small_files_two_hunks() { srcdir.create_file(&name); } } - let stats = backup(&af, &srcdir.live_tree(), &BackupOptions::default()).expect("backup"); + let stats = backup(&af, srcdir.path(), &BackupOptions::default()).expect("backup"); assert_eq!( stats.index_builder_stats.index_hunks, 2, "expect exactly 2 hunks" @@ -510,7 +509,7 @@ fn detect_unchanged_from_stitched_index() { // Use small hunks for easier manipulation. let stats = backup( &af, - &srcdir.live_tree(), + srcdir.path(), &BackupOptions { max_entries_per_hunk: 1, ..Default::default() @@ -525,7 +524,7 @@ fn detect_unchanged_from_stitched_index() { srcdir.create_file_with_contents("a", b"new a contents"); let stats = backup( &af, - &srcdir.live_tree(), + srcdir.path(), &BackupOptions { max_entries_per_hunk: 1, ..Default::default() @@ -546,7 +545,7 @@ fn detect_unchanged_from_stitched_index() { // index from both b0 and b1. let stats = backup( &af, - &srcdir.live_tree(), + srcdir.path(), &BackupOptions { max_entries_per_hunk: 1, ..Default::default() diff --git a/tests/api/bandid.rs b/tests/api/bandid.rs new file mode 100644 index 00000000..be911f0b --- /dev/null +++ b/tests/api/bandid.rs @@ -0,0 +1,95 @@ +// Conserve backup system. +// Copyright 2015-2023 Martin Pool. + +use std::str::FromStr; + +use assert_matches::assert_matches; + +use conserve::{BandId, Error}; + +#[test] +#[should_panic] +fn empty_id_not_allowed() { + BandId::new(&[]); +} + +#[test] +fn equality() { + assert_eq!(BandId::new(&[1]), BandId::new(&[1])) +} + +#[test] +fn zero() { + assert_eq!(BandId::zero().to_string(), "b0000"); +} + +#[test] +fn zero_has_no_previous() { + assert_eq!(BandId::zero().previous(), None); +} + +#[test] +fn previous_of_one_is_zero() { + assert_eq!( + BandId::zero().next_sibling().previous(), + Some(BandId::zero()) + ); +} + +#[test] +fn next_of_zero_is_one() { + assert_eq!(BandId::zero().next_sibling().to_string(), "b0001"); +} + +#[test] +fn next_of_two_is_three() { + assert_eq!(BandId::from(2).next_sibling().to_string(), "b0003"); +} + +#[test] +fn to_string() { + let band_id = BandId::new(&[20]); + assert_eq!(band_id.to_string(), "b0020"); +} + +#[test] +fn large_value_to_string() { + assert_eq!(BandId::new(&[2_000_000]).to_string(), "b2000000") +} + +#[test] +fn from_string_detects_invalid() { + assert!(BandId::from_str("").is_err()); + assert!(BandId::from_str("hello").is_err()); + assert!(BandId::from_str("b").is_err()); + assert!(BandId::from_str("b-").is_err()); + assert!(BandId::from_str("b2-").is_err()); + assert!(BandId::from_str("b-2").is_err()); + assert!(BandId::from_str("b2-1-").is_err()); + assert!(BandId::from_str("b2--1").is_err()); + assert!(BandId::from_str("beta").is_err()); + assert!(BandId::from_str("b-eta").is_err()); + assert!(BandId::from_str("b-1eta").is_err()); + assert!(BandId::from_str("b-1-eta").is_err()); +} + +#[test] +fn from_string_valid() { + assert_eq!(BandId::from_str("b0001").unwrap().to_string(), "b0001"); + assert_eq!(BandId::from_str("b123456").unwrap().to_string(), "b123456"); +} + +#[test] +fn dashes_are_no_longer_valid() { + // Versions prior to 23.2 accepted bandids with dashes, but never + // used them. + let err = BandId::from_str("b0001-0100-0234").unwrap_err(); + assert_matches!(err, Error::InvalidVersion { .. }); +} + +#[test] +fn to_string_respects_padding() { + let s = format!("{:<10}", BandId::from(42)); + assert_eq!(s.len(), 10); + assert_eq!(s, "b0042 "); +} diff --git a/tests/api/damaged.rs b/tests/api/damaged.rs index a42595a2..726d602a 100644 --- a/tests/api/damaged.rs +++ b/tests/api/damaged.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2020, Martin Pool. +// Copyright 2020-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -15,26 +15,28 @@ use std::path::Path; +use tracing_test::traced_test; + use conserve::*; +#[traced_test] #[test] -fn missing_block() -> Result<()> { +fn missing_block_when_checking_hashes() -> Result<()> { let archive = Archive::open_path(Path::new("testdata/damaged/missing-block"))?; - - let validate_stats = archive.validate(&ValidateOptions::default())?; - assert!(validate_stats.has_problems()); - assert_eq!(validate_stats.block_missing_count, 1); + archive.validate(&ValidateOptions::default())?; + assert!(logs_contain( + "Referenced block missing block_hash=fec91c70284c72d0d4e3684788a90de9338a5b2f47f01fedbe203cafd68708718ae5672d10eca804a8121904047d40d1d6cf11e7a76419357a9469af41f22d01")); Ok(()) } +#[traced_test] #[test] fn missing_block_skip_block_hashes() -> Result<()> { let archive = Archive::open_path(Path::new("testdata/damaged/missing-block"))?; - - let validate_stats = archive.validate(&ValidateOptions { + archive.validate(&ValidateOptions { skip_block_hashes: true, })?; - assert!(validate_stats.has_problems()); - assert_eq!(validate_stats.block_missing_count, 1); + assert!(logs_contain( + "Referenced block missing block_hash=fec91c70284c72d0d4e3684788a90de9338a5b2f47f01fedbe203cafd68708718ae5672d10eca804a8121904047d40d1d6cf11e7a76419357a9469af41f22d01")); Ok(()) } diff --git a/tests/api/diff.rs b/tests/api/diff.rs index 5c0409d8..7ed7679a 100644 --- a/tests/api/diff.rs +++ b/tests/api/diff.rs @@ -1,4 +1,4 @@ -// Copyright 2021 Martin Pool. +// Copyright 2021-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -14,6 +14,7 @@ use conserve::test_fixtures::{ScratchArchive, TreeFixture}; use conserve::*; +use itertools::Itertools; #[test] fn diff_unchanged() { @@ -22,7 +23,7 @@ fn diff_unchanged() { tf.create_file_with_contents("thing", b"contents of thing"); let lt = tf.live_tree(); - let stats = backup(&a, <, &BackupOptions::default()).unwrap(); + let stats = backup(&a, tf.path(), &BackupOptions::default()).unwrap(); assert_eq!(stats.new_files, 1); let st = a.open_stored_tree(BandSelectionPolicy::Latest).unwrap(); @@ -31,28 +32,20 @@ fn diff_unchanged() { include_unchanged: true, ..DiffOptions::default() }; - let des: Vec = diff(&st, <, &options).unwrap().collect(); - assert_eq!(des.len(), 2); // Root directory and the file "/thing". - assert_eq!( - des[0], - DiffEntry { - apath: "/".into(), - kind: DiffKind::Unchanged, - } - ); - assert_eq!( - des[1], - DiffEntry { - apath: "/thing".into(), - kind: DiffKind::Unchanged, - } - ); + let changes: Vec = diff(&st, <, &options).unwrap().collect(); + dbg!(&changes); + assert_eq!(changes.len(), 2); // Root directory and the file "/thing". + assert_eq!(changes[0].apath, "/"); + assert!(changes[0].is_unchanged()); + assert_eq!(changes[1].apath, "/thing"); + assert!(changes[1].is_unchanged()); // Excluding unchanged elements let options = DiffOptions { include_unchanged: false, ..DiffOptions::default() }; - - assert_eq!(diff(&st, <, &options).unwrap().count(), 0); + let changes = diff(&st, <, &options).unwrap().collect_vec(); + println!("changes with include_unchanged=false:\n{changes:#?}"); + assert_eq!(changes.len(), 0); } diff --git a/tests/api/format_flags.rs b/tests/api/format_flags.rs new file mode 100644 index 00000000..3e245f2b --- /dev/null +++ b/tests/api/format_flags.rs @@ -0,0 +1,54 @@ +// Conserve backup system. +// Copyright 2015-2023 Martin Pool. + +//! Tests for per-band format flags. + +use conserve::test_fixtures::ScratchArchive; +use conserve::*; + +#[test] +// This can be updated if/when Conserve does start writing some flags by default. +fn default_format_flags_are_empty() { + let af = ScratchArchive::new(); + + let orig_band = Band::create(&af).unwrap(); + let flags = orig_band.format_flags(); + assert!(flags.is_empty(), "{flags:?}"); + + let band = Band::open(&af, orig_band.id()).unwrap(); + println!("{band:?}"); + assert!(band.format_flags().is_empty()); + + assert_eq!(band.band_format_version(), Some("0.6.3")); + // TODO: When we do support some flags, check that the minimum version is 23.2. +} + +#[test] +#[should_panic(expected = "unknown flag \"wibble\"")] +fn unknown_format_flag_panics_in_create() { + let af = ScratchArchive::new(); + let _ = Band::create_with_flags(&af, &["wibble".into()]); +} + +#[test] +fn unknown_format_flag_fails_to_open() { + let af = ScratchArchive::new(); + + // Make the bandhead by hand because the library prevents writing invalid flags. + af.transport().create_dir("b0000").unwrap(); + let head = serde_json::json! ({ + "start_time": 1676651990, + "band_format_version": "23.2.0", + "format_flags": ["wibble"] + }); + af.transport() + .sub_transport("b0000") + .write_file("BANDHEAD", &serde_json::to_vec(&head).unwrap()) + .unwrap(); + + let err = Band::open(&af, BandId::zero()).unwrap_err(); + assert_eq!( + err.to_string(), + "Unsupported band format flags [\"wibble\"] in b0000" + ) +} diff --git a/tests/api/gc.rs b/tests/api/gc.rs index 86295285..73ee607a 100644 --- a/tests/api/gc.rs +++ b/tests/api/gc.rs @@ -1,4 +1,4 @@ -// Copyright 2015, 2016, 2017, 2019, 2020, 2021 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -26,7 +26,7 @@ fn unreferenced_blocks() { .parse() .unwrap(); - let _copy_stats = backup(&archive, &tf.live_tree(), &BackupOptions::default()).expect("backup"); + let _copy_stats = backup(&archive, tf.path(), &BackupOptions::default()).expect("backup"); // Delete the band and index std::fs::remove_dir_all(archive.path().join("b0000")).unwrap(); @@ -98,11 +98,11 @@ fn backup_prevented_by_gc_lock() -> Result<()> { let lock1 = GarbageCollectionLock::new(&archive)?; // Backup should fail while gc lock is held. - let backup_result = backup(&archive, &tf.live_tree(), &BackupOptions::default()); - match backup_result { - Err(Error::GarbageCollectionLockHeld) => (), - other => panic!("unexpected result {other:?}"), - }; + let backup_result = backup(&archive, tf.path(), &BackupOptions::default()); + assert_eq!( + backup_result.unwrap_err().to_string(), + "Archive is locked for garbage collection" + ); // Leak the lock, then gc breaking the lock. std::mem::forget(lock1); @@ -115,7 +115,7 @@ fn backup_prevented_by_gc_lock() -> Result<()> { )?; // Backup should now succeed. - let backup_result = backup(&archive, &tf.live_tree(), &BackupOptions::default()); + let backup_result = backup(&archive, tf.path(), &BackupOptions::default()); assert!(backup_result.is_ok()); Ok(()) diff --git a/tests/api/live_tree.rs b/tests/api/live_tree.rs index b2a599c2..afb62846 100644 --- a/tests/api/live_tree.rs +++ b/tests/api/live_tree.rs @@ -1,4 +1,4 @@ -// Copyright 2021, 2022 Martin Pool. +// Copyright 2021-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -11,9 +11,9 @@ // GNU General Public License for more details. use pretty_assertions::assert_eq; -use regex::Regex; -use conserve::test_fixtures::TreeFixture; +use conserve::entry::EntryValue; +use conserve::test_fixtures::{entry_iter_to_apath_strings, TreeFixture}; use conserve::*; #[test] @@ -33,11 +33,11 @@ fn list_simple_directory() { tf.create_dir("jelly"); tf.create_dir("jam/.etc"); let lt = LiveTree::open(tf.path()).unwrap(); - let result: Vec = lt + let result: Vec = lt .iter_entries(Apath::root(), Exclude::nothing()) .unwrap() .collect(); - let names = entry_iter_to_apath_strings(result.clone()); + let names = entry_iter_to_apath_strings(&result); // First one is the root assert_eq!( names, @@ -53,15 +53,9 @@ fn list_simple_directory() { ); let repr = format!("{:?}", &result[6]); - - let re_str = r#"LiveEntry \{ apath: Apath\("/jam/apricot"\), kind: "#.to_owned() - + r#"File, mtime: UnixTime \{ [^)]* \}, size: Some\(8\), symlink_target: None, "# - + r#"unix_mode: UnixMode\((Some\([0-9]+\)\)|None), "# - + r#"owner: Owner \{ user: (Some\("[a-z_][a-z0-9_-]*[$]?"\)|None), "# - + r#"group: (Some\("[a-z_][a-z0-9_-]*[$]?"\)|None) \} \}"#; - - let re = Regex::new(&re_str).unwrap(); - assert!(re.is_match(&repr)); + println!("{repr}"); + assert!(repr.starts_with("EntryValue {")); + assert!(repr.contains("Apath(\"/jam/apricot\")")); // TODO: Somehow get the stats out of the iterator. // assert_eq!(source_iter.stats.directories_visited, 4); @@ -137,17 +131,3 @@ fn exclude_cachedir() { entry_iter_to_apath_strings(lt.iter_entries(Apath::root(), Exclude::nothing()).unwrap()); assert_eq!(names, ["/", "/a"]); } - -/// Collect apaths from an iterator into a list of string. -/// -/// This is more loosely typed but useful for tests. -fn entry_iter_to_apath_strings(entry_iter: EntryIter) -> Vec -where - EntryIter: IntoIterator, - E: Entry, -{ - entry_iter - .into_iter() - .map(|entry| entry.apath().clone().into()) - .collect() -} diff --git a/tests/api/main.rs b/tests/api/main.rs index 92cddbd1..6f85f985 100644 --- a/tests/api/main.rs +++ b/tests/api/main.rs @@ -1,4 +1,4 @@ -// Copyright 2021 Martin Pool. +// Copyright 2021-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -13,11 +13,14 @@ //! Tests for the Conserve library API. mod apath; +mod archive; mod backup; +mod bandid; mod blockhash; mod damaged; mod delete; mod diff; +mod format_flags; mod gc; mod live_tree; mod old_archives; diff --git a/tests/api/old_archives.rs b/tests/api/old_archives.rs index f10e58e3..f908a0cd 100644 --- a/tests/api/old_archives.rs +++ b/tests/api/old_archives.rs @@ -13,6 +13,7 @@ //! Read archives written by older versions. +use std::cell::RefCell; use std::collections::HashSet; use std::fs::{self, metadata, read_dir}; use std::path::Path; @@ -22,8 +23,9 @@ use assert_fs::TempDir; use predicates::prelude::*; use pretty_assertions::assert_eq; -use conserve::unix_time::UnixTime; use conserve::*; +use time::OffsetDateTime; +use tracing_test::traced_test; const MINIMAL_ARCHIVE_VERSIONS: &[&str] = &["0.6.0", "0.6.10", "0.6.2", "0.6.3", "0.6.9", "0.6.17"]; @@ -71,19 +73,17 @@ fn examine_archive() { } } +#[traced_test] #[test] fn validate_archive() { for ver in MINIMAL_ARCHIVE_VERSIONS { println!("validate {ver}"); let archive = open_old_archive(ver, "minimal"); - let stats = archive + archive .validate(&ValidateOptions::default()) .expect("validate archive"); - assert_eq!(stats.structure_problems, 0); - assert_eq!(stats.io_errors, 0); - assert_eq!(stats.block_error_count, 0); - assert!(!stats.has_problems()); + assert!(!logs_contain("ERROR") && !logs_contain("WARN")); } } @@ -155,20 +155,25 @@ fn restore_old_archive() { // Check that mtimes are restored. The sub-second times are not tested // because their behavior might vary depending on the local filesystem. - let file_mtime = UnixTime::from( + let file_mtime = OffsetDateTime::from( metadata(dest.child("hello").path()) .unwrap() .modified() .unwrap(), ); - assert_eq!(file_mtime.secs, 1592266523, "mtime not restored correctly"); - let dir_mtime = UnixTime::from( + assert_eq!( + file_mtime.unix_timestamp(), + 1592266523, + "mtime not restored correctly" + ); + + let dir_mtime = OffsetDateTime::from( metadata(dest.child("subdir").path()) .unwrap() .modified() .unwrap(), ); - assert_eq!(dir_mtime.secs, 1592266523,); + assert_eq!(dir_mtime.unix_timestamp(), 1592266523); } } @@ -204,16 +209,26 @@ fn restore_modify_backup() { .expect("overwrite file"); let new_archive = Archive::open_path(&new_archive_path).expect("Open new archive"); + let emitted = RefCell::new(Vec::new()); let backup_stats = backup( &new_archive, - &LiveTree::open(working_tree.path()).unwrap(), + working_tree.path(), &BackupOptions { - print_filenames: true, + change_callback: Some(Box::new(|change| { + emitted + .borrow_mut() + .push((change.change.sigil(), change.apath.to_string())); + Ok(()) + })), ..Default::default() }, ) .expect("Backup modified tree"); + // Check the visited files passed to the callbacks. + let emitted = emitted.into_inner(); + dbg!(&emitted); + // Expected results for files: // "/empty" is empty and new // "/subdir/subfile" is modified @@ -226,6 +241,8 @@ fn restore_modify_backup() { working_tree.child(path).path().metadata().unwrap() ); } + assert!(emitted.contains(&('+', "/empty".to_owned()))); + assert!(emitted.contains(&('*', "/subdir/subfile".to_owned()))); assert_eq!(backup_stats.files, 3); assert!( diff --git a/tests/api/restore.rs b/tests/api/restore.rs index c6703de4..34c8769e 100644 --- a/tests/api/restore.rs +++ b/tests/api/restore.rs @@ -1,4 +1,4 @@ -// Copyright 2015, 2016, 2017, 2019, 2020 Martin Pool. +// Copyright 2015-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -10,8 +10,9 @@ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. -//! Tests focussed on restore. +//! Tests focused on restore. +use std::cell::RefCell; #[cfg(unix)] use std::fs::{read_link, symlink_metadata}; use std::path::PathBuf; @@ -21,7 +22,6 @@ use tempfile::TempDir; use conserve::test_fixtures::ScratchArchive; use conserve::test_fixtures::TreeFixture; -use conserve::unix_time::UnixTime; use conserve::*; #[test] @@ -29,12 +29,31 @@ fn simple_restore() { let af = ScratchArchive::new(); af.store_two_versions(); let destdir = TreeFixture::new(); - - let options = RestoreOptions::default(); let restore_archive = Archive::open_path(af.path()).unwrap(); + let restored_names = RefCell::new(Vec::new()); + let options = RestoreOptions { + change_callback: Some(Box::new(|entry_change| { + restored_names.borrow_mut().push(entry_change.apath.clone()); + Ok(()) + })), + ..Default::default() + }; let stats = restore(&restore_archive, destdir.path(), &options).expect("restore"); assert_eq!(stats.files, 3); + let mut expected_names = vec![ + "/", + "/hello", + "/hello2", + "/link", + "/subdir", + "/subdir/subfile", + ]; + if !SYMLINKS_SUPPORTED { + expected_names.retain(|n| *n != "/link"); + } + drop(options); + assert_eq!(restored_names.into_inner(), expected_names); let dest = &destdir.path(); assert!(dest.join("hello").is_file()); @@ -71,10 +90,17 @@ pub fn decline_to_overwrite() { af.store_two_versions(); let destdir = TreeFixture::new(); destdir.create_file("existing"); - let restore_err_str = RestoreTree::create(destdir.path().to_owned()) - .unwrap_err() + let options = RestoreOptions { + ..RestoreOptions::default() + }; + assert!(!options.overwrite, "overwrite is false by default"); + let restore_err_str = restore(&af, destdir.path(), &options) + .expect_err("restore should fail if the destination exists") .to_string(); - assert!(restore_err_str.contains("Destination directory not empty")); + assert!( + restore_err_str.contains("Destination directory is not empty"), + "Unexpected error message: {restore_err_str:?}" + ); } #[test] @@ -123,14 +149,10 @@ fn restore_symlink() { let srcdir = TreeFixture::new(); srcdir.create_symlink("symlink", "target"); - let years_ago = UnixTime { - secs: 189216000, - nanosecs: 0, - }; - let mtime: FileTime = years_ago.into(); - set_symlink_file_times(srcdir.path().join("symlink"), mtime, mtime).unwrap(); + let years_ago = FileTime::from_unix_time(189216000, 0); + set_symlink_file_times(srcdir.path().join("symlink"), years_ago, years_ago).unwrap(); - backup(&af, &srcdir.live_tree(), &Default::default()).unwrap(); + backup(&af, srcdir.path(), &Default::default()).unwrap(); let restore_dir = TempDir::new().unwrap(); restore(&af, restore_dir.path(), &Default::default()).unwrap(); @@ -138,7 +160,7 @@ fn restore_symlink() { let restored_symlink_path = restore_dir.path().join("symlink"); let sym_meta = symlink_metadata(&restored_symlink_path).unwrap(); assert!(sym_meta.file_type().is_symlink()); - assert_eq!(UnixTime::from(sym_meta.modified().unwrap()), years_ago); + assert_eq!(FileTime::from(sym_meta.modified().unwrap()), years_ago); assert_eq!( read_link(&restored_symlink_path).unwrap(), PathBuf::from("target") diff --git a/tests/api/transport.rs b/tests/api/transport.rs index f2a05660..99e8951a 100644 --- a/tests/api/transport.rs +++ b/tests/api/transport.rs @@ -13,7 +13,7 @@ use assert_fs::prelude::*; use url::Url; -use conserve::transport::{open_transport, ListDirNames}; +use conserve::transport::{open_transport, ListDir}; #[test] fn open_local() { @@ -33,7 +33,7 @@ fn list_dir_names() { let transport = open_transport(url.as_str()).unwrap(); dbg!(&transport); - let ListDirNames { mut files, dirs } = transport.list_dir_names("").unwrap(); + let ListDir { mut files, dirs } = transport.list_dir("").unwrap(); assert_eq!(dirs, ["a dir"]); files.sort(); assert_eq!(files, ["a file", "another file"]); @@ -51,7 +51,7 @@ fn parse_location_urls() { assert_eq!(parsed_scheme("/backup/repo.c6"), "file"); assert_eq!(parsed_scheme("../backup/repo.c6"), "file"); assert_eq!(parsed_scheme("c:/backup/repo"), "file"); - assert_eq!(parsed_scheme(r#"c:\backup\repo\"#), "file"); + assert_eq!(parsed_scheme(r"c:\backup\repo\"), "file"); } #[test] diff --git a/tests/cli/backup.rs b/tests/cli/backup.rs index 8102e21c..6afa4810 100644 --- a/tests/cli/backup.rs +++ b/tests/cli/backup.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2016, 2017, 2018, 2019, 2020, 2021, 2022 Martin Pool. +// Copyright 2016-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -11,7 +11,12 @@ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. +use std::fs::read_to_string; + use assert_cmd::prelude::*; +use assert_fs::NamedTempFile; +use indoc::indoc; +use serde_json::Deserializer; use conserve::test_fixtures::{ScratchArchive, TreeFixture}; @@ -24,6 +29,55 @@ fn backup_verbose() { src.create_dir("subdir"); src.create_file("subdir/a"); src.create_file("subdir/b"); + let changes_json = NamedTempFile::new("changes.json").unwrap(); + + run_conserve() + .args(["backup", "--no-stats", "-v"]) + .arg(af.path()) + .arg(src.path()) + .arg("--changes-json") + .arg(changes_json.path()) + .assert() + .success() + .stdout(indoc! { " + + /subdir/a + + /subdir/b + "}); + + let changes_json = read_to_string(changes_json.path()).unwrap(); + println!("{changes_json}"); + let changes: Vec = Deserializer::from_str(&changes_json) + .into_iter::() + .map(Result::unwrap) + .collect(); + assert_eq!(changes.len(), 2); + assert_eq!(changes[0]["apath"], "/subdir/a"); + assert_eq!(changes[0]["change"], "Added"); + assert_eq!(changes[0]["added"]["kind"], "File"); + assert_eq!(changes[1]["apath"], "/subdir/b"); + assert_eq!(changes[1]["change"], "Added"); + assert_eq!(changes[1]["added"]["kind"], "File"); +} + +#[test] +fn verbose_backup_does_not_print_unchanged_files() { + let af = ScratchArchive::new(); + let src = TreeFixture::new(); + src.create_file("a"); + src.create_file("b"); + + run_conserve() + .args(["backup", "--no-stats", "-v"]) + .arg(af.path()) + .arg(src.path()) + .assert() + .success() + .stdout(indoc! { " + + /a + + /b + "}); + + src.create_file_with_contents("b", b"new b contents"); run_conserve() .args(["backup", "--no-stats", "-v"]) @@ -31,5 +85,7 @@ fn backup_verbose() { .arg(src.path()) .assert() .success() - .stdout("+ /subdir/a\n+ /subdir/b\n"); + .stdout(indoc! { " + * /b + "}); } diff --git a/tests/cli/delete.rs b/tests/cli/delete.rs index f8a5fef8..d10b2a4b 100644 --- a/tests/cli/delete.rs +++ b/tests/cli/delete.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2016, 2017, 2018, 2019, 2020 Martin Pool. +// Copyright 2016-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -37,7 +37,7 @@ fn delete_both_bands() { .success(); assert_eq!(af.list_band_ids().unwrap().len(), 0); - assert_eq!(af.block_dir().block_names().unwrap().count(), 0); + assert_eq!(af.block_dir().iter_block_names().unwrap().count(), 0); } #[test] @@ -55,7 +55,7 @@ fn delete_first_version() { assert_eq!(af.list_band_ids().unwrap(), &[BandId::new(&[1])]); // b0 contains two small files packed into the same block, which is not deleted. // b1 (not deleted) adds one additional block, which is still referenced. - assert_eq!(af.block_dir().block_names().unwrap().count(), 2); + assert_eq!(af.block_dir().iter_block_names().unwrap().count(), 2); let rd = TempDir::new().unwrap(); run_conserve() @@ -91,7 +91,7 @@ fn delete_second_version() { assert_eq!(af.list_band_ids().unwrap(), &[BandId::new(&[0])]); // b0 contains two small files packed into the same block. - assert_eq!(af.block_dir().block_names().unwrap().count(), 1); + assert_eq!(af.block_dir().iter_block_names().unwrap().count(), 1); let rd = TempDir::new().unwrap(); run_conserve() @@ -118,18 +118,13 @@ fn delete_second_version() { fn delete_nonexistent_band() { let af = ScratchArchive::new(); - let pred_fn = predicate::str::is_match( - r"conserve error: Failed to delete band b0000 - caused by: (No such file or directory|The system cannot find the file specified\.) \(os error \d+\) -", - ) - .unwrap(); - run_conserve() .args(["delete"]) .args(["-b", "b0000"]) .arg(af.path()) .assert() - .stdout(pred_fn) + .stderr(predicate::str::contains( + "ERROR conserve: Band not found: b0000", + )) .failure(); } diff --git a/tests/cli/diff.rs b/tests/cli/diff.rs index 0bcc8d7b..9d990ffb 100644 --- a/tests/cli/diff.rs +++ b/tests/cli/diff.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2021 Martin Pool. +// Copyright 2021-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -13,12 +13,12 @@ //! Test `conserve diff`. -use std::fs; - use assert_cmd::prelude::*; +use indoc::indoc; use predicates::prelude::*; use conserve::test_fixtures::{ScratchArchive, TreeFixture}; +use serde_json::Value; use crate::run_conserve; @@ -36,21 +36,6 @@ fn setup() -> (ScratchArchive, TreeFixture) { (af, tf) } -#[cfg(unix)] -fn setup_symlink() -> (ScratchArchive, TreeFixture) { - let af = ScratchArchive::new(); - let tf = TreeFixture::new(); - tf.create_dir("subdir"); - tf.create_symlink("subdir/link", "target"); - run_conserve() - .arg("backup") - .arg(af.path()) - .arg(tf.path()) - .assert() - .success(); - (af, tf) -} - #[test] fn no_changes() { let (af, tf) = setup(); @@ -71,7 +56,7 @@ fn no_changes() { .arg(tf.path()) .assert() .success() - .stdout(".\t/\n.\t/hello.c\n.\t/subdir\n") + .stdout(". /\n. /hello.c\n. /subdir\n") .stderr(predicate::str::is_empty()); } @@ -79,7 +64,8 @@ fn no_changes() { fn add_entries() { let (af, tf) = setup(); tf.create_dir("src"); - tf.create_file_with_contents("src/new.rs", b"pub fn main() {}"); + let new_rs_content = b"pub fn main() {}"; + tf.create_file_with_contents("src/new.rs", new_rs_content); run_conserve() .arg("diff") @@ -87,8 +73,39 @@ fn add_entries() { .arg(tf.path()) .assert() .success() - .stdout("+\t/src\n+\t/src/new.rs\n") + .stdout(indoc! {" + + /src + + /src/new.rs + "}) + .stderr(predicate::str::is_empty()); + + // Inspect json diff + let command = run_conserve() + .args(["diff", "-j"]) + .arg(af.path()) + .arg(tf.path()) + .assert() + .success() .stderr(predicate::str::is_empty()); + let diff_json = &command.get_output().stdout; + println!("{}", std::str::from_utf8(diff_json).unwrap()); + let diff = serde_json::Deserializer::from_slice(diff_json) + .into_iter::() + .collect::, _>>() + .unwrap(); + println!("{diff:#?}"); + assert_eq!(diff.len(), 2); + assert_eq!(diff[0]["apath"], "/src"); + assert_eq!(diff[0]["added"]["kind"], "Dir"); + assert_eq!(diff[0]["added"]["size"], Value::Null); + assert!(diff[0]["added"]["mtime"].is_string()); + // User/group currently only added on Unix. + // assert!(diff[0]["added"]["user"].is_string()); + // assert!(diff[0]["added"]["group"].is_string()); + assert_eq!(diff[1]["apath"], "/src/new.rs"); + assert_eq!(diff[1]["added"]["kind"], "File"); + assert_eq!(diff[1]["added"]["size"], new_rs_content.len()); + assert!(diff[1]["added"]["mtime"].is_string()); } #[test] @@ -102,7 +119,7 @@ fn remove_file() { .arg(tf.path()) .assert() .success() - .stdout("-\t/hello.c\n") + .stdout("- /hello.c\n") .stderr(predicate::str::is_empty()); run_conserve() @@ -112,7 +129,7 @@ fn remove_file() { .arg(tf.path()) .assert() .success() - .stdout(".\t/\n-\t/hello.c\n.\t/subdir\n") + .stdout(". /\n- /hello.c\n. /subdir\n") .stderr(predicate::str::is_empty()); } @@ -128,7 +145,9 @@ fn change_kind() { .arg(tf.path()) .assert() .success() - .stdout("*\t/subdir\n") + .stdout(indoc! {" + * /subdir + "}) .stderr(predicate::str::is_empty()); run_conserve() @@ -138,7 +157,11 @@ fn change_kind() { .arg(tf.path()) .assert() .success() - .stdout(".\t/\n.\t/hello.c\n*\t/subdir\n") + .stdout(indoc! {" + . / + . /hello.c + * /subdir + "}) .stderr(predicate::str::is_empty()); } @@ -154,59 +177,9 @@ fn change_file_content() { .arg(tf.path()) .assert() .success() - .stdout("*\t/hello.c\n") - .stderr(predicate::str::is_empty()); - - run_conserve() - .arg("diff") - .arg("--include-unchanged") - .arg(af.path()) - .arg(tf.path()) - .assert() - .success() - .stdout(".\t/\n*\t/hello.c\n.\t/subdir\n") - .stderr(predicate::str::is_empty()); -} - -#[cfg(unix)] -#[test] -pub fn symlink_unchanged() { - let (af, tf) = setup_symlink(); - - run_conserve() - .arg("diff") - .arg(af.path()) - .arg(tf.path()) - .assert() - .success() - .stdout("") - .stderr(predicate::str::is_empty()); - - run_conserve() - .arg("diff") - .arg("--include-unchanged") - .arg(af.path()) - .arg(tf.path()) - .assert() - .success() - .stdout(".\t/\n.\t/subdir\n.\t/subdir/link\n") - .stderr(predicate::str::is_empty()); -} - -#[cfg(unix)] -#[test] -pub fn symlink_changed() { - let (af, tf) = setup_symlink(); - fs::remove_file(tf.path().join("subdir/link")).unwrap(); - tf.create_symlink("subdir/link", "newtarget"); - - run_conserve() - .arg("diff") - .arg(af.path()) - .arg(tf.path()) - .assert() - .success() - .stdout("*\t/subdir/link\n") + .stdout(indoc! {" + * /hello.c + "}) .stderr(predicate::str::is_empty()); run_conserve() @@ -216,6 +189,10 @@ pub fn symlink_changed() { .arg(tf.path()) .assert() .success() - .stdout(".\t/\n.\t/subdir\n*\t/subdir/link\n") + .stdout(indoc! {" + . / + * /hello.c + . /subdir + "}) .stderr(predicate::str::is_empty()); } diff --git a/tests/cli/exclude.rs b/tests/cli/exclude.rs index dee35673..d30612eb 100644 --- a/tests/cli/exclude.rs +++ b/tests/cli/exclude.rs @@ -189,8 +189,8 @@ fn restore_exclude_excludes_subtrees() { .assert() .success() .stdout(indoc! { " - / - /hello + + / + + /hello "}) .stderr(""); dest.child("subdir").assert(predicate::path::missing()); diff --git a/tests/cli/ls.rs b/tests/cli/ls.rs new file mode 100644 index 00000000..e85c929e --- /dev/null +++ b/tests/cli/ls.rs @@ -0,0 +1,37 @@ +// Conserve backup system. +// Copyright 2023 Martin Pool. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +//! Test `conserve ls`. + +use assert_cmd::prelude::*; +use indoc::indoc; +use pretty_assertions::assert_eq; + +use super::run_conserve; + +#[test] +fn ls_json() { + let cmd = run_conserve() + .args(["ls", "--json", "./testdata/archive/minimal/v0.6.17"]) + .assert() + .success(); + assert_eq!( + String::from_utf8_lossy(&cmd.get_output().stdout), + indoc! { r#" + {"apath":"/","kind":"Dir","mtime":"2020-06-16 00:15:23.0 +00:00:00","unix_mode":509,"user":"mbp","group":"mbp"} + {"apath":"/hello","kind":"File","size":12,"mtime":"2020-06-16 00:15:23.0 +00:00:00","unix_mode":436,"user":"mbp","group":"mbp"} + {"apath":"/subdir","kind":"Dir","mtime":"2020-06-16 00:15:23.0 +00:00:00","unix_mode":509,"user":"mbp","group":"mbp"} + {"apath":"/subdir/subfile","kind":"File","size":12,"mtime":"2020-06-16 00:15:23.0 +00:00:00","unix_mode":436,"user":"mbp","group":"mbp"} + "# } + ); +} diff --git a/tests/cli/main.rs b/tests/cli/main.rs index 4c84f1eb..483af1c4 100644 --- a/tests/cli/main.rs +++ b/tests/cli/main.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2016, 2017, 2018, 2019, 2020, 2021, 2022 Martin Pool. +// Copyright 2016-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -13,13 +13,17 @@ //! Run conserve CLI as a subprocess and test it. +use std::fs::read_to_string; use std::path::PathBuf; use std::process::Command; use assert_cmd::prelude::*; use assert_fs::prelude::*; +use assert_fs::NamedTempFile; use assert_fs::TempDir; +use indoc::indoc; use predicates::prelude::*; +use serde_json::Deserializer; use url::Url; use conserve::test_fixtures::{ScratchArchive, TreeFixture}; @@ -28,11 +32,14 @@ mod backup; mod delete; mod diff; mod exclude; +mod ls; +mod trace; mod validate; mod versions; #[cfg(unix)] mod unix { + mod diff; mod permissions; } @@ -74,7 +81,9 @@ fn clean_error_on_non_archive() { .arg(".") .assert() .failure() - .stdout(predicate::str::contains("Not a Conserve archive")); + .stderr(predicate::str::contains( + "Not a Conserve archive (no CONSERVE header found)", + )); } #[test] @@ -89,7 +98,7 @@ fn basic_backup() { .assert() .success() .stderr(predicate::str::is_empty()) - .stdout(predicate::str::starts_with("Created new archive")); + .stdout(predicate::str::is_empty()); // New archive contains no versions. run_conserve() @@ -131,8 +140,8 @@ fn basic_backup() { .arg(&src) .assert() .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::starts_with("Backup complete.\n")); + .stderr(predicate::str::contains("Backup complete.")) + .stdout(predicate::str::is_empty()); // TODO: Now inspect the archive. run_conserve() @@ -213,6 +222,7 @@ fn basic_backup() { // TODO: Factor out comparison to expected tree. let restore_dir = TempDir::new().unwrap(); + let restore_json = NamedTempFile::new("restore.json").unwrap(); // Also try --no-progress here; should make no difference because these tests run // without a pty. @@ -220,18 +230,20 @@ fn basic_backup() { .arg("restore") .arg("-v") .arg("--no-progress") + .arg("--no-stats") .arg(&arch_dir) .arg(restore_dir.path()) + .arg("--changes-json") + .arg(restore_json.path()) .assert() .success() .stderr(predicate::str::is_empty()) - .stdout(predicate::str::starts_with( - "/\n\ - /hello\n\ - /subdir\n\ - /subdir/subfile\n\ - Restore complete.\n", - )); + .stdout(indoc! { " + + / + + /hello + + /subdir + + /subdir/subfile + " }); restore_dir .child("subdir") @@ -245,6 +257,19 @@ fn basic_backup() { .child("subfile") .assert("I like Rust\n"); + let json = read_to_string(restore_json.path()).unwrap(); + dbg!(&json); + let changes: Vec = Deserializer::from_str(&json) + .into_iter::() + .map(Result::unwrap) + .collect(); + dbg!(&changes); + assert_eq!(changes.len(), 4); + assert_eq!(changes[0]["apath"], "/"); + assert_eq!(changes[1]["apath"], "/hello"); + assert_eq!(changes[2]["apath"], "/subdir"); + assert_eq!(changes[3]["apath"], "/subdir/subfile"); + // Try to restore again over the same directory: should decline. run_conserve() .arg("restore") @@ -253,7 +278,9 @@ fn basic_backup() { .arg(restore_dir.path()) .assert() .failure() - .stdout(predicate::str::contains("Destination directory not empty")); + .stderr(predicate::str::contains( + "Destination directory is not empty", + )); // Restore with specified band id / backup version. { @@ -274,8 +301,8 @@ fn basic_backup() { .arg(arch_dir) .assert() .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::contains("Archive is OK.\n")); + .stdout(predicate::str::is_empty()) + .stderr(predicate::str::contains("Archive is OK.\n")); // TODO: Compare vs source tree. } @@ -294,21 +321,22 @@ fn empty_archive() { .arg(restore_dir.path()) .assert() .failure() - .stdout(predicate::str::contains("Archive has no bands")); + .stderr(predicate::str::contains("Archive is empty")); run_conserve() .arg("ls") .arg(&adir) .assert() .failure() - .stdout(predicate::str::contains("Archive has no bands")); + .stderr(predicate::str::contains("Archive is empty")); run_conserve() .arg("versions") .arg(&adir) .assert() .success() - .stdout(predicate::str::is_empty()); + .stdout(predicate::str::is_empty()) + .stderr(predicate::str::is_empty()); run_conserve().arg("gc").arg(adir).assert().success(); } @@ -339,16 +367,7 @@ fn incomplete_version() { .arg(af.path()) .assert() .failure() - .stdout(predicate::str::contains("incomplete and may be in use")); -} - -#[test] -fn validate_non_fatal_problems_nonzero_result() { - run_conserve() - .args(["validate", "testdata/damaged/missing-block/"]) - .assert() - .stdout(predicate::str::contains("Archive has some problems.")) - .code(2); + .stderr(predicate::str::contains("incomplete and may be in use")); } #[test] diff --git a/tests/cli/trace.rs b/tests/cli/trace.rs new file mode 100644 index 00000000..e5dc8c12 --- /dev/null +++ b/tests/cli/trace.rs @@ -0,0 +1,21 @@ +// Copyright 2023 Martin Pool + +//! Tests for trace-related options and behaviors of the Conserve CLI. + +use assert_fs::prelude::*; +use predicates::prelude::*; + +use super::*; + +#[test] +fn no_trace_timestamps_by_default() { + let temp_dir = TempDir::new().unwrap(); + run_conserve() + .args(["-D", "init"]) + .arg(temp_dir.child("archive").path()) + .assert() + .success() + .stderr(predicate::str::contains( + "TRACE conserve::ui::termui: Tracing enabled", + )); +} diff --git a/tests/cli/unix/diff.rs b/tests/cli/unix/diff.rs new file mode 100644 index 00000000..29ef4c6a --- /dev/null +++ b/tests/cli/unix/diff.rs @@ -0,0 +1,87 @@ +// Conserve backup system. +// Copyright 2021-2023 Martin Pool. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +//! Test `conserve diff` on Unix with symlinks. + +use std::fs; + +use assert_cmd::prelude::*; +use predicates::prelude::*; + +use conserve::test_fixtures::{ScratchArchive, TreeFixture}; + +use crate::run_conserve; + +fn setup_symlink() -> (ScratchArchive, TreeFixture) { + let af = ScratchArchive::new(); + let tf = TreeFixture::new(); + tf.create_dir("subdir"); + tf.create_symlink("subdir/link", "target"); + run_conserve() + .arg("backup") + .arg(af.path()) + .arg(tf.path()) + .assert() + .success(); + (af, tf) +} + +#[test] +pub fn symlink_unchanged() { + let (af, tf) = setup_symlink(); + + run_conserve() + .arg("diff") + .arg(af.path()) + .arg(tf.path()) + .assert() + .success() + .stdout("") + .stderr(predicate::str::is_empty()); + + run_conserve() + .arg("diff") + .arg("--include-unchanged") + .arg(af.path()) + .arg(tf.path()) + .assert() + .success() + .stdout(". /\n. /subdir\n. /subdir/link\n") + .stderr(predicate::str::is_empty()); +} + +#[test] +pub fn symlink_changed() { + let (af, tf) = setup_symlink(); + fs::remove_file(tf.path().join("subdir/link")).unwrap(); + tf.create_symlink("subdir/link", "newtarget"); + + run_conserve() + .arg("diff") + .arg(af.path()) + .arg(tf.path()) + .assert() + .success() + .stdout("* /subdir/link\n") + .stderr(predicate::str::is_empty()); + + run_conserve() + .arg("diff") + .arg("--include-unchanged") + .arg(af.path()) + .arg(tf.path()) + .assert() + .success() + .stdout(". /\n. /subdir\n* /subdir/link\n") + .stderr(predicate::str::is_empty()); +} diff --git a/tests/cli/unix/permissions.rs b/tests/cli/unix/permissions.rs index 4e38e02b..dad8e9b8 100644 --- a/tests/cli/unix/permissions.rs +++ b/tests/cli/unix/permissions.rs @@ -10,7 +10,7 @@ use std::path::{Path, PathBuf}; use assert_cmd::prelude::*; use assert_fs::prelude::*; use assert_fs::TempDir; -use indoc::indoc; +use indoc::{formatdoc, indoc}; use predicates::prelude::*; use crate::run_conserve; @@ -29,8 +29,7 @@ fn backup_unix_permissions() { .arg(&arch_dir) .assert() .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::starts_with("Created new archive")); + .stderr(predicate::str::is_empty()); // copy the appropriate testdata into the testdir let src: PathBuf = "./testdata/tree/minimal".into(); @@ -88,7 +87,7 @@ fn backup_unix_permissions() { .arg(&data_dir) .assert() .success() - .stderr(predicate::str::is_empty()) + .stderr(predicate::str::contains("Backup complete.")) .stdout(predicate::str::starts_with(expected)); // verify file permissions in stored archive @@ -98,12 +97,12 @@ fn backup_unix_permissions() { .assert() .success() .stderr(predicate::str::is_empty()) - .stdout(predicate::str::diff(format!( - "rwxr-xr-x {user:<10} {group:<10} /\n\ - r--r--r-- {user:<10} {group:<10} /hello\n\ - rwxrwxr-x {user:<10} {group:<10} /subdir\n\ - rwxr-xr-x {user:<10} {group:<10} /subdir/subfile\n" - ))); + .stdout(predicate::str::diff(formatdoc! { " + rwxr-xr-x {user:<10} {group:<10} / + r--r--r-- {user:<10} {group:<10} /hello + rwxrwxr-x {user:<10} {group:<10} /subdir + rwxr-xr-x {user:<10} {group:<10} /subdir/subfile + " })); // create a directory to restore to let restore_dir = TempDir::new().unwrap(); @@ -116,13 +115,12 @@ fn backup_unix_permissions() { .assert() .success() .stderr(predicate::str::is_empty()) - .stdout(predicate::str::starts_with(format!( - "rwxr-xr-x {user:<10} {group:<10} /\n\ - r--r--r-- {user:<10} {group:<10} /hello\n\ - rwxrwxr-x {user:<10} {group:<10} /subdir\n\ - rwxr-xr-x {user:<10} {group:<10} /subdir/subfile\n\ - Restore complete.\n" - ))); + .stdout(predicate::str::diff(formatdoc! {" + + rwxr-xr-x {user:<10} {group:<10} / + + r--r--r-- {user:<10} {group:<10} /hello + + rwxrwxr-x {user:<10} {group:<10} /subdir + + rwxr-xr-x {user:<10} {group:<10} /subdir/subfile + "})); } #[test] @@ -138,8 +136,7 @@ fn backup_user_and_permissions() { .arg(&arch_dir) .assert() .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::starts_with("Created new archive")); + .stderr(predicate::str::is_empty()); let src: PathBuf = "./testdata/tree/minimal".into(); assert!(src.is_dir()); @@ -197,25 +194,26 @@ fn backup_user_and_permissions() { .arg(&src) .assert() .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::starts_with("Backup complete.\n")); + .stdout(predicate::str::is_empty()) + .stderr(predicate::str::contains("Backup complete.\n")); let restore_dir = TempDir::new().unwrap(); // restore run_conserve() - .args(["restore", "-v", "-l", "--no-progress"]) + .args(["restore", "-v", "-l", "--no-progress", "--no-stats"]) .arg(&arch_dir) .arg(restore_dir.path()) .assert() .success() .stderr(predicate::str::is_empty()) - .stdout(predicate::str::starts_with(format!( - "{} {} /\n\ - {} {} /hello\n\ - {} {} /subdir\n\ - {} {} /subdir/subfile\n\ - Restore complete.\n", + .stdout(predicate::str::diff(formatdoc!( + " + + {} {} / + + {} {} /hello + + {} {} /subdir + + {} {} /subdir/subfile + ", UnixMode::from(mdata_root.permissions()), Owner::from(&mdata_root), UnixMode::from(mdata_hello.permissions()), @@ -240,7 +238,7 @@ fn backup_user_and_permissions() { } #[test] -/// List an archive with particular encoded permissions, from the first version tha tracked +/// List an archive with particular encoded permissions, from the first version that tracked /// ownership and permissions. /// /// This should succeed even, and especially, if the machine running the tests does diff --git a/tests/cli/validate.rs b/tests/cli/validate.rs index d99ec3a2..a88887a2 100644 --- a/tests/cli/validate.rs +++ b/tests/cli/validate.rs @@ -1,16 +1,50 @@ +// Copyright 2023 Martin Pool + //! Tests for the `conserve validate` CLI. +use std::path::Path; + use assert_cmd::prelude::*; use assert_fs::prelude::*; -use assert_fs::TempDir; +use assert_fs::{NamedTempFile, TempDir}; use predicates::prelude::*; +use serde_json::json; +use serde_json::{Deserializer, Value}; +use tracing::Level; use super::run_conserve; +fn read_log_json(path: &Path) -> Vec { + let json_content = std::fs::read_to_string(path).unwrap(); + println!("{json_content}"); + Deserializer::from_str(&json_content) + .into_iter::() + .map(Result::unwrap) + .collect::>() +} + +/// Filter out only logs with severity equal or more important than `level`. +fn filter_by_level(logs: &[serde_json::Value], level: Level) -> Vec<&serde_json::Value> { + logs.iter() + .filter(move |event| event["level"].as_str().unwrap().parse::().unwrap() <= level) + .collect() +} + +// /// Reduce json logs to just their messages. +// fn events_to_messages<'s, I>(logs: I) -> Vec<&'s str> +// where +// I: IntoIterator, +// { +// logs.into_iter() +// .map(|event| event["fields"]["message"].as_str().unwrap()) +// .collect() +// } + /// #[test] fn validate_does_not_complain_about_gc_lock() { let temp = TempDir::new().unwrap(); + let log_temp = NamedTempFile::new("log.json").unwrap(); run_conserve() .args(["init"]) .arg(temp.path()) @@ -19,8 +53,36 @@ fn validate_does_not_complain_about_gc_lock() { temp.child("GC_LOCK").touch().unwrap(); run_conserve() .args(["validate"]) + .arg("--log-json") + .arg(log_temp.path()) .arg(temp.path()) .assert() .stdout(predicate::str::contains("Unexpected file").not()) .success(); + let events = read_log_json(log_temp.path()); + dbg!(&events); + assert!(filter_by_level(&events, Level::WARN).is_empty()); +} + +#[test] +fn validate_non_fatal_problems_nonzero_result_and_json_log() { + let log_temp = NamedTempFile::new("log.json").unwrap(); + run_conserve() + .args(["validate", "testdata/damaged/missing-block/"]) + .arg("--log-json") + .arg(log_temp.path()) + .assert() + .stderr(predicate::str::contains("Archive has some problems.")) + .code(2); + let events = read_log_json(log_temp.path()); + dbg!(&events); + let errors = filter_by_level(&events, Level::ERROR); + assert_eq!(errors.len(), 1); + assert_eq!( + errors[0]["fields"], + json!({ + "block_hash": "fec91c70284c72d0d4e3684788a90de9338a5b2f47f01fedbe203cafd68708718ae5672d10eca804a8121904047d40d1d6cf11e7a76419357a9469af41f22d01", + "message": "Referenced block missing", + }) + ); } diff --git a/tests/damage/README.md b/tests/damage/README.md new file mode 100644 index 00000000..fdc9180c --- /dev/null +++ b/tests/damage/README.md @@ -0,0 +1,10 @@ +# damage tests + +Conserve tries to still allow the archive to be read, and future backups to be written, +even if some files are damaged: truncated, corrupt, missing, or unreadable. + +This is not yet achieved in every case, but the format and code are designed to +work towards this goal. + +These API tests write an archive, create some damage, and then try to read other +information, write future backups, and validate. diff --git a/tests/damage/main.rs b/tests/damage/main.rs new file mode 100644 index 00000000..c38e6224 --- /dev/null +++ b/tests/damage/main.rs @@ -0,0 +1,90 @@ +// Conserve backup system. +// Copyright 2020-2023 Martin Pool. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +use assert_fs::prelude::*; +use assert_fs::TempDir; +use dir_assert::assert_paths; +use pretty_assertions::assert_eq; +use rstest::rstest; +use tracing_test::traced_test; +// use predicates::prelude::*; + +use conserve::{ + backup, restore, Apath, Archive, BackupOptions, BandId, BandSelectionPolicy, EntryTrait, + Exclude, RestoreOptions, ValidateOptions, +}; + +mod strategy; +use strategy::Damage; + +// TODO: Also test damage to other files: band tail, index hunks, data blocks, etc. +// TODO: Test that you can delete a damaged backup; then there are no problems. + +#[rstest] +#[traced_test] +#[test] +fn backup_after_damage(#[values(Damage::Delete, Damage::Truncate)] damage: Damage) { + let archive_dir = TempDir::new().unwrap(); + let source_dir = TempDir::new().unwrap(); + + let archive = Archive::create_path(archive_dir.path()).expect("create archive"); + source_dir + .child("file") + .write_str("content in first backup") + .unwrap(); + + let backup_options = BackupOptions::default(); + backup(&archive, source_dir.path(), &backup_options).expect("initial backup"); + + damage.damage(&archive_dir.child("b0000").child("BANDHEAD")); + + // A second backup should succeed. + source_dir + .child("file") + .write_str("content in second backup") + .unwrap(); + backup(&archive, source_dir.path(), &backup_options) + .expect("write second backup even though first bandhead is damaged"); + + // Can restore the second backup + let restore_dir = TempDir::new().unwrap(); + restore(&archive, restore_dir.path(), &RestoreOptions::default()) + .expect("restore second backup"); + + // Since the second backup rewrote the single file in the backup (and the root dir), + // we should get all the content back out. + assert_paths!(source_dir.path(), restore_dir.path()); + + // You can see both versions. + let versions = archive.list_band_ids().expect("list versions"); + assert_eq!(versions, [BandId::zero(), BandId::new(&[1])]); + + // Can list the contents of the second backup. + let apaths: Vec = archive + .iter_entries( + BandSelectionPolicy::Latest, + Apath::root(), + Exclude::nothing(), + ) + .expect("iter entries") + .map(|e| e.apath().to_string()) + .collect(); + + assert_eq!(apaths, ["/", "/file"]); + + // Validation completes although with warnings. + // TODO: This should return problems that we can inspect. + archive + .validate(&ValidateOptions::default()) + .expect("validate"); +} diff --git a/tests/damage/strategy.rs b/tests/damage/strategy.rs new file mode 100644 index 00000000..d692ce9e --- /dev/null +++ b/tests/damage/strategy.rs @@ -0,0 +1,50 @@ +// Conserve backup system. +// Copyright 2023 Martin Pool. + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +//! Strategies for damaging files. + +use std::fs::{remove_file, OpenOptions}; +use std::path::Path; + +/// A way of damaging a file in an archive. +#[derive(Debug, Clone)] +pub enum Damage { + /// Truncate the file to zero bytes. + Truncate, + + /// Delete the file. + Delete, + // TODO: Also test other types of damage, including + // permission denied (as a kind of IOError), and binary junk. +} + +impl Damage { + /// Apply this damage to a file. + /// + /// The file must already exist. + pub fn damage(&self, path: &Path) { + assert!(path.exists(), "{path:?} does not exist"); + match self { + Damage::Truncate => { + OpenOptions::new() + .write(true) + .truncate(true) + .open(path) + .expect("truncate file"); + } + Damage::Delete => { + remove_file(path).expect("delete file"); + } + } + } +} diff --git a/tests/expensive/changes.rs b/tests/expensive/changes.rs index 6ec9449c..8e8be869 100644 --- a/tests/expensive/changes.rs +++ b/tests/expensive/changes.rs @@ -1,5 +1,5 @@ // Conserve backup system. -// Copyright 2022 Martin Pool. +// Copyright 2022-2023 Martin Pool. // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -93,7 +93,7 @@ fn backup_sequential_changes(changes: &[TreeChange]) { max_entries_per_hunk: 3, ..BackupOptions::default() }; - backup(&archive, &tf.live_tree(), &options).unwrap(); + backup(&archive, tf.path(), &options).unwrap(); let snapshot = TempDir::new().unwrap(); cp_r::CopyOptions::default() .copy_tree(tf.path(), snapshot.path())