From 4d037a000c7e22b7dd6b14a1f8c61c91ca4558a4 Mon Sep 17 00:00:00 2001 From: Lukasz Anforowicz Date: Thu, 24 Oct 2024 18:29:49 +0000 Subject: [PATCH] Add fuzzer for `utf8::validate`. --- .github/workflows/fuzz.yml | 24 +++++++++++++ .gitignore | 2 +- fuzz/.gitignore | 4 +++ fuzz/Cargo.toml | 23 +++++++++++++ fuzz/fuzz_targets/utf8_validate.rs | 55 ++++++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/fuzz.yml create mode 100644 fuzz/.gitignore create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/fuzz_targets/utf8_validate.rs diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 0000000..771492d --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,24 @@ +name: Fuzzing + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + fuzzing: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + override: true + - name: Install cargo-fuzz + run: | + cargo install cargo-fuzz + - name: Run Fuzzing + run: | + cargo fuzz run utf8_validate -- -max_total_time=180 diff --git a/.gitignore b/.gitignore index 42cacb3..ca54338 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ .*.swp tags target -/Cargo.lock +Cargo.lock diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..1a45eee --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..1f85670 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "bstr-fuzz" +version = "0.0.0" +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.bstr] +path = ".." + +[[bin]] +name = "utf8_validate" +path = "fuzz_targets/utf8_validate.rs" +test = false +doc = false +bench = false + +[workspace] diff --git a/fuzz/fuzz_targets/utf8_validate.rs b/fuzz/fuzz_targets/utf8_validate.rs new file mode 100644 index 0000000..86cb214 --- /dev/null +++ b/fuzz/fuzz_targets/utf8_validate.rs @@ -0,0 +1,55 @@ +//! This fuzzer attempts to test the functional correctness of the `bstr::utf8::validate` function. +//! This coverage is desirable, because some `unsafe` blocks in the `bstr` crate depend on the +//! guarantees made by `utf8::validate` - e.g. the soundness of `bstr::ByteSlice::to_str` depends +//! on these guarantees. +//! +//! The `utf8::validate` function is in a non-public module, which means that we can't test it +//! directly. Therefore we test via `bstr::ByteSlice::to_str` instead. +//! +//! We use the following [test oracle](https://en.wikipedia.org/wiki/Test_oracle) to validate +//! results returned by `utf8::validate`: +//! +//! * A standard library implementation (`std::str::from_utf8` is analogous to +//! `bstr::ByteSlice::to_str` and `run_utf8_validation` in `core/str/validations.rs` is analogous +//! to `bstr::utf8::validate`). +//! https://github.com/BurntSushi/bstr/issues/25#issuecomment-543835601 explains +//! why `bstr` doesn't reuse the standard library's implementation. +//! * TODO: Consider also adding a manual, simple (and therefore hopefully "obviously correct") +//! implementation as another test oracle. + +#![no_main] + +use bstr::ByteSlice; +use libfuzzer_sys::fuzz_target; + +fn validate(data: &[u8]) { + let bstr_result = data.to_str(); + let std_result = std::str::from_utf8(data); + + match bstr_result { + Ok(bstr_str) => { + let Ok(std_str) = std_result else { + panic!("`bstr` succeeded but `std` failed"); + }; + assert_eq!(data.as_ptr(), bstr_str.as_ptr()); + assert_eq!(data.as_ptr(), std_str.as_ptr()); + assert_eq!(data.len(), bstr_str.len()); + assert_eq!(data.len(), std_str.len()); + } + Err(bstr_err) => { + let Err(std_err) = std_result else { + panic!("`bstr` failed but `std` succeeded"); + }; + assert_eq!(bstr_err.error_len(), std_err.error_len()); + assert_eq!(bstr_err.valid_up_to(), std_err.valid_up_to()); + } + } +} + +fuzz_target!(|data: &[u8]| { + // Test various alignments, because `utf8::validate` calls into `ascii::first_non_ascii_byte` + // and the latter is sensitive to the alignment. + for alignment_offset in 0..=(std::cmp::min(data.len(), 16)) { + validate(&data[alignment_offset..]); + } +});