Skip to content

Commit

Permalink
Add fuzzer for utf8::validate.
Browse files Browse the repository at this point in the history
  • Loading branch information
anforowicz committed Nov 14, 2024
1 parent 41f8bdb commit 4d037a0
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 1 deletion.
24 changes: 24 additions & 0 deletions .github/workflows/fuzz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Fuzzing

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
fuzzing:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
- name: Install cargo-fuzz
run: |
cargo install cargo-fuzz
- name: Run Fuzzing
run: |
cargo fuzz run utf8_validate -- -max_total_time=180
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.*.swp
tags
target
/Cargo.lock
Cargo.lock
4 changes: 4 additions & 0 deletions fuzz/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
target
corpus
artifacts
coverage
23 changes: 23 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "bstr-fuzz"
version = "0.0.0"
publish = false
edition = "2018"

[package.metadata]
cargo-fuzz = true

[dependencies]
libfuzzer-sys = "0.4"

[dependencies.bstr]
path = ".."

[[bin]]
name = "utf8_validate"
path = "fuzz_targets/utf8_validate.rs"
test = false
doc = false
bench = false

[workspace]
55 changes: 55 additions & 0 deletions fuzz/fuzz_targets/utf8_validate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
//! This fuzzer attempts to test the functional correctness of the `bstr::utf8::validate` function.
//! This coverage is desirable, because some `unsafe` blocks in the `bstr` crate depend on the
//! guarantees made by `utf8::validate` - e.g. the soundness of `bstr::ByteSlice::to_str` depends
//! on these guarantees.
//!
//! The `utf8::validate` function is in a non-public module, which means that we can't test it
//! directly. Therefore we test via `bstr::ByteSlice::to_str` instead.
//!
//! We use the following [test oracle](https://en.wikipedia.org/wiki/Test_oracle) to validate
//! results returned by `utf8::validate`:
//!
//! * A standard library implementation (`std::str::from_utf8` is analogous to
//! `bstr::ByteSlice::to_str` and `run_utf8_validation` in `core/str/validations.rs` is analogous
//! to `bstr::utf8::validate`).
//! https://github.com/BurntSushi/bstr/issues/25#issuecomment-543835601 explains
//! why `bstr` doesn't reuse the standard library's implementation.
//! * TODO: Consider also adding a manual, simple (and therefore hopefully "obviously correct")
//! implementation as another test oracle.
#![no_main]

use bstr::ByteSlice;
use libfuzzer_sys::fuzz_target;

fn validate(data: &[u8]) {
let bstr_result = data.to_str();
let std_result = std::str::from_utf8(data);

match bstr_result {
Ok(bstr_str) => {
let Ok(std_str) = std_result else {
panic!("`bstr` succeeded but `std` failed");
};
assert_eq!(data.as_ptr(), bstr_str.as_ptr());
assert_eq!(data.as_ptr(), std_str.as_ptr());
assert_eq!(data.len(), bstr_str.len());
assert_eq!(data.len(), std_str.len());
}
Err(bstr_err) => {
let Err(std_err) = std_result else {
panic!("`bstr` failed but `std` succeeded");
};
assert_eq!(bstr_err.error_len(), std_err.error_len());
assert_eq!(bstr_err.valid_up_to(), std_err.valid_up_to());
}
}
}

fuzz_target!(|data: &[u8]| {
// Test various alignments, because `utf8::validate` calls into `ascii::first_non_ascii_byte`
// and the latter is sensitive to the alignment.
for alignment_offset in 0..=(std::cmp::min(data.len(), 16)) {
validate(&data[alignment_offset..]);
}
});

0 comments on commit 4d037a0

Please sign in to comment.