diff --git a/Cargo.toml b/Cargo.toml index 8f5e3e7..647581d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,8 +4,9 @@ version = "0.1.0" authors = ["Heinz N. Gies "] edition = "2018" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - +[features] +default = ["cpb"] +cpb = [] [dev-dependencies] proptest = "0.10" @@ -13,6 +14,9 @@ criterion = "0.3" mimalloc = "0.1" core_affinity = "*" +[target.'cfg(any(target_arch = "x86_64", target_arch = "x86"))'.dev-dependencies] +criterion-cycles-per-byte = "0.1" + [[bench]] name = "criterion_bench" -harness = false \ No newline at end of file +harness = false diff --git a/benches/criterion_bench.rs b/benches/criterion_bench.rs index 1bd592a..89d76e6 100644 --- a/benches/criterion_bench.rs +++ b/benches/criterion_bench.rs @@ -1,124 +1,63 @@ -extern crate core_affinity; -#[macro_use] -extern crate criterion; - use mimalloc::MiMalloc; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; -use criterion::{BatchSize, Criterion, ParameterizedBenchmark, Throughput}; -use std::fs::File; -use std::io::Read; +use criterion::{criterion_group, criterion_main, measurement::Measurement, Criterion, Throughput}; -macro_rules! bench_file { - ($name:ident) => { - fn $name(c: &mut Criterion) { - let core_ids = core_affinity::get_core_ids().unwrap(); - core_affinity::set_for_current(core_ids[0]); +#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "cpb"))] +use criterion_cycles_per_byte::CyclesPerByte; - let mut vec = Vec::new(); - File::open(concat!("data/", stringify!($name), ".data")) - .unwrap() - .read_to_end(&mut vec) - .unwrap(); +use std::{fs, str}; - let b = ParameterizedBenchmark::new( - "faster_utf8_validator", - |b, data| { - b.iter_batched( - || data, - |bytes| { - assert!(faster_utf8_validator::validate(&bytes)); - }, - BatchSize::SmallInput, - ) - }, - vec![vec], - ); - c.bench( - stringify!($name), - b.throughput(|data| Throughput::Bytes(data.len() as u64)), - ); - } - }; -} +fn bench_file(c: &mut Criterion, name: &str, is_valid: bool) { + let core_ids = core_affinity::get_core_ids().unwrap(); + core_affinity::set_for_current(core_ids[0]); -macro_rules! bench_file_bad { - ($name:ident) => { - fn $name(c: &mut Criterion) { - let core_ids = core_affinity::get_core_ids().unwrap(); - core_affinity::set_for_current(core_ids[0]); + let buf = fs::read(format!("data/{}.data", name)).unwrap(); - let mut vec = Vec::new(); - File::open(concat!("data/", stringify!($name), ".data")) - .unwrap() - .read_to_end(&mut vec) - .unwrap(); + let mut group = c.benchmark_group(name); + group.throughput(Throughput::Bytes(buf.len() as u64)); + group.bench_function("std_utf8_validator", |b| { + b.iter(|| assert!(str::from_utf8(&buf).is_ok() == is_valid)) + }); + group.bench_function("faster_utf8_validator", |b| { + b.iter(|| assert!(faster_utf8_validator::validate(&buf) == is_valid)) + }); - let b = ParameterizedBenchmark::new( - "faster_utf8_validator", - |b, data| { - b.iter_batched( - || data, - |bytes| { - assert!(!faster_utf8_validator::validate(&bytes)); - }, - BatchSize::SmallInput, - ) - }, - vec![vec], - ); - c.bench( - stringify!($name), - b.throughput(|data| Throughput::Bytes(data.len() as u64)), - ); - } - }; + group.finish(); } -bench_file!(apache_builds); -bench_file!(canada); -bench_file!(citm_catalog); -bench_file!(github_events); -bench_file!(gsoc_2018); -bench_file!(instruments); -bench_file!(log); -bench_file!(marine_ik); -bench_file!(mesh); -bench_file!(numbers); -bench_file!(random); -bench_file!(twitterescaped); -bench_file!(twitter); -bench_file!(update_center); -bench_file!(mostly_ascii_sample_ok); -bench_file_bad!(random_bytes); -bench_file!(utf8_characters_0_0x10ffff); -bench_file_bad!(utf8_characters_0_0x10ffff_with_garbage); -bench_file!(utf8_sample_ok); -bench_file!(ascii_sample_ok); +fn bench_all(c: &mut Criterion) { + bench_file(c, "apache_builds", true); + bench_file(c, "canada", true); + bench_file(c, "citm_catalog", true); + bench_file(c, "github_events", true); + bench_file(c, "gsoc_2018", true); + bench_file(c, "instruments", true); + bench_file(c, "log", true); + bench_file(c, "marine_ik", true); + bench_file(c, "mesh", true); + bench_file(c, "numbers", true); + bench_file(c, "random", true); + bench_file(c, "twitterescaped", true); + bench_file(c, "twitter", true); + bench_file(c, "update_center", true); + bench_file(c, "mostly_ascii_sample_ok", true); + bench_file(c, "random_bytes", false); + bench_file(c, "utf8_characters_0_0x10ffff", true); + bench_file(c, "utf8_characters_0_0x10ffff_with_garbage", false); + bench_file(c, "utf8_sample_ok", true); + bench_file(c, "ascii_sample_ok", true); +} + +#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "cpb"))] +criterion_group! { + name = benches; + config = Criterion::default().with_measurement(CyclesPerByte); + targets = bench_all +} -criterion_group!( - benches, - mostly_ascii_sample_ok, - ascii_sample_ok, - random_bytes, - utf8_characters_0_0x10ffff, - utf8_characters_0_0x10ffff_with_garbage, - utf8_sample_ok, - apache_builds, - canada, - citm_catalog, - github_events, - gsoc_2018, - instruments, - log, - marine_ik, - mesh, - numbers, - random, - twitterescaped, - twitter, - update_center -); +#[cfg(not(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "cpb")))] +criterion_group!(benches, bench_all); criterion_main!(benches); diff --git a/src/lib.rs b/src/lib.rs index 85421a2..0a88360 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -181,7 +181,7 @@ mod tests { s[0] = 0; let is_valid = validate(s); - assert!(is_valid == String::from_utf8(s.to_vec()).is_ok()) + assert!(is_valid == std::str::from_utf8(s).is_ok()) } } }