Skip to content
This repository was archived by the owner on Oct 17, 2022. It is now read-only.

Benchmarking improvements #4

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,19 @@ version = "0.1.0"
authors = ["Heinz N. Gies <[email protected]>"]
edition = "2018"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
default = ["cpb"]
cpb = []

[dev-dependencies]
proptest = "0.10"
criterion = "0.3"
mimalloc = "0.1"
core_affinity = "*"

[target.'cfg(any(target_arch = "x86_64", target_arch = "x86"))'.dev-dependencies]
criterion-cycles-per-byte = "0.1"

[[bench]]
name = "criterion_bench"
harness = false
harness = false
157 changes: 48 additions & 109 deletions benches/criterion_bench.rs
Original file line number Diff line number Diff line change
@@ -1,124 +1,63 @@
extern crate core_affinity;
#[macro_use]
extern crate criterion;

use mimalloc::MiMalloc;
#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;

use criterion::{BatchSize, Criterion, ParameterizedBenchmark, Throughput};
use std::fs::File;
use std::io::Read;
use criterion::{criterion_group, criterion_main, measurement::Measurement, Criterion, Throughput};

macro_rules! bench_file {
($name:ident) => {
fn $name(c: &mut Criterion) {
let core_ids = core_affinity::get_core_ids().unwrap();
core_affinity::set_for_current(core_ids[0]);
#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "cpb"))]
use criterion_cycles_per_byte::CyclesPerByte;

let mut vec = Vec::new();
File::open(concat!("data/", stringify!($name), ".data"))
.unwrap()
.read_to_end(&mut vec)
.unwrap();
use std::{fs, str};

let b = ParameterizedBenchmark::new(
"faster_utf8_validator",
|b, data| {
b.iter_batched(
|| data,
|bytes| {
assert!(faster_utf8_validator::validate(&bytes));
},
BatchSize::SmallInput,
)
},
vec![vec],
);
c.bench(
stringify!($name),
b.throughput(|data| Throughput::Bytes(data.len() as u64)),
);
}
};
}
fn bench_file<T: Measurement>(c: &mut Criterion<T>, name: &str, is_valid: bool) {
let core_ids = core_affinity::get_core_ids().unwrap();
core_affinity::set_for_current(core_ids[0]);

macro_rules! bench_file_bad {
($name:ident) => {
fn $name(c: &mut Criterion) {
let core_ids = core_affinity::get_core_ids().unwrap();
core_affinity::set_for_current(core_ids[0]);
let buf = fs::read(format!("data/{}.data", name)).unwrap();

let mut vec = Vec::new();
File::open(concat!("data/", stringify!($name), ".data"))
.unwrap()
.read_to_end(&mut vec)
.unwrap();
let mut group = c.benchmark_group(name);
group.throughput(Throughput::Bytes(buf.len() as u64));
group.bench_function("std_utf8_validator", |b| {
b.iter(|| assert!(str::from_utf8(&buf).is_ok() == is_valid))
});
group.bench_function("faster_utf8_validator", |b| {
b.iter(|| assert!(faster_utf8_validator::validate(&buf) == is_valid))
});

let b = ParameterizedBenchmark::new(
"faster_utf8_validator",
|b, data| {
b.iter_batched(
|| data,
|bytes| {
assert!(!faster_utf8_validator::validate(&bytes));
},
BatchSize::SmallInput,
)
},
vec![vec],
);
c.bench(
stringify!($name),
b.throughput(|data| Throughput::Bytes(data.len() as u64)),
);
}
};
group.finish();
}

bench_file!(apache_builds);
bench_file!(canada);
bench_file!(citm_catalog);
bench_file!(github_events);
bench_file!(gsoc_2018);
bench_file!(instruments);
bench_file!(log);
bench_file!(marine_ik);
bench_file!(mesh);
bench_file!(numbers);
bench_file!(random);
bench_file!(twitterescaped);
bench_file!(twitter);
bench_file!(update_center);
bench_file!(mostly_ascii_sample_ok);
bench_file_bad!(random_bytes);
bench_file!(utf8_characters_0_0x10ffff);
bench_file_bad!(utf8_characters_0_0x10ffff_with_garbage);
bench_file!(utf8_sample_ok);
bench_file!(ascii_sample_ok);
fn bench_all<T: Measurement>(c: &mut Criterion<T>) {
bench_file(c, "apache_builds", true);
bench_file(c, "canada", true);
bench_file(c, "citm_catalog", true);
bench_file(c, "github_events", true);
bench_file(c, "gsoc_2018", true);
bench_file(c, "instruments", true);
bench_file(c, "log", true);
bench_file(c, "marine_ik", true);
bench_file(c, "mesh", true);
bench_file(c, "numbers", true);
bench_file(c, "random", true);
bench_file(c, "twitterescaped", true);
bench_file(c, "twitter", true);
bench_file(c, "update_center", true);
bench_file(c, "mostly_ascii_sample_ok", true);
bench_file(c, "random_bytes", false);
bench_file(c, "utf8_characters_0_0x10ffff", true);
bench_file(c, "utf8_characters_0_0x10ffff_with_garbage", false);
bench_file(c, "utf8_sample_ok", true);
bench_file(c, "ascii_sample_ok", true);
}

#[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "cpb"))]
criterion_group! {
name = benches;
config = Criterion::default().with_measurement(CyclesPerByte);
targets = bench_all
}

criterion_group!(
benches,
mostly_ascii_sample_ok,
ascii_sample_ok,
random_bytes,
utf8_characters_0_0x10ffff,
utf8_characters_0_0x10ffff_with_garbage,
utf8_sample_ok,
apache_builds,
canada,
citm_catalog,
github_events,
gsoc_2018,
instruments,
log,
marine_ik,
mesh,
numbers,
random,
twitterescaped,
twitter,
update_center
);
#[cfg(not(all(any(target_arch = "x86_64", target_arch = "x86"), feature = "cpb")))]
criterion_group!(benches, bench_all);

criterion_main!(benches);
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ mod tests {
s[0] = 0;
let is_valid = validate(s);

assert!(is_valid == String::from_utf8(s.to_vec()).is_ok())
assert!(is_valid == std::str::from_utf8(s).is_ok())
}
}
}
Expand Down