Skip to content

Commit

Permalink
Calculate CRC32C using SIMD acceleration
Browse files Browse the repository at this point in the history
Signed-off-by: Li Zhanhui <[email protected]>
  • Loading branch information
lizhanhui committed Nov 16, 2023
1 parent f62d284 commit 9320451
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 10 deletions.
8 changes: 7 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ edition = "2021"

[dependencies]
byteorder = "1.4"
crc = "3"
crc32c = "0.6.4"
log = "0.4"
memmap2 = "0.9.0"
rand = "0.8.5"
Expand All @@ -31,8 +31,14 @@ env_logger = "0.10"
serde = { version = "1", features = ["derive"] }

[dev-dependencies]
crc = "3"
hdrhistogram = "7.5.2"
quickcheck = "1.0.3"
regex = "1.8.1"
tempfile = "3.5.0"
chrono = "0.4.31"
criterion = "0.5.1"

[[bench]]
name = "benchmark"
harness = false
50 changes: 50 additions & 0 deletions benches/benchmark.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
use crc::{Crc, CRC_32_ISCSI};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::{rngs::OsRng, RngCore};

pub const CASTAGNOLI: Crc<u32> = Crc::<u32>::new(&CRC_32_ISCSI);

pub fn criterion_benchmark_4k(c: &mut Criterion) {
let mut buffer = [0u8; 8192];
OsRng.fill_bytes(&mut buffer);

let mut group = c.benchmark_group("8k");
group.throughput(criterion::Throughput::Bytes(8192));
group.bench_function("crc", |b| {
b.iter(|| {
let mut digest = CASTAGNOLI.digest();
digest.update(&buffer);
black_box(digest.finalize());
})
});

group.bench_function("crc32c", |b| {
b.iter(|| {
black_box(crc32c::crc32c(&buffer));
})
});
}

pub fn criterion_benchmark_1024k(c: &mut Criterion) {
let mut buffer = [0u8; 1048576];
OsRng.fill_bytes(&mut buffer);

let mut group = c.benchmark_group("1M");
group.throughput(criterion::Throughput::Bytes(1048576));
group.bench_function("crc", |b| {
b.iter(|| {
let mut digest = CASTAGNOLI.digest();
digest.update(&buffer);
black_box(digest.finalize());
})
});

group.bench_function("crc32c", |b| {
b.iter(|| {
black_box(crc32c::crc32c(&buffer));
})
});
}

criterion_group!(benches, criterion_benchmark_4k, criterion_benchmark_1024k);
criterion_main!(benches);
47 changes: 38 additions & 9 deletions src/segment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use log::{debug, error, log_enabled, trace};
use std::cmp::Ordering;
use std::fmt;
use std::fs::{self, OpenOptions};
use std::hash::Hasher;
use std::io::{Error, ErrorKind, Result};
use std::mem;
use std::ops::Deref;
Expand All @@ -13,7 +14,6 @@ use std::time::Duration;

use crate::mmap_view_sync::MmapViewSync;
use byteorder::{ByteOrder, LittleEndian};
use crc::{Crc, CRC_32_ISCSI};
#[cfg(not(unix))]
use fs4::FileExt;

Expand All @@ -27,8 +27,6 @@ const HEADER_LEN: usize = 8;
/// The length of a CRC value.
const CRC_LEN: usize = 4;

pub const CASTAGNOLI: Crc<u32> = Crc::<u32>::new(&CRC_32_ISCSI);

pub struct Entry {
view: MmapViewSync,
}
Expand Down Expand Up @@ -264,9 +262,9 @@ impl Segment {
if offset + HEADER_LEN + padded_len + CRC_LEN > capacity {
break;
}
let mut digest = CASTAGNOLI.digest_with_initial(crc);
digest.update(&segment[offset..offset + HEADER_LEN + padded_len]);
let entry_crc = digest.finalize();
let mut digest = crc32c::Crc32cHasher::new(crc);
digest.write(&segment[offset..offset + HEADER_LEN + padded_len]);
let entry_crc = digest.finish() as u32;
let stored_crc =
LittleEndian::read_u32(&segment[offset + HEADER_LEN + padded_len..]);
if entry_crc != stored_crc {
Expand Down Expand Up @@ -340,7 +338,7 @@ impl Segment {
let offset = self.size();

let mut crc = self.crc;
let mut digest = CASTAGNOLI.digest_with_initial(crc);
let mut digest = crc32c::Crc32cHasher::new(crc);

LittleEndian::write_u64(&mut self.as_mut_slice()[offset..], entry.len() as u64);
copy_memory(
Expand All @@ -355,8 +353,8 @@ impl Segment {
&mut self.as_mut_slice()[offset + HEADER_LEN + entry.len()..],
);
}
digest.update(&self.as_slice()[offset..offset + HEADER_LEN + padded_len]);
crc = digest.finalize();
digest.write(&self.as_slice()[offset..offset + HEADER_LEN + padded_len]);
crc = digest.finish() as u32;

LittleEndian::write_u32(
&mut self.as_mut_slice()[offset + HEADER_LEN + padded_len..],
Expand Down Expand Up @@ -875,4 +873,35 @@ mod test {
Segment::open(&path).unwrap_err().kind()
);
}

use rand::{rngs::OsRng, RngCore};
use std::hash::Hasher;

#[test]
fn test_crc32c() {
let message = b"123456789";
let crc = crc32c::crc32c(message);
assert_eq!(crc, crc::CRC_32_ISCSI.check);

let mut hasher = crc32c::Crc32cHasher::default();
hasher.write(message);
assert_eq!(hasher.finish() as u32, crc::CRC_32_ISCSI.check);
}

#[test]
fn test_crc32c_accuracy() {
let mut buffer = [0u8; 8192];
let castagnoli = crc::Crc::<u32>::new(&crc::CRC_32_ISCSI);

(0..1024).for_each(|_| {
OsRng.fill_bytes(&mut buffer);
let mut digest = castagnoli.digest();
digest.update(&buffer);
let crc1 = digest.finalize();

let crc2 = crc32c::crc32c(&buffer);

assert_eq!(crc1, crc2);
});
}
}

0 comments on commit 9320451

Please sign in to comment.