From 3dc7cec2d83e7f4a9421c6210f666756987fe776 Mon Sep 17 00:00:00 2001 From: IWANABETHATGUY Date: Sun, 21 Jul 2024 08:00:06 +0800 Subject: [PATCH] perf: memchr and batch mutate buffer (#11) 1. `memchr` + `memset` to batch mutate `buffer`, this could make processing more cache-friendly. --- Cargo.lock | 7 ++++ Cargo.toml | 3 ++ src/lib.rs | 99 ++++++++++++++++++++++++++---------------------------- 3 files changed, 58 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 307b629..8ac70c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -126,6 +126,7 @@ name = "json-strip-comments" version = "1.0.3" dependencies = [ "criterion2", + "memchr", "serde_json", ] @@ -141,6 +142,12 @@ version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + [[package]] name = "num-traits" version = "0.2.19" diff --git a/Cargo.toml b/Cargo.toml index 63a120c..34a6679 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,3 +17,6 @@ criterion2 = { version = "0.11.0", default-features = false } [features] codspeed = ["criterion2/codspeed"] + +[dependencies] +memchr = "2.7.4" diff --git a/src/lib.rs b/src/lib.rs index 7a0abc2..f994ffd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,10 +25,7 @@ #![doc = include_str!("../examples/example.rs")] //! ``` -use std::{ - io::{ErrorKind, Read, Result}, - slice::IterMut, -}; +use std::io::{ErrorKind, Read, Result}; #[derive(Eq, PartialEq, Copy, Clone, Debug)] enum State { @@ -127,14 +124,18 @@ where fn consume_comment_whitespace_until_maybe_bracket( state: &mut State, - it: &mut IterMut, + buf: &mut [u8], + i: &mut usize, settings: CommentSettings, ) -> Result { - while let Some(c) = it.next() { + *i += 1; + while *i < buf.len() { + let c = &mut buf[*i]; *state = match state { Top => { *state = top(c, settings); if c.is_ascii_whitespace() { + *i += 1; continue; } return Ok(*c == b'}' || *c == b']'); @@ -142,10 +143,11 @@ fn consume_comment_whitespace_until_maybe_bracket( InString => in_string(*c), StringEscape => InString, InComment => in_comment(c, settings)?, - InBlockComment => consume_block_comments(it), + InBlockComment => consume_block_comments(buf, i), MaybeCommentEnd => maybe_comment_end(c), - InLineComment => consume_line_comments(it), + InLineComment => consume_line_comments(buf, i), }; + *i += 1; } Ok(false) } @@ -156,15 +158,18 @@ fn strip_buf( settings: CommentSettings, remove_trailing_commas: bool, ) -> Result<()> { - let mut it = buf.iter_mut(); - while let Some(c) = it.next() { + let mut i = 0; + let len = buf.len(); + while i < len { + let c = &mut buf[i]; if matches!(state, Top) { + let cur = i; *state = top(c, settings); if remove_trailing_commas && *c == b',' - && consume_comment_whitespace_until_maybe_bracket(state, &mut it, settings)? + && consume_comment_whitespace_until_maybe_bracket(state, buf, &mut i, settings)? { - *c = b' '; + buf[cur] = b' '; } } else { *state = match state { @@ -172,49 +177,48 @@ fn strip_buf( InString => in_string(*c), StringEscape => InString, InComment => in_comment(c, settings)?, - InBlockComment => in_block_comment(c), + InBlockComment => consume_block_comments(buf, &mut i), MaybeCommentEnd => maybe_comment_end(c), - InLineComment => { - if *c == b'\n' { - Top - } else { - *c = b' '; - consume_line_comments(&mut it) - } - } + InLineComment => consume_line_comments(buf, &mut i), } } + i += 1; } Ok(()) } #[inline] -fn consume_line_comments(it: &mut IterMut) -> State { - let mut ret = InLineComment; - for c in it.by_ref() { - if *c == b'\n' { - ret = Top; - break; - } else { - *c = b' '; +fn consume_line_comments(buf: &mut [u8], i: &mut usize) -> State { + let cur = *i; + match memchr::memchr(b'\n', &buf[*i..]) { + Some(offset) => { + *i += offset; + buf[cur..*i].fill(b' '); + Top + } + None => { + *i = buf.len() - 1; + buf[cur..].fill(b' '); + InLineComment } } - ret } #[inline] -fn consume_block_comments(it: &mut IterMut) -> State { - let mut ret = InBlockComment; - for c in it.by_ref() { - if *c == b'*' { - *c = b' '; - ret = MaybeCommentEnd; - break; - } else { - *c = b' '; +fn consume_block_comments(buf: &mut [u8], i: &mut usize) -> State { + let cur = *i; + match memchr::memchr(b'*', &buf[*i..]) { + Some(offset) => { + *i += offset; + buf[cur..=*i].fill(b' '); + MaybeCommentEnd + } + None => { + *i = buf.len() - 1; + buf[cur..].fill(b' '); + InBlockComment } } - ret } /// Strips comments from a string in place, replacing it with whitespaces. @@ -366,6 +370,7 @@ fn top(c: &mut u8, settings: CommentSettings) -> State { } } +#[inline] fn in_string(c: u8) -> State { match c { b'"' => Top, @@ -378,22 +383,14 @@ fn in_comment(c: &mut u8, settings: CommentSettings) -> Result { let new_state = match c { b'*' if settings.block_comments => InBlockComment, b'/' if settings.slash_line_comments => InLineComment, - _ => invalid_data!(), + _ => { + invalid_data!() + } }; *c = b' '; Ok(new_state) } -fn in_block_comment(c: &mut u8) -> State { - let old = *c; - *c = b' '; - if old == b'*' { - MaybeCommentEnd - } else { - InBlockComment - } -} - fn maybe_comment_end(c: &mut u8) -> State { let old = *c; *c = b' ';