Skip to content

Commit

Permalink
perf: memchr and batch mutate buffer (#11)
Browse files Browse the repository at this point in the history
1. `memchr` + `memset` to batch mutate `buffer`, this could make
processing more cache-friendly.
  • Loading branch information
IWANABETHATGUY authored Jul 21, 2024
1 parent 519202d commit 3dc7cec
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 51 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ criterion2 = { version = "0.11.0", default-features = false }

[features]
codspeed = ["criterion2/codspeed"]

[dependencies]
memchr = "2.7.4"
99 changes: 48 additions & 51 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,7 @@
#![doc = include_str!("../examples/example.rs")]
//! ```
use std::{
io::{ErrorKind, Read, Result},
slice::IterMut,
};
use std::io::{ErrorKind, Read, Result};

#[derive(Eq, PartialEq, Copy, Clone, Debug)]
enum State {
Expand Down Expand Up @@ -127,25 +124,30 @@ where

fn consume_comment_whitespace_until_maybe_bracket(
state: &mut State,
it: &mut IterMut<u8>,
buf: &mut [u8],
i: &mut usize,
settings: CommentSettings,
) -> Result<bool> {
while let Some(c) = it.next() {
*i += 1;
while *i < buf.len() {
let c = &mut buf[*i];
*state = match state {
Top => {
*state = top(c, settings);
if c.is_ascii_whitespace() {
*i += 1;
continue;
}
return Ok(*c == b'}' || *c == b']');
}
InString => in_string(*c),
StringEscape => InString,
InComment => in_comment(c, settings)?,
InBlockComment => consume_block_comments(it),
InBlockComment => consume_block_comments(buf, i),
MaybeCommentEnd => maybe_comment_end(c),
InLineComment => consume_line_comments(it),
InLineComment => consume_line_comments(buf, i),
};
*i += 1;
}
Ok(false)
}
Expand All @@ -156,65 +158,67 @@ fn strip_buf(
settings: CommentSettings,
remove_trailing_commas: bool,
) -> Result<()> {
let mut it = buf.iter_mut();
while let Some(c) = it.next() {
let mut i = 0;
let len = buf.len();
while i < len {
let c = &mut buf[i];
if matches!(state, Top) {
let cur = i;
*state = top(c, settings);
if remove_trailing_commas
&& *c == b','
&& consume_comment_whitespace_until_maybe_bracket(state, &mut it, settings)?
&& consume_comment_whitespace_until_maybe_bracket(state, buf, &mut i, settings)?
{
*c = b' ';
buf[cur] = b' ';
}
} else {
*state = match state {
Top => unreachable!(),
InString => in_string(*c),
StringEscape => InString,
InComment => in_comment(c, settings)?,
InBlockComment => in_block_comment(c),
InBlockComment => consume_block_comments(buf, &mut i),
MaybeCommentEnd => maybe_comment_end(c),
InLineComment => {
if *c == b'\n' {
Top
} else {
*c = b' ';
consume_line_comments(&mut it)
}
}
InLineComment => consume_line_comments(buf, &mut i),
}
}
i += 1;
}
Ok(())
}

#[inline]
fn consume_line_comments(it: &mut IterMut<u8>) -> State {
let mut ret = InLineComment;
for c in it.by_ref() {
if *c == b'\n' {
ret = Top;
break;
} else {
*c = b' ';
fn consume_line_comments(buf: &mut [u8], i: &mut usize) -> State {
let cur = *i;
match memchr::memchr(b'\n', &buf[*i..]) {
Some(offset) => {
*i += offset;
buf[cur..*i].fill(b' ');
Top
}
None => {
*i = buf.len() - 1;
buf[cur..].fill(b' ');
InLineComment
}
}
ret
}

#[inline]
fn consume_block_comments(it: &mut IterMut<u8>) -> State {
let mut ret = InBlockComment;
for c in it.by_ref() {
if *c == b'*' {
*c = b' ';
ret = MaybeCommentEnd;
break;
} else {
*c = b' ';
fn consume_block_comments(buf: &mut [u8], i: &mut usize) -> State {
let cur = *i;
match memchr::memchr(b'*', &buf[*i..]) {
Some(offset) => {
*i += offset;
buf[cur..=*i].fill(b' ');
MaybeCommentEnd
}
None => {
*i = buf.len() - 1;
buf[cur..].fill(b' ');
InBlockComment
}
}
ret
}

/// Strips comments from a string in place, replacing it with whitespaces.
Expand Down Expand Up @@ -366,6 +370,7 @@ fn top(c: &mut u8, settings: CommentSettings) -> State {
}
}

#[inline]
fn in_string(c: u8) -> State {
match c {
b'"' => Top,
Expand All @@ -378,22 +383,14 @@ fn in_comment(c: &mut u8, settings: CommentSettings) -> Result<State> {
let new_state = match c {
b'*' if settings.block_comments => InBlockComment,
b'/' if settings.slash_line_comments => InLineComment,
_ => invalid_data!(),
_ => {
invalid_data!()
}
};
*c = b' ';
Ok(new_state)
}

fn in_block_comment(c: &mut u8) -> State {
let old = *c;
*c = b' ';
if old == b'*' {
MaybeCommentEnd
} else {
InBlockComment
}
}

fn maybe_comment_end(c: &mut u8) -> State {
let old = *c;
*c = b' ';
Expand Down

0 comments on commit 3dc7cec

Please sign in to comment.