Skip to content

Commit

Permalink
Next skip (#84)
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin authored Apr 23, 2024
1 parent 0ab4dd4 commit ad047bf
Show file tree
Hide file tree
Showing 12 changed files with 456 additions and 55 deletions.
19 changes: 18 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,23 @@ jobs:

- run: cargo fuzz run --fuzz-dir crates/fuzz compare_to_serde --release -- -max_total_time=300s

fuzz-skip:
name: fuzz skip
# we only run this on ubuntu since architecture should make no difference

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

- uses: moonrepo/setup-rust@v1
with:
channel: nightly
cache-target: release
bins: cargo-fuzz

- run: cargo fuzz run --fuzz-dir crates/fuzz compare_skip --release -- -max_total_time=300s

lint:
runs-on: ubuntu-latest
steps:
Expand All @@ -166,7 +183,7 @@ jobs:
# https://github.com/marketplace/actions/alls-green#why used for branch protection checks
check:
if: always()
needs: [test-linux, test-macos, bench, fuzz, lint]
needs: [test-linux, test-macos, bench, fuzz, fuzz-skip, lint]
runs-on: ubuntu-latest
steps:
- name: Decide whether the needed jobs succeeded or failed
Expand Down
6 changes: 6 additions & 0 deletions crates/fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,9 @@ name = "compare_to_serde"
path = "fuzz_targets/compare_to_serde.rs"
test = false
doc = false

[[bin]]
name = "compare_skip"
path = "fuzz_targets/compare_skip.rs"
test = false
doc = false
32 changes: 32 additions & 0 deletions crates/fuzz/fuzz_targets/compare_skip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#![no_main]

use jiter::{Jiter, JiterError, JiterErrorType, JsonError, JsonValue};

use libfuzzer_sys::fuzz_target;
fn errors_equal(value_error: &JsonError, jiter_error: &JiterError) {
let jiter_error_type = match &jiter_error.error_type {
JiterErrorType::JsonError(json_error_type) => json_error_type,
JiterErrorType::WrongType { .. } => panic!("Expected JsonError, found WrongType"),
};
assert_eq!(&value_error.error_type, jiter_error_type);
assert_eq!(value_error.index, jiter_error.index);
}

fuzz_target!(|json: String| {
let json_data = json.as_bytes();
match JsonValue::parse(json_data, false) {
Ok(_) => {
let mut jiter = Jiter::new(json_data, false);
jiter.next_skip().unwrap();
jiter.finish().unwrap();
}
Err(json_error) => {
let mut jiter = Jiter::new(json_data, false);
let jiter_error = match jiter.next_skip() {
Ok(_) => jiter.finish().unwrap_err(),
Err(e) => e,
};
errors_equal(&json_error, &jiter_error);
}
};
});
27 changes: 27 additions & 0 deletions crates/jiter/benches/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,15 @@ fn jiter_value(path: &str, bench: &mut Bencher) {
})
}

fn jiter_skip(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
bench.iter(|| {
let mut jiter = Jiter::new(json_data, false);
jiter.next_skip().unwrap();
})
}

fn jiter_iter_big(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
Expand Down Expand Up @@ -211,6 +220,10 @@ macro_rules! test_cases {
jiter_string(&file_path, bench);
}
}
fn [< $file_name _jiter_skip >](bench: &mut Bencher) {
let file_path = format!("./benches/{}.json", stringify!($file_name));
jiter_skip(&file_path, bench);
}

fn [< $file_name _serde_value >](bench: &mut Bencher) {
let file_path = format!("./benches/{}.json", stringify!($file_name));
Expand Down Expand Up @@ -293,51 +306,65 @@ fn lazy_map_lookup_3_50(bench: &mut Bencher) {
benchmark_group!(
benches,
big_jiter_iter,
big_jiter_skip,
big_jiter_value,
big_serde_value,
bigints_array_jiter_iter,
bigints_array_jiter_skip,
bigints_array_jiter_value,
bigints_array_serde_value,
floats_array_jiter_iter,
floats_array_jiter_skip,
floats_array_jiter_value,
floats_array_serde_value,
massive_ints_array_jiter_iter,
massive_ints_array_jiter_skip,
massive_ints_array_jiter_value,
massive_ints_array_serde_value,
medium_response_jiter_iter,
medium_response_jiter_skip,
medium_response_jiter_value,
medium_response_jiter_value_owned,
medium_response_serde_value,
x100_jiter_iter,
x100_jiter_skip,
x100_jiter_value,
x100_serde_iter,
x100_serde_value,
sentence_jiter_iter,
sentence_jiter_skip,
sentence_jiter_value,
sentence_serde_value,
unicode_jiter_iter,
unicode_jiter_skip,
unicode_jiter_value,
unicode_serde_value,
pass1_jiter_iter,
pass1_jiter_skip,
pass1_jiter_value,
pass1_serde_value,
pass2_jiter_iter,
pass2_jiter_skip,
pass2_jiter_value,
pass2_serde_value,
string_array_jiter_iter,
string_array_jiter_skip,
string_array_jiter_value,
string_array_jiter_value_owned,
string_array_serde_value,
true_array_jiter_iter,
true_array_jiter_skip,
true_array_jiter_value,
true_array_serde_value,
true_object_jiter_iter,
true_object_jiter_skip,
true_object_jiter_value,
true_object_serde_value,
lazy_map_lookup_1_10,
lazy_map_lookup_2_20,
lazy_map_lookup_3_50,
short_numbers_jiter_iter,
short_numbers_jiter_skip,
short_numbers_jiter_value,
short_numbers_serde_value,
);
Expand Down
5 changes: 0 additions & 5 deletions crates/jiter/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ use std::fmt;
/// those expected from `serde_json`.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum JsonErrorType {
/// string escape sequences are not supported in this method, usize here is the position within the string
/// that is invalid
StringEscapeNotSupported,

/// float value was found where an int was expected
FloatExpectingInt,

Expand Down Expand Up @@ -82,7 +78,6 @@ impl std::fmt::Display for JsonErrorType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// Messages for enum members copied from serde_json are unchanged
match self {
Self::StringEscapeNotSupported => f.write_str("string escape sequences are not supported"),
Self::FloatExpectingInt => f.write_str("float value was found where an int was expected"),
Self::EofWhileParsingList => f.write_str("EOF while parsing a list"),
Self::EofWhileParsingObject => f.write_str("EOF while parsing an object"),
Expand Down
37 changes: 36 additions & 1 deletion crates/jiter/src/jiter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::errors::{json_error, JiterError, JsonType, LinePosition, DEFAULT_RECU
use crate::number_decoder::{NumberAny, NumberFloat, NumberInt, NumberRange};
use crate::parse::{Parser, Peek};
use crate::string_decoder::{StringDecoder, StringDecoderRange, Tape};
use crate::value::{take_value_borrowed, take_value_owned, JsonValue};
use crate::value::{take_value_borrowed, take_value_owned, take_value_skip, JsonValue};
use crate::{JsonError, JsonErrorType};

pub type JiterResult<T> = Result<T, JiterError>;
Expand Down Expand Up @@ -48,6 +48,16 @@ impl<'j> Jiter<'j> {
self.parser.current_position()
}

/// Get the current index of the parser.
pub fn current_index(&self) -> usize {
self.parser.index
}

/// Get a slice of the underlying JSON data from `start` to `current_index`.
pub fn slice_to_current(&self, start: usize) -> &'j [u8] {
&self.data[start..self.current_index()]
}

/// Convert an error index to a [LinePosition].
///
/// # Arguments
Expand Down Expand Up @@ -218,6 +228,31 @@ impl<'j> Jiter<'j> {
.map_err(Into::into)
}

/// Parse the next JSON value, but don't return it.
/// This should be faster than returning the value, useful when you don't care about this value.
/// Error if it is invalid JSON.
///
/// *WARNING:* For performance reasons, this method does not check that strings would be valid UTF-8.
pub fn next_skip(&mut self) -> JiterResult<()> {
let peek = self.peek()?;
self.known_skip(peek)
}

/// Parse the next JSON value, but don't return it. Error if it is invalid JSON.
///
/// # Arguments
/// - `peek`: The [Peek] of the next JSON value.
pub fn known_skip(&mut self, peek: Peek) -> JiterResult<()> {
take_value_skip(
peek,
&mut self.parser,
&mut self.tape,
DEFAULT_RECURSION_LIMIT,
self.allow_inf_nan,
)
.map_err(Into::into)
}

/// Parse the next JSON value and return it as a [JsonValue] with static lifetime. Error if it is invalid JSON.
pub fn next_value_owned(&mut self) -> JiterResult<JsonValue<'static>> {
let peek = self.peek()?;
Expand Down
2 changes: 1 addition & 1 deletion crates/jiter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ mod simd_aarch64;
mod string_decoder;
mod value;

pub use errors::{JiterErrorType, JsonError, JsonErrorType, JsonResult, JsonType, LinePosition};
pub use errors::{JiterError, JiterErrorType, JsonError, JsonErrorType, JsonResult, JsonType, LinePosition};
pub use jiter::{Jiter, JiterResult};
pub use lazy_index_map::LazyIndexMap;
pub use number_decoder::{NumberAny, NumberInt};
Expand Down
48 changes: 36 additions & 12 deletions crates/jiter/src/number_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,8 @@ impl AbstractNumberDecoder for NumberRange {
let end = consume_exponential(data, index)?;
Ok((start..end, end))
}
Some(_) => return json_err!(InvalidNumber, index),
None => return Ok((start..index, index)),
Some(digit) if digit.is_ascii_digit() => json_err!(InvalidNumber, index),
_ => return Ok((start..index, index)),
};
}
Some(b'I') => {
Expand All @@ -398,25 +398,49 @@ impl AbstractNumberDecoder for NumberRange {
};

index += 1;
while let Some(next) = data.get(index) {
match next {
b'0'..=b'9' => (),
b'.' => {
for _ in 0..18 {
if let Some(digit) = data.get(index) {
if INT_CHAR_MAP[*digit as usize] {
index += 1;
continue;
} else if matches!(digit, b'.') {
index += 1;
let end = consume_decimal(data, index)?;
return Ok((start..end, end));
}
b'e' | b'E' => {
} else if matches!(digit, b'e' | b'E') {
index += 1;
let end = consume_exponential(data, index)?;
return Ok((start..end, end));
}
_ => break,
}
index += 1;
return Ok((start..index, index));
}
loop {
let (chunk, new_index) = IntChunk::parse_big(data, index);
if (new_index - start) > 4300 {
return json_err!(NumberOutOfRange, start + 4301);
}
match chunk {
IntChunk::Ongoing(_) => {
index = new_index;
}
IntChunk::Done(_) => return Ok((start..new_index, new_index)),
IntChunk::Float => {
return match data.get(new_index) {
Some(b'.') => {
index = new_index + 1;
let end = consume_decimal(data, index)?;
Ok((start..end, end))
}
_ => {
index = new_index + 1;
let end = consume_exponential(data, index)?;
Ok((start..end, end))
}
}
}
}
}

Ok((start..index, index))
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/jiter/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ impl Peek {
}

impl Peek {
const fn new(next: u8) -> Self {
pub const fn new(next: u8) -> Self {
Self(next)
}

Expand Down
44 changes: 22 additions & 22 deletions crates/jiter/src/string_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ fn parse_u4(data: &[u8], mut index: usize) -> JsonResult<(u16, usize)> {
Ok((n, index))
}

/// A string decoder that returns the range of the string.
///
/// *WARNING:* For performance reasons, this decoder does not check that the string would be valid UTF-8.
pub struct StringDecoderRange;

impl<'t, 'j> AbstractStringDecoder<'t, 'j> for StringDecoderRange
Expand All @@ -338,33 +341,30 @@ where
fn decode(data: &'j [u8], mut index: usize, _tape: &'t mut Tape) -> JsonResult<(Self::Output, usize)> {
index += 1;
let start = index;
while let Some(next) = data.get(index) {
match next {
b'"' => {

loop {
index = match decode_chunk(data, index, true)? {
(StringChunk::Quote, _, index) => {
let r = start..index;
index += 1;
return Ok((r, index));
return Ok((r, index + 1));
}
b'\\' => {
index += 1;
if let Some(next_inner) = data.get(index) {
match next_inner {
// these escapes are easy to validate
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => (),
// unicode escapes are harder to validate, we just prevent them here
b'u' => return json_err!(StringEscapeNotSupported, index),
_ => return json_err!(InvalidEscape, index),
}
} else {
return json_err!(EofWhileParsingString, index);
(StringChunk::Backslash, _, index) => index,
};
index += 1;
if let Some(next_inner) = data.get(index) {
match next_inner {
// these escapes are easy to validate
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => (),
b'u' => {
let (_, new_index) = parse_escape(data, index)?;
index = new_index;
}
index += 1;
}
_ => {
index += 1;
_ => return json_err!(InvalidEscape, index),
}
index += 1;
} else {
return json_err!(EofWhileParsingString, index);
}
}
json_err!(EofWhileParsingString, index)
}
}
Loading

0 comments on commit ad047bf

Please sign in to comment.