diff --git a/grammars/Cargo.toml b/grammars/Cargo.toml index 55ea025d..a7b802d6 100644 --- a/grammars/Cargo.toml +++ b/grammars/Cargo.toml @@ -19,6 +19,7 @@ pest_derive = { path = "../derive", version = "2.5.3" } [dev-dependencies] criterion = "0.3" +pretty_assertions = "1.3.0" [[bench]] name = "json" diff --git a/grammars/tests/examples.line-col.txt b/grammars/tests/examples.line-col.txt new file mode 100644 index 00000000..272edca2 --- /dev/null +++ b/grammars/tests/examples.line-col.txt @@ -0,0 +1,97 @@ +(2:3) "JSON Test Pattern pass1" +(3:4) "object with 1 member" +(3:28) "array with 1 element" +(4:3) {} +(5:3) [] +(6:3) -42 +(7:3) true +(8:3) false +(9:3) null +(11:5) "integer" +(11:16) 1234567890 +(12:5) "real" +(12:13) -9876.543210 +(13:5) "e" +(13:10) 0.123456789e-12 +(14:5) "E" +(14:10) 1.234567890E+34 +(15:5) "" +(15:10) 23456789012E66 +(16:5) "zero" +(16:13) 0 +(17:5) "one" +(17:12) 1 +(18:5) "space" +(18:14) " " +(19:5) "quote" +(19:14) "\"" +(20:5) "backslash" +(20:18) "\\" +(21:5) "controls" +(21:17) "\b\f\n\r\t" +(22:5) "slash" +(22:14) "/ & \/" +(23:5) "alpha" +(23:14) "abcdefghijklmnopqrstuvwyz" +(24:5) "ALPHA" +(24:14) "ABCDEFGHIJKLMNOPQRSTUVWYZ" +(25:5) "digit" +(25:14) "0123456789" +(26:5) "0123456789" +(26:19) "digit" +(27:5) "special" +(27:16) "`1~!@#$%^&*()_+-={':[,]}|;.?" +(28:5) "hex" +(28:12) "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A" +(29:5) "true" +(29:13) true +(30:5) "false" +(30:14) false +(31:5) "null" +(31:13) null +(32:5) "array" +(32:13) [ ] +(33:5) "object" +(33:14) { } +(34:5) "address" +(34:16) "50 St. James Street" +(35:5) "url" +(35:12) "http://www.JSON.org/" +(36:5) "comment" +(36:16) "// /* */" +(37:20) " " +(38:5) " s p a c e d " +(38:23) 1 +(38:25) 2 +(38:29) 3 +(42:7) 4 +(42:11) 5 +(42:31) 6 +(42:44) 7 +(42:55) "compact" +(42:66) 1 +(42:68) 2 +(42:70) 3 +(42:72) 4 +(42:74) 5 +(42:76) 6 +(42:78) 7 +(43:5) "jsontext" +(43:17) "{\"object with 1 member\":[\"array with 1 element\"]}" +(44:5) "quotes" +(44:15) "" \u0022 %22 0x22 034 "" +(45:5) "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" +(46:7) "A key can be any string" +(48:3) 0.5 +(48:8) 98.6 +(50:3) 99.44 +(53:3) 1066 +(54:3) 1e1 +(55:3) 0.1e1 +(56:3) 1e-1 +(57:3) 1e00 +(57:8) 2e+00 +(57:14) 2e-00 +(58:2) "rosebud" +(59:1) \ No newline at end of file diff --git a/grammars/tests/json.rs b/grammars/tests/json.rs index 41db310b..9d609805 100644 --- a/grammars/tests/json.rs +++ b/grammars/tests/json.rs @@ -11,12 +11,9 @@ extern crate pest; extern crate pest_grammars; -use std::fs::File; -use std::io::Read; - use pest::Parser; - use pest_grammars::json::*; +use pretty_assertions::assert_eq; #[test] fn null() { @@ -164,10 +161,50 @@ fn object() { #[test] fn examples() { - let mut file = File::open("tests/examples.json").unwrap(); - let mut data = String::new(); + let raw = include_str!("examples.json"); + let pairs = JsonParser::parse(Rule::json, raw).unwrap(); + + let expected = include_str!("examples.line-col.txt"); + + // Test for flatten iter, and use span.start_pos().line_col() + let mut out = String::new(); + for pair in pairs.clone().flatten() { + let sub_pairs = pair.clone().into_inner(); + if sub_pairs.count() == 0 { + let span = pair.as_span(); + out.push_str(&build_line_col(span.start_pos().line_col(), span.as_str())); + } + } + assert_eq!(expected.trim(), out.trim()); + + // Test for nested iter, use pair.line_col() + let mut out = String::new(); + for pair in pairs { + out.push_str(&build_result_for_pair(pair.clone())); + } + + assert_eq!(expected.trim(), out.trim()); +} - file.read_to_string(&mut data).unwrap(); +fn build_line_col(line_col: (usize, usize), str: &str) -> String { + format!( + "({}:{}) {}\n", + line_col.0, + line_col.1, + str.replace('\n', "\\n") + ) +} - JsonParser::parse(Rule::json, &data).unwrap(); +fn build_result_for_pair(pair: pest::iterators::Pair) -> String { + let mut out = String::new(); + + let sub_pairs = pair.clone().into_inner(); + if sub_pairs.clone().count() == 0 { + out.push_str(&build_line_col(pair.line_col(), pair.as_str())); + } else { + for sub_pair in sub_pairs { + out.push_str(&build_result_for_pair(sub_pair)); + } + } + out } diff --git a/pest/src/iterators/pairs.rs b/pest/src/iterators/pairs.rs index a43dc528..d4596b0f 100644 --- a/pest/src/iterators/pairs.rs +++ b/pest/src/iterators/pairs.rs @@ -249,14 +249,7 @@ impl<'i, R: RuleType> Pairs<'i, R> { let (prev_line, prev_col) = (self.cursor.line, self.cursor.col); let part = &input[self.cursor.end..end]; - let (l, c) = position::line_col(part, part.len()); - - // Because the `original_line_col` returns (line, col) is start from 1 - let l = l - 1; - let mut c = c - 1; - if c < 1 { - c = 1 - } + let (l, c) = position::line_col(part, part.len(), (0, 0)); self.cursor.line += l; // Has new line diff --git a/pest/src/position.rs b/pest/src/position.rs index 559cf446..b7b3c102 100644 --- a/pest/src/position.rs +++ b/pest/src/position.rs @@ -139,7 +139,7 @@ impl<'i> Position<'i> { panic!("position out of bounds"); } - line_col(self.input, self.pos) + line_col(self.input, self.pos, (1, 1)) } /// Returns the entire line of the input that contains this `Position`. @@ -452,25 +452,30 @@ impl<'i> Hash for Position<'i> { } } -pub(crate) fn line_col(input: &str, pos: usize) -> (usize, usize) { +/// Returns the line and column of the given `pos` in `input`. +pub(crate) fn line_col(input: &str, pos: usize, start: (usize, usize)) -> (usize, usize) { #[cfg(feature = "fast-line-col")] { - fast_line_col(input, pos) + fast_line_col(input, pos, start) } #[cfg(not(feature = "fast-line-col"))] { - original_line_col(input, pos) + original_line_col(input, pos, start) } } #[inline] #[cfg(not(feature = "fast-line-col"))] -fn original_line_col(input: &str, mut pos: usize) -> (usize, usize) { +pub(crate) fn original_line_col( + input: &str, + mut pos: usize, + start: (usize, usize), +) -> (usize, usize) { // Position's pos is always a UTF-8 border. let slice = &input[..pos]; let mut chars = slice.chars().peekable(); - let mut line_col = (1, 1); + let mut line_col = start; while pos != 0 { match chars.next() { @@ -507,16 +512,16 @@ fn original_line_col(input: &str, mut pos: usize) -> (usize, usize) { #[inline] #[cfg(feature = "fast-line-col")] -fn fast_line_col(input: &str, pos: usize) -> (usize, usize) { +fn fast_line_col(input: &str, pos: usize, start: (usize, usize)) -> (usize, usize) { // Position's pos is always a UTF-8 border. let slice = &input[..pos]; let prec_ln = memchr::memrchr(b'\n', slice.as_bytes()); if let Some(prec_nl_pos) = prec_ln { - let lines = bytecount::count(slice[..=prec_nl_pos].as_bytes(), b'\n') + 1; + let lines = bytecount::count(slice[..=prec_nl_pos].as_bytes(), b'\n') + start.0; (lines, slice[prec_nl_pos..].chars().count()) } else { - (1, slice.chars().count() + 1) + (start.0, slice.chars().count() + start.1) } }