Skip to content

Commit

Permalink
Merge pull request #65 from neocturne/peg
Browse files Browse the repository at this point in the history
Restore peg example, turn into an actual parser
  • Loading branch information
epage authored Apr 3, 2024
2 parents b121ef1 + 616cd85 commit 5ad14a2
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 1 deletion.
34 changes: 34 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ This repo tries to assess Rust parsing performance.
| [lalrpop] | LR(1) | in grammar | build script | `&str` | No | Yes | No |
| [logos] | lexer | in source | proc macro | `&str`, `&[u8]` | ? | ? | ? |
| [nom] | combinators | in source | library | `&[u8]`, custom | No | Yes | Yes |
| [peg] | PEG | in grammar | proc macro (block) | `&str`, `&[T]`, custom | Yes | Yes | No |
| [pest] | PEG | external | proc macro (file) | `&str` | Yes | No | No |
| [winnow] | combinators | in source | library | `&str`, `&[T]`, custom | No | Yes | Yes |
| [yap] | combinators | in source | library | `&str`, `&[T]`, custom | No | Yes | ? |

Formerly, we compared:
- [peg]: invalid example
- [pom]: lack of notoriety

# Results
Expand All @@ -27,6 +27,7 @@ logos | 170 KiB | 5s | 17ms | ![Download count](https://img.shields.io/crates/dr
combine | 204 KiB | 4s | 49ms | ![Download count](https://img.shields.io/crates/dr/combine) | v3.8.1
lalrpop | 1,615 KiB | 11s | 880ms | ![Download count](https://img.shields.io/crates/dr/lalrpop-util) | v0.20.0
nom | 99 KiB | 2s | 66ms | ![Download count](https://img.shields.io/crates/dr/nom) | v7.1.3
peg | 21 KiB | 2s | **invalid** | ![Download count](https://img.shields.io/crates/dr/peg) | v0.8.2
pest | 104 KiB | 4s | 48ms | ![Download count](https://img.shields.io/crates/dr/pest) | v2.7.6
serde_json | 41 KiB | 3s | 13ms | ![Download count](https://img.shields.io/crates/dr/serde_json) | v1.0.113
winnow | 73 KiB | 2s | 22ms | ![Download count](https://img.shields.io/crates/dr/winnow) | v0.6.0
Expand Down
11 changes: 11 additions & 0 deletions examples/peg-app/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "peg-app"
version = "0.1.0"
edition = "2021"

[[bin]]
name = "peg-app"
path = "app.rs"

[dependencies]
peg = "0.8.2"
25 changes: 25 additions & 0 deletions examples/peg-app/app.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
mod parser;

use std::{env, fs};

fn main() {
let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument"))
.expect("Failed to read file");

match parser::parser::json(&src) {
Ok(json) => {
#[cfg(debug_assertions)]
{
println!("{:#?}", json);
}
#[cfg(not(debug_assertions))]
{
std::hint::black_box(json);
}
}
Err(err) => {
eprintln!("{}", err);
std::process::exit(1);
}
};
}
94 changes: 94 additions & 0 deletions examples/peg-app/parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
use std::{borrow::Cow, collections::HashMap, str::FromStr};

#[derive(Debug, PartialEq, Clone)]
pub enum JsonValue {
Null,
Boolean(bool),
Str(String),
Num(f64),
Array(Vec<JsonValue>),
Object(HashMap<String, JsonValue>),
}

peg::parser!(pub grammar parser() for str {

pub rule json() -> JsonValue
= _ value:value() _ { value }

rule _() = [' ' | '\t' | '\r' | '\n']*
rule value_separator() = _ "," _

rule value() -> JsonValue
= boolean() / null() / object() / array() / number() / string()

rule null() -> JsonValue
= "null" { JsonValue::Null }

rule boolean() -> JsonValue
= "true" { JsonValue::Boolean(true) }
/ "false" { JsonValue::Boolean(false) }

rule object() -> JsonValue
= "{" _ elements:(member() ** value_separator()) _ "}" {
JsonValue::Object(elements.into_iter().collect())
}

rule member() -> (String, JsonValue)
= key:raw_string() _ ":" _ value:value() { (key, value) }

rule array() -> JsonValue
= "[" _ elements:(value() ** value_separator()) _ "]" {
JsonValue::Array(elements)
}

rule string() -> JsonValue
= value:raw_string() { JsonValue::Str(value) }

rule raw_string() -> String
= "\"" slices:string_slice()* "\"" { slices.concat() }

/// A substring of same-kind (escaped or unescaped) characters
rule string_slice() -> Cow<'input, str>
= value:string_characters() { Cow::Borrowed(value) }
/ value:string_escapes() { Cow::Owned(value.into_iter().collect()) }

/// A substring of unescaped characters
rule string_characters() -> &'input str
= $([^ '\"' | '\\']+)

/// A substring of escaped characters
rule string_escapes() -> Vec<char>
= ("\\" value:string_escape_char() { value })+

/// Handles a single escape
rule string_escape_char() -> char
= "\"" { '"' }
/ "\\" { '\\' }
/ "/" { '/' }
/ "b" { '\x08' }
/ "f" { '\x0C' }
/ "n" { '\n' }
/ "r" { '\r' }
/ "t" { '\t' }
/ "u" digits:$(hex_digit()*<4>) { ?
let value = u16::from_str_radix(digits, 16).unwrap();
char::from_u32(value.into()).ok_or("invalid unicode escape")
}

rule hex_digit()
= ['0'..='9' | 'a'..='f' | 'A'..='F']

rule number() -> JsonValue
= "-"? value:$(int() frac()? exp()?) { ?
Ok(JsonValue::Num(f64::from_str(value).map_err(|_| "invalid number")?))
}

rule int()
= ['0'] / ['1'..='9']['0'..='9']*

rule exp()
= ("e" / "E") ("-" / "+")? ['0'..='9']*<1,>

rule frac()
= "." ['0'..='9']*<1,>
});

0 comments on commit 5ad14a2

Please sign in to comment.