Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support typed BNF grammar #5

Merged
merged 15 commits into from
Nov 14, 2024
56 changes: 56 additions & 0 deletions examples/set-algebra-typed.bnfgen
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
<Num> ::= re("[0-9]|[1-9][0-9]+") ;
<Id> ::= "x" | "y" | "z" ;

<Program> ::= <Decls> <CalcExpr> ".";

<CalcExpr> ::= "show" <Algebra>
| "simplify" <Algebra> ;

<Decls> ::= <Decl>
| <Decls> <Decl> {100} ;
// For generation purposes, we generate exactly 100 Decls

<Ty> ::= "int"
| "set" ;

<Decl> ::= "let" <Ty> <Id> "be" <Expr> "." <EOL> ;

<Algebra> ::= <Expr>
| <Predicate> ;

<Expr0: "set"> ::= "{" <Id> ":" <Predicate> "}" ;
// <Expr0: "set"> ::= "{" <Id> ":" "Predicate" "}" ;

<Expr0: "int"> ::= "(" <Expr: "int"> ")"
| 100 <Num>
| <Id> ;

<Expr1: "int"> ::= 2 <Expr0: "int">
| <Expr1: "int"> "*" <Expr0: "int"> ;

<Expr1: "set"> ::= 2 <Expr0: "set">
| <Expr1: "set"> "U" <Expr0: "set"> ;

<Expr: "int"> ::= 2 <Expr1: "int">
| <Expr: "int"> "+" <Expr1: "int">
| <Expr: "int"> "-" <Expr1: "int"> ;

<Expr: "set"> ::= 2 <Expr1: "set">
| <Expr: "set"> "U" <Expr1: "set"> ;

<Predicate0: "relation"> ::= 2 <Expr: "int"> "<" <Expr: "int">
| 2 <Expr: "int"> ">" <Expr: "int">
| 2 <Expr: "int"> "=" <Expr: "int">
| <Expr: "int"> "@" <Expr: "set"> ;

<Predicate1> ::= "(" <Predicate> ")"
| 2 "!" <Predicate0>
| 10 <Predicate0> ;

<Predicate2> ::= 2 <Predicate1>
| <Predicate2> "&" <Predicate1> ;

<Predicate> ::= 2 <Predicate2>
| <Predicate> "|" <Predicate2> ;

<EOL> ::= 1000 "\n" | "" ; // For generation purposes, make it looks better
56 changes: 51 additions & 5 deletions src/generator.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::grammar::checked::{CheckedGrammar, ReduceOutput};
use crate::grammar::state::State;
use crate::grammar::symbol::SymbolKind;
use crate::grammar::symbol::SymbolKind::Terminal;
use crate::grammar::symbol::{NonTerminal, SymbolKind};
use crate::parse_tree::tree::ParseTree;
use rand::Rng;

Expand All @@ -11,11 +11,11 @@
}

impl Generator {
pub fn generate<R: Rng, S: ToString>(&self, start: S, rng: &mut R) -> String {
pub fn generate<R: Rng, S: Into<String>>(&self, start: S, rng: &mut R) -> String {
let mut buf = Vec::new();
let mut state = State::new(rng);

let start = SymbolKind::NonTerminal(start.to_string().into());
let start = SymbolKind::NonTerminal(NonTerminal::untyped(start));
let mut stack = vec![start];

while !stack.is_empty() {
Expand All @@ -41,8 +41,12 @@
}

impl TreeGenerator {
pub fn generate<R: Rng, S: ToString>(&self, start: S, rng: &mut R) -> ParseTree<SymbolKind> {
let start = SymbolKind::NonTerminal(start.to_string().into());
pub fn generate<R: Rng, S: Into<String>>(
&self,
start: S,
rng: &mut R,
) -> ParseTree<SymbolKind> {

Check warning on line 48 in src/generator.rs

View workflow job for this annotation

GitHub Actions / clippy

type `grammar::symbol::SymbolKind` is more private than the item `generator::TreeGenerator::generate`

warning: type `grammar::symbol::SymbolKind` is more private than the item `generator::TreeGenerator::generate` --> src/generator.rs:44:5 | 44 | / pub fn generate<R: Rng, S: Into<String>>( 45 | | &self, 46 | | start: S, 47 | | rng: &mut R, 48 | | ) -> ParseTree<SymbolKind> { | |______________________________^ method `generator::TreeGenerator::generate` is reachable at visibility `pub` | note: but type `grammar::symbol::SymbolKind` is only usable at visibility `pub(crate)` --> src/grammar/symbol.rs:91:1 | 91 | pub(crate) enum SymbolKind { | ^^^^^^^^^^^^^^^^^^^^^^^^^^ = note: `#[warn(private_interfaces)]` on by default
let start = SymbolKind::NonTerminal(NonTerminal::untyped(start));
let mut state = State::new(rng);
self.generate_tree(start, &mut state)
}
Expand Down Expand Up @@ -95,4 +99,46 @@
let tree = tree_gen.generate("S", &mut seeded_rng);
insta::assert_debug_snapshot!(&tree);
}

#[test]
fn test_typed_generator() {
let text = r#"
<S> ::= <Expr> | <S> "\n" <Expr> {10, 20};

<Expr> ::= <E> ;

<E: "int"> ::= "1" | "2" | "3"
| <E: "int"> "+" <E: "int"> {3, } ;

<E: "bool"> ::= "true" | "false"
| <E: "bool"> "&" <E: "bool"> {3, } ;
"#;
let grammar = RawGrammar::parse(text).unwrap().to_checked().unwrap();
let gen = Generator { grammar };
let mut seeded_rng = rand::rngs::StdRng::seed_from_u64(42);
insta::assert_snapshot!(gen.generate("S", &mut seeded_rng));
}

#[test]
fn test_typed_set_algebra_expr() {
let text = include_str!("../examples/set-algebra-typed.bnfgen");
let grammar = RawGrammar::parse(text).unwrap().to_checked().unwrap();
let gen = Generator { grammar };
let mut seeded_rng = rand::rngs::StdRng::seed_from_u64(42);
let out = (0..100)
.map(|_| gen.generate("Expr", &mut seeded_rng))
.collect::<Vec<_>>()
.join("\n");
insta::assert_snapshot!(out);
}

#[test]
fn test_typed_set_algebra() {
let text = include_str!("../examples/set-algebra-typed.bnfgen");
let grammar = RawGrammar::parse(text).unwrap().to_checked().unwrap();
let gen = Generator { grammar };
let mut seeded_rng = rand::rngs::StdRng::seed_from_u64(42);
let out = gen.generate("Program", &mut seeded_rng);
insta::assert_snapshot!(out);
}
}
56 changes: 47 additions & 9 deletions src/grammar/checked.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
use crate::grammar::production::WeightedProduction;
use crate::grammar::state::State;
use crate::grammar::symbol::SymbolKind;
use crate::grammar::symbol::Ty::Untyped;
use crate::grammar::symbol::{NonTerminal, SymbolKind, Ty};
use indexmap::IndexMap;
use rand::prelude::SliceRandom;
use rand::Rng;
use std::collections::HashMap;
use std::rc::Rc;

#[derive(Debug)]
pub struct CheckedGrammar {
pub(crate) rules: HashMap<String, WeightedProduction>,
pub(crate) rules: IndexMap<NonTerminal, WeightedProduction>,
}

pub enum ReduceOutput {
Terminal(Rc<String>),
NonTerminal {
name: Rc<String>,
syms: Vec<SymbolKind>,

Check warning on line 19 in src/grammar/checked.rs

View workflow job for this annotation

GitHub Actions / clippy

type `grammar::symbol::SymbolKind` is more private than the item `grammar::checked::ReduceOutput::NonTerminal::syms`

warning: type `grammar::symbol::SymbolKind` is more private than the item `grammar::checked::ReduceOutput::NonTerminal::syms` --> src/grammar/checked.rs:19:9 | 19 | syms: Vec<SymbolKind>, | ^^^^^^^^^^^^^^^^^^^^^ field `grammar::checked::ReduceOutput::NonTerminal::syms` is reachable at visibility `pub` | note: but type `grammar::symbol::SymbolKind` is only usable at visibility `pub(crate)` --> src/grammar/symbol.rs:91:1 | 91 | pub(crate) enum SymbolKind { | ^^^^^^^^^^^^^^^^^^^^^^^^^^
},
}

Expand All @@ -26,13 +29,32 @@
match symbol {
SymbolKind::Terminal(s) => ReduceOutput::Terminal(s),
SymbolKind::NonTerminal(s) => {
let syms = self
.rules
.get(s.as_ref())
.unwrap_or_else(|| panic!("Fail to find rule of {}", s))
.choose_by_state(state);
let syms = match s.ty {
Untyped => {
let candidates = self
.rules
.keys()
.filter(|k| k.name == s.name)
.collect::<Vec<_>>();
self.rules
.get(
*candidates
.choose(state.rng())
.expect("No candidates available"),
)
.unwrap_or_else(|| panic!("Fail to find rule of {:?}", s))
.choose_by_state(state)
}
Ty::Typed(_) => {
// require an exact match
self.rules
.get(&s)
.unwrap_or_else(|| panic!("Fail to find rule of {:?}", s))
.choose_by_state(state)
}
};

ReduceOutput::NonTerminal { name: s, syms }
ReduceOutput::NonTerminal { name: s.name, syms }
}
SymbolKind::Regex(re) => {
let terminals = self
Expand All @@ -46,3 +68,19 @@
}
}
}

#[cfg(test)]
mod test {
use crate::grammar::raw::RawGrammar;

#[test]
fn it_can_merge() {
let text = r#"
<E> ::= <E: "int"> "+" <E: "int"> ;
<E> ::= <E: "str"> "+" <E: "str"> ;
<E: "str"> ::= <E: "str"> "+" <E: "str"> ;
"#;
let grammar = RawGrammar::parse(text).unwrap();
assert!(grammar.to_checked().is_ok());
}
}
21 changes: 12 additions & 9 deletions src/grammar/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@

impl<'rule> GrammarGraph<'rule> {
pub fn check_unused<S: AsRef<str>>(&self, start: S) -> crate::error::Result<&Self> {
let all_nts = self.nodes.keys().collect::<HashSet<_>>();
let all_nts = self
.nodes
.keys()
.map(|s| s.as_str())
.collect::<HashSet<_>>();
// find the reachable nodes for a given start symbol
let start = self
.nodes
Expand All @@ -23,15 +27,15 @@
let mut reachable = HashSet::new();
while let Some(nx) = dfs.next(&self.graph) {
let name = &self.graph[nx];
reachable.insert(name);
reachable.insert(name.as_str());
}
let unreachable = all_nts.difference(&reachable).collect::<HashSet<_>>();
// find the unreachable spans
if !unreachable.is_empty() {
let spans = self
.rules
.iter()
.filter(|rule| unreachable.contains(&&rule.name))
.filter(|rule| unreachable.contains(&&rule.lhs.as_str()))
.map(|rule| rule.span)
.collect::<Vec<_>>();
return Err(Error::UnreachableRules { spans });
Expand All @@ -48,23 +52,23 @@
.map(|nx| {
self.rules
.iter()
.find(|rule| rule.name == self.graph[*nx])
.find(|rule| rule.lhs.as_str() == self.graph[*nx])
.unwrap()
.span
})
.collect::<Vec<_>>();
return Err(Error::TrapLoop { spans });
}
}
Ok(&self)
Ok(self)
}

fn is_trap_loop(&self, scc: &Vec<NodeIndex>) -> bool {

Check warning on line 66 in src/grammar/graph.rs

View workflow job for this annotation

GitHub Actions / clippy

writing `&Vec` instead of `&[_]` involves a new object where a slice will do

warning: writing `&Vec` instead of `&[_]` involves a new object where a slice will do --> src/grammar/graph.rs:66:33 | 66 | fn is_trap_loop(&self, scc: &Vec<NodeIndex>) -> bool { | ^^^^^^^^^^^^^^^ help: change this to: `&[NodeIndex]` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#ptr_arg = note: `#[warn(clippy::ptr_arg)]` on by default
let produce_t = scc.iter().map(|nx| self.graph[*nx].as_str()).any(|name| {
// check if rule produce a terminal
self.rules
.iter()
.find(|rule| rule.name == name)
.find(|rule| rule.lhs.as_str() == name)
.unwrap()
.produce_terminals()
});
Expand All @@ -73,9 +77,8 @@
}
let out_deg: HashSet<NodeIndex> = scc
.iter()
.map(|nx| self.graph.neighbors(*nx))
.flatten()
.flat_map(|nx| self.graph.neighbors(*nx))
.collect();
out_deg == scc.iter().map(|n| *n).collect()
out_deg == scc.iter().copied().collect()
}
}
34 changes: 22 additions & 12 deletions src/grammar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@ mod test {
insta::assert_debug_snapshot!(grammar);
}

#[test]
fn typed() {
let text = r#"
<E> ::= <E: "int"> "+" <E: "int"> ;
<S: "int"> ::= <E> ;
"#;
let grammar = RawGrammar::parse(text).unwrap();
insta::assert_debug_snapshot!(grammar);
}

#[test]
fn repeat() {
let text = r#"
Expand All @@ -52,7 +62,7 @@ mod test {

#[test]
fn invalid_token() {
let text = ":";
let text = "*";
let err = RawGrammar::parse(text).err().unwrap();
let ui = report_with_unnamed_source(err, text);
insta::assert_snapshot!(ui);
Expand All @@ -74,17 +84,17 @@ mod test {
insta::assert_snapshot!(ui);
}

#[test]
fn duplicated_def() {
let text = r#"
<E> ::= <S>;
<S> ::= <E>;
<E> ::= "?";
"#;
let err = RawGrammar::parse(text).unwrap().to_checked().err().unwrap();
let ui = report_with_unnamed_source(err, text);
insta::assert_snapshot!(ui);
}
// #[test]
// fn duplicated_def() {
// let text = r#"
// <E> ::= <S>;
// <S> ::= <E>;
// <E> ::= "?";
// "#;
// let err = RawGrammar::parse(text).unwrap().to_checked().err().unwrap();
// let ui = report_with_unnamed_source(err, text);
// insta::assert_snapshot!(ui);
// }

#[test]
fn invalid_repeat() {
Expand Down
Loading
Loading