From 50d1c1c7b40d4adafa5214727bd6c68f9e6943c3 Mon Sep 17 00:00:00 2001 From: Devin Yeung Date: Sat, 18 Nov 2023 22:44:49 +0800 Subject: [PATCH] refactor: refactor `firstBuilder` --- src/utils/first/builder.rs | 236 +++++++++++++++++++++++++++++++++++++ src/utils/first/mod.rs | 8 ++ src/utils/mod.rs | 1 + 3 files changed, 245 insertions(+) create mode 100644 src/utils/first/builder.rs create mode 100644 src/utils/first/mod.rs diff --git a/src/utils/first/builder.rs b/src/utils/first/builder.rs new file mode 100644 index 0000000..4d007fc --- /dev/null +++ b/src/utils/first/builder.rs @@ -0,0 +1,236 @@ +use crate::utils::first::First; +use crate::utils::symbols; +use bnf::{Grammar, Production, Term}; +use once_cell::sync::OnceCell; +use std::cell::RefCell; +use std::collections::{HashMap, HashSet}; + +pub fn epsilon() -> &'static Term { + static EPSILON: OnceCell = OnceCell::new(); + EPSILON.get_or_init(|| Term::Terminal(String::from("ε"))) +} + +pub struct FirstBuilder<'grammar> { + pub(crate) grammar: &'grammar Grammar, + pub(crate) first: RefCell>>, + pub(crate) lookup: HashMap<&'grammar Term, &'grammar Production>, +} + +impl<'grammar> FirstBuilder<'grammar> { + pub(crate) fn new(grammar: &'grammar Grammar) -> FirstBuilder<'grammar> { + let mut first = HashMap::new(); + + let lookup = grammar + .productions_iter() + .map(|production| (&production.lhs, production)) + .collect::>(); + + // initialize the table + symbols(&grammar) + .into_iter() + .filter(|term| term != &epsilon()) // epsilon is a special non-terminal + .for_each(|term| { + first.insert(term, HashSet::new()); + }); + + let first = RefCell::new(first); + + FirstBuilder { + grammar, + first, + lookup, + } + } + + pub(crate) fn build_first(&mut self) { + symbols(&self.grammar) + .into_iter() + .filter(|term| term != &epsilon()) + .for_each(|t| { + match t { + Term::Terminal(s) => { + // Rule1: If X is a terminal, then First(X) = { X } + self.insert_term(t, t); + println!("Rule1: Push {} to First({})", s, t.to_string()); + } + Term::Nonterminal(_) => { /* skip */ } + }; + + if self.produce_epsilon(t) { + // Rule2: If X is an ε-production, then add ε to First(X) + self.insert_epsilon(t); + println!("Rule2: Push ε to First({})", t.to_string()); + } + }); + + loop { + let mut changed = false; + + symbols(&self.grammar) + .iter() + .filter(|term| matches!(*term, Term::Nonterminal(_))) + .for_each(|lhs| { + println!("===> Checking Symbol: {}", lhs.to_string()); + let production = self.lookup.get(lhs).unwrap(); + // Rule3: If X is a non-terminal and X → Y1 Y2 ... Yk, + // then add First(Y1) ∖ {ε} to First(X) + for expr in production.rhs_iter() { + for term in expr + .terms_iter() + .filter(|term| term != &&Term::Terminal("ε".to_string())) + { + // First(Y1) ∖ {ε} to First(X) + changed |= self.insert_first_no_epsilon(&production.lhs, term); + println!( + "Rule3/4: Push First({}) \\ ε to First({})", + term, + production.lhs.to_string() + ); + // terminate (check next expression) if X does NOT produce ε + if !self.produce_epsilon(term) { + println!("{} does NOT produce ε", term.to_string()); + break; + } + } + // Rule 5: If X is a non-terminal and X -> Y1 Y2 ... Yk, + // and First(Yi) produce ε for all i, then add ε to First(X) + if expr.terms_iter().all(|term| self.produce_epsilon(term)) { + println!("Rule5: Push ε to First({})", production.lhs.to_string()); + changed = self.insert_epsilon(&production.lhs); + } + } + }); + + if !changed { + println!("Unchanged, break!"); + break; + } + } // End of loop + } + + fn produce_epsilon(&self, term: &Term) -> bool { + let production = self.lookup.get(&term); + if production.is_none() { + return false; + } + + let production = production.unwrap(); + + match &production.lhs { + Term::Terminal(t) => { + if t == "ε" { + return true; + } + } + Term::Nonterminal(nt) => { + if nt == "ε" { + return true; + } + } + } + + production + .rhs_iter() + .map(|expr| { + expr.terms_iter().all(|term| match term { + Term::Terminal(t) => t == "ε", + Term::Nonterminal(nt) => nt == "ε", + }) + }) + .any(|v| v) + } + + // Insert term to First(x) + /// + /// return true if the First(x) changes + /// otherwise return false + pub(crate) fn insert_term(&self, x: &'grammar Term, term: &'grammar Term) -> bool { + let mut first = self.first.borrow_mut(); + // First(x) + dbg!(x); + let first_x = first.get_mut(x).unwrap(); + + // Insert term to First(x) + let before = first_x.len(); + first_x.insert(term); + let after = first_x.len(); + + // check if set changes + before != after + } + + // Insert epsilon to First(x) + pub(crate) fn insert_epsilon(&self, x: &'grammar Term) -> bool { + self.insert_term(x, epsilon()) + } + + /// First(x) + pub(crate) fn first(&self, x: &Term) -> HashSet<&'grammar Term> { + self.first + .borrow() + .get(x) + .map_or_else(|| HashSet::new(), |set| set.clone()) + } + + pub(crate) fn insert_set(&self, x: &'grammar Term, set: HashSet<&'grammar Term>) -> bool { + let mut first = self.first.borrow_mut(); + // First(x) + let first_x = first.get_mut(x).unwrap(); + + // Insert set into First(x) + let before = first_x.len(); + first_x.extend(set); + let after = first_x.len(); + + // check if set changes + return before != after; + } + + /// Insert First(y) \ { ε } into First(x) + /// + /// return true if the First(x) changes + /// otherwise return false + pub(crate) fn insert_first_no_epsilon(&self, x: &'grammar Term, y: &'grammar Term) -> bool { + // First(y) + let mut first_y = self.first(y); + // First(y) \ { ε } + first_y.remove(epsilon()); + // Insert First(y) \ { ε } into First(x) + self.insert_set(x, first_y) + } + + pub(crate) fn build(mut self) -> First<'grammar> { + self.build_first(); + First { + first: self.first.into_inner(), + } + } +} + +mod tests { + use crate::utils::first::builder::FirstBuilder; + use bnf::Term; + + #[test] + fn first() { + let grammar = r#" + ::= + ::= '+' | 'ε' + ::= + ::= '*' | 'ε' + ::= '(' ')' | 'id' + "# + .parse() + .unwrap(); + + let first = FirstBuilder::new(&grammar).build(); + first.first.iter().for_each(|(lhs, rhs)| match lhs { + Term::Terminal(_) => { + assert_eq!(rhs.len(), 1) + } + Term::Nonterminal(_) => { + assert_eq!(rhs.len(), 2) + } + }) + } +} diff --git a/src/utils/first/mod.rs b/src/utils/first/mod.rs new file mode 100644 index 0000000..06a1f64 --- /dev/null +++ b/src/utils/first/mod.rs @@ -0,0 +1,8 @@ +use bnf::Term; +use std::collections::{HashMap, HashSet}; + +mod builder; + +pub struct First<'grammar> { + pub(crate) first: HashMap<&'grammar Term, HashSet<&'grammar Term>>, +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 0d9df22..47fc680 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -5,6 +5,7 @@ pub mod builder; pub mod firstv1; pub mod follow; +pub mod first; pub fn symbols(grammar: &Grammar) -> HashSet<&Term> { grammar .productions_iter()