Skip to content

Commit

Permalink
refactor: refactor firstBuilder
Browse files Browse the repository at this point in the history
  • Loading branch information
Devin-Yeung committed Nov 18, 2023
1 parent 8570d37 commit 50d1c1c
Show file tree
Hide file tree
Showing 3 changed files with 245 additions and 0 deletions.
236 changes: 236 additions & 0 deletions src/utils/first/builder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
use crate::utils::first::First;
use crate::utils::symbols;
use bnf::{Grammar, Production, Term};
use once_cell::sync::OnceCell;
use std::cell::RefCell;
use std::collections::{HashMap, HashSet};

pub fn epsilon() -> &'static Term {
static EPSILON: OnceCell<Term> = OnceCell::new();
EPSILON.get_or_init(|| Term::Terminal(String::from("ε")))
}

pub struct FirstBuilder<'grammar> {
pub(crate) grammar: &'grammar Grammar,
pub(crate) first: RefCell<HashMap<&'grammar Term, HashSet<&'grammar Term>>>,
pub(crate) lookup: HashMap<&'grammar Term, &'grammar Production>,
}

impl<'grammar> FirstBuilder<'grammar> {
pub(crate) fn new(grammar: &'grammar Grammar) -> FirstBuilder<'grammar> {
let mut first = HashMap::new();

let lookup = grammar
.productions_iter()
.map(|production| (&production.lhs, production))
.collect::<HashMap<_, _>>();

// initialize the table
symbols(&grammar)
.into_iter()
.filter(|term| term != &epsilon()) // epsilon is a special non-terminal
.for_each(|term| {
first.insert(term, HashSet::new());
});

let first = RefCell::new(first);

FirstBuilder {
grammar,
first,
lookup,
}
}

pub(crate) fn build_first(&mut self) {
symbols(&self.grammar)
.into_iter()
.filter(|term| term != &epsilon())
.for_each(|t| {
match t {
Term::Terminal(s) => {
// Rule1: If X is a terminal, then First(X) = { X }
self.insert_term(t, t);
println!("Rule1: Push {} to First({})", s, t.to_string());
}
Term::Nonterminal(_) => { /* skip */ }
};

if self.produce_epsilon(t) {
// Rule2: If X is an ε-production, then add ε to First(X)
self.insert_epsilon(t);
println!("Rule2: Push ε to First({})", t.to_string());
}
});

loop {
let mut changed = false;

symbols(&self.grammar)
.iter()
.filter(|term| matches!(*term, Term::Nonterminal(_)))
.for_each(|lhs| {
println!("===> Checking Symbol: {}", lhs.to_string());
let production = self.lookup.get(lhs).unwrap();
// Rule3: If X is a non-terminal and X → Y1 Y2 ... Yk,
// then add First(Y1) ∖ {ε} to First(X)
for expr in production.rhs_iter() {
for term in expr
.terms_iter()
.filter(|term| term != &&Term::Terminal("ε".to_string()))
{
// First(Y1) ∖ {ε} to First(X)
changed |= self.insert_first_no_epsilon(&production.lhs, term);
println!(
"Rule3/4: Push First({}) \\ ε to First({})",
term,
production.lhs.to_string()
);
// terminate (check next expression) if X does NOT produce ε
if !self.produce_epsilon(term) {
println!("{} does NOT produce ε", term.to_string());
break;
}
}
// Rule 5: If X is a non-terminal and X -> Y1 Y2 ... Yk,
// and First(Yi) produce ε for all i, then add ε to First(X)
if expr.terms_iter().all(|term| self.produce_epsilon(term)) {
println!("Rule5: Push ε to First({})", production.lhs.to_string());
changed = self.insert_epsilon(&production.lhs);
}
}
});

if !changed {
println!("Unchanged, break!");
break;
}
} // End of loop
}

fn produce_epsilon(&self, term: &Term) -> bool {
let production = self.lookup.get(&term);
if production.is_none() {
return false;
}

let production = production.unwrap();

match &production.lhs {
Term::Terminal(t) => {
if t == "ε" {
return true;
}
}
Term::Nonterminal(nt) => {
if nt == "ε" {
return true;
}
}
}

production
.rhs_iter()
.map(|expr| {
expr.terms_iter().all(|term| match term {
Term::Terminal(t) => t == "ε",
Term::Nonterminal(nt) => nt == "ε",
})
})
.any(|v| v)
}

// Insert term to First(x)
///
/// return true if the First(x) changes
/// otherwise return false
pub(crate) fn insert_term(&self, x: &'grammar Term, term: &'grammar Term) -> bool {
let mut first = self.first.borrow_mut();
// First(x)
dbg!(x);
let first_x = first.get_mut(x).unwrap();

// Insert term to First(x)
let before = first_x.len();
first_x.insert(term);
let after = first_x.len();

// check if set changes
before != after
}

// Insert epsilon to First(x)
pub(crate) fn insert_epsilon(&self, x: &'grammar Term) -> bool {
self.insert_term(x, epsilon())
}

/// First(x)
pub(crate) fn first(&self, x: &Term) -> HashSet<&'grammar Term> {
self.first
.borrow()
.get(x)
.map_or_else(|| HashSet::new(), |set| set.clone())
}

pub(crate) fn insert_set(&self, x: &'grammar Term, set: HashSet<&'grammar Term>) -> bool {
let mut first = self.first.borrow_mut();
// First(x)
let first_x = first.get_mut(x).unwrap();

// Insert set into First(x)
let before = first_x.len();
first_x.extend(set);
let after = first_x.len();

// check if set changes
return before != after;
}

/// Insert First(y) \ { ε } into First(x)
///
/// return true if the First(x) changes
/// otherwise return false
pub(crate) fn insert_first_no_epsilon(&self, x: &'grammar Term, y: &'grammar Term) -> bool {
// First(y)
let mut first_y = self.first(y);
// First(y) \ { ε }
first_y.remove(epsilon());
// Insert First(y) \ { ε } into First(x)
self.insert_set(x, first_y)
}

pub(crate) fn build(mut self) -> First<'grammar> {
self.build_first();
First {
first: self.first.into_inner(),
}
}
}

mod tests {
use crate::utils::first::builder::FirstBuilder;
use bnf::Term;

#[test]
fn first() {
let grammar = r#"
<E> ::= <T> <E'>
<E'> ::= '+' <T> <E'> | 'ε'
<T> ::= <F> <T'>
<T'> ::= '*' <F> <T'> | 'ε'
<F> ::= '(' <E> ')' | 'id'
"#
.parse()
.unwrap();

let first = FirstBuilder::new(&grammar).build();
first.first.iter().for_each(|(lhs, rhs)| match lhs {
Term::Terminal(_) => {
assert_eq!(rhs.len(), 1)
}
Term::Nonterminal(_) => {
assert_eq!(rhs.len(), 2)
}
})
}
}
8 changes: 8 additions & 0 deletions src/utils/first/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
use bnf::Term;
use std::collections::{HashMap, HashSet};

mod builder;

pub struct First<'grammar> {
pub(crate) first: HashMap<&'grammar Term, HashSet<&'grammar Term>>,
}
1 change: 1 addition & 0 deletions src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pub mod builder;
pub mod firstv1;
pub mod follow;

pub mod first;
pub fn symbols(grammar: &Grammar) -> HashSet<&Term> {
grammar
.productions_iter()
Expand Down

0 comments on commit 50d1c1c

Please sign in to comment.