Skip to content

Commit

Permalink
Call operator with ^ (#58)
Browse files Browse the repository at this point in the history
  • Loading branch information
wrsturgeon authored Nov 14, 2023
2 parents 2fbc523 + a24357f commit 7bc9970
Show file tree
Hide file tree
Showing 2 changed files with 263 additions and 1 deletion.
73 changes: 73 additions & 0 deletions automata/examples/matched_parentheses_codegen/src/autogen.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
//! Automatically generated with [inator](https://crates.io/crates/inator).

#![allow(dead_code, unused_variables)]

/// Descriptive parsing error.
#[allow(dead_code)]
#[derive(Clone, Debug, PartialEq)]
pub enum Error {
/// Token without any relevant rule.
Absurd {
/// Index of the token that caused this error.
index: usize,
/// Particular token that didn't correspond to a rule.
token: char,
},
/// Token that would have closed a delimiter, but the delimiter wasn't open.
Unopened {
/// What was actually open, if anything, and the index of the token that opened it.
what_was_open: Option<(&'static str, usize)>,
/// Index of the token that caused this error.
index: usize,
},
/// After parsing all input, a delimiter remains open (e.g. "(a, b, c").
Unclosed {
/// Region (user-defined name) that was not closed. Sensible to be e.g. "parentheses" for `(...)`.
region: &'static str,
/// Index at which the delimiter was opened (e.g., for parentheses, the index of the relevant '(').
opened: usize,
},
/// Ended on a user-defined non-accepting state.
UserDefined {
/// User-defined error message.
messages: &'static [&'static str],
},
}

type R<I> = Result<(Option<(usize, Option<F<I>>)>, ()), Error>;

#[repr(transparent)]
struct F<I>(fn(&mut I, ()) -> R<I>);

#[inline]
pub fn parse<I: IntoIterator<Item = char>>(input: I) -> Result<(), Error> {
state_0(&mut input.into_iter().enumerate(), (), None)
}

#[inline]
fn state_0<I: Iterator<Item = (usize, char)>>(
input: &mut I,
acc: (),
stack_top: Option<(&'static str, usize)>,
) -> Result<(), Error> {
match input.next() {
None => stack_top.map_or(Ok(acc), |(region, opened)| {
Err(Error::Unclosed { region, opened })
}),
Some((index, token)) => match token {
'('..='(' => {
let detour = state_0(input, (), Some(("parentheses", index)))?;
let postprocessed = (|(), ()| ())(acc, detour);
state_0(input, acc, stack_top)
}
')'..=')' => match stack_top {
Some((region, _)) if region == "parentheses" => Ok(acc),
_ => Err(Error::Unopened {
what_was_open: stack_top,
index,
}),
},
_ => Err(Error::Absurd { index, token }),
},
}
}
191 changes: 190 additions & 1 deletion automata/src/combinators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#![allow(clippy::manual_assert, clippy::match_wild_err_arm, clippy::panic)]

use crate::{Ctrl, Curry, Deterministic, Graph, Input, Merge, RangeMap, State, Transition};
use crate::{Ctrl, Curry, Deterministic, Graph, Input, Merge, RangeMap, State, Transition, FF};
use core::{iter, mem, ops};
use std::collections::BTreeSet;

Expand Down Expand Up @@ -128,6 +128,76 @@ impl<I: Input> ops::Shr<Self> for Deterministic<I> {
}
}

impl<I: Input> ops::BitXor<(&'static str, Self, FF)> for Deterministic<I> {
type Output = Self;
#[inline]
fn bitxor(mut self, (region, other, combine): (&'static str, Self, FF)) -> Self::Output {
let rhs_init = get!(other.states, other.initial)
.transitions
.clone()
.generalize();

let accepting_indices =
self.states
.iter_mut()
.enumerate()
.fold(BTreeSet::new(), |mut acc_i, (i, st)| {
if st.non_accepting.is_empty() {
st.non_accepting = iter::once(
"Ran the first part of a two-parser call \
(with `^`) but not the second one."
.to_owned(),
)
.collect(); // <-- No longer accepting since we need to run the second parser
let _ = acc_i.insert(i);
}
acc_i
});

let mut s = self.generalize();
if s.check().is_err() {
panic!("Internal error")
}
let size = s.states.len();

let Graph {
states: other_states,
initial: other_initial,
} = other
.generalize()
.map_indices(|i| i.checked_add(size).expect("Absurdly huge number of states"));

s.states.extend(other_states);

// For every transition that an empty stack can take from the initial state of the right-hand parser,
// add that transition (only on the empty stack) to each accepting state of the left-hand parser.
for state in &mut s.states {
state.transitions = mem::replace(
&mut state.transitions,
Curry::Wildcard(Transition::Return { region: "" }),
)
.merge(rhs_init.clone())
.unwrap_or_else(|e| panic!("{e}"));
}

// If any initial states are immediately accepting, we need to start in the second parser, too.
if s.initial.iter().any(|i| accepting_indices.contains(i)) {
s.initial.extend(other_initial.iter().copied());
}

let mut out = Graph {
states: s
.states
.into_iter()
.map(|st| add_call_state(st, &other_initial, &accepting_indices, region, &combine))
.collect(),
..s
};
out.sort();
out.determinize().unwrap_or_else(|e| panic!("{e}"))
}
}

/// Add a tail call to any accepting state.
#[inline]
#[must_use]
Expand Down Expand Up @@ -229,3 +299,122 @@ fn add_tail_call_c<I: Input, C: Ctrl<I>>(
iter.collect()
}
}

/// Add a call to any accepting state.
#[inline]
#[must_use]
fn add_call_state<I: Input, C: Ctrl<I>>(
s: State<I, C>,
other_init: &BTreeSet<usize>,
accepting_indices: &BTreeSet<usize>,
region: &'static str,
combine: &FF,
) -> State<I, BTreeSet<usize>> {
State {
transitions: add_call_curry(
s.transitions,
other_init,
accepting_indices,
region,
combine,
),
non_accepting: s.non_accepting,
}
}

/// Add a call to any accepting state.
#[inline]
#[must_use]
fn add_call_curry<I: Input, C: Ctrl<I>>(
s: Curry<I, C>,
other_init: &BTreeSet<usize>,
accepting_indices: &BTreeSet<usize>,
region: &'static str,
combine: &FF,
) -> Curry<I, BTreeSet<usize>> {
match s {
Curry::Wildcard(t) => Curry::Wildcard(add_call_transition(
t,
other_init,
accepting_indices,
region,
combine,
)),
Curry::Scrutinize { filter, fallback } => Curry::Scrutinize {
filter: add_call_range_map(filter, other_init, accepting_indices, region, combine),
fallback: fallback
.map(|f| add_call_transition(f, other_init, accepting_indices, region, combine)),
},
}
}

/// Add a call to any accepting state.
#[inline]
#[must_use]
fn add_call_range_map<I: Input, C: Ctrl<I>>(
s: RangeMap<I, C>,
other_init: &BTreeSet<usize>,
accepting_indices: &BTreeSet<usize>,
region: &'static str,
combine: &FF,
) -> RangeMap<I, BTreeSet<usize>> {
RangeMap(
s.0.into_iter()
.map(|(k, v)| {
(
k,
add_call_transition(v, other_init, accepting_indices, region, combine),
)
})
.collect(),
)
}

/// Add a call to any accepting state.
#[inline]
#[must_use]
fn add_call_transition<I: Input, C: Ctrl<I>>(
s: Transition<I, C>,
other_init: &BTreeSet<usize>,
accepting_indices: &BTreeSet<usize>,
region: &'static str,
combine: &FF,
) -> Transition<I, BTreeSet<usize>> {
match s {
Transition::Lateral { dst, update } => {
if dst.view().any(|ref i| accepting_indices.contains(i)) {
Transition::Call {
region,
detour: other_init.clone(),
dst: Box::new(Transition::Lateral { dst, update }.generalize()),
combine: combine.clone(),
}
} else {
Transition::Lateral {
dst: dst.view().collect(),
update,
}
}
}
#[allow(clippy::shadow_unrelated)]
Transition::Call {
region,
ref detour,
dst,
combine,
} => Transition::Call {
region,
detour: detour.view().collect(),
dst: Box::new(add_call_transition(
*dst,
other_init,
accepting_indices,
region,
&combine,
)),
combine,
},
#[allow(clippy::shadow_unrelated)]
Transition::Return { region } => Transition::Return { region },
}
}

0 comments on commit 7bc9970

Please sign in to comment.