From 1fb9256bd3149d938618cb9b7e92ead3c445ee45 Mon Sep 17 00:00:00 2001 From: Will Sturgeon Date: Sun, 5 Nov 2023 01:37:47 -0400 Subject: [PATCH] Has to be a better way other than splitting into a tuple --- automata/src/curry_input.rs | 9 ++++ automata/src/curry_stack.rs | 9 ++++ automata/src/lib.rs | 2 +- automata/src/range_map.rs | 6 +++ automata/src/test.rs | 2 + automata/src/to_src.rs | 2 +- automata/src/update.rs | 8 ++-- examples/digit/src/parser.rs | 4 +- src/call.rs | 91 ++++++++++++++++++++++++++++++++++++ src/f.rs | 60 +++++++++++++++++++++++- src/lib.rs | 61 ++---------------------- src/num.rs | 15 +++--- 12 files changed, 196 insertions(+), 73 deletions(-) create mode 100644 src/call.rs diff --git a/automata/src/curry_input.rs b/automata/src/curry_input.rs index fd97438..06ca21d 100644 --- a/automata/src/curry_input.rs +++ b/automata/src/curry_input.rs @@ -112,6 +112,15 @@ impl> CurryInput { } } + /// All values in this collection, without their associated keys. + #[inline] + pub fn values_mut(&mut self) -> Box>> { + match *self { + Self::Wildcard(ref mut etc) => Box::new(iter::once(etc)), + Self::Scrutinize(ref mut etc) => Box::new(etc.values_mut()), + } + } + /// Remove an entry by key. /// # Panics /// If we ask to remove a wildcard but it's a specific value, or vice-versa. diff --git a/automata/src/curry_stack.rs b/automata/src/curry_stack.rs index a8dc8e0..6278cb9 100644 --- a/automata/src/curry_stack.rs +++ b/automata/src/curry_stack.rs @@ -109,6 +109,15 @@ impl> CurryStack { .chain(self.map_none.iter()) .chain(self.map_some.values()) } + + /// All values in this collection, without their associated keys. + #[inline] + pub fn values_mut(&mut self) -> impl Iterator> { + self.wildcard + .iter_mut() + .chain(self.map_none.iter_mut()) + .chain(self.map_some.values_mut()) + } } impl CurryStack { diff --git a/automata/src/lib.rs b/automata/src/lib.rs index b4500e3..7308a64 100644 --- a/automata/src/lib.rs +++ b/automata/src/lib.rs @@ -94,7 +94,7 @@ #[macro_export] macro_rules! update { ($ex:expr) => { - $crate::Update::_update_macro(stringify!($ex), $ex) + $crate::Update::_update_macro(stringify!($ex).to_owned(), $ex) }; } diff --git a/automata/src/range_map.rs b/automata/src/range_map.rs index 38e3cc5..eb4001d 100644 --- a/automata/src/range_map.rs +++ b/automata/src/range_map.rs @@ -109,6 +109,12 @@ impl> RangeMap { self.entries.values() } + /// All values in this collection, without their associated keys. + #[inline] + pub fn values_mut(&mut self) -> impl Iterator> { + self.entries.values_mut() + } + /// Remove an entry by key. #[inline] pub fn remove(&mut self, key: &Range) { diff --git a/automata/src/test.rs b/automata/src/test.rs index 0cf7a1e..e8216cd 100644 --- a/automata/src/test.rs +++ b/automata/src/test.rs @@ -293,6 +293,8 @@ mod prop { } } + // TODO: test that running sort twice does not decrease the number of states the second time. + fn shr(lhs: Deterministic, rhs: Deterministic, input: Vec) -> bool { let splittable = (0..=input.len()).any(|i| { lhs.accept(input[..i].iter().copied()).is_ok() && diff --git a/automata/src/to_src.rs b/automata/src/to_src.rs index 0c695d4..c40c658 100644 --- a/automata/src/to_src.rs +++ b/automata/src/to_src.rs @@ -427,7 +427,7 @@ impl Transition { #[allow(clippy::todo)] // TODO: what the fuck does the last case mean? fn to_src(&self, stack_symbol: Option>) -> String { let dst = self.dst; - let f = self.update.src; + let f = self.update.src.as_str(); match self.act { Action::Local => format!( r#"match state_{dst}(input, context, ({f})(acc, token))? {{ diff --git a/automata/src/update.rs b/automata/src/update.rs index 87050f6..65158a5 100644 --- a/automata/src/update.rs +++ b/automata/src/update.rs @@ -19,14 +19,14 @@ pub struct Update { /// Representation of the type of tokens. pub ghost: PhantomData, /// Source-code representation that's promised to compile to a call operationally identical to `ptr`. - pub src: &'static str, + pub src: String, } impl Update { /// Internals of the `update!` macro. #[inline] #[must_use] - pub fn _update_macro(src: &'static str, _: fn(T, I) -> U) -> Self { + pub fn _update_macro(src: String, _: fn(T, I) -> U) -> Self { Self { input_t: T::src_type(), output_t: U::src_type(), @@ -55,7 +55,7 @@ impl PartialOrd for Update { impl Ord for Update { #[inline] fn cmp(&self, other: &Self) -> cmp::Ordering { - self.src.cmp(other.src) + self.src.cmp(&other.src) } } @@ -73,7 +73,7 @@ impl Clone for Update { input_t: self.input_t.clone(), output_t: self.output_t.clone(), ghost: self.ghost, - src: self.src, + src: self.src.clone(), } } } diff --git a/examples/digit/src/parser.rs b/examples/digit/src/parser.rs index 14510a4..1ac7af3 100644 --- a/examples/digit/src/parser.rs +++ b/examples/digit/src/parser.rs @@ -65,14 +65,14 @@ fn state_0>(input: &mut I, context: Option<()>, } #[inline] -fn state_1>(input: &mut I, context: Option<()>, acc: u16) -> R { +fn state_1>(input: &mut I, context: Option<()>, acc: ()) -> R { match input.next() { None => Err(Error::UserDefined { messages: &["Expected a token in the range [b\'0\'..=b\'9\'] but input ended"], }), Some((index, token)) => match (&context, &token) { (&_, &(b'0'..=b'9')) => { - match state_0(input, context, (|_: u16, i| i - b'0')(acc, token))? { + match state_0(input, context, (|(), i| i - b'0')(acc, token))? { (done @ (None | Some((_, _, None))), acc) => Ok((done, acc)), (Some((idx, ctx, Some(F(f)))), out) => f(input, Some(ctx), out), } diff --git a/src/call.rs b/src/call.rs new file mode 100644 index 0000000..21ca357 --- /dev/null +++ b/src/call.rs @@ -0,0 +1,91 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +//! Save the current value, run this second parser from scratch, then combine the results. + +use crate::{F, FF}; +use core::{iter, ops}; +use inator_automata::*; +use std::collections::{BTreeMap, BTreeSet}; + +/// Save the current value, run this second parser from scratch, then combine the results. +pub struct Call { + /// Parser to call. + parser: Deterministic, + /// Combine the tabled result with the result of the call. + combinator: FF, +} + +/// Save the current value, run this second parser from scratch, then combine the results. +#[inline] +#[must_use] +pub fn call(parser: Deterministic, combinator: FF) -> Call { + Call { parser, combinator } +} + +impl ops::Shr> for Deterministic { + type Output = Self; + #[inline] + #[must_use] + #[allow(clippy::panic)] + fn shr(self, Call { parser, combinator }: Call) -> Self::Output { + let Ok(maybe_parser_input_t) = parser.input_type() else { + panic!("Inconsistent types in the parser argument to `combine`.") + }; + let Some(parser_output_t) = maybe_parser_input_t else { + panic!( + "Parser argument to `combine` has no initial states, \ + so it can never parse anything.", + ) + }; + if parser_output_t != "()" { + panic!( + "Called `call` with a parser that doesn't start from scratch \ + (it wants an input of type `{parser_output_t}`, \ + but it should start from scratch with an input of type `()`)." + ); + } + + // From `automata/src/combinators.rs` (in the original `>>` implementation): + let s = self.generalize(); + let size = s.states.len(); + let Graph { + states: call_states, + initial: call_initial, + tags: call_tags, + } = parser + .generalize() + .map_indices(|i| i.checked_add(size).expect("Absurdly huge number of states")); + assert_eq!(call_initial.len(), 1); + + let call_init = get!(call_states, unwrap!(unwrap!(call_initial.first()))); + let init_trans = call_init.transitions.merge(); + for state in s.states.iter_mut().filter(|&s| { + if s.non_accepting.is_empty() { + s.non_accepting = iter::once(todo!()).collect(); + true + } else { + false + } + }) { + state.transitions = state + .transitions + .merge(call_init.transitions) + .unwrap_or_else(|e| panic!("{e}")); + } + + s.states.extend(call_states); + s.tags.extend(call_tags); + + // Split the accumulator into a passthrough unmodified first argument and a new modifiable second argument. + let split = F { + src: "|x| (x, ())".to_owned(), + arg_t: parser_output_t.clone(), + output_t: format!("({parser_output_t}, ())"), + }; + let splat /* past tense */ = parser >> split; + } +} diff --git a/src/f.rs b/src/f.rs index 00d3934..1f57284 100644 --- a/src/f.rs +++ b/src/f.rs @@ -8,7 +8,9 @@ #![allow(clippy::module_name_repetitions)] -use inator_automata::ToSrc; +use core::{convert::identity as id, mem, ops}; +use inator_automata::*; +use std::collections::BTreeSet; /// One-argument function. #[non_exhaustive] @@ -65,3 +67,59 @@ impl FF { } } } + +impl> ops::Shr for Graph { + type Output = Self; + #[inline] + #[must_use] + #[allow(clippy::panic, clippy::todo)] + fn shr(mut self, rhs: F) -> Self::Output { + let Ok(out_t) = self.output_type() else { + panic!("Type inconsistency in the parser argument to `process`.") + }; + if out_t.as_deref() != Some(&rhs.arg_t) { + panic!( + "Called `process` with a function that wants an input of type `{}`, \ + but the parser {}.", + rhs.arg_t, + out_t.map_or_else(|| "never returns".to_owned(), |t| format!("returns `{t}`")) + ); + } + let accepting_indices: BTreeSet = self + .states + .iter() + .enumerate() + .filter(|&(_, s)| s.non_accepting.is_empty()) + .map(|(i, _)| i) + .collect(); + for state in &mut self.states { + for curry in state.transitions.values_mut() { + for transition in curry.values_mut() { + let to_accepting = transition + .dst + .view() + .map(|r| r.map_or_else(|tag| *unwrap!(self.tags.get(tag)), id)) + .any(|i| accepting_indices.contains(&i)); + if !to_accepting { + continue; + } + { + let old_out_t = + mem::replace(&mut transition.update.output_t, rhs.output_t.clone()); + assert_eq!( + old_out_t, rhs.arg_t, + "Tried to apply a function to the output of a parser, but \ + at least one path in the parser produced a mismatched type: \ + the post-processing function wanted an input of type {}, but \ + a path to an accepting state produced a value of type {old_out_t}", + rhs.arg_t, + ); + } + let src = mem::take(&mut transition.update.src); + transition.update.src = format!("|tok, inp| ({})({src}(tok, inp))", rhs.src); + } + } + } + self + } +} diff --git a/src/lib.rs b/src/lib.rs index 74f7eed..1e07821 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -100,7 +100,6 @@ clippy::wildcard_imports )] -/* /// Unwrap if we're debugging but `unwrap_unchecked` if we're not. #[cfg(any(debug_assertions, test))] macro_rules! unwrap { @@ -137,6 +136,7 @@ macro_rules! get { }}; } +/* /// Unwrap if we're debugging but `unwrap_unchecked` if we're not. #[cfg(any(debug_assertions, test))] macro_rules! get_mut { @@ -154,7 +154,6 @@ macro_rules! get_mut { result }}; } -*/ /// Unreachable state, but checked if we're debugging. #[cfg(any(debug_assertions, test))] @@ -174,6 +173,7 @@ macro_rules! never { } }}; } +*/ /// One-argument function. #[macro_export] @@ -195,6 +195,7 @@ macro_rules! ff { // TODO: Macro that isn't context-aware but just dumps the codegen right there +mod call; mod f; mod fixpoint; mod num; @@ -298,59 +299,3 @@ pub fn toss(token: I) -> Deterministic { pub fn toss_range(range: Range) -> Deterministic { any_of(range, update!(|(), _| {})) } - -/// Run this parser, then apply this function to the result. -/// # Panics -/// FIXME -#[inline] -#[must_use] -#[allow(clippy::needless_pass_by_value, clippy::todo)] // <-- TODO -#[allow(clippy::panic)] -pub fn process>( - parser: Graph, - combinator: F, -) -> Graph { - let Ok(parser_output_t) = parser.output_type() else { - panic!("Inconsistent types in the parser argument to `process`.") - }; - if parser_output_t.as_deref() != Some(&combinator.arg_t) { - panic!( - "Called `process` with a function that wants an input of type `{}`, \ - but the parser {}.", - combinator.arg_t, - parser_output_t - .map_or_else(|| "never returns".to_owned(), |t| format!("returns `{t}`")) - ); - } - todo!() -} - -/// Save the current value and put it aside, run this second parser from scratch, then combine the results. -/// # Panics -/// FIXME -#[inline] -#[must_use] -#[allow(clippy::needless_pass_by_value, clippy::todo)] // <-- TODO -#[allow(clippy::panic)] -pub fn combine>( - parser: Graph, - _combinator: FF, -) -> Graph { - let Ok(maybe_parser_input_t) = parser.input_type() else { - panic!("Inconsistent types in the parser argument to `combine`.") - }; - let Some(parser_output_t) = maybe_parser_input_t else { - panic!( - "Parser argument to `combine` has no initial states, \ - so it can never parse anything.", - ) - }; - if parser_output_t != "()" { - panic!( - "Called `combine` with a parser that doesn't start from scratch \ - (it asks for an input of type `{parser_output_t}` instead of `()`)." - ); - } - // TODO: We might have to define a `Combine` struct to handle the `>>` operator - todo!() -} diff --git a/src/num.rs b/src/num.rs index c878c82..16618e2 100644 --- a/src/num.rs +++ b/src/num.rs @@ -6,7 +6,7 @@ //! Numeric utilities. -use crate::{any_of, combine, fixpoint, process, recurse}; +use crate::{any_of, call, fixpoint, recurse}; use inator_automata::*; /// Any digit character (0, 1, 2, 3, 4, 5, 6, 7, 8, 9). @@ -19,7 +19,7 @@ pub fn digit() -> Deterministic { first: b'0', last: b'9', }, - update!(|_: u16, i| i - b'0'), + update!(|(), i| i - b'0'), ) } @@ -28,9 +28,12 @@ pub fn digit() -> Deterministic { #[must_use] #[allow(clippy::arithmetic_side_effects)] pub fn integer() -> Deterministic { - let shape = process(digit(), f!(|i: u8| usize::from(i))) + digit() + >> f!(|i: u8| Some(usize::from(i))) >> fixpoint("integer") - >> combine(digit(), ff!(|a: usize, b: usize| a * 10 + b)) - >> recurse("integer"); - shape.determinize().unwrap_or_else(|_| never!()) + >> call( + digit(), + ff!(|a: Option, b| a?.checked_mul(10)?.checked_add(b)), + ) + >> recurse("integer") }