From 8b689a7cb461b4c51b7ac55b320c0ffbcac61e2a Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 14 May 2019 21:02:11 +0300 Subject: [PATCH 1/5] Revert "runtime: group `Parser` fields in a `Continuation`." This reverts commit ec48a00f9591a9c1d825efdbd3157cec3fdc6f6d. --- src/runtime.rs | 55 +++++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/src/runtime.rs b/src/runtime.rs index 01ffc75..c80ccc5 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -244,7 +244,9 @@ impl InputMatch> for str { pub struct Parser<'a, 'i, C: CodeLabel, I: Input> { state: &'a mut ParserState<'i, C, I>, - current: Continuation<'i, C>, + current: C, + saved: Option>, + result: Range<'i>, remaining: Range<'i>, } @@ -314,10 +316,21 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { ); // Run all threads to completion. - while let Some(Call { callee, range }) = state.threads.steal() { - callee.code.step(Parser { + while let Some(next) = state.threads.steal() { + let Call { + callee: + Continuation { + code, + saved, + result, + }, + range, + } = next; + code.step(Parser { state: &mut state, - current: callee, + current: code, + saved, + result, remaining: range, }); } @@ -367,10 +380,9 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { } Some(Parser { state: self.state, - current: Continuation { - result: Range(self.current.result.join(matching).unwrap()), - ..self.current - }, + current: self.current, + saved: self.saved, + result: Range(self.result.join(matching).unwrap()), remaining: Range(after), }) } @@ -396,10 +408,9 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { let (before, matching, _) = self.remaining.split_at(self.remaining.len() - n); Some(Parser { state: self.state, - current: Continuation { - result: Range(matching.join(self.current.result.0).unwrap()), - ..self.current - }, + current: self.current, + saved: self.saved, + result: Range(matching.join(self.result.0).unwrap()), remaining: Range(before), }) } @@ -409,15 +420,15 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { // FIXME(eddyb) maybe specialize this further, for `forest_add_split`? pub fn save(&mut self, kind: C::ParseNodeKind) { - let old_saved = self.current.saved.replace(ParseNode { + let old_saved = self.saved.replace(ParseNode { kind, - range: self.current.result, + range: self.result, }); assert_eq!(old_saved, None); } pub fn take_saved(&mut self) -> ParseNode<'i, C::ParseNodeKind> { - self.current.saved.take().unwrap() + self.saved.take().unwrap() } pub fn forest_add_choice(&mut self, kind: C::ParseNodeKind, choice: C::ParseNodeKind) { @@ -426,7 +437,7 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { .possible_choices .entry(ParseNode { kind, - range: self.current.result, + range: self.result, }) .or_default() .insert(choice); @@ -443,7 +454,7 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { .possible_splits .entry(ParseNode { kind, - range: self.current.result, + range: self.result, }) .or_default() .insert(left.range.len()); @@ -453,7 +464,8 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { self.state.threads.spawn( Continuation { code: next, - ..self.current + saved: self.saved, + result: self.result, }, self.remaining, ); @@ -466,7 +478,8 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { }; let next = Continuation { code: next, - ..self.current + saved: self.saved, + result: self.result, }; let returns = self.state.gss.returns.entry(call).or_default(); if returns.insert(next) { @@ -497,10 +510,10 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { } pub fn ret(&mut self) { - let call_result = self.current.result; + let call_result = self.result; let remaining = self.remaining; let call = Call { - callee: self.current.code.enclosing_fn(), + callee: self.current.enclosing_fn(), range: Range(call_result.join(remaining.0).unwrap()), }; if self From 4b8fdb8c2fe77eae1711e90e5d76302d3d21caa0 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 14 May 2019 21:17:13 +0300 Subject: [PATCH 2/5] runtime: split out input facilities into an `input` module. --- src/generate/rust.rs | 22 +- src/generate/templates/header.rs | 26 +- src/generate/templates/imports.rs | 5 +- src/input.rs | 238 +++++++++++++++++ src/lib.rs | 6 +- ...c_macro_runtime.rs => proc_macro_input.rs} | 2 +- src/runtime.rs | 239 +----------------- 7 files changed, 272 insertions(+), 266 deletions(-) create mode 100644 src/input.rs rename src/{proc_macro_runtime.rs => proc_macro_input.rs} (98%) diff --git a/src/generate/rust.rs b/src/generate/rust.rs index 294e46b..96e803c 100644 --- a/src/generate/rust.rs +++ b/src/generate/rust.rs @@ -934,7 +934,7 @@ where let rust_slice_ty = Pat::rust_slice_ty(); quote!( impl #ident<'_, '_, I> - where I: gll::runtime::Input, + where I: gll::input::Input, { pub fn parse(input: I) -> Result< @@ -953,7 +953,7 @@ where } } - impl OwnedHandle> { + impl OwnedHandle> { pub fn with(&self, f: impl for<'a, 'i> FnOnce(Handle<'a, 'i, I, #ident<'a, 'i, I>>) -> R) -> R { self.forest_and_node.unpack_ref(|_, forest_and_node| { let (ref forest, node) = *forest_and_node; @@ -1005,7 +1005,7 @@ where }); quote!( #[allow(non_camel_case_types)] - pub enum #ident<'a, 'i, I: gll::runtime::Input> { + pub enum #ident<'a, 'i, I: gll::input::Input> { #(#variants),* } ) @@ -1022,7 +1022,7 @@ where }; quote!( #[allow(non_camel_case_types)] - pub struct #ident<'a, 'i, I: gll::runtime::Input> { + pub struct #ident<'a, 'i, I: gll::input::Input> { #(pub #fields_ident: #fields_ty),* #marker_field } @@ -1132,7 +1132,7 @@ where ) }; - quote!(impl<'a, 'i, I: gll::runtime::Input> #ident<'a, 'i, I> { + quote!(impl<'a, 'i, I: gll::input::Input> #ident<'a, 'i, I> { #methods }) } @@ -1220,7 +1220,7 @@ where }; quote!(impl<'a, 'i, I> Handle<'a, 'i, I, #ident<'a, 'i, I>> - where I: gll::runtime::Input, + where I: gll::input::Input, { pub fn one(self) -> Result<#ident<'a, 'i, I>, Ambiguity> { // HACK(eddyb) using a closure to catch `Err`s from `?` @@ -1313,7 +1313,7 @@ fn rule_debug_impl( d.finish() ) }; - quote!(impl fmt::Debug for #ident<'_, '_, I> { + quote!(impl fmt::Debug for #ident<'_, '_, I> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { #body } @@ -1338,7 +1338,7 @@ fn rule_handle_debug_impl(name: &str, has_fields: bool) -> Src { ) }; quote!( - impl<'a, 'i, I: gll::runtime::Input> fmt::Debug for Handle<'a, 'i, I, #ident<'a, 'i, I>> { + impl<'a, 'i, I: gll::input::Input> fmt::Debug for Handle<'a, 'i, I, #ident<'a, 'i, I>> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:?}", self.source_info())?; #body @@ -1346,7 +1346,7 @@ fn rule_handle_debug_impl(name: &str, has_fields: bool) -> Src { } } - impl fmt::Debug for OwnedHandle> { + impl fmt::Debug for OwnedHandle> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.with(|handle| handle.fmt(f)) } @@ -1385,7 +1385,7 @@ where let rust_slice_ty = Pat::rust_slice_ty(); quote!(impl gll::runtime::CodeStep for _C - where I: gll::runtime::Input, + where I: gll::input::Input, { fn step<'i>(self, mut p: gll::runtime::Parser<'_, 'i, _C, I>) { match self { @@ -1447,7 +1447,7 @@ fn impl_debug_for_handle_any(all_parse_nodes: &[ParseNode]) -> Src { }),) }) }); - quote!(impl fmt::Debug for Handle<'_, '_, I, Any> { + quote!(impl fmt::Debug for Handle<'_, '_, I, Any> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.node.kind { #(#arms)* diff --git a/src/generate/templates/header.rs b/src/generate/templates/header.rs index b0f6347..9f5ad7b 100644 --- a/src/generate/templates/header.rs +++ b/src/generate/templates/header.rs @@ -3,12 +3,12 @@ pub type Any = dyn any::Any; #[derive(Debug)] pub struct Ambiguity(T); -pub struct OwnedHandle { +pub struct OwnedHandle { forest_and_node: gll::runtime::OwnedParseForestAndNode<_P, I>, _marker: PhantomData, } -impl OwnedHandle { +impl OwnedHandle { pub fn source_info(&self) -> I::SourceInfo { self.forest_and_node.unpack_ref(|_, forest_and_node| { let (ref forest, node) = *forest_and_node; @@ -17,21 +17,21 @@ impl OwnedHandle { } } -pub struct Handle<'a, 'i, I: gll::runtime::Input, T: ?Sized> { +pub struct Handle<'a, 'i, I: gll::input::Input, T: ?Sized> { pub node: ParseNode<'i, _P>, pub forest: &'a gll::runtime::ParseForest<'i, _P, I>, _marker: PhantomData, } -impl Copy for Handle<'_, '_, I, T> {} +impl Copy for Handle<'_, '_, I, T> {} -impl Clone for Handle<'_, '_, I, T> { +impl Clone for Handle<'_, '_, I, T> { fn clone(&self) -> Self { *self } } -impl<'a, I: gll::runtime::Input, T: ?Sized> Handle<'a, '_, I, T> { +impl<'a, I: gll::input::Input, T: ?Sized> Handle<'a, '_, I, T> { pub fn source(self) -> &'a I::Slice { self.forest.input(self.node.range) } @@ -40,7 +40,7 @@ impl<'a, I: gll::runtime::Input, T: ?Sized> Handle<'a, '_, I, T> { } } -impl<'a, 'i, I: gll::runtime::Input, T> From>> +impl<'a, 'i, I: gll::input::Input, T> From>> for Ambiguity> { fn from(x: Ambiguity>) -> Self { @@ -52,7 +52,7 @@ impl<'a, 'i, I: gll::runtime::Input, T> From>> } } -impl<'a, 'i, I: gll::runtime::Input, T> From>> +impl<'a, 'i, I: gll::input::Input, T> From>> for Ambiguity> { fn from(x: Ambiguity>) -> Self { @@ -64,13 +64,13 @@ impl<'a, 'i, I: gll::runtime::Input, T> From>> } } -impl fmt::Debug for Handle<'_, '_, I, ()> { +impl fmt::Debug for Handle<'_, '_, I, ()> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:?}", self.source_info()) } } -impl<'a, 'i, I: gll::runtime::Input, T> fmt::Debug for Handle<'a, 'i, I, [T]> +impl<'a, 'i, I: gll::input::Input, T> fmt::Debug for Handle<'a, 'i, I, [T]> where Handle<'a, 'i, I, T>: fmt::Debug, { @@ -116,7 +116,7 @@ where } } -impl<'a, 'i, I: gll::runtime::Input, T> Iterator for Handle<'a, 'i, I, [T]> { +impl<'a, 'i, I: gll::input::Input, T> Iterator for Handle<'a, 'i, I, [T]> { type Item = Result, Ambiguity>; fn next(&mut self) -> Option { match self.all_list_heads() { @@ -129,7 +129,7 @@ impl<'a, 'i, I: gll::runtime::Input, T> Iterator for Handle<'a, 'i, I, [T]> { } else { match self.node.kind.shape() { ParseNodeShape::Opt(_) => { - self.node.range = Range(original.node.range.split_at(0).0); + self.node.range.0 = original.node.range.frontiers().0; } _ => unreachable!(), } @@ -150,7 +150,7 @@ pub enum ListHead { Nil, } -impl<'a, 'i, I: gll::runtime::Input, T> Handle<'a, 'i, I, [T]> { +impl<'a, 'i, I: gll::input::Input, T> Handle<'a, 'i, I, [T]> { fn one_list_head(self) -> ListHead, Self), Ambiguity>> { match self.all_list_heads() { ListHead::Cons(mut iter) => { diff --git a/src/generate/templates/imports.rs b/src/generate/templates/imports.rs index b16af6e..e0d4727 100644 --- a/src/generate/templates/imports.rs +++ b/src/generate/templates/imports.rs @@ -1,6 +1,5 @@ -use gll::runtime::{ - nd::Arrow, traverse, CodeLabel, ParseNode, ParseNodeKind, ParseNodeShape, Range, -}; +use gll::parse_node::ParseNodeShape; +use gll::runtime::{nd::Arrow, traverse, CodeLabel, ParseNode, ParseNodeKind}; use std::any; use std::fmt; use std::marker::PhantomData; diff --git a/src/input.rs b/src/input.rs new file mode 100644 index 0000000..1a35cd0 --- /dev/null +++ b/src/input.rs @@ -0,0 +1,238 @@ +use crate::indexing_str; +use indexing::container_traits::Trustworthy; +use indexing::{self, Container, Index, Unknown}; +use std::cmp::Ordering; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::ops::{self, Deref, RangeInclusive}; +use std::str; + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct Range<'i>(pub indexing::Range<'i>); + +impl<'i> Deref for Range<'i> { + type Target = indexing::Range<'i>; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl PartialOrd for Range<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + (self.start(), self.end()).partial_cmp(&(other.start(), other.end())) + } +} + +impl Ord for Range<'_> { + fn cmp(&self, other: &Self) -> Ordering { + (self.start(), self.end()).cmp(&(other.start(), other.end())) + } +} + +impl Hash for Range<'_> { + fn hash(&self, state: &mut H) { + (self.start(), self.end()).hash(state); + } +} + +impl Range<'_> { + pub fn subtract_suffix(self, other: Self) -> Self { + assert_eq!(self.end(), other.end()); + Range(self.split_at(other.start() - self.start()).0) + } +} + +#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] +pub struct LineColumn { + pub line: usize, + pub column: usize, +} + +impl fmt::Debug for LineColumn { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}", 1 + self.line, 1 + self.column) + } +} + +impl LineColumn { + fn count(prefix: &str) -> Self { + let (line, column) = prefix + .split('\n') + .enumerate() + .last() + .map_or((0, 0), |(i, s)| (i, s.chars().count())); + LineColumn { line, column } + } +} + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct LineColumnRange { + pub start: LineColumn, + pub end: LineColumn, +} + +impl fmt::Debug for LineColumnRange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}-{:?}", self.start, self.end) + } +} + +pub trait Input: Sized { + type Container: Trustworthy; + type Slice: ?Sized; + type SourceInfo: fmt::Debug; + // FIXME(eddyb) remove - replace with `SourceInfo` for the affected range + type SourceInfoPoint: fmt::Debug; + fn to_container(self) -> Self::Container; + fn slice<'a, 'i>( + input: &'a Container<'i, Self::Container>, + range: Range<'i>, + ) -> &'a Self::Slice; + fn source_info<'i>( + input: &Container<'i, Self::Container>, + range: Range<'i>, + ) -> Self::SourceInfo; + fn source_info_point<'i>( + input: &Container<'i, Self::Container>, + index: Index<'i, Unknown>, + ) -> Self::SourceInfoPoint; +} + +impl Input for &[T] { + type Container = Self; + type Slice = [T]; + type SourceInfo = ops::Range; + type SourceInfoPoint = usize; + fn to_container(self) -> Self::Container { + self + } + fn slice<'b, 'i>( + input: &'b Container<'i, Self::Container>, + range: Range<'i>, + ) -> &'b Self::Slice { + &input[range.0] + } + fn source_info<'i>(_: &Container<'i, Self::Container>, range: Range<'i>) -> Self::SourceInfo { + range.as_range() + } + fn source_info_point<'i>( + _: &Container<'i, Self::Container>, + index: Index<'i, Unknown>, + ) -> Self::SourceInfoPoint { + index.integer() + } +} + +impl<'a> Input for &'a str { + type Container = &'a indexing_str::Str; + type Slice = str; + type SourceInfo = LineColumnRange; + type SourceInfoPoint = LineColumn; + fn to_container(self) -> Self::Container { + self.into() + } + fn slice<'b, 'i>( + input: &'b Container<'i, Self::Container>, + range: Range<'i>, + ) -> &'b Self::Slice { + indexing_str::Str::slice(input, range.0) + } + fn source_info<'i>( + input: &Container<'i, Self::Container>, + range: Range<'i>, + ) -> Self::SourceInfo { + let start = Self::source_info_point(input, range.first()); + // HACK(eddyb) add up `LineColumn`s to avoid counting twice. + // Ideally we'd cache around a line map, like rustc's `SourceMap`. + let mut end = LineColumn::count(Self::slice(input, range)); + end.line += start.line; + if end.line == start.line { + end.column += start.column; + } + LineColumnRange { start, end } + } + fn source_info_point<'i>( + input: &Container<'i, Self::Container>, + index: Index<'i, Unknown>, + ) -> Self::SourceInfoPoint { + let prefix_range = Range(input.split_at(index).0); + LineColumn::count(Self::slice(input, prefix_range)) + } +} + +pub trait InputMatch { + fn match_left(&self, pat: &'static Pat) -> Option; + fn match_right(&self, pat: &'static Pat) -> Option; +} + +impl InputMatch<&'static [T]> for [T] { + fn match_left(&self, pat: &&[T]) -> Option { + if self.starts_with(pat) { + Some(pat.len()) + } else { + None + } + } + fn match_right(&self, pat: &&[T]) -> Option { + if self.ends_with(pat) { + Some(pat.len()) + } else { + None + } + } +} + +impl InputMatch> for [T] { + fn match_left(&self, pat: &RangeInclusive) -> Option { + let x = self.first()?; + if pat.start() <= x && x <= pat.end() { + Some(1) + } else { + None + } + } + fn match_right(&self, pat: &RangeInclusive) -> Option { + let x = self.last()?; + if pat.start() <= x && x <= pat.end() { + Some(1) + } else { + None + } + } +} + +impl InputMatch<&'static str> for str { + fn match_left(&self, pat: &&str) -> Option { + if self.starts_with(pat) { + Some(pat.len()) + } else { + None + } + } + fn match_right(&self, pat: &&str) -> Option { + if self.ends_with(pat) { + Some(pat.len()) + } else { + None + } + } +} + +impl InputMatch> for str { + fn match_left(&self, pat: &RangeInclusive) -> Option { + let c = self.chars().next()?; + if *pat.start() <= c && c <= *pat.end() { + Some(c.len_utf8()) + } else { + None + } + } + fn match_right(&self, pat: &RangeInclusive) -> Option { + let c = self.chars().rev().next()?; + if *pat.start() <= c && c <= *pat.end() { + Some(c.len_utf8()) + } else { + None + } + } +} diff --git a/src/lib.rs b/src/lib.rs index ea37032..26dbe3e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,8 @@ mod indexing_str; #[forbid(unsafe_code)] pub mod generate; #[forbid(unsafe_code)] +pub mod input; +#[forbid(unsafe_code)] pub mod parse_node; #[forbid(unsafe_code)] pub mod proc_macro; @@ -19,9 +21,9 @@ pub mod runtime; pub mod scannerless; // HACK(eddyb) this contains impls for types in `proc_macro`, which depend on -// `runtime`. Those parts of `runtime` should be moved to `grammer::input`. +// `input`. Those parts of `input` should be moved to `grammer::input`. #[forbid(unsafe_code)] -mod proc_macro_runtime; +mod proc_macro_input; #[forbid(unsafe_code)] mod parse_grammar; diff --git a/src/proc_macro_runtime.rs b/src/proc_macro_input.rs similarity index 98% rename from src/proc_macro_runtime.rs rename to src/proc_macro_input.rs index d0896a0..53d6fa2 100644 --- a/src/proc_macro_runtime.rs +++ b/src/proc_macro_input.rs @@ -1,5 +1,5 @@ +use crate::input::{Input, InputMatch, Range}; use crate::proc_macro::{flatten, FlatToken, FlatTokenPat, Span, TokenStream}; -use crate::runtime::{Input, InputMatch, Range}; use indexing::{proof::Provable, Container, Index, Unknown}; use std::ops; diff --git a/src/runtime.rs b/src/runtime.rs index c80ccc5..43c21ad 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -1,247 +1,14 @@ -pub use crate::parse_node::ParseNodeShape; - use crate::high::{type_lambda, ErasableL, ExistsL, PairL}; -use crate::indexing_str; -use indexing::container_traits::Trustworthy; +use crate::input::{Input, InputMatch, Range}; +use crate::parse_node::ParseNodeShape; use indexing::{self, Container, Index, Unknown}; use std::cmp::{Ordering, Reverse}; use std::collections::{BTreeSet, BinaryHeap, HashMap, VecDeque}; use std::fmt; -use std::hash::{Hash, Hasher}; +use std::hash::Hash; use std::io::{self, Write}; -use std::ops::{self, Deref, RangeInclusive}; use std::str; -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -pub struct Range<'i>(pub indexing::Range<'i>); - -impl<'i> Deref for Range<'i> { - type Target = indexing::Range<'i>; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl PartialOrd for Range<'_> { - fn partial_cmp(&self, other: &Self) -> Option { - (self.start(), self.end()).partial_cmp(&(other.start(), other.end())) - } -} - -impl Ord for Range<'_> { - fn cmp(&self, other: &Self) -> Ordering { - (self.start(), self.end()).cmp(&(other.start(), other.end())) - } -} - -impl Hash for Range<'_> { - fn hash(&self, state: &mut H) { - (self.start(), self.end()).hash(state); - } -} - -impl Range<'_> { - pub fn subtract_suffix(self, other: Self) -> Self { - assert_eq!(self.end(), other.end()); - Range(self.split_at(other.start() - self.start()).0) - } -} - -#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] -pub struct LineColumn { - pub line: usize, - pub column: usize, -} - -impl fmt::Debug for LineColumn { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}:{}", 1 + self.line, 1 + self.column) - } -} - -impl LineColumn { - fn count(prefix: &str) -> Self { - let (line, column) = prefix - .split('\n') - .enumerate() - .last() - .map_or((0, 0), |(i, s)| (i, s.chars().count())); - LineColumn { line, column } - } -} - -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct LineColumnRange { - pub start: LineColumn, - pub end: LineColumn, -} - -impl fmt::Debug for LineColumnRange { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:?}-{:?}", self.start, self.end) - } -} - -pub trait Input: Sized { - type Container: Trustworthy; - type Slice: ?Sized; - type SourceInfo: fmt::Debug; - // FIXME(eddyb) remove - replace with `SourceInfo` for the affected range - type SourceInfoPoint: fmt::Debug; - fn to_container(self) -> Self::Container; - fn slice<'a, 'i>( - input: &'a Container<'i, Self::Container>, - range: Range<'i>, - ) -> &'a Self::Slice; - fn source_info<'i>( - input: &Container<'i, Self::Container>, - range: Range<'i>, - ) -> Self::SourceInfo; - fn source_info_point<'i>( - input: &Container<'i, Self::Container>, - index: Index<'i, Unknown>, - ) -> Self::SourceInfoPoint; -} - -impl Input for &[T] { - type Container = Self; - type Slice = [T]; - type SourceInfo = ops::Range; - type SourceInfoPoint = usize; - fn to_container(self) -> Self::Container { - self - } - fn slice<'b, 'i>( - input: &'b Container<'i, Self::Container>, - range: Range<'i>, - ) -> &'b Self::Slice { - &input[range.0] - } - fn source_info<'i>(_: &Container<'i, Self::Container>, range: Range<'i>) -> Self::SourceInfo { - range.as_range() - } - fn source_info_point<'i>( - _: &Container<'i, Self::Container>, - index: Index<'i, Unknown>, - ) -> Self::SourceInfoPoint { - index.integer() - } -} - -impl<'a> Input for &'a str { - type Container = &'a indexing_str::Str; - type Slice = str; - type SourceInfo = LineColumnRange; - type SourceInfoPoint = LineColumn; - fn to_container(self) -> Self::Container { - self.into() - } - fn slice<'b, 'i>( - input: &'b Container<'i, Self::Container>, - range: Range<'i>, - ) -> &'b Self::Slice { - indexing_str::Str::slice(input, range.0) - } - fn source_info<'i>( - input: &Container<'i, Self::Container>, - range: Range<'i>, - ) -> Self::SourceInfo { - let start = Self::source_info_point(input, range.first()); - // HACK(eddyb) add up `LineColumn`s to avoid counting twice. - // Ideally we'd cache around a line map, like rustc's `SourceMap`. - let mut end = LineColumn::count(Self::slice(input, range)); - end.line += start.line; - if end.line == start.line { - end.column += start.column; - } - LineColumnRange { start, end } - } - fn source_info_point<'i>( - input: &Container<'i, Self::Container>, - index: Index<'i, Unknown>, - ) -> Self::SourceInfoPoint { - let prefix_range = Range(input.split_at(index).0); - LineColumn::count(Self::slice(input, prefix_range)) - } -} - -pub trait InputMatch { - fn match_left(&self, pat: &'static Pat) -> Option; - fn match_right(&self, pat: &'static Pat) -> Option; -} - -impl InputMatch<&'static [T]> for [T] { - fn match_left(&self, pat: &&[T]) -> Option { - if self.starts_with(pat) { - Some(pat.len()) - } else { - None - } - } - fn match_right(&self, pat: &&[T]) -> Option { - if self.ends_with(pat) { - Some(pat.len()) - } else { - None - } - } -} - -impl InputMatch> for [T] { - fn match_left(&self, pat: &RangeInclusive) -> Option { - let x = self.first()?; - if pat.start() <= x && x <= pat.end() { - Some(1) - } else { - None - } - } - fn match_right(&self, pat: &RangeInclusive) -> Option { - let x = self.last()?; - if pat.start() <= x && x <= pat.end() { - Some(1) - } else { - None - } - } -} - -impl InputMatch<&'static str> for str { - fn match_left(&self, pat: &&str) -> Option { - if self.starts_with(pat) { - Some(pat.len()) - } else { - None - } - } - fn match_right(&self, pat: &&str) -> Option { - if self.ends_with(pat) { - Some(pat.len()) - } else { - None - } - } -} - -impl InputMatch> for str { - fn match_left(&self, pat: &RangeInclusive) -> Option { - let c = self.chars().next()?; - if *pat.start() <= c && c <= *pat.end() { - Some(c.len_utf8()) - } else { - None - } - } - fn match_right(&self, pat: &RangeInclusive) -> Option { - let c = self.chars().rev().next()?; - if *pat.start() <= c && c <= *pat.end() { - Some(c.len_utf8()) - } else { - None - } - } -} - pub struct Parser<'a, 'i, C: CodeLabel, I: Input> { state: &'a mut ParserState<'i, C, I>, current: C, From b84918f46acd020f84e6cb07c433f67c60ef7d21 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 14 May 2019 21:37:18 +0300 Subject: [PATCH 3/5] runtime: split out the SPPF into a `forest` module. --- src/forest.rs | 546 ++++++++++++++++++++++++++++++ src/generate/rust.rs | 8 +- src/generate/templates/header.rs | 4 +- src/generate/templates/imports.rs | 2 +- src/lib.rs | 2 + src/runtime.rs | 545 +---------------------------- 6 files changed, 559 insertions(+), 548 deletions(-) create mode 100644 src/forest.rs diff --git a/src/forest.rs b/src/forest.rs new file mode 100644 index 0000000..ed83c24 --- /dev/null +++ b/src/forest.rs @@ -0,0 +1,546 @@ +use crate::high::{type_lambda, ExistsL, PairL}; +use crate::input::{Input, Range}; +use crate::parse_node::ParseNodeShape; +use indexing::{self, Container}; +use std::collections::{BTreeSet, HashMap, VecDeque}; +use std::fmt; +use std::hash::Hash; +use std::io::{self, Write}; +use std::str; + +/// A parse forest, in SPPF (Shared Packed Parse Forest) representation. +pub struct ParseForest<'i, P: ParseNodeKind, I: Input> { + // HACK(eddyb) `pub(crate)` only for `runtime`. + pub(crate) input: Container<'i, I::Container>, + pub(crate) possible_choices: HashMap, BTreeSet

>, + pub(crate) possible_splits: HashMap, BTreeSet>, +} + +type_lambda! { + pub type<'i> ParseForestL = ParseForest<'i, P, I>; + pub type<'i> ParseNodeL = ParseNode<'i, P>; +} + +pub type OwnedParseForestAndNode = ExistsL, ParseNodeL

>>; + +#[derive(Debug)] +pub struct MoreThanOne; + +impl<'i, P: ParseNodeKind, I: Input> ParseForest<'i, P, I> { + pub fn input(&self, range: Range<'i>) -> &I::Slice { + I::slice(&self.input, range) + } + + pub fn source_info(&self, range: Range<'i>) -> I::SourceInfo { + I::source_info(&self.input, range) + } + + pub fn one_choice(&self, node: ParseNode<'i, P>) -> Result, MoreThanOne> { + match node.kind.shape() { + ParseNodeShape::Choice => { + let choices = &self.possible_choices[&node]; + if choices.len() > 1 { + return Err(MoreThanOne); + } + let &choice = choices.iter().next().unwrap(); + Ok(ParseNode { + kind: choice, + range: node.range, + }) + } + shape => unreachable!("one_choice({}): non-choice shape {}", node, shape), + } + } + + pub fn all_choices<'a>( + &'a self, + node: ParseNode<'i, P>, + ) -> impl Iterator> + Clone + 'a { + match node.kind.shape() { + ParseNodeShape::Choice => self + .possible_choices + .get(&node) + .into_iter() + .flatten() + .cloned() + .map(move |kind| ParseNode { + kind, + range: node.range, + }), + shape => unreachable!("all_choices({}): non-choice shape {}", node, shape), + } + } + + pub fn one_split( + &self, + node: ParseNode<'i, P>, + ) -> Result<(ParseNode<'i, P>, ParseNode<'i, P>), MoreThanOne> { + match node.kind.shape() { + ParseNodeShape::Split(left_kind, right_kind) => { + let splits = &self.possible_splits[&node]; + if splits.len() > 1 { + return Err(MoreThanOne); + } + let &split = splits.iter().next().unwrap(); + let (left, right, _) = node.range.split_at(split); + Ok(( + ParseNode { + kind: left_kind, + range: Range(left), + }, + ParseNode { + kind: right_kind, + range: Range(right), + }, + )) + } + shape => unreachable!("one_split({}): non-split shape {}", node, shape), + } + } + + pub fn all_splits<'a>( + &'a self, + node: ParseNode<'i, P>, + ) -> impl Iterator, ParseNode<'i, P>)> + Clone + 'a { + match node.kind.shape() { + ParseNodeShape::Split(left_kind, right_kind) => self + .possible_splits + .get(&node) + .into_iter() + .flatten() + .cloned() + .map(move |i| { + let (left, right, _) = node.range.split_at(i); + ( + ParseNode { + kind: left_kind, + range: Range(left), + }, + ParseNode { + kind: right_kind, + range: Range(right), + }, + ) + }), + shape => unreachable!("all_splits({}): non-split shape {}", node, shape), + } + } + + pub fn dump_graphviz(&self, out: &mut dyn Write) -> io::Result<()> { + writeln!(out, "digraph forest {{")?; + let mut queue: VecDeque<_> = self + .possible_choices + .keys() + .chain(self.possible_splits.keys()) + .cloned() + .collect(); + let mut seen: BTreeSet<_> = queue.iter().cloned().collect(); + let mut p = 0; + while let Some(source) = queue.pop_front() { + writeln!(out, " {:?} [shape=box]", source.to_string())?; + let mut add_children = |children: &[(&str, ParseNode<'i, P>)]| -> io::Result<()> { + writeln!(out, r#" p{} [label="" shape=point]"#, p)?; + writeln!(out, " {:?} -> p{}:n", source.to_string(), p)?; + for &(port, child) in children { + writeln!( + out, + " p{}:{} -> {:?}:n [dir=none]", + p, + port, + child.to_string() + )?; + if seen.insert(child) { + queue.push_back(child); + } + } + p += 1; + Ok(()) + }; + match source.kind.shape() { + ParseNodeShape::Opaque => {} + + ParseNodeShape::Alias(_) => { + add_children(&[("s", source.unpack_alias())])?; + } + + ParseNodeShape::Opt(_) => { + if let Some(child) = source.unpack_opt() { + add_children(&[("s", child)])?; + } + } + + ParseNodeShape::Choice => { + for child in self.all_choices(source) { + add_children(&[("s", child)])?; + } + } + + ParseNodeShape::Split(..) => { + for (left, right) in self.all_splits(source) { + add_children(&[("sw", left), ("se", right)])?; + } + } + } + } + writeln!(out, "}}") + } +} + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ParseNode<'i, P: ParseNodeKind> { + pub kind: P, + pub range: Range<'i>, +} + +impl ParseNode<'_, P> { + pub fn unpack_alias(self) -> Self { + match self.kind.shape() { + ParseNodeShape::Alias(inner) => ParseNode { + kind: inner, + range: self.range, + }, + shape => unreachable!("unpack_alias({}): non-alias shape {}", self, shape), + } + } + + pub fn unpack_opt(self) -> Option { + match self.kind.shape() { + ParseNodeShape::Opt(inner) => { + if self.range.is_empty() { + None + } else { + Some(ParseNode { + kind: inner, + range: self.range, + }) + } + } + shape => unreachable!("unpack_opt({}): non-opt shape {}", self, shape), + } + } +} + +impl fmt::Display for ParseNode<'_, P> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{} @ {}..{}", + self.kind, + self.range.start(), + self.range.end() + ) + } +} + +impl fmt::Debug for ParseNode<'_, P> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{} @ {}..{}", + self.kind, + self.range.start(), + self.range.end() + ) + } +} + +pub trait ParseNodeKind: fmt::Display + Ord + Hash + Copy + 'static { + fn shape(self) -> ParseNodeShape; +} + +// FIXME(rust-lang/rust#54175) work around iterator adapter compile-time +// blowup issues by using a makeshift "non-determinism arrow toolkit". +pub mod nd { + use std::iter; + use std::marker::PhantomData; + + pub trait Arrow: Copy { + type Input; + type Output; + type Iter: Iterator + Clone; + fn apply(&self, x: Self::Input) -> Self::Iter; + + fn map R, R>(self, f: F) -> Map { + Map(self, f) + } + fn then>(self, b: B) -> Then { + Then(self, b) + } + fn pairs(self, b: B) -> Pairs + where + Self::Output: Copy, + B::Input: Copy, + { + Pairs(self, b) + } + } + + macro_rules! derive_copy { + ($name:ident<$($param:ident $(: $bound:ident)*),*>) => { + impl<$($param $(: $bound)*),*> Copy for $name<$($param),*> {} + impl<$($param $(: $bound)*),*> Clone for $name<$($param),*> { + fn clone(&self) -> Self { + *self + } + } + } + } + + pub struct Id(PhantomData); + derive_copy!(Id); + impl Id { + pub fn new() -> Self { + Id(PhantomData) + } + } + impl Arrow for Id { + type Input = T; + type Output = T; + type Iter = iter::Once; + fn apply(&self, x: T) -> Self::Iter { + iter::once(x) + } + } + + pub struct FromIter(F, PhantomData); + derive_copy!(FromIter); + impl FromIter { + pub fn new(f: F) -> Self { + FromIter(f, PhantomData) + } + } + impl I, I: Iterator + Clone> Arrow for FromIter { + type Input = T; + type Output = I::Item; + type Iter = I; + fn apply(&self, x: T) -> I { + self.0(x) + } + } + + pub struct FromIterK(K, F, PhantomData); + derive_copy!(FromIterK); + impl FromIterK { + pub fn new(k: K, f: F) -> Self { + FromIterK(k, f, PhantomData) + } + } + impl I, I: Iterator + Clone> Arrow for FromIterK { + type Input = T; + type Output = I::Item; + type Iter = I; + fn apply(&self, x: T) -> I { + self.1(self.0, x) + } + } + + #[derive(Copy, Clone)] + pub struct Map(A, F); + impl R, R> Arrow for Map { + type Input = A::Input; + type Output = R; + type Iter = iter::Map; + fn apply(&self, x: Self::Input) -> Self::Iter { + self.0.apply(x).map(self.1) + } + } + + #[derive(Clone)] + pub struct ThenIter> { + a_iter: A::Iter, + b_arrow: B, + b_iter: Option, + // HACK(eddyb) this field is useless (never set to `Some`) + // (see `match self.b_iter_backwards` below for more details). + b_iter_backwards: Option, + } + impl> Iterator for ThenIter { + type Item = B::Output; + fn next(&mut self) -> Option { + loop { + if let Some(ref mut b_iter) = self.b_iter { + if let x @ Some(_) = b_iter.next() { + return x; + } + } + match self.a_iter.next() { + // HACK(eddyb) this never does anything, but without a *second* + // call to `B::Iter::next`, LLVM spends more time optimizing. + None => { + return match self.b_iter_backwards { + Some(ref mut b_iter) => b_iter.next(), + None => None, + } + } + Some(x) => self.b_iter = Some(self.b_arrow.apply(x)), + } + } + } + } + + #[derive(Copy, Clone)] + pub struct Then(A, B); + impl> Arrow for Then { + type Input = A::Input; + type Output = B::Output; + type Iter = ThenIter; + fn apply(&self, x: Self::Input) -> Self::Iter { + ThenIter { + a_iter: self.0.apply(x), + b_arrow: self.1, + b_iter: None, + b_iter_backwards: None, + } + } + } + + #[derive(Clone)] + pub struct PairsIter + where + A::Output: Copy, + B::Input: Copy, + { + a_iter: A::Iter, + b_iter0: B::Iter, + a_output_b_iter: Option<(A::Output, B::Iter)>, + } + impl Iterator for PairsIter + where + A::Output: Copy, + B::Input: Copy, + { + type Item = (A::Output, B::Output); + fn next(&mut self) -> Option { + loop { + if let Some((x, ref mut b_iter)) = self.a_output_b_iter { + if let Some(y) = b_iter.next() { + return Some((x, y)); + } + } + match self.a_iter.next() { + None => return None, + Some(x) => { + self.a_output_b_iter = Some((x, self.b_iter0.clone())); + } + } + } + } + } + + #[derive(Copy, Clone)] + pub struct Pairs(A, B); + impl Arrow for Pairs + where + A::Output: Copy, + B::Input: Copy, + { + type Input = (A::Input, B::Input); + type Output = (A::Output, B::Output); + type Iter = PairsIter; + fn apply(&self, (x, y): Self::Input) -> Self::Iter { + PairsIter { + a_iter: self.0.apply(x), + b_iter0: self.1.apply(y), + a_output_b_iter: None, + } + } + } +} + +// HACK(eddyb) work around `macro_rules` not being `use`-able. +pub use crate::__forest_traverse as traverse; + +#[macro_export] +macro_rules! __forest_traverse { + (typeof($leaf:ty) _) => { $leaf }; + (typeof($leaf:ty) ?) => { Option }; + (typeof($leaf:ty) ($l_shape:tt, $r_shape:tt)) => { (traverse!(typeof($leaf) $l_shape), traverse!(typeof($leaf) $r_shape)) }; + (typeof($leaf:ty) { $($i:tt $_i:ident: $kind:pat => $shape:tt,)* }) => { ($(traverse!(typeof($leaf) $shape),)*) }; + (typeof($leaf:ty) [$shape:tt]) => { (traverse!(typeof($leaf) $shape),) }; + + (one($forest:ident, $node:ident) _) => { + $node + }; + (one($forest:ident, $node:ident) ?) => { + Some($node) + }; + (one($forest:ident, $node:ident) ($l_shape:tt, $r_shape:tt)) => { + { + let (left, right) = $forest.one_split($node)?; + ( + traverse!(one($forest, left) $l_shape), + traverse!(one($forest, right) $r_shape), + ) + } + }; + (one($forest:ident, $node:ident) { $($i:tt $_i:ident: $kind:pat => $shape:tt,)* }) => { + { + let node = $forest.one_choice($node)?; + let mut r = <($(traverse!(typeof(_) $shape),)*)>::default(); + match node.kind { + $($kind => r.$i = traverse!(one($forest, node) $shape),)* + _ => unreachable!(), + } + r + } + }; + (one($forest:ident, $node:ident) [$shape:tt]) => { + { + let mut r = <(traverse!(typeof(_) $shape),)>::default(); + if let Some(node) = $node.unpack_opt() { + r.0 = traverse!(one($forest, node) $shape); + } + r + } + }; + + (all($forest:ident) _) => { + $crate::forest::nd::Id::new() + }; + (all($forest:ident) ?) => { + $crate::forest::nd::Id::new().map(Some) + }; + (all($forest:ident) ($l_shape:tt, $r_shape:tt)) => { + $crate::forest::nd::FromIterK::new($forest, $crate::forest::ParseForest::all_splits) + .then(traverse!(all($forest) $l_shape).pairs(traverse!(all($forest) $r_shape))) + }; + (all($forest:ident) { $($i:tt $_i:ident: $kind:pat => $shape:tt,)* }) => { + $crate::forest::nd::FromIter::new(move |node| { + #[derive(Clone)] + enum Iter<$($_i),*> { + $($_i($_i)),* + } + impl<$($_i: Iterator),*> Iterator for Iter<$($_i),*> + where $($_i::Item: Default),* + { + type Item = ($($_i::Item),*); + fn next(&mut self) -> Option { + let mut r = Self::Item::default(); + match self { + $(Iter::$_i(iter) => r.$i = iter.next()?),* + } + Some(r) + } + } + $forest.all_choices(node).flat_map(move |node| { + match node.kind { + $($kind => Iter::$_i(traverse!(all($forest) $shape).apply(node)),)* + _ => unreachable!(), + } + }) + }) + }; + (all($forest:ident) [$shape:tt]) => { + $crate::forest::nd::FromIter::new(move |node| { + match $crate::forest::ParseNode::unpack_opt(node) { + Some(node) => { + Some(traverse!(all($forest) $shape).apply(node).map(|x| (x,))) + .into_iter().flatten().chain(None) + } + None => { + None.into_iter().flatten().chain(Some(<_>::default())) + } + } + }) + } +} diff --git a/src/generate/rust.rs b/src/generate/rust.rs index 96e803c..ae4424a 100644 --- a/src/generate/rust.rs +++ b/src/generate/rust.rs @@ -1099,7 +1099,7 @@ where quote!(#( #[allow(non_snake_case)] fn #variants_from_forest_ident( - forest: &'a gll::runtime::ParseForest<'i, _P, I>, + forest: &'a gll::forest::ParseForest<'i, _P, I>, _node: ParseNode<'i, _P>, _r: traverse!(typeof(ParseNode<'i, _P>) #variants_shape), ) -> Self { @@ -1120,7 +1120,7 @@ where }; quote!( fn from_forest( - forest: &'a gll::runtime::ParseForest<'i, _P, I>, + forest: &'a gll::forest::ParseForest<'i, _P, I>, _node: ParseNode<'i, _P>, _r: traverse!(typeof(ParseNode<'i, _P>) #shape), ) -> Self { @@ -1228,7 +1228,7 @@ where let _forest = self.forest; let node = self.node.unpack_alias(); #one - }))().map_err(|gll::runtime::MoreThanOne| Ambiguity(self)) + }))().map_err(|gll::forest::MoreThanOne| Ambiguity(self)) } pub fn all(self) -> impl Iterator> { @@ -1488,7 +1488,7 @@ fn code_label_decl_and_impls( enum _C { #(#all_labels_ident),* } - impl CodeLabel for _C { + impl gll::runtime::CodeLabel for _C { type ParseNodeKind = _P; fn enclosing_fn(self) -> Self { diff --git a/src/generate/templates/header.rs b/src/generate/templates/header.rs index 9f5ad7b..d8a18ef 100644 --- a/src/generate/templates/header.rs +++ b/src/generate/templates/header.rs @@ -4,7 +4,7 @@ pub type Any = dyn any::Any; pub struct Ambiguity(T); pub struct OwnedHandle { - forest_and_node: gll::runtime::OwnedParseForestAndNode<_P, I>, + forest_and_node: gll::forest::OwnedParseForestAndNode<_P, I>, _marker: PhantomData, } @@ -19,7 +19,7 @@ impl OwnedHandle { pub struct Handle<'a, 'i, I: gll::input::Input, T: ?Sized> { pub node: ParseNode<'i, _P>, - pub forest: &'a gll::runtime::ParseForest<'i, _P, I>, + pub forest: &'a gll::forest::ParseForest<'i, _P, I>, _marker: PhantomData, } diff --git a/src/generate/templates/imports.rs b/src/generate/templates/imports.rs index e0d4727..663794f 100644 --- a/src/generate/templates/imports.rs +++ b/src/generate/templates/imports.rs @@ -1,5 +1,5 @@ +use gll::forest::{nd::Arrow, traverse, ParseNode, ParseNodeKind}; use gll::parse_node::ParseNodeShape; -use gll::runtime::{nd::Arrow, traverse, CodeLabel, ParseNode, ParseNodeKind}; use std::any; use std::fmt; use std::marker::PhantomData; diff --git a/src/lib.rs b/src/lib.rs index 26dbe3e..de5346a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,8 @@ mod high; #[allow(unsafe_code)] mod indexing_str; +#[forbid(unsafe_code)] +pub mod forest; #[forbid(unsafe_code)] pub mod generate; #[forbid(unsafe_code)] diff --git a/src/runtime.rs b/src/runtime.rs index 43c21ad..20e3e37 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -1,13 +1,12 @@ -use crate::high::{type_lambda, ErasableL, ExistsL, PairL}; +use crate::forest::{OwnedParseForestAndNode, ParseForest, ParseNode, ParseNodeKind}; +use crate::high::ErasableL; use crate::input::{Input, InputMatch, Range}; -use crate::parse_node::ParseNodeShape; -use indexing::{self, Container, Index, Unknown}; +use indexing::{self, Index, Unknown}; use std::cmp::{Ordering, Reverse}; -use std::collections::{BTreeSet, BinaryHeap, HashMap, VecDeque}; +use std::collections::{BTreeSet, BinaryHeap, HashMap}; use std::fmt; use std::hash::Hash; use std::io::{self, Write}; -use std::str; pub struct Parser<'a, 'i, C: CodeLabel, I: Input> { state: &'a mut ParserState<'i, C, I>, @@ -34,13 +33,6 @@ pub struct ParseError { pub type ParseResult = Result>; -type_lambda! { - pub type<'i> ParseForestL = ParseForest<'i, P, I>; - pub type<'i> ParseNodeL = ParseNode<'i, P>; -} - -pub type OwnedParseForestAndNode = ExistsL, ParseNodeL

>>; - impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { pub fn parse( input: I, @@ -436,238 +428,6 @@ impl<'i, C: CodeLabel> Memoizer<'i, C> { } } -/// A parse forest, in SPPF (Shared Packed Parse Forest) representation. -pub struct ParseForest<'i, P: ParseNodeKind, I: Input> { - input: Container<'i, I::Container>, - possible_choices: HashMap, BTreeSet

>, - possible_splits: HashMap, BTreeSet>, -} - -#[derive(Debug)] -pub struct MoreThanOne; - -impl<'i, P: ParseNodeKind, I: Input> ParseForest<'i, P, I> { - pub fn input(&self, range: Range<'i>) -> &I::Slice { - I::slice(&self.input, range) - } - - pub fn source_info(&self, range: Range<'i>) -> I::SourceInfo { - I::source_info(&self.input, range) - } - - pub fn one_choice(&self, node: ParseNode<'i, P>) -> Result, MoreThanOne> { - match node.kind.shape() { - ParseNodeShape::Choice => { - let choices = &self.possible_choices[&node]; - if choices.len() > 1 { - return Err(MoreThanOne); - } - let &choice = choices.iter().next().unwrap(); - Ok(ParseNode { - kind: choice, - range: node.range, - }) - } - shape => unreachable!("one_choice({}): non-choice shape {}", node, shape), - } - } - - pub fn all_choices<'a>( - &'a self, - node: ParseNode<'i, P>, - ) -> impl Iterator> + Clone + 'a { - match node.kind.shape() { - ParseNodeShape::Choice => self - .possible_choices - .get(&node) - .into_iter() - .flatten() - .cloned() - .map(move |kind| ParseNode { - kind, - range: node.range, - }), - shape => unreachable!("all_choices({}): non-choice shape {}", node, shape), - } - } - - pub fn one_split( - &self, - node: ParseNode<'i, P>, - ) -> Result<(ParseNode<'i, P>, ParseNode<'i, P>), MoreThanOne> { - match node.kind.shape() { - ParseNodeShape::Split(left_kind, right_kind) => { - let splits = &self.possible_splits[&node]; - if splits.len() > 1 { - return Err(MoreThanOne); - } - let &split = splits.iter().next().unwrap(); - let (left, right, _) = node.range.split_at(split); - Ok(( - ParseNode { - kind: left_kind, - range: Range(left), - }, - ParseNode { - kind: right_kind, - range: Range(right), - }, - )) - } - shape => unreachable!("one_split({}): non-split shape {}", node, shape), - } - } - - pub fn all_splits<'a>( - &'a self, - node: ParseNode<'i, P>, - ) -> impl Iterator, ParseNode<'i, P>)> + Clone + 'a { - match node.kind.shape() { - ParseNodeShape::Split(left_kind, right_kind) => self - .possible_splits - .get(&node) - .into_iter() - .flatten() - .cloned() - .map(move |i| { - let (left, right, _) = node.range.split_at(i); - ( - ParseNode { - kind: left_kind, - range: Range(left), - }, - ParseNode { - kind: right_kind, - range: Range(right), - }, - ) - }), - shape => unreachable!("all_splits({}): non-split shape {}", node, shape), - } - } - - pub fn dump_graphviz(&self, out: &mut dyn Write) -> io::Result<()> { - writeln!(out, "digraph forest {{")?; - let mut queue: VecDeque<_> = self - .possible_choices - .keys() - .chain(self.possible_splits.keys()) - .cloned() - .collect(); - let mut seen: BTreeSet<_> = queue.iter().cloned().collect(); - let mut p = 0; - while let Some(source) = queue.pop_front() { - writeln!(out, " {:?} [shape=box]", source.to_string())?; - let mut add_children = |children: &[(&str, ParseNode<'i, P>)]| -> io::Result<()> { - writeln!(out, r#" p{} [label="" shape=point]"#, p)?; - writeln!(out, " {:?} -> p{}:n", source.to_string(), p)?; - for &(port, child) in children { - writeln!( - out, - " p{}:{} -> {:?}:n [dir=none]", - p, - port, - child.to_string() - )?; - if seen.insert(child) { - queue.push_back(child); - } - } - p += 1; - Ok(()) - }; - match source.kind.shape() { - ParseNodeShape::Opaque => {} - - ParseNodeShape::Alias(_) => { - add_children(&[("s", source.unpack_alias())])?; - } - - ParseNodeShape::Opt(_) => { - if let Some(child) = source.unpack_opt() { - add_children(&[("s", child)])?; - } - } - - ParseNodeShape::Choice => { - for child in self.all_choices(source) { - add_children(&[("s", child)])?; - } - } - - ParseNodeShape::Split(..) => { - for (left, right) in self.all_splits(source) { - add_children(&[("sw", left), ("se", right)])?; - } - } - } - } - writeln!(out, "}}") - } -} - -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct ParseNode<'i, P: ParseNodeKind> { - pub kind: P, - pub range: Range<'i>, -} - -impl ParseNode<'_, P> { - pub fn unpack_alias(self) -> Self { - match self.kind.shape() { - ParseNodeShape::Alias(inner) => ParseNode { - kind: inner, - range: self.range, - }, - shape => unreachable!("unpack_alias({}): non-alias shape {}", self, shape), - } - } - - pub fn unpack_opt(self) -> Option { - match self.kind.shape() { - ParseNodeShape::Opt(inner) => { - if self.range.is_empty() { - None - } else { - Some(ParseNode { - kind: inner, - range: self.range, - }) - } - } - shape => unreachable!("unpack_opt({}): non-opt shape {}", self, shape), - } - } -} - -impl fmt::Display for ParseNode<'_, P> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{} @ {}..{}", - self.kind, - self.range.start(), - self.range.end() - ) - } -} - -impl fmt::Debug for ParseNode<'_, P> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{} @ {}..{}", - self.kind, - self.range.start(), - self.range.end() - ) - } -} - -pub trait ParseNodeKind: fmt::Display + Ord + Hash + Copy + 'static { - fn shape(self) -> ParseNodeShape; -} - pub trait CodeLabel: fmt::Debug + Ord + Hash + Copy + 'static { type ParseNodeKind: ParseNodeKind; @@ -677,300 +437,3 @@ pub trait CodeLabel: fmt::Debug + Ord + Hash + Copy + 'static { pub trait CodeStep: CodeLabel { fn step<'i>(self, p: Parser<'_, 'i, Self, I>); } - -// FIXME(rust-lang/rust#54175) work around iterator adapter compile-time -// blowup issues by using a makeshift "non-determinism arrow toolkit". -pub mod nd { - use std::iter; - use std::marker::PhantomData; - - pub trait Arrow: Copy { - type Input; - type Output; - type Iter: Iterator + Clone; - fn apply(&self, x: Self::Input) -> Self::Iter; - - fn map R, R>(self, f: F) -> Map { - Map(self, f) - } - fn then>(self, b: B) -> Then { - Then(self, b) - } - fn pairs(self, b: B) -> Pairs - where - Self::Output: Copy, - B::Input: Copy, - { - Pairs(self, b) - } - } - - macro_rules! derive_copy { - ($name:ident<$($param:ident $(: $bound:ident)*),*>) => { - impl<$($param $(: $bound)*),*> Copy for $name<$($param),*> {} - impl<$($param $(: $bound)*),*> Clone for $name<$($param),*> { - fn clone(&self) -> Self { - *self - } - } - } - } - - pub struct Id(PhantomData); - derive_copy!(Id); - impl Id { - pub fn new() -> Self { - Id(PhantomData) - } - } - impl Arrow for Id { - type Input = T; - type Output = T; - type Iter = iter::Once; - fn apply(&self, x: T) -> Self::Iter { - iter::once(x) - } - } - - pub struct FromIter(F, PhantomData); - derive_copy!(FromIter); - impl FromIter { - pub fn new(f: F) -> Self { - FromIter(f, PhantomData) - } - } - impl I, I: Iterator + Clone> Arrow for FromIter { - type Input = T; - type Output = I::Item; - type Iter = I; - fn apply(&self, x: T) -> I { - self.0(x) - } - } - - pub struct FromIterK(K, F, PhantomData); - derive_copy!(FromIterK); - impl FromIterK { - pub fn new(k: K, f: F) -> Self { - FromIterK(k, f, PhantomData) - } - } - impl I, I: Iterator + Clone> Arrow for FromIterK { - type Input = T; - type Output = I::Item; - type Iter = I; - fn apply(&self, x: T) -> I { - self.1(self.0, x) - } - } - - #[derive(Copy, Clone)] - pub struct Map(A, F); - impl R, R> Arrow for Map { - type Input = A::Input; - type Output = R; - type Iter = iter::Map; - fn apply(&self, x: Self::Input) -> Self::Iter { - self.0.apply(x).map(self.1) - } - } - - #[derive(Clone)] - pub struct ThenIter> { - a_iter: A::Iter, - b_arrow: B, - b_iter: Option, - // HACK(eddyb) this field is useless (never set to `Some`) - // (see `match self.b_iter_backwards` below for more details). - b_iter_backwards: Option, - } - impl> Iterator for ThenIter { - type Item = B::Output; - fn next(&mut self) -> Option { - loop { - if let Some(ref mut b_iter) = self.b_iter { - if let x @ Some(_) = b_iter.next() { - return x; - } - } - match self.a_iter.next() { - // HACK(eddyb) this never does anything, but without a *second* - // call to `B::Iter::next`, LLVM spends more time optimizing. - None => { - return match self.b_iter_backwards { - Some(ref mut b_iter) => b_iter.next(), - None => None, - } - } - Some(x) => self.b_iter = Some(self.b_arrow.apply(x)), - } - } - } - } - - #[derive(Copy, Clone)] - pub struct Then(A, B); - impl> Arrow for Then { - type Input = A::Input; - type Output = B::Output; - type Iter = ThenIter; - fn apply(&self, x: Self::Input) -> Self::Iter { - ThenIter { - a_iter: self.0.apply(x), - b_arrow: self.1, - b_iter: None, - b_iter_backwards: None, - } - } - } - - #[derive(Clone)] - pub struct PairsIter - where - A::Output: Copy, - B::Input: Copy, - { - a_iter: A::Iter, - b_iter0: B::Iter, - a_output_b_iter: Option<(A::Output, B::Iter)>, - } - impl Iterator for PairsIter - where - A::Output: Copy, - B::Input: Copy, - { - type Item = (A::Output, B::Output); - fn next(&mut self) -> Option { - loop { - if let Some((x, ref mut b_iter)) = self.a_output_b_iter { - if let Some(y) = b_iter.next() { - return Some((x, y)); - } - } - match self.a_iter.next() { - None => return None, - Some(x) => { - self.a_output_b_iter = Some((x, self.b_iter0.clone())); - } - } - } - } - } - - #[derive(Copy, Clone)] - pub struct Pairs(A, B); - impl Arrow for Pairs - where - A::Output: Copy, - B::Input: Copy, - { - type Input = (A::Input, B::Input); - type Output = (A::Output, B::Output); - type Iter = PairsIter; - fn apply(&self, (x, y): Self::Input) -> Self::Iter { - PairsIter { - a_iter: self.0.apply(x), - b_iter0: self.1.apply(y), - a_output_b_iter: None, - } - } - } -} - -// HACK(eddyb) work around `macro_rules` not being `use`-able. -pub use crate::__runtime_traverse as traverse; - -#[macro_export] -macro_rules! __runtime_traverse { - (typeof($leaf:ty) _) => { $leaf }; - (typeof($leaf:ty) ?) => { Option }; - (typeof($leaf:ty) ($l_shape:tt, $r_shape:tt)) => { (traverse!(typeof($leaf) $l_shape), traverse!(typeof($leaf) $r_shape)) }; - (typeof($leaf:ty) { $($i:tt $_i:ident: $kind:pat => $shape:tt,)* }) => { ($(traverse!(typeof($leaf) $shape),)*) }; - (typeof($leaf:ty) [$shape:tt]) => { (traverse!(typeof($leaf) $shape),) }; - - (one($forest:ident, $node:ident) _) => { - $node - }; - (one($forest:ident, $node:ident) ?) => { - Some($node) - }; - (one($forest:ident, $node:ident) ($l_shape:tt, $r_shape:tt)) => { - { - let (left, right) = $forest.one_split($node)?; - ( - traverse!(one($forest, left) $l_shape), - traverse!(one($forest, right) $r_shape), - ) - } - }; - (one($forest:ident, $node:ident) { $($i:tt $_i:ident: $kind:pat => $shape:tt,)* }) => { - { - let node = $forest.one_choice($node)?; - let mut r = <($(traverse!(typeof(_) $shape),)*)>::default(); - match node.kind { - $($kind => r.$i = traverse!(one($forest, node) $shape),)* - _ => unreachable!(), - } - r - } - }; - (one($forest:ident, $node:ident) [$shape:tt]) => { - { - let mut r = <(traverse!(typeof(_) $shape),)>::default(); - if let Some(node) = $node.unpack_opt() { - r.0 = traverse!(one($forest, node) $shape); - } - r - } - }; - - (all($forest:ident) _) => { - $crate::runtime::nd::Id::new() - }; - (all($forest:ident) ?) => { - $crate::runtime::nd::Id::new().map(Some) - }; - (all($forest:ident) ($l_shape:tt, $r_shape:tt)) => { - $crate::runtime::nd::FromIterK::new($forest, $crate::runtime::ParseForest::all_splits) - .then(traverse!(all($forest) $l_shape).pairs(traverse!(all($forest) $r_shape))) - }; - (all($forest:ident) { $($i:tt $_i:ident: $kind:pat => $shape:tt,)* }) => { - $crate::runtime::nd::FromIter::new(move |node| { - #[derive(Clone)] - enum Iter<$($_i),*> { - $($_i($_i)),* - } - impl<$($_i: Iterator),*> Iterator for Iter<$($_i),*> - where $($_i::Item: Default),* - { - type Item = ($($_i::Item),*); - fn next(&mut self) -> Option { - let mut r = Self::Item::default(); - match self { - $(Iter::$_i(iter) => r.$i = iter.next()?),* - } - Some(r) - } - } - $forest.all_choices(node).flat_map(move |node| { - match node.kind { - $($kind => Iter::$_i(traverse!(all($forest) $shape).apply(node)),)* - _ => unreachable!(), - } - }) - }) - }; - (all($forest:ident) [$shape:tt]) => { - $crate::runtime::nd::FromIter::new(move |node| { - match $crate::runtime::ParseNode::unpack_opt(node) { - Some(node) => { - Some(traverse!(all($forest) $shape).apply(node).map(|x| (x,))) - .into_iter().flatten().chain(None) - } - None => { - None.into_iter().flatten().chain(Some(<_>::default())) - } - } - }) - } -} From 75e50d56df86a7d929ae5b97333471d9a490498b Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 14 May 2019 22:33:44 +0300 Subject: [PATCH 4/5] runtime: split `Parser` into `Parser` (more general) and `Runtime` (GLL-specific). --- src/generate/rust.rs | 20 ++-- src/runtime.rs | 272 +++++++++++++++++++++++++++---------------- 2 files changed, 184 insertions(+), 108 deletions(-) diff --git a/src/generate/rust.rs b/src/generate/rust.rs index ae4424a..ea7446d 100644 --- a/src/generate/rust.rs +++ b/src/generate/rust.rs @@ -470,7 +470,7 @@ impl Continuation<'_> { fn to_inline(&mut self) -> &mut Src { if let Code::Label(ref label) = self.code { self.code = Code::Inline(quote!( - p.spawn(#label); + rt.spawn(#label); )); } @@ -583,7 +583,7 @@ macro_rules! thunk { } fn pop_saved(f: impl FnOnce(Src) -> Thunk) -> Thunk { - thunk!(let saved = p.take_saved();) + thunk!(let saved = rt.take_saved();) + f(quote!(saved)) + Thunk::new(|mut cont| { if let Some(&None) = cont.nested_frames.last() { @@ -600,7 +600,7 @@ fn pop_saved(f: impl FnOnce(Src) -> Thunk) -> Thunk { } fn push_saved(parse_node_kind: ParseNodeKind) -> Thunk { - thunk!(p.save(#parse_node_kind);) + thunk!(rt.save(#parse_node_kind);) + Thunk::new(move |mut cont| { if let Some((ret_label, outer_fn_label)) = cont.nested_frames.pop().unwrap() { let inner_fn_label = mem::replace(cont.fn_code_label, outer_fn_label); @@ -627,14 +627,14 @@ fn call(callee: Rc) -> Thunk { Thunk::new(move |mut cont| { let label = cont.to_label().clone(); cont.code = Code::Inline(quote!( - p.call(#callee, #label); + rt.call(#callee, #label); )); cont }) } fn ret() -> Thunk { - thunk!(p.ret();) + thunk!(rt.ret();) + Thunk::new(|mut cont| { assert!(cont.to_inline().is_empty()); cont @@ -748,7 +748,7 @@ fn reify_as(label: Rc) -> Thunk { } fn forest_add_choice(parse_node_kind: &ParseNodeKind, choice: ParseNodeKind) -> Thunk { - thunk!(p.forest_add_choice(#parse_node_kind, #choice);) + thunk!(rt.forest_add_choice(#parse_node_kind, #choice);) } fn concat_and_forest_add( @@ -760,7 +760,7 @@ fn concat_and_forest_add( left + push_saved(left_parse_node_kind) + right + pop_saved(move |saved| { - thunk!(p.forest_add_split( + thunk!(rt.forest_add_split( #parse_node_kind, #saved, );) @@ -792,7 +792,7 @@ impl RuleGenerateMethods for Rule { (Rule::Empty, _) => cont, (Rule::Eat(pat), _) => { let pat = pat.rust_matcher(); - check(quote!(let Some(mut p) = p.input_consume_left(&(#pat)))).apply(cont) + check(quote!(let Some(mut rt) = rt.input_consume_left(&(#pat)))).apply(cont) } (Rule::Call(r), _) => call(Rc::new(CodeLabel::NamedRule(r.clone()))).apply(cont), (Rule::Concat([left, right]), None) => { @@ -942,7 +942,7 @@ where gll::runtime::ParseError, > { - gll::runtime::Parser::parse( + gll::runtime::Runtime::parse( input, #code_label, #parse_node_kind, @@ -1387,7 +1387,7 @@ where quote!(impl gll::runtime::CodeStep for _C where I: gll::input::Input, { - fn step<'i>(self, mut p: gll::runtime::Parser<'_, 'i, _C, I>) { + fn step<'i>(self, mut rt: gll::runtime::Runtime<'_, 'i, _C, I>) { match self { #(#code_label_arms)* } diff --git a/src/runtime.rs b/src/runtime.rs index 20e3e37..77d9b2f 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -8,19 +8,14 @@ use std::fmt; use std::hash::Hash; use std::io::{self, Write}; -pub struct Parser<'a, 'i, C: CodeLabel, I: Input> { - state: &'a mut ParserState<'i, C, I>, - current: C, - saved: Option>, +pub struct Parser<'a, 'i, P: ParseNodeKind, I: Input> { + state: &'a mut ParserState<'i, P, I>, result: Range<'i>, remaining: Range<'i>, } -struct ParserState<'i, C: CodeLabel, I: Input> { - threads: Threads<'i, C>, - gss: GraphStack<'i, C>, - memoizer: Memoizer<'i, C>, - forest: ParseForest<'i, C::ParseNodeKind, I>, +struct ParserState<'i, P: ParseNodeKind, I: Input> { + forest: ParseForest<'i, P, I>, last_input_pos: Index<'i, Unknown>, expected_pats: Vec<&'static dyn fmt::Debug>, } @@ -33,82 +28,41 @@ pub struct ParseError { pub type ParseResult = Result>; -impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { - pub fn parse( +impl<'i, P: ParseNodeKind, I: Input> Parser<'_, 'i, P, I> { + pub fn parse_with( input: I, - callee: C, - kind: C::ParseNodeKind, - ) -> ParseResult> { + f: impl for<'i2> FnOnce(Parser<'_, 'i2, P, I>) -> Option>, + ) -> ParseResult> { ErasableL::indexing_scope(input.to_container(), |lifetime, input| { - let call = Call { - callee, - range: Range(input.range()), - }; + let range = Range(input.range()); let mut state = ParserState { - threads: Threads { - queue: BinaryHeap::new(), - seen: BTreeSet::new(), - }, - gss: GraphStack { - returns: HashMap::new(), - }, - memoizer: Memoizer { - lengths: HashMap::new(), - }, forest: ParseForest { input, possible_choices: HashMap::new(), possible_splits: HashMap::new(), }, - last_input_pos: call.range.first(), + last_input_pos: range.first(), expected_pats: vec![], }; - // Start with one thread, at the provided entry-point. - state.threads.spawn( - Continuation { - code: call.callee, - saved: None, - result: Range(call.range.frontiers().0), - }, - call.range, - ); - - // Run all threads to completion. - while let Some(next) = state.threads.steal() { - let Call { - callee: - Continuation { - code, - saved, - result, - }, - range, - } = next; - code.step(Parser { - state: &mut state, - current: code, - saved, - result, - remaining: range, - }); - } + let result = f(Parser { + state: &mut state, + result: Range(range.frontiers().0), + remaining: range, + }); - // If the function call we started with ever returned, - // we will find an entry for it in the memoizer, from - // which we pick the longest match, which is only a - // successful parse if it's as long as the input. let error = ParseError { at: I::source_info_point(&state.forest.input, state.last_input_pos), expected: state.expected_pats, }; - match state.memoizer.longest_result(call) { + match result { None => Err(error), - Some(range) => { - if range == call.range { + Some(node) => { + // The result is only a successful parse if it's as long as the input. + if node.range == range { Ok(OwnedParseForestAndNode::pack( lifetime, - (state.forest, ParseNode { kind, range }), + (state.forest, node), )) } else { Err(error) @@ -121,7 +75,7 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { pub fn input_consume_left<'a, Pat: fmt::Debug>( &'a mut self, pat: &'static Pat, - ) -> Option> + ) -> Option> where I::Slice: InputMatch, { @@ -139,8 +93,6 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { } Some(Parser { state: self.state, - current: self.current, - saved: self.saved, result: Range(self.result.join(matching).unwrap()), remaining: Range(after), }) @@ -157,7 +109,7 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { pub fn input_consume_right<'a, Pat>( &'a mut self, pat: &'static Pat, - ) -> Option> + ) -> Option> where I::Slice: InputMatch, { @@ -167,8 +119,6 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { let (before, matching, _) = self.remaining.split_at(self.remaining.len() - n); Some(Parser { state: self.state, - current: self.current, - saved: self.saved, result: Range(matching.join(self.result.0).unwrap()), remaining: Range(before), }) @@ -177,20 +127,7 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { } } - // FIXME(eddyb) maybe specialize this further, for `forest_add_split`? - pub fn save(&mut self, kind: C::ParseNodeKind) { - let old_saved = self.saved.replace(ParseNode { - kind, - range: self.result, - }); - assert_eq!(old_saved, None); - } - - pub fn take_saved(&mut self) -> ParseNode<'i, C::ParseNodeKind> { - self.saved.take().unwrap() - } - - pub fn forest_add_choice(&mut self, kind: C::ParseNodeKind, choice: C::ParseNodeKind) { + pub fn forest_add_choice(&mut self, kind: P, choice: P) { self.state .forest .possible_choices @@ -203,11 +140,7 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { } // FIXME(eddyb) safeguard this against misuse. - pub fn forest_add_split( - &mut self, - kind: C::ParseNodeKind, - left: ParseNode<'i, C::ParseNodeKind>, - ) { + pub fn forest_add_split(&mut self, kind: P, left: ParseNode<'i, P>) { self.state .forest .possible_splits @@ -218,27 +151,170 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { .or_default() .insert(left.range.len()); } +} + +pub struct Runtime<'a, 'i, C: CodeLabel, I: Input> { + parser: Parser<'a, 'i, C::ParseNodeKind, I>, + state: &'a mut RuntimeState<'i, C>, + current: C, + saved: Option>, +} + +struct RuntimeState<'i, C: CodeLabel> { + threads: Threads<'i, C>, + gss: GraphStack<'i, C>, + memoizer: Memoizer<'i, C>, +} + +impl<'i, C: CodeStep, I: Input> Runtime<'_, 'i, C, I> { + pub fn parse( + input: I, + callee: C, + kind: C::ParseNodeKind, + ) -> ParseResult> { + Parser::parse_with(input, |parser| { + let call = Call { + callee, + range: parser.remaining, + }; + let mut state = RuntimeState { + threads: Threads { + queue: BinaryHeap::new(), + seen: BTreeSet::new(), + }, + gss: GraphStack { + returns: HashMap::new(), + }, + memoizer: Memoizer { + lengths: HashMap::new(), + }, + }; + + // Start with one thread, at the provided entry-point. + state.threads.spawn( + Continuation { + code: call.callee, + saved: None, + result: Range(call.range.frontiers().0), + }, + call.range, + ); + + // Run all threads to completion. + while let Some(next) = state.threads.steal() { + let Call { + callee: + Continuation { + code, + saved, + result, + }, + range, + } = next; + code.step(Runtime { + parser: Parser { + state: parser.state, + result, + remaining: range, + }, + state: &mut state, + current: code, + saved, + }); + } + + // If the function call we started with ever returned, + // we will find an entry for it in the memoizer, from + // which we pick the longest match. + state + .memoizer + .longest_result(call) + .map(|range| ParseNode { kind, range }) + }) + } + + pub fn input_consume_left<'a, Pat: fmt::Debug>( + &'a mut self, + pat: &'static Pat, + ) -> Option> + where + I::Slice: InputMatch, + { + match self.parser.input_consume_left(pat) { + Some(parser) => Some(Runtime { + parser, + state: self.state, + current: self.current, + saved: self.saved, + }), + None => None, + } + } + + pub fn input_consume_right<'a, Pat>( + &'a mut self, + pat: &'static Pat, + ) -> Option> + where + I::Slice: InputMatch, + { + match self.parser.input_consume_right(pat) { + Some(parser) => Some(Runtime { + parser, + state: self.state, + current: self.current, + saved: self.saved, + }), + None => None, + } + } + + // FIXME(eddyb) maybe specialize this further, for `forest_add_split`? + pub fn save(&mut self, kind: C::ParseNodeKind) { + let old_saved = self.saved.replace(ParseNode { + kind, + range: self.parser.result, + }); + assert_eq!(old_saved, None); + } + + pub fn take_saved(&mut self) -> ParseNode<'i, C::ParseNodeKind> { + self.saved.take().unwrap() + } + + pub fn forest_add_choice(&mut self, kind: C::ParseNodeKind, choice: C::ParseNodeKind) { + self.parser.forest_add_choice(kind, choice); + } + + // FIXME(eddyb) safeguard this against misuse. + pub fn forest_add_split( + &mut self, + kind: C::ParseNodeKind, + left: ParseNode<'i, C::ParseNodeKind>, + ) { + self.parser.forest_add_split(kind, left); + } pub fn spawn(&mut self, next: C) { self.state.threads.spawn( Continuation { code: next, saved: self.saved, - result: self.result, + result: self.parser.result, }, - self.remaining, + self.parser.remaining, ); } pub fn call(&mut self, callee: C, next: C) { let call = Call { callee, - range: self.remaining, + range: self.parser.remaining, }; let next = Continuation { code: next, saved: self.saved, - result: self.result, + result: self.parser.result, }; let returns = self.state.gss.returns.entry(call).or_default(); if returns.insert(next) { @@ -269,8 +345,8 @@ impl<'i, C: CodeStep, I: Input> Parser<'_, 'i, C, I> { } pub fn ret(&mut self) { - let call_result = self.result; - let remaining = self.remaining; + let call_result = self.parser.result; + let remaining = self.parser.remaining; let call = Call { callee: self.current.enclosing_fn(), range: Range(call_result.join(remaining.0).unwrap()), @@ -435,5 +511,5 @@ pub trait CodeLabel: fmt::Debug + Ord + Hash + Copy + 'static { } pub trait CodeStep: CodeLabel { - fn step<'i>(self, p: Parser<'_, 'i, Self, I>); + fn step<'i>(self, rt: Runtime<'_, 'i, Self, I>); } From defcf2e1adcd1bb7e5b5017d92d98dd2dabca02e Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 14 May 2019 22:59:58 +0300 Subject: [PATCH 5/5] runtime: split out `Parser` into a `parser` module. --- macros/tests/basic.rs | 2 +- src/forest.rs | 2 +- src/generate/rust.rs | 2 +- src/lib.rs | 2 + src/parse_grammar.rs | 4 +- src/parser.rs | 178 ++++++++++++++++++++++++++++++++++++++++++ src/runtime.rs | 174 +++-------------------------------------- 7 files changed, 197 insertions(+), 167 deletions(-) create mode 100644 src/parser.rs diff --git a/macros/tests/basic.rs b/macros/tests/basic.rs index d0bf098..bd12909 100644 --- a/macros/tests/basic.rs +++ b/macros/tests/basic.rs @@ -37,7 +37,7 @@ macro_rules! testcases { let result = match &result { Ok(result) => format!("{:#?}", result), - Err(gll::runtime::ParseError { + Err(gll::parser::ParseError { at, expected, }) => { diff --git a/src/forest.rs b/src/forest.rs index ed83c24..487e6af 100644 --- a/src/forest.rs +++ b/src/forest.rs @@ -10,7 +10,7 @@ use std::str; /// A parse forest, in SPPF (Shared Packed Parse Forest) representation. pub struct ParseForest<'i, P: ParseNodeKind, I: Input> { - // HACK(eddyb) `pub(crate)` only for `runtime`. + // HACK(eddyb) `pub(crate)` only for `parser`. pub(crate) input: Container<'i, I::Container>, pub(crate) possible_choices: HashMap, BTreeSet

>, pub(crate) possible_splits: HashMap, BTreeSet>, diff --git a/src/generate/rust.rs b/src/generate/rust.rs index ea7446d..ab77ffe 100644 --- a/src/generate/rust.rs +++ b/src/generate/rust.rs @@ -939,7 +939,7 @@ where pub fn parse(input: I) -> Result< OwnedHandle, - gll::runtime::ParseError, + gll::parser::ParseError, > { gll::runtime::Runtime::parse( diff --git a/src/lib.rs b/src/lib.rs index de5346a..2e7f17e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,6 +16,8 @@ pub mod input; #[forbid(unsafe_code)] pub mod parse_node; #[forbid(unsafe_code)] +pub mod parser; +#[forbid(unsafe_code)] pub mod proc_macro; #[forbid(unsafe_code)] pub mod runtime; diff --git a/src/parse_grammar.rs b/src/parse_grammar.rs index 412cb40..93fd544 100644 --- a/src/parse_grammar.rs +++ b/src/parse_grammar.rs @@ -7,15 +7,15 @@ use crate as gll; include!(concat!(env!("OUT_DIR"), "/parse_grammar.rs")); +use crate::parser::ParseError; use crate::proc_macro::{FlatToken, Span, TokenStream}; -use crate::runtime; use crate::scannerless::Pat as SPat; use std::ops::Bound; use std::str::FromStr; pub fn parse_grammar>( stream: TokenStream, -) -> Result, runtime::ParseError> { +) -> Result, ParseError> { let mut grammar = grammer::Grammar::new(); Grammar::parse(stream)?.with(|g| { for rule_def in g.one().unwrap().rules { diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..c11a88d --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,178 @@ +use crate::forest::{OwnedParseForestAndNode, ParseForest, ParseNode, ParseNodeKind}; +use crate::high::ErasableL; +use crate::input::{Input, InputMatch, Range}; +use indexing::{self, Index, Unknown}; +use std::collections::HashMap; +use std::fmt; + +pub struct Parser<'a, 'i, P: ParseNodeKind, I: Input> { + state: &'a mut ParserState<'i, P, I>, + result: Range<'i>, + remaining: Range<'i>, +} + +struct ParserState<'i, P: ParseNodeKind, I: Input> { + forest: ParseForest<'i, P, I>, + last_input_pos: Index<'i, Unknown>, + expected_pats: Vec<&'static dyn fmt::Debug>, +} + +#[derive(Debug)] +pub struct ParseError { + pub at: A, + pub expected: Vec<&'static dyn fmt::Debug>, +} + +pub type ParseResult = Result>; + +impl<'i, P: ParseNodeKind, I: Input> Parser<'_, 'i, P, I> { + pub fn parse_with( + input: I, + f: impl for<'i2> FnOnce(Parser<'_, 'i2, P, I>) -> Option>, + ) -> ParseResult> { + ErasableL::indexing_scope(input.to_container(), |lifetime, input| { + let range = Range(input.range()); + let mut state = ParserState { + forest: ParseForest { + input, + possible_choices: HashMap::new(), + possible_splits: HashMap::new(), + }, + last_input_pos: range.first(), + expected_pats: vec![], + }; + + let result = f(Parser { + state: &mut state, + result: Range(range.frontiers().0), + remaining: range, + }); + + let error = ParseError { + at: I::source_info_point(&state.forest.input, state.last_input_pos), + expected: state.expected_pats, + }; + match result { + None => Err(error), + Some(node) => { + // The result is only a successful parse if it's as long as the input. + if node.range == range { + Ok(OwnedParseForestAndNode::pack( + lifetime, + (state.forest, node), + )) + } else { + Err(error) + } + } + } + }) + } + + // FIXME(eddyb) find an nicer way for algorithms to manipulate these ranges. + pub fn result(&self) -> Range<'i> { + self.result + } + + pub fn remaining(&self) -> Range<'i> { + self.remaining + } + + pub fn with_result_and_remaining<'a>( + &'a mut self, + result: Range<'i>, + remaining: Range<'i>, + ) -> Parser<'a, 'i, P, I> { + // HACK(eddyb) enforce that `result` and `remaining` are inside `self`. + assert_eq!(self.result, Range(self.remaining.frontiers().0)); + let full_new_range = result.join(remaining.0).unwrap(); + assert!(self.remaining.start() <= full_new_range.start()); + assert_eq!(self.remaining.end(), full_new_range.end()); + + Parser { + state: self.state, + result, + remaining, + } + } + + pub fn input_consume_left<'a, Pat: fmt::Debug>( + &'a mut self, + pat: &'static Pat, + ) -> Option> + where + I::Slice: InputMatch, + { + let start = self.remaining.first(); + if start > self.state.last_input_pos { + self.state.last_input_pos = start; + self.state.expected_pats.clear(); + } + match self.state.forest.input(self.remaining).match_left(pat) { + Some(n) => { + let (matching, after, _) = self.remaining.split_at(n); + if n > 0 { + self.state.last_input_pos = after.first(); + self.state.expected_pats.clear(); + } + Some(Parser { + state: self.state, + result: Range(self.result.join(matching).unwrap()), + remaining: Range(after), + }) + } + None => { + if start == self.state.last_input_pos { + self.state.expected_pats.push(pat); + } + None + } + } + } + + pub fn input_consume_right<'a, Pat>( + &'a mut self, + pat: &'static Pat, + ) -> Option> + where + I::Slice: InputMatch, + { + // FIXME(eddyb) implement error reporting support like in `input_consume_left` + match self.state.forest.input(self.remaining).match_right(pat) { + Some(n) => { + let (before, matching, _) = self.remaining.split_at(self.remaining.len() - n); + Some(Parser { + state: self.state, + result: Range(matching.join(self.result.0).unwrap()), + remaining: Range(before), + }) + } + None => None, + } + } + + pub fn forest_add_choice(&mut self, kind: P, choice: P) { + self.state + .forest + .possible_choices + .entry(ParseNode { + kind, + range: self.result, + }) + .or_default() + .insert(choice); + } + + // FIXME(eddyb) safeguard this against misuse. + pub fn forest_add_split(&mut self, kind: P, left: ParseNode<'i, P>) { + self.state + .forest + .possible_splits + .entry(ParseNode { + kind, + range: self.result, + }) + .or_default() + .insert(left.range.len()); + } +} diff --git a/src/runtime.rs b/src/runtime.rs index 77d9b2f..c9ab53a 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -1,158 +1,12 @@ -use crate::forest::{OwnedParseForestAndNode, ParseForest, ParseNode, ParseNodeKind}; -use crate::high::ErasableL; +use crate::forest::{OwnedParseForestAndNode, ParseNode, ParseNodeKind}; use crate::input::{Input, InputMatch, Range}; -use indexing::{self, Index, Unknown}; +use crate::parser::{ParseResult, Parser}; use std::cmp::{Ordering, Reverse}; use std::collections::{BTreeSet, BinaryHeap, HashMap}; use std::fmt; use std::hash::Hash; use std::io::{self, Write}; -pub struct Parser<'a, 'i, P: ParseNodeKind, I: Input> { - state: &'a mut ParserState<'i, P, I>, - result: Range<'i>, - remaining: Range<'i>, -} - -struct ParserState<'i, P: ParseNodeKind, I: Input> { - forest: ParseForest<'i, P, I>, - last_input_pos: Index<'i, Unknown>, - expected_pats: Vec<&'static dyn fmt::Debug>, -} - -#[derive(Debug)] -pub struct ParseError { - pub at: A, - pub expected: Vec<&'static dyn fmt::Debug>, -} - -pub type ParseResult = Result>; - -impl<'i, P: ParseNodeKind, I: Input> Parser<'_, 'i, P, I> { - pub fn parse_with( - input: I, - f: impl for<'i2> FnOnce(Parser<'_, 'i2, P, I>) -> Option>, - ) -> ParseResult> { - ErasableL::indexing_scope(input.to_container(), |lifetime, input| { - let range = Range(input.range()); - let mut state = ParserState { - forest: ParseForest { - input, - possible_choices: HashMap::new(), - possible_splits: HashMap::new(), - }, - last_input_pos: range.first(), - expected_pats: vec![], - }; - - let result = f(Parser { - state: &mut state, - result: Range(range.frontiers().0), - remaining: range, - }); - - let error = ParseError { - at: I::source_info_point(&state.forest.input, state.last_input_pos), - expected: state.expected_pats, - }; - match result { - None => Err(error), - Some(node) => { - // The result is only a successful parse if it's as long as the input. - if node.range == range { - Ok(OwnedParseForestAndNode::pack( - lifetime, - (state.forest, node), - )) - } else { - Err(error) - } - } - } - }) - } - - pub fn input_consume_left<'a, Pat: fmt::Debug>( - &'a mut self, - pat: &'static Pat, - ) -> Option> - where - I::Slice: InputMatch, - { - let start = self.remaining.first(); - if start > self.state.last_input_pos { - self.state.last_input_pos = start; - self.state.expected_pats.clear(); - } - match self.state.forest.input(self.remaining).match_left(pat) { - Some(n) => { - let (matching, after, _) = self.remaining.split_at(n); - if n > 0 { - self.state.last_input_pos = after.first(); - self.state.expected_pats.clear(); - } - Some(Parser { - state: self.state, - result: Range(self.result.join(matching).unwrap()), - remaining: Range(after), - }) - } - None => { - if start == self.state.last_input_pos { - self.state.expected_pats.push(pat); - } - None - } - } - } - - pub fn input_consume_right<'a, Pat>( - &'a mut self, - pat: &'static Pat, - ) -> Option> - where - I::Slice: InputMatch, - { - // FIXME(eddyb) implement error reporting support like in `input_consume_left` - match self.state.forest.input(self.remaining).match_right(pat) { - Some(n) => { - let (before, matching, _) = self.remaining.split_at(self.remaining.len() - n); - Some(Parser { - state: self.state, - result: Range(matching.join(self.result.0).unwrap()), - remaining: Range(before), - }) - } - None => None, - } - } - - pub fn forest_add_choice(&mut self, kind: P, choice: P) { - self.state - .forest - .possible_choices - .entry(ParseNode { - kind, - range: self.result, - }) - .or_default() - .insert(choice); - } - - // FIXME(eddyb) safeguard this against misuse. - pub fn forest_add_split(&mut self, kind: P, left: ParseNode<'i, P>) { - self.state - .forest - .possible_splits - .entry(ParseNode { - kind, - range: self.result, - }) - .or_default() - .insert(left.range.len()); - } -} - pub struct Runtime<'a, 'i, C: CodeLabel, I: Input> { parser: Parser<'a, 'i, C::ParseNodeKind, I>, state: &'a mut RuntimeState<'i, C>, @@ -172,10 +26,10 @@ impl<'i, C: CodeStep, I: Input> Runtime<'_, 'i, C, I> { callee: C, kind: C::ParseNodeKind, ) -> ParseResult> { - Parser::parse_with(input, |parser| { + Parser::parse_with(input, |mut parser| { let call = Call { callee, - range: parser.remaining, + range: parser.remaining(), }; let mut state = RuntimeState { threads: Threads { @@ -212,11 +66,7 @@ impl<'i, C: CodeStep, I: Input> Runtime<'_, 'i, C, I> { range, } = next; code.step(Runtime { - parser: Parser { - state: parser.state, - result, - remaining: range, - }, + parser: parser.with_result_and_remaining(result, range), state: &mut state, current: code, saved, @@ -273,7 +123,7 @@ impl<'i, C: CodeStep, I: Input> Runtime<'_, 'i, C, I> { pub fn save(&mut self, kind: C::ParseNodeKind) { let old_saved = self.saved.replace(ParseNode { kind, - range: self.parser.result, + range: self.parser.result(), }); assert_eq!(old_saved, None); } @@ -300,21 +150,21 @@ impl<'i, C: CodeStep, I: Input> Runtime<'_, 'i, C, I> { Continuation { code: next, saved: self.saved, - result: self.parser.result, + result: self.parser.result(), }, - self.parser.remaining, + self.parser.remaining(), ); } pub fn call(&mut self, callee: C, next: C) { let call = Call { callee, - range: self.parser.remaining, + range: self.parser.remaining(), }; let next = Continuation { code: next, saved: self.saved, - result: self.parser.result, + result: self.parser.result(), }; let returns = self.state.gss.returns.entry(call).or_default(); if returns.insert(next) { @@ -345,8 +195,8 @@ impl<'i, C: CodeStep, I: Input> Runtime<'_, 'i, C, I> { } pub fn ret(&mut self) { - let call_result = self.parser.result; - let remaining = self.parser.remaining; + let call_result = self.parser.result(); + let remaining = self.parser.remaining(); let call = Call { callee: self.current.enclosing_fn(), range: Range(call_result.join(remaining.0).unwrap()),