Skip to content

Commit

Permalink
some cleanup and documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
rrevenantt committed Feb 12, 2020
1 parent 123c306 commit 307b806
Show file tree
Hide file tree
Showing 45 changed files with 1,128 additions and 1,472 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ typed-arena = "2.0.*"
#name = "my_test"
#path="tests/my_test.rs"


[profile.release]
opt-level = 3
#debug = true
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ and [tests/my_tests.rs](tests/my_test.rs) for actual usage examples

### Implementation status

Everything is implemented, but you should still expect bugs/panics, so it is not ready for production yet.
Also API very likely will have some changes.
Everything is implemented, business logic is quite stable and well tested, but user facing
API is not very robust yet an very likely will have some changes.

Currently requires nightly version of rust.
This very likely will be the case until `specialization`,`try_blocks` and `unsize` features are stabilized.
Expand All @@ -21,6 +21,9 @@ Remaining core things:
- [ ] API stabilization
- [ ] Rust api guidelines compliance
- [ ] more tests for API because it is quite different from Java
- [ ] Code quality
- [ ] Rustfmt fails to run currently
- [ ] Clippy sanitation

See tracking [issue](https://github.com/antlr/antlr4/issues/1839) for more info

Expand Down Expand Up @@ -60,7 +63,8 @@ there are quite some differences because Rust is not an OOP language and is much
- If you are using labeled alternatives,
struct generated for rule is a enum with variant for each alternative
- Parser needs to have ownership for listeners, but it is possible te get listener back via `ListenerId`
otherwise `ParseTreeWalker` should be used
otherwise `ParseTreeWalker` should be used.


### Unsafe
Currently unsafe is used only to cast from trait object back to original type
Expand Down
27 changes: 13 additions & 14 deletions grammars/Labels.g4
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
grammar Labels;
z : s[0] ;
s[isize v] : q=e {println!("{}",$e.v)};
e returns [isize v]
: a=e op='*' b=e {$v = $a.v * $b.v;} # mult
| a=e '+' b=e {$v = $a.v + $b.v;} # add
| INT {$v = $INT.int;} # anInt
| '(' x=e ')' {$v = $x.v;} # parens
| x=e '++' {$v = $x.v+1;} # inc
| e '--' # dec
| ID {$v = 3;} # anID
;
ID : 'a'..'z'+ ;
INT : '0'..'9'+ ;
WS : (' '|'\n') -> skip ;
s : q=e ;
e returns [String v]
: a=e op='*' b=e {$v = "* ".to_owned() + $a.v + " " + $b.v;} # mult
| a=e '+' b=e {$v = "+ ".to_owned() + $a.v + " " + $b.v;} # add
| INT {$v = $INT.text.to_owned();} # anInt
| '(' x=e ')' {$v = $x.v;} # parens
| x=e '++' {$v = " ++".to_owned() + $x.v;} # inc
| x=e '--' {$v = " --".to_owned() + $x.v;} # dec
| ID {$v = $ID.text.to_owned();} # anID
;
ID : 'a'..'z'+ ;
INT : '0'..'9'+ ;
WS : (' '|'\n') -> skip ;
52 changes: 41 additions & 11 deletions src/atn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub struct ATN {

pub grammar_type: ATNType,

pub lexer_actions: Vec<LexerAction>,
pub(crate) lexer_actions: Vec<LexerAction>,

pub max_token_type: isize,

Expand Down Expand Up @@ -53,31 +53,29 @@ impl ATN {
}
}

// fn next_tokens_in_context(&self,s: ATNStateRef, _ctx: &RuleContext) -> IntervalSet {
// unimplemented!()
// }
//
// fn next_tokens_no_context(&self,s: ATNStateRef) -> IntervalSet {
// unimplemented!()
// }

///Compute the set of valid tokens that can occur starting in `s` and
///staying in same rule. `Token::EPSILON` is in set if we reach end of
///rule.
pub fn next_tokens<'a>(&self, s: &'a dyn ATNState) -> &'a IntervalSet {
s.get_next_tokens_within_rule().get_or_init(|| {
self.next_tokens_in_ctx(s, None)
.modify_with(|r| {
// println!("expecting {:?}", r);
r.read_only = true
}
)
})
}

/// Compute the set of valid tokens that can occur starting in state `s`.
/// If `ctx` is null, the set of tokens will not include what can follow
/// the rule surrounding `s`. In other words, the set will be
/// restricted to tokens reachable staying within `s`'s rule.
pub fn next_tokens_in_ctx(&self, s: &dyn ATNState, _ctx: Option<&dyn ParserRuleContext>) -> IntervalSet {
let analyzer = LL1Analyzer::new(self);
analyzer.look(s, None, _ctx)
}

pub fn add_state(&mut self, state: Box<dyn ATNState>) {
pub(crate) fn add_state(&mut self, state: Box<dyn ATNState>) {
assert_eq!(state.get_state_number(), self.states.len());
self.states.push(state)
}
Expand All @@ -94,6 +92,38 @@ impl ATN {
self.decision_to_state[decision]
}

/// Computes the set of input symbols which could follow ATN state number
/// {@code stateNumber} in the specified full {@code context}. This method
/// considers the complete parser context, but does not evaluate semantic
/// predicates (i.e. all predicates encountered during the calculation are
/// assumed true). If a path in the ATN exists from the starting state to the
/// {@link RuleStopState} of the outermost context without matching any
/// symbols, {@link Token#EOF} is added to the returned set.
///
/// <p>If {@code context} is {@code null}, it is treated as {@link ParserRuleContext#EMPTY}.</p>
///
/// Note that this does NOT give you the set of all tokens that could
/// appear at a given token position in the input phrase. In other words,
/// it does not answer:
///
/// "Given a specific partial input phrase, return the set of all tokens
/// that can follow the last token in the input phrase."
///
/// The big difference is that with just the input, the parser could
/// land right in the middle of a lookahead decision. Getting
/// all *possible* tokens given a partial input stream is a separate
/// computation. See https://github.com/antlr/antlr4/issues/1428
///
/// For this function, we are specifying an ATN state and call stack to compute
/// what token(s) can come next and specifically: outside of a lookahead decision.
/// That is what you want for error reporting and recovery upon parse error.
///
/// @param stateNumber the ATN state number
/// @param context the full parse context
/// @return The set of potentially valid input symbols which could follow the
/// specified state in the specified context.
/// @throws IllegalArgumentException if the ATN does not contain a state with
/// number {@code stateNumber}
pub fn get_expected_tokens(&self, state_number: isize, _ctx: &Rc<dyn ParserRuleContext>) -> IntervalSet {
let s = self.states[state_number as usize].as_ref();
let mut following = self.next_tokens(s);
Expand Down
54 changes: 24 additions & 30 deletions src/atn_config.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::fmt::{Debug, Error, Formatter, Write};
use std::fmt::{Debug, Error, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::DerefMut;
use std::sync::Arc;

use murmur3::murmur3_32::MurmurHasher;
Expand All @@ -12,6 +11,22 @@ use crate::lexer_action_executor::LexerActionExecutor;
use crate::prediction_context::PredictionContext;
use crate::semantic_context::SemanticContext;

#[derive(Clone)]
pub struct ATNConfig {
precedence_filter_suppressed: bool,
//todo since ATNState is immutable when we started working with ATNConfigs
// looks like it is possible to have usual reference here
state: ATNStateRef,
alt: isize,
//todo maybe option is unnecessary and PredictionContext::EMPTY would be enough
//another todo check arena alloc
context: Option<Arc<PredictionContext>>,
//todo looks like here option is also unnesesary
pub semantic_context: Box<SemanticContext>,
pub reaches_into_outer_context: isize,
pub(crate) config_type: ATNConfigType,
}

impl Eq for ATNConfig {}

impl PartialEq for ATNConfig {
Expand Down Expand Up @@ -45,22 +60,6 @@ impl Hash for ATNConfig {
}
}

#[derive(Clone)]
pub struct ATNConfig {
precedence_filter_suppressed: bool,
//todo since ATNState is immutable when we started working with ATNConfigs
// looks like it is possible to have usual reference here
state: ATNStateRef,
alt: isize,
//todo maybe option is unnecessary and PredictionContext::EMPTY would be enough
//another todo check check arena alloc
context: Option<Arc<PredictionContext>>,
//todo looks like here option is also unnesesary
pub semantic_context: Option<Box<SemanticContext>>,
pub reaches_into_outer_context: isize,
pub config_type: ATNConfigType,
}

impl Debug for ATNConfig {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
f.write_fmt(format_args!("({},{},[{}]", self.state, self.alt, self.context.as_deref().unwrap()))?;
Expand All @@ -73,7 +72,7 @@ impl Debug for ATNConfig {
}

#[derive(Eq, PartialEq, Clone, Debug)]
pub enum ATNConfigType {
pub(crate) enum ATNConfigType {
BaseATNConfig,
LexerATNConfig {
lexer_action_executor: Option<Box<LexerActionExecutor>>,
Expand All @@ -82,7 +81,7 @@ pub enum ATNConfigType {
}

impl ATNConfig {
pub fn get_lexer_executor(&self) -> Option<&LexerActionExecutor> {
pub(crate) fn get_lexer_executor(&self) -> Option<&LexerActionExecutor> {
match &self.config_type {
ATNConfigType::BaseATNConfig => None,
ATNConfigType::LexerATNConfig { lexer_action_executor, .. } => lexer_action_executor.as_deref(),
Expand All @@ -106,7 +105,7 @@ impl ATNConfig {
state,
alt,
context,
semantic_context: Some(Box::new(SemanticContext::NONE)),
semantic_context: Box::new(SemanticContext::NONE),
reaches_into_outer_context: 0,
config_type: ATNConfigType::BaseATNConfig,
}
Expand All @@ -116,9 +115,8 @@ impl ATNConfig {
state: ATNStateRef,
alt: isize,
context: Option<Arc<PredictionContext>>,
semantic_context: Option<Box<SemanticContext>>,
semantic_context: Box<SemanticContext>,
) -> ATNConfig {
assert!(semantic_context.is_some());
let mut new = Self::new(state, alt, context);
new.semantic_context = semantic_context;
new
Expand All @@ -137,7 +135,7 @@ impl ATNConfig {
atnconfig
}

pub fn cloned_with_new_semantic(&self, target: &dyn ATNState, ctx: Option<Box<SemanticContext>>) -> ATNConfig {
pub fn cloned_with_new_semantic(&self, target: &dyn ATNState, ctx: Box<SemanticContext>) -> ATNConfig {
let mut new = self.cloned(target);
new.semantic_context = ctx;
new
Expand All @@ -160,7 +158,7 @@ impl ATNConfig {
new
}

pub fn cloned_with_new_exec(&self, target: &dyn ATNState, exec: Option<LexerActionExecutor>) -> ATNConfig {
pub(crate) fn cloned_with_new_exec(&self, target: &dyn ATNState, exec: Option<LexerActionExecutor>) -> ATNConfig {
let mut new = self.cloned(target);
if let ATNConfigType::LexerATNConfig {
lexer_action_executor, passed_through_non_greedy_decision: _
Expand All @@ -179,14 +177,10 @@ impl ATNConfig {
self.alt
}

pub fn get_type(&self) -> &ATNConfigType {
pub(crate) fn get_type(&self) -> &ATNConfigType {
&self.config_type
}

pub fn get_semantic_context(&self) -> Option<&SemanticContext> {
self.semantic_context.as_deref()
}

pub fn get_context(&self) -> Option<&Arc<PredictionContext>> {
self.context.as_ref()
}
Expand Down
25 changes: 12 additions & 13 deletions src/atn_config_set.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
use std::cmp::max;
use std::collections::{HashMap, HashSet};
use std::fmt::{Debug, Error, Formatter, Write};
use std::collections::HashMap;
use std::fmt::{Debug, Error, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::Deref;

use backtrace::Backtrace;
use bit_set::BitSet;
use murmur3::murmur3_32::MurmurHasher;

use crate::atn_config::ATNConfig;
use crate::atn_simulator::IATNSimulator;
Expand Down Expand Up @@ -45,7 +44,7 @@ pub struct ATNConfigSet {
#[derive(Hash, Eq, PartialEq)]
enum Key {
Full(ATNConfig),
Partial(ATNStateRef, isize, Option<SemanticContext>),
Partial(ATNStateRef, isize, SemanticContext),
}

impl Debug for ATNConfigSet {
Expand Down Expand Up @@ -94,34 +93,34 @@ impl ATNConfigSet {
has_semantic_context: false,
read_only: false,
unique_alt: 0,
hasher: Self::atn_config_local_hash,
hasher: Self::local_hash_key,
}
}

// for lexerATNConfig
pub fn new_ordered() -> ATNConfigSet {
let mut a = ATNConfigSet::new_base_atnconfig_set(true);

a.hasher = Self::atn_config_full_hash;
a.hasher = Self::full_hash_key;
a
}

fn atn_config_full_hash(config: &ATNConfig) -> Key {
fn full_hash_key(config: &ATNConfig) -> Key {
Key::Full(config.clone())
}

fn atn_config_local_hash(config: &ATNConfig) -> Key {
Key::Partial(config.get_state(), config.get_alt(), config.get_semantic_context().cloned())
fn local_hash_key(config: &ATNConfig) -> Key {
Key::Partial(config.get_state(), config.get_alt(), config.semantic_context.deref().clone())
}

pub fn add_cached(
&mut self,
mut config: Box<ATNConfig>,
config: Box<ATNConfig>,
mut merge_cache: Option<&mut MergeCache>,
) -> bool {
assert!(!self.read_only);

if config.get_semantic_context().is_some() && *config.get_semantic_context().unwrap() != SemanticContext::NONE {
if *config.semantic_context != SemanticContext::NONE {
self.has_semantic_context = true
}

Expand All @@ -136,7 +135,7 @@ impl ATNConfigSet {
let existing = self.configs.get_mut(*existing).unwrap().as_mut();
let root_is_wildcard = !self.full_ctx;

let mut merged = PredictionContext::merge(
let merged = PredictionContext::merge(
existing.get_context().unwrap(),
config.get_context().unwrap(),
root_is_wildcard,
Expand Down
6 changes: 3 additions & 3 deletions src/atn_deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,11 +244,11 @@ impl ATNDeserializer {
.get_mut(atn.rule_to_start_state[rule_index])
.unwrap();
if let ATNStateType::RuleStartState {
stop_state: mut stop,
stop_state: stop,
..
} = start_state.get_state_type()
} = start_state.get_state_type_mut()
{
stop = i
*stop = i
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions src/atn_simulator.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::fmt::{Debug, Error, Formatter};
use std::ops::Deref;
use std::sync::Arc;

Expand All @@ -12,12 +13,19 @@ pub trait IATNSimulator {
fn decision_to_dfa(&self) -> &Vec<DFA>;
}


pub struct BaseATNSimulator {
pub atn: Arc<ATN>,
pub shared_context_cache: Arc<PredictionContextCache>,
pub decision_to_dfa: Arc<Vec<DFA>>,
}

impl Debug for BaseATNSimulator {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
f.write_str("BaseATNSimulator { .. }")
}
}

impl BaseATNSimulator {
pub fn new_base_atnsimulator(
atn: Arc<ATN>,
Expand Down
Loading

0 comments on commit 307b806

Please sign in to comment.