some cleanup and documentation

rrevenantt · Feb 12, 2020 · 307b806 · 307b806
1 parent 123c306
commit 307b806
Show file tree

Hide file tree

Showing 45 changed files with 1,128 additions and 1,472 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -26,6 +26,7 @@ typed-arena = "2.0.*"
 #name = "my_test"
 #path="tests/my_test.rs"
 
+
 [profile.release]
 opt-level = 3
 #debug = true
diff --git a/README.md b/README.md
@@ -8,8 +8,8 @@ and [tests/my_tests.rs](tests/my_test.rs) for actual usage examples
 
 ### Implementation status
 
-Everything is implemented, but you should still expect bugs/panics, so it is not ready for production yet.
-Also API very likely will have some changes.
+Everything is implemented, business logic is quite stable and well tested, but user facing 
+API is not very robust yet an very likely will have some changes.
 
 Currently requires nightly version of rust. 
 This very likely will be the case until `specialization`,`try_blocks` and `unsize` features are stabilized. 
@@ -21,6 +21,9 @@ Remaining core things:
 - [ ] API stabilization
   - [ ] Rust api guidelines compliance  
   - [ ] more tests for API because it is quite different from Java
+- [ ] Code quality
+  - [ ] Rustfmt fails to run currently
+  - [ ] Clippy sanitation 
 
 See tracking [issue](https://github.com/antlr/antlr4/issues/1839) for more info
 
@@ -60,7 +63,8 @@ there are quite some differences because Rust is not an OOP language and is much
  - If you are using labeled alternatives, 
  struct generated for rule is a enum with variant for each alternative
  - Parser needs to have ownership for listeners, but it is possible te get listener back via `ListenerId`
- otherwise `ParseTreeWalker` should be used 
+ otherwise `ParseTreeWalker` should be used.
+
 
 ### Unsafe
 Currently unsafe is used only to cast from trait object back to original type 

diff --git a/grammars/Labels.g4 b/grammars/Labels.g4
@@ -1,15 +1,14 @@
 grammar Labels;
-         z : s[0] ;
-		 s[isize v] : q=e {println!("{}",$e.v)};
-		 e returns [isize v]
-		   : a=e op='*' b=e {$v = $a.v * $b.v;}  # mult
-		   | a=e '+' b=e {$v = $a.v + $b.v;}     # add
-		   | INT         {$v = $INT.int;}        # anInt
-		   | '(' x=e ')' {$v = $x.v;}            # parens
-		   | x=e '++'    {$v = $x.v+1;}          # inc
-		   | e '--'                              # dec
-		   | ID          {$v = 3;}               # anID
-		   ;
-		 ID : 'a'..'z'+ ;
-		 INT : '0'..'9'+ ;
-		 WS : (' '|'\n') -> skip ;
+s : q=e ;
+e returns [String v]
+  : a=e op='*' b=e {$v = "* ".to_owned() + $a.v + " " + $b.v;}  # mult
+  | a=e '+' b=e {$v = "+ ".to_owned() + $a.v + " " + $b.v;}     # add
+  | INT         {$v = $INT.text.to_owned();}        # anInt
+  | '(' x=e ')' {$v = $x.v;}            # parens
+  | x=e '++'    {$v = " ++".to_owned() + $x.v;}          # inc
+  | x=e '--'      {$v = " --".to_owned() + $x.v;}              # dec
+  | ID          {$v = $ID.text.to_owned();}               # anID
+  ;
+ID : 'a'..'z'+ ;
+INT : '0'..'9'+ ;
+WS : (' '|'\n') -> skip ;
diff --git a/src/atn.rs b/src/atn.rs
@@ -20,7 +20,7 @@ pub struct ATN {
 
     pub grammar_type: ATNType,
 
-    pub lexer_actions: Vec<LexerAction>,
+    pub(crate) lexer_actions: Vec<LexerAction>,
 
     pub max_token_type: isize,
 
@@ -53,31 +53,29 @@ impl ATN {
         }
     }
 
-//    fn next_tokens_in_context(&self,s: ATNStateRef, _ctx: &RuleContext) -> IntervalSet {
-//        unimplemented!()
-//    }
-//
-//    fn next_tokens_no_context(&self,s: ATNStateRef) -> IntervalSet {
-//        unimplemented!()
-//    }
-
+    ///Compute the set of valid tokens that can occur starting in `s` and
+    ///staying in same rule. `Token::EPSILON` is in set if we reach end of
+    ///rule.
     pub fn next_tokens<'a>(&self, s: &'a dyn ATNState) -> &'a IntervalSet {
         s.get_next_tokens_within_rule().get_or_init(|| {
             self.next_tokens_in_ctx(s, None)
                 .modify_with(|r| {
-//                    println!("expecting {:?}", r);
                     r.read_only = true
                 }
                 )
         })
     }
 
+    /// Compute the set of valid tokens that can occur starting in state `s`.
+    /// If `ctx` is null, the set of tokens will not include what can follow
+    /// the rule surrounding `s`. In other words, the set will be
+    /// restricted to tokens reachable staying within `s`'s rule.
     pub fn next_tokens_in_ctx(&self, s: &dyn ATNState, _ctx: Option<&dyn ParserRuleContext>) -> IntervalSet {
         let analyzer = LL1Analyzer::new(self);
         analyzer.look(s, None, _ctx)
     }
 
-    pub fn add_state(&mut self, state: Box<dyn ATNState>) {
+    pub(crate) fn add_state(&mut self, state: Box<dyn ATNState>) {
         assert_eq!(state.get_state_number(), self.states.len());
         self.states.push(state)
     }
@@ -94,6 +92,38 @@ impl ATN {
         self.decision_to_state[decision]
     }
 
+    /// Computes the set of input symbols which could follow ATN state number
+    /// {@code stateNumber} in the specified full {@code context}. This method
+    /// considers the complete parser context, but does not evaluate semantic
+    /// predicates (i.e. all predicates encountered during the calculation are
+    /// assumed true). If a path in the ATN exists from the starting state to the
+    /// {@link RuleStopState} of the outermost context without matching any
+    /// symbols, {@link Token#EOF} is added to the returned set.
+    ///
+    /// <p>If {@code context} is {@code null}, it is treated as {@link ParserRuleContext#EMPTY}.</p>
+    ///
+    /// Note that this does NOT give you the set of all tokens that could
+    /// appear at a given token position in the input phrase.  In other words,
+    /// it does not answer:
+    ///
+    ///   "Given a specific partial input phrase, return the set of all tokens
+    ///    that can follow the last token in the input phrase."
+    ///
+    /// The big difference is that with just the input, the parser could
+    /// land right in the middle of a lookahead decision. Getting
+    /// all *possible* tokens given a partial input stream is a separate
+    /// computation. See https://github.com/antlr/antlr4/issues/1428
+    ///
+    /// For this function, we are specifying an ATN state and call stack to compute
+    /// what token(s) can come next and specifically: outside of a lookahead decision.
+    /// That is what you want for error reporting and recovery upon parse error.
+    ///
+    /// @param stateNumber the ATN state number
+    /// @param context the full parse context
+    /// @return The set of potentially valid input symbols which could follow the
+    /// specified state in the specified context.
+    /// @throws IllegalArgumentException if the ATN does not contain a state with
+    /// number {@code stateNumber}
     pub fn get_expected_tokens(&self, state_number: isize, _ctx: &Rc<dyn ParserRuleContext>) -> IntervalSet {
         let s = self.states[state_number as usize].as_ref();
         let mut following = self.next_tokens(s);

diff --git a/src/atn_config.rs b/src/atn_config.rs
@@ -1,6 +1,5 @@
-use std::fmt::{Debug, Error, Formatter, Write};
+use std::fmt::{Debug, Error, Formatter};
 use std::hash::{Hash, Hasher};
-use std::ops::DerefMut;
 use std::sync::Arc;
 
 use murmur3::murmur3_32::MurmurHasher;
@@ -12,6 +11,22 @@ use crate::lexer_action_executor::LexerActionExecutor;
 use crate::prediction_context::PredictionContext;
 use crate::semantic_context::SemanticContext;
 
+#[derive(Clone)]
+pub struct ATNConfig {
+    precedence_filter_suppressed: bool,
+    //todo since ATNState is immutable when we started working with ATNConfigs
+    // looks like it is possible to have usual reference here
+    state: ATNStateRef,
+    alt: isize,
+    //todo maybe option is unnecessary and PredictionContext::EMPTY would be enough
+    //another todo check arena alloc
+    context: Option<Arc<PredictionContext>>,
+    //todo looks like here option is also unnesesary
+    pub semantic_context: Box<SemanticContext>,
+    pub reaches_into_outer_context: isize,
+    pub(crate) config_type: ATNConfigType,
+}
+
 impl Eq for ATNConfig {}
 
 impl PartialEq for ATNConfig {
@@ -45,22 +60,6 @@ impl Hash for ATNConfig {
     }
 }
 
-#[derive(Clone)]
-pub struct ATNConfig {
-    precedence_filter_suppressed: bool,
-    //todo since ATNState is immutable when we started working with ATNConfigs
-    // looks like it is possible to have usual reference here
-    state: ATNStateRef,
-    alt: isize,
-    //todo maybe option is unnecessary and PredictionContext::EMPTY would be enough
-    //another todo check check arena alloc
-    context: Option<Arc<PredictionContext>>,
-    //todo looks like here option is also unnesesary
-    pub semantic_context: Option<Box<SemanticContext>>,
-    pub reaches_into_outer_context: isize,
-    pub config_type: ATNConfigType,
-}
-
 impl Debug for ATNConfig {
     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
         f.write_fmt(format_args!("({},{},[{}]", self.state, self.alt, self.context.as_deref().unwrap()))?;
@@ -73,7 +72,7 @@ impl Debug for ATNConfig {
 }
 
 #[derive(Eq, PartialEq, Clone, Debug)]
-pub enum ATNConfigType {
+pub(crate) enum ATNConfigType {
     BaseATNConfig,
     LexerATNConfig {
         lexer_action_executor: Option<Box<LexerActionExecutor>>,
@@ -82,7 +81,7 @@ pub enum ATNConfigType {
 }
 
 impl ATNConfig {
-    pub fn get_lexer_executor(&self) -> Option<&LexerActionExecutor> {
+    pub(crate) fn get_lexer_executor(&self) -> Option<&LexerActionExecutor> {
         match &self.config_type {
             ATNConfigType::BaseATNConfig => None,
             ATNConfigType::LexerATNConfig { lexer_action_executor, .. } => lexer_action_executor.as_deref(),
@@ -106,7 +105,7 @@ impl ATNConfig {
             state,
             alt,
             context,
-            semantic_context: Some(Box::new(SemanticContext::NONE)),
+            semantic_context: Box::new(SemanticContext::NONE),
             reaches_into_outer_context: 0,
             config_type: ATNConfigType::BaseATNConfig,
         }
@@ -116,9 +115,8 @@ impl ATNConfig {
         state: ATNStateRef,
         alt: isize,
         context: Option<Arc<PredictionContext>>,
-        semantic_context: Option<Box<SemanticContext>>,
+        semantic_context: Box<SemanticContext>,
     ) -> ATNConfig {
-        assert!(semantic_context.is_some());
         let mut new = Self::new(state, alt, context);
         new.semantic_context = semantic_context;
         new
@@ -137,7 +135,7 @@ impl ATNConfig {
         atnconfig
     }
 
-    pub fn cloned_with_new_semantic(&self, target: &dyn ATNState, ctx: Option<Box<SemanticContext>>) -> ATNConfig {
+    pub fn cloned_with_new_semantic(&self, target: &dyn ATNState, ctx: Box<SemanticContext>) -> ATNConfig {
         let mut new = self.cloned(target);
         new.semantic_context = ctx;
         new
@@ -160,7 +158,7 @@ impl ATNConfig {
         new
     }
 
-    pub fn cloned_with_new_exec(&self, target: &dyn ATNState, exec: Option<LexerActionExecutor>) -> ATNConfig {
+    pub(crate) fn cloned_with_new_exec(&self, target: &dyn ATNState, exec: Option<LexerActionExecutor>) -> ATNConfig {
         let mut new = self.cloned(target);
         if let ATNConfigType::LexerATNConfig {
             lexer_action_executor, passed_through_non_greedy_decision: _
@@ -179,14 +177,10 @@ impl ATNConfig {
         self.alt
     }
 
-    pub fn get_type(&self) -> &ATNConfigType {
+    pub(crate) fn get_type(&self) -> &ATNConfigType {
         &self.config_type
     }
 
-    pub fn get_semantic_context(&self) -> Option<&SemanticContext> {
-        self.semantic_context.as_deref()
-    }
-
     pub fn get_context(&self) -> Option<&Arc<PredictionContext>> {
         self.context.as_ref()
     }

diff --git a/src/atn_config_set.rs b/src/atn_config_set.rs
@@ -1,11 +1,10 @@
 use std::cmp::max;
-use std::collections::{HashMap, HashSet};
-use std::fmt::{Debug, Error, Formatter, Write};
+use std::collections::HashMap;
+use std::fmt::{Debug, Error, Formatter};
 use std::hash::{Hash, Hasher};
+use std::ops::Deref;
 
-use backtrace::Backtrace;
 use bit_set::BitSet;
-use murmur3::murmur3_32::MurmurHasher;
 
 use crate::atn_config::ATNConfig;
 use crate::atn_simulator::IATNSimulator;
@@ -45,7 +44,7 @@ pub struct ATNConfigSet {
 #[derive(Hash, Eq, PartialEq)]
 enum Key {
     Full(ATNConfig),
-    Partial(ATNStateRef, isize, Option<SemanticContext>),
+    Partial(ATNStateRef, isize, SemanticContext),
 }
 
 impl Debug for ATNConfigSet {
@@ -94,34 +93,34 @@ impl ATNConfigSet {
             has_semantic_context: false,
             read_only: false,
             unique_alt: 0,
-            hasher: Self::atn_config_local_hash,
+            hasher: Self::local_hash_key,
         }
     }
 
     // for lexerATNConfig
     pub fn new_ordered() -> ATNConfigSet {
         let mut a = ATNConfigSet::new_base_atnconfig_set(true);
 
-        a.hasher = Self::atn_config_full_hash;
+        a.hasher = Self::full_hash_key;
         a
     }
 
-    fn atn_config_full_hash(config: &ATNConfig) -> Key {
+    fn full_hash_key(config: &ATNConfig) -> Key {
         Key::Full(config.clone())
     }
 
-    fn atn_config_local_hash(config: &ATNConfig) -> Key {
-        Key::Partial(config.get_state(), config.get_alt(), config.get_semantic_context().cloned())
+    fn local_hash_key(config: &ATNConfig) -> Key {
+        Key::Partial(config.get_state(), config.get_alt(), config.semantic_context.deref().clone())
     }
 
     pub fn add_cached(
         &mut self,
-        mut config: Box<ATNConfig>,
+        config: Box<ATNConfig>,
         mut merge_cache: Option<&mut MergeCache>,
     ) -> bool {
         assert!(!self.read_only);
 
-        if config.get_semantic_context().is_some() && *config.get_semantic_context().unwrap() != SemanticContext::NONE {
+        if *config.semantic_context != SemanticContext::NONE {
             self.has_semantic_context = true
         }
 
@@ -136,7 +135,7 @@ impl ATNConfigSet {
             let existing = self.configs.get_mut(*existing).unwrap().as_mut();
             let root_is_wildcard = !self.full_ctx;
 
-            let mut merged = PredictionContext::merge(
+            let merged = PredictionContext::merge(
                 existing.get_context().unwrap(),
                 config.get_context().unwrap(),
                 root_is_wildcard,

diff --git a/src/atn_deserializer.rs b/src/atn_deserializer.rs
@@ -244,11 +244,11 @@ impl ATNDeserializer {
                     .get_mut(atn.rule_to_start_state[rule_index])
                     .unwrap();
                 if let ATNStateType::RuleStartState {
-                    stop_state: mut stop,
+                    stop_state: stop,
                     ..
-                } = start_state.get_state_type()
+                } = start_state.get_state_type_mut()
                 {
-                    stop = i
+                    *stop = i
                 }
             }
         }

diff --git a/src/atn_simulator.rs b/src/atn_simulator.rs
@@ -1,3 +1,4 @@
+use std::fmt::{Debug, Error, Formatter};
 use std::ops::Deref;
 use std::sync::Arc;
 
@@ -12,12 +13,19 @@ pub trait IATNSimulator {
     fn decision_to_dfa(&self) -> &Vec<DFA>;
 }
 
+
 pub struct BaseATNSimulator {
     pub atn: Arc<ATN>,
     pub shared_context_cache: Arc<PredictionContextCache>,
     pub decision_to_dfa: Arc<Vec<DFA>>,
 }
 
+impl Debug for BaseATNSimulator {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
+        f.write_str("BaseATNSimulator { .. }")
+    }
+}
+
 impl BaseATNSimulator {
     pub fn new_base_atnsimulator(
         atn: Arc<ATN>,