Skip to content

Commit

Permalink
some cleanup, documentation and performace optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
rrevenantt committed Feb 14, 2020
1 parent 307b806 commit a44046d
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 36 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ typed-arena = "2.0.*"


[profile.release]
opt-level = 3
#opt-level = 3
#debug = true
22 changes: 13 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,26 @@ This very likely will be the case until `specialization`,`try_blocks` and `unsiz
Remaining core things:
- [ ] Documentation
- [ ] Quite some things are already documented but still far from perfect
- [ ] More doctests. Currently the only examples are tests
- [ ] API stabilization
- [ ] Rust api guidelines compliance
- [ ] more tests for API because it is quite different from Java
- [ ] Code quality
- [ ] Rustfmt fails to run currently
- [ ] Clippy sanitation
- [ ] Not all warning are fixed

See tracking [issue](https://github.com/antlr/antlr4/issues/1839) for more info

### Additional improvements:
- make parsing zero copy(i.e. use &str(or Cow) instead String in token and &Token in tree nodes)
- profiling and performance optimizations
- use & instead of Rc for nodes in parser
- visitor
- build.rs integration example
- run rustfmt on generated parser
- support stable rust
- support no_std(although alloc would still be required)

### Usage

Add to `Cargo.toml`
Expand Down Expand Up @@ -64,20 +74,14 @@ there are quite some differences because Rust is not an OOP language and is much
struct generated for rule is a enum with variant for each alternative
- Parser needs to have ownership for listeners, but it is possible te get listener back via `ListenerId`
otherwise `ParseTreeWalker` should be used.
- In embedded actions to access parser you should use `recog` variable instead of `self`.
This is because predicate have to be inserted into two syntactically different places in generated parser


### Unsafe
Currently unsafe is used only to cast from trait object back to original type
and to update data inside Rc via `get_mut_unchecked`(returned mutable reference is used immediately and not stored anywhere)

### Future improvements:
- make parsing zero copy(i.e. use &str(or Cow) instead String in token and &Token in tree nodes)
- use & instead of Rc for nodes in parser
- support stable rust
- visitor
- run rustfmt on generated parser
- support no_std(although alloc would still be required)

## Licence

BSD 3-clause
29 changes: 22 additions & 7 deletions src/atn_config_set.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
use std::cmp::max;
use std::collections::HashMap;
use std::fmt::{Debug, Error, Formatter};
use std::hash::{Hash, Hasher};
use std::hash::{BuildHasher, Hash, Hasher};
use std::ops::Deref;

use bit_set::BitSet;
use murmur3::murmur3_32::MurmurHasher;

use crate::atn_config::ATNConfig;
use crate::atn_simulator::IATNSimulator;
use crate::atn_state::ATNStateRef;
use crate::parser_atn_simulator::MergeCache;
use crate::prediction_context::PredictionContext;
use crate::prediction_context::{MurmurHasherBuilder, PredictionContext};
use crate::semantic_context::SemanticContext;

pub struct ATNConfigSet {
cached_hash: u64,

//todo looks like we need only iteration for configs
// so i think we can replace configs and lookup with indexhashset
config_lookup: HashMap<Key, usize>,
config_lookup: HashMap<Key, usize, MurmurHasherBuilder>,

//todo remove box?
pub(crate) configs: Vec<Box<ATNConfig>>,
Expand All @@ -41,10 +42,19 @@ pub struct ATNConfigSet {
hasher: fn(&ATNConfig) -> Key,
}

#[derive(Hash, Eq, PartialEq)]
#[derive(Eq, PartialEq)]
enum Key {
Full(ATNConfig),
Partial(ATNStateRef, isize, SemanticContext),
Partial(i32, ATNStateRef, isize, SemanticContext),
}

impl Hash for Key {
fn hash<H: Hasher>(&self, state: &mut H) {
match self {
Key::Full(x) => x.hash(state),
Key::Partial(hash, _, _, _) => state.write_i32(*hash),
}
}
}

impl Debug for ATNConfigSet {
Expand Down Expand Up @@ -85,7 +95,7 @@ impl ATNConfigSet {
pub fn new_base_atnconfig_set(full_ctx: bool) -> ATNConfigSet {
ATNConfigSet {
cached_hash: 0,
config_lookup: HashMap::new(),
config_lookup: HashMap::with_hasher(MurmurHasherBuilder {}),
configs: vec![],
conflicting_alts: Default::default(),
dips_into_outer_context: false,
Expand All @@ -110,7 +120,12 @@ impl ATNConfigSet {
}

fn local_hash_key(config: &ATNConfig) -> Key {
Key::Partial(config.get_state(), config.get_alt(), config.semantic_context.deref().clone())
let mut hasher = MurmurHasher::default();
config.get_state().hash(&mut hasher);
config.get_alt().hash(&mut hasher);
config.semantic_context.hash(&mut hasher);

Key::Partial(hasher.finish() as i32, config.get_state(), config.get_alt(), config.semantic_context.deref().clone())
}

pub fn add_cached(
Expand Down
28 changes: 14 additions & 14 deletions src/atn_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@ use once_cell::sync::OnceCell;
use crate::interval_set::IntervalSet;
use crate::transition::Transition;

pub const ATNSTATE_INVALID_TYPE: isize = 0;
pub const ATNSTATE_BASIC: isize = 1;
pub const ATNSTATE_RULE_START: isize = 2;
pub const ATNSTATE_BLOCK_START: isize = 3;
pub const ATNSTATE_PLUS_BLOCK_START: isize = 4;
pub const ATNSTATE_STAR_BLOCK_START: isize = 5;
pub const ATNSTATE_TOKEN_START: isize = 6;
pub const ATNSTATE_RULE_STOP: isize = 7;
pub const ATNSTATE_BLOCK_END: isize = 8;
pub const ATNSTATE_STAR_LOOP_BACK: isize = 9;
pub const ATNSTATE_STAR_LOOP_ENTRY: isize = 10;
pub const ATNSTATE_PLUS_LOOP_BACK: isize = 11;
pub const ATNSTATE_LOOP_END: isize = 12;
pub const ATNSTATE_INVALID_STATE_NUMBER: isize = -1;
pub(crate) const ATNSTATE_INVALID_TYPE: isize = 0;
pub(crate) const ATNSTATE_BASIC: isize = 1;
pub(crate) const ATNSTATE_RULE_START: isize = 2;
pub(crate) const ATNSTATE_BLOCK_START: isize = 3;
pub(crate) const ATNSTATE_PLUS_BLOCK_START: isize = 4;
pub(crate) const ATNSTATE_STAR_BLOCK_START: isize = 5;
pub(crate) const ATNSTATE_TOKEN_START: isize = 6;
pub(crate) const ATNSTATE_RULE_STOP: isize = 7;
pub(crate) const ATNSTATE_BLOCK_END: isize = 8;
pub(crate) const ATNSTATE_STAR_LOOP_BACK: isize = 9;
pub(crate) const ATNSTATE_STAR_LOOP_ENTRY: isize = 10;
pub(crate) const ATNSTATE_PLUS_LOOP_BACK: isize = 11;
pub(crate) const ATNSTATE_LOOP_END: isize = 12;
pub(crate) const ATNSTATE_INVALID_STATE_NUMBER: isize = -1;

//might be changed later
#[derive(Debug, Eq, PartialEq)]
Expand Down
4 changes: 3 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
#![warn(trivial_numeric_casts)]
//! # Antlr4 runtime
//!
//! !! not production ready, but pretty close
//! !! not production ready, but pretty close.
//! Api very likely will be changed,
//! but it is fully functional so you can use it for experimentation and proof of concepts.
//!
//! This is a Rust runtime for [ANTLR4] parser generator.
//! It is required to use parsers and lexers generated by [ANTLR4] parser generator
Expand Down
1 change: 1 addition & 0 deletions src/parser_atn_simulator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ impl ParserATNSimulator {
Ok(predicted_alt)
}

// ATNConfigSet is pretty big so should be boxed to move it cheaper
fn compute_reach_set(&self, closure: &ATNConfigSet, t: isize, full_ctx: bool, local: &mut Local) -> Option<ATNConfigSet> {
// println!("in computeReachSet, starting closure: {:?}",closure);
let mut intermediate = ATNConfigSet::new_base_atnconfig_set(full_ctx);
Expand Down
4 changes: 0 additions & 4 deletions tests/my_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,6 @@ if (x < x && a > 0) then duh
fn parser_test_csv() {
println!("test started");
let mut _lexer = CSVLexer::new(Box::new(InputStream::new("V123, V2\nd1,d2\n".into())));
// _lexer.base.add_error_listener();
// let mut token_source = UnbufferedTokenStream::new_unbuffered(_lexer);
let token_source = CommonTokenStream::new(_lexer);
let mut parser = CSVParser::new(Box::new(token_source));
parser.add_parse_listener(Box::new(Listener {}));
Expand All @@ -135,8 +133,6 @@ if (x < x && a > 0) then duh
#[test]
fn adaptive_predict_test() {
let mut _lexer = ReferenceToATNLexer::new(Box::new(InputStream::new("a 34 b".into())));
// _lexer.base.add_error_listener();
// let mut token_source = UnbufferedTokenStream::new_unbuffered(_lexer);
let token_source = CommonTokenStream::new(_lexer);
let mut parser = ReferenceToATNParser::new(Box::new(token_source));
parser.add_parse_listener(Box::new(Listener2 {}));
Expand Down

0 comments on commit a44046d

Please sign in to comment.