Skip to content

Commit

Permalink
feat(tasks): benchmarks for lexer (#2101)
Browse files Browse the repository at this point in the history
This PR adds benchmarks for the lexer. I'm doing some work on optimizing
the lexer and I thought it'd be useful to see the effects of changes in
isolation, separate from the parser.

These benchmarks may not be ideal to keep long-term, but for now it'd be
useful.

In order to do so, it's necessary for `oxc_parser` crate to expose the
lexer, but have done that without adding it to the docs, and using an
alias `__lexer`.
  • Loading branch information
overlookmotel authored Jan 21, 2024
1 parent 16b3261 commit 36c718e
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 1 deletion.
5 changes: 4 additions & 1 deletion crates/oxc_parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ pub struct Token {

/// True if the identifier / string / template kinds has escaped strings.
/// The escaped strings are saved in [Lexer::escaped_strings] and [Lexer::escaped_templates] by
/// [Token::start]
/// [Token::start].
///
/// [Lexer::escaped_strings]: [super::Lexer::escaped_strings]
/// [Lexer::escaped_templates]: [super::Lexer::escaped_templates]
pub escaped: bool,
}

Expand Down
6 changes: 6 additions & 0 deletions crates/oxc_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ use crate::{
state::ParserState,
};

// Expose lexer for benchmarks
#[doc(hidden)]
pub mod __lexer {
pub use super::lexer::{Kind, Lexer, Token};
}

/// Maximum length of source in bytes which can be parsed (~4 GiB).
// Span's start and end are u32s, so size limit is u32::MAX bytes.
pub const MAX_LEN: usize = u32::MAX as usize;
Expand Down
4 changes: 4 additions & 0 deletions tasks/benchmark/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ harness = false
name = "minifier"
harness = false

[[bench]]
name = "lexer"
harness = false

[dependencies]
oxc_allocator = { workspace = true }
oxc_linter = { workspace = true }
Expand Down
46 changes: 46 additions & 0 deletions tasks/benchmark/benches/lexer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use oxc_allocator::Allocator;
use oxc_benchmark::{criterion_group, criterion_main, BenchmarkId, Criterion};
use oxc_parser::__lexer::{Kind, Lexer};
use oxc_span::SourceType;
use oxc_tasks_common::{TestFile, TestFiles};

fn bench_lexer(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("lexer");

// Lexer lacks awareness of JS grammar, so it gets confused by a few things without the parser
// driving it, notably escapes in regexps and template strings.
// So simplify the input for it, by removing backslashes and converting template strings to
// normal string literals.
let files = TestFiles::complicated()
.files()
.iter()
.map(|file| TestFile {
url: file.url.clone(),
file_name: file.file_name.clone(),
source_text: file.source_text.replace('\\', " ").replace('`', "'"),
})
.collect::<Vec<_>>();

for file in files {
let source_type = SourceType::from_path(&file.file_name).unwrap();
group.bench_with_input(
BenchmarkId::from_parameter(&file.file_name),
&file.source_text,
|b, source_text| {
b.iter_with_large_drop(|| {
// Include the allocator drop time to make time measurement consistent.
// Otherwise the allocator will allocate huge memory chunks (by power of two) from the
// system allocator, which makes time measurement unequal during long runs.
let allocator = Allocator::default();
let mut lexer = Lexer::new(&allocator, source_text, source_type);
while lexer.next_token().kind != Kind::Eof {}
allocator
});
},
);
}
group.finish();
}

criterion_group!(lexer, bench_lexer);
criterion_main!(lexer);

0 comments on commit 36c718e

Please sign in to comment.