Skip to content

Commit cecf063

Browse files
committed
Tune tokenizer to real-world token frequencies
1 parent eed8958 commit cecf063

File tree

1 file changed

+148
-147
lines changed

1 file changed

+148
-147
lines changed

lib/std/zig/tokenizer.zig

Lines changed: 148 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -10,55 +10,56 @@ pub const Token = struct {
1010
};
1111

1212
pub const Keywords = std.ComptimeStringMap(Tag, .{
13-
.{ "addrspace", .keyword_addrspace },
14-
.{ "align", .keyword_align },
15-
.{ "allowzero", .keyword_allowzero },
16-
.{ "and", .keyword_and },
17-
.{ "anyframe", .keyword_anyframe },
18-
.{ "anytype", .keyword_anytype },
19-
.{ "asm", .keyword_asm },
20-
.{ "async", .keyword_async },
21-
.{ "await", .keyword_await },
22-
.{ "break", .keyword_break },
23-
.{ "callconv", .keyword_callconv },
24-
.{ "catch", .keyword_catch },
25-
.{ "comptime", .keyword_comptime },
13+
// sorted in descending order of frequency for faster lookup
2614
.{ "const", .keyword_const },
27-
.{ "continue", .keyword_continue },
28-
.{ "defer", .keyword_defer },
15+
.{ "pub", .keyword_pub },
16+
.{ "fn", .keyword_fn },
17+
.{ "try", .keyword_try },
18+
.{ "return", .keyword_return },
19+
.{ "callconv", .keyword_callconv },
20+
.{ "if", .keyword_if },
2921
.{ "else", .keyword_else },
22+
.{ "while", .keyword_while },
23+
.{ "extern", .keyword_extern },
24+
.{ "var", .keyword_var },
25+
.{ "volatile", .keyword_volatile },
26+
.{ "switch", .keyword_switch },
27+
.{ "struct", .keyword_struct },
28+
.{ "inline", .keyword_inline },
29+
.{ "error", .keyword_error },
30+
.{ "defer", .keyword_defer },
31+
.{ "comptime", .keyword_comptime },
32+
.{ "test", .keyword_test },
33+
.{ "break", .keyword_break },
34+
.{ "for", .keyword_for },
35+
.{ "usingnamespace", .keyword_usingnamespace },
36+
.{ "and", .keyword_and },
37+
.{ "unreachable", .keyword_unreachable },
3038
.{ "enum", .keyword_enum },
39+
.{ "catch", .keyword_catch },
40+
.{ "anytype", .keyword_anytype },
41+
.{ "orelse", .keyword_orelse },
42+
.{ "align", .keyword_align },
43+
.{ "continue", .keyword_continue },
44+
.{ "packed", .keyword_packed },
45+
.{ "or", .keyword_or },
46+
.{ "opaque", .keyword_opaque },
47+
.{ "union", .keyword_union },
3148
.{ "errdefer", .keyword_errdefer },
32-
.{ "error", .keyword_error },
3349
.{ "export", .keyword_export },
34-
.{ "extern", .keyword_extern },
35-
.{ "fn", .keyword_fn },
36-
.{ "for", .keyword_for },
37-
.{ "if", .keyword_if },
38-
.{ "inline", .keyword_inline },
50+
.{ "asm", .keyword_asm },
3951
.{ "noalias", .keyword_noalias },
4052
.{ "noinline", .keyword_noinline },
41-
.{ "nosuspend", .keyword_nosuspend },
42-
.{ "opaque", .keyword_opaque },
43-
.{ "or", .keyword_or },
44-
.{ "orelse", .keyword_orelse },
45-
.{ "packed", .keyword_packed },
46-
.{ "pub", .keyword_pub },
47-
.{ "resume", .keyword_resume },
48-
.{ "return", .keyword_return },
53+
.{ "addrspace", .keyword_addrspace },
54+
.{ "threadlocal", .keyword_threadlocal },
4955
.{ "linksection", .keyword_linksection },
50-
.{ "struct", .keyword_struct },
56+
.{ "async", .keyword_async },
5157
.{ "suspend", .keyword_suspend },
52-
.{ "switch", .keyword_switch },
53-
.{ "test", .keyword_test },
54-
.{ "threadlocal", .keyword_threadlocal },
55-
.{ "try", .keyword_try },
56-
.{ "union", .keyword_union },
57-
.{ "unreachable", .keyword_unreachable },
58-
.{ "usingnamespace", .keyword_usingnamespace },
59-
.{ "var", .keyword_var },
60-
.{ "volatile", .keyword_volatile },
61-
.{ "while", .keyword_while },
58+
.{ "resume", .keyword_resume },
59+
.{ "allowzero", .keyword_allowzero },
60+
.{ "anyframe", .keyword_anyframe },
61+
.{ "await", .keyword_await },
62+
.{ "nosuspend", .keyword_nosuspend },
6263
});
6364

6465
pub fn getKeyword(bytes: []const u8) ?Tag {
@@ -190,144 +191,144 @@ pub const Token = struct {
190191
keyword_while,
191192

192193
pub fn lexeme(tag: Tag) ?[]const u8 {
194+
// Sorted in descending order of frequency for faster lookup
193195
return switch (tag) {
194-
.invalid,
195-
.identifier,
196-
.string_literal,
197-
.multiline_string_literal_line,
198-
.char_literal,
199-
.eof,
200-
.builtin,
201-
.number_literal,
202-
.doc_comment,
203-
.container_doc_comment,
204-
=> null,
205-
206-
.invalid_periodasterisks => ".**",
207-
.bang => "!",
208-
.pipe => "|",
209-
.pipe_pipe => "||",
210-
.pipe_equal => "|=",
211-
.equal => "=",
212-
.equal_equal => "==",
213-
.equal_angle_bracket_right => "=>",
214-
.bang_equal => "!=",
196+
.identifier => null,
197+
.period => ".",
198+
.comma => ",",
215199
.l_paren => "(",
216200
.r_paren => ")",
201+
.equal => "=",
217202
.semicolon => ";",
218-
.percent => "%",
219-
.percent_equal => "%=",
203+
.number_literal => null,
204+
.colon => ":",
205+
.keyword_const => "const",
220206
.l_brace => "{",
221207
.r_brace => "}",
208+
.builtin => null,
209+
.keyword_pub => "pub",
210+
.asterisk => "*",
211+
.string_literal => null,
222212
.l_bracket => "[",
223213
.r_bracket => "]",
224-
.period => ".",
225-
.period_asterisk => ".*",
214+
.question_mark => "?",
215+
.keyword_fn => "fn",
216+
.doc_comment => null,
217+
.keyword_try => "try",
218+
.equal_angle_bracket_right => "=>",
219+
.keyword_return => "return",
220+
.ampersand => "&",
221+
.keyword_callconv => "callconv",
222+
.pipe => "|",
223+
.keyword_if => "if",
224+
.keyword_else => "else",
225+
.keyword_while => "while",
226+
.keyword_extern => "extern",
227+
.keyword_var => "var",
228+
.keyword_volatile => "volatile",
229+
.equal_equal => "==",
230+
.keyword_switch => "switch",
231+
.keyword_struct => "struct",
232+
.multiline_string_literal_line => null,
233+
.keyword_inline => "inline",
234+
.bang => "!",
235+
.minus => "-",
236+
.keyword_error => "error",
237+
.char_literal => null,
238+
.plus => "+",
226239
.ellipsis2 => "..",
240+
.keyword_defer => "defer",
241+
.keyword_comptime => "comptime",
242+
.bang_equal => "!=",
243+
.keyword_test => "test",
244+
.period_asterisk => ".*",
245+
.keyword_break => "break",
246+
.keyword_for => "for",
247+
.plus_equal => "+=",
248+
.keyword_usingnamespace => "usingnamespace",
249+
.keyword_and => "and",
250+
.keyword_unreachable => "unreachable",
251+
.keyword_enum => "enum",
252+
.keyword_catch => "catch",
253+
.keyword_anytype => "anytype",
254+
.keyword_orelse => "orelse",
255+
.plus_plus => "++",
256+
.eof => null,
257+
.angle_bracket_left => "<",
258+
.keyword_align => "align",
259+
.angle_bracket_right => ">",
260+
.keyword_continue => "continue",
261+
.keyword_packed => "packed",
262+
.keyword_or => "or",
263+
.angle_bracket_angle_bracket_left => "<<",
264+
.container_doc_comment => null,
265+
.keyword_opaque => "opaque",
266+
.angle_bracket_angle_bracket_right => ">>",
267+
.slash => "/",
227268
.ellipsis3 => "...",
269+
.keyword_union => "union",
270+
.angle_bracket_right_equal => ">=",
271+
.angle_bracket_left_equal => "<=",
272+
.keyword_errdefer => "errdefer",
273+
.keyword_export => "export",
274+
.asterisk_asterisk => "**",
275+
.minus_equal => "-=",
276+
.keyword_asm => "asm",
277+
.pipe_pipe => "||",
278+
.percent => "%",
279+
.tilde => "~",
228280
.caret => "^",
229-
.caret_equal => "^=",
230-
.plus => "+",
231-
.plus_plus => "++",
232-
.plus_equal => "+=",
281+
.pipe_equal => "|=",
233282
.plus_percent => "+%",
234-
.plus_percent_equal => "+%=",
235-
.plus_pipe => "+|",
236-
.plus_pipe_equal => "+|=",
237-
.minus => "-",
238-
.minus_equal => "-=",
283+
.keyword_noalias => "noalias",
239284
.minus_percent => "-%",
240-
.minus_percent_equal => "-%=",
241-
.minus_pipe => "-|",
242-
.minus_pipe_equal => "-|=",
243-
.asterisk => "*",
244-
.asterisk_equal => "*=",
245-
.asterisk_asterisk => "**",
285+
.caret_equal => "^=",
286+
.plus_percent_equal => "+%=",
246287
.asterisk_percent => "*%",
247-
.asterisk_percent_equal => "*%=",
248-
.asterisk_pipe => "*|",
249-
.asterisk_pipe_equal => "*|=",
250288
.arrow => "->",
251-
.colon => ":",
252-
.slash => "/",
253-
.slash_equal => "/=",
254-
.comma => ",",
255-
.ampersand => "&",
256289
.ampersand_equal => "&=",
257-
.question_mark => "?",
258-
.angle_bracket_left => "<",
259-
.angle_bracket_left_equal => "<=",
260-
.angle_bracket_angle_bracket_left => "<<",
261-
.angle_bracket_angle_bracket_left_equal => "<<=",
262-
.angle_bracket_angle_bracket_left_pipe => "<<|",
263-
.angle_bracket_angle_bracket_left_pipe_equal => "<<|=",
264-
.angle_bracket_right => ">",
265-
.angle_bracket_right_equal => ">=",
266-
.angle_bracket_angle_bracket_right => ">>",
267290
.angle_bracket_angle_bracket_right_equal => ">>=",
268-
.tilde => "~",
269-
.keyword_addrspace => "addrspace",
270-
.keyword_align => "align",
291+
.keyword_async => "async",
292+
.asterisk_equal => "*=",
293+
.minus_pipe => "-|",
294+
.asterisk_percent_equal => "*%=",
295+
.keyword_suspend => "suspend",
296+
.keyword_resume => "resume",
271297
.keyword_allowzero => "allowzero",
272-
.keyword_and => "and",
298+
.angle_bracket_angle_bracket_left_equal => "<<=",
299+
.slash_equal => "/=",
273300
.keyword_anyframe => "anyframe",
274-
.keyword_anytype => "anytype",
275-
.keyword_asm => "asm",
276-
.keyword_async => "async",
277301
.keyword_await => "await",
278-
.keyword_break => "break",
279-
.keyword_callconv => "callconv",
280-
.keyword_catch => "catch",
281-
.keyword_comptime => "comptime",
282-
.keyword_const => "const",
283-
.keyword_continue => "continue",
284-
.keyword_defer => "defer",
285-
.keyword_else => "else",
286-
.keyword_enum => "enum",
287-
.keyword_errdefer => "errdefer",
288-
.keyword_error => "error",
289-
.keyword_export => "export",
290-
.keyword_extern => "extern",
291-
.keyword_fn => "fn",
292-
.keyword_for => "for",
293-
.keyword_if => "if",
294-
.keyword_inline => "inline",
295-
.keyword_noalias => "noalias",
296-
.keyword_noinline => "noinline",
297302
.keyword_nosuspend => "nosuspend",
298-
.keyword_opaque => "opaque",
299-
.keyword_or => "or",
300-
.keyword_orelse => "orelse",
301-
.keyword_packed => "packed",
302-
.keyword_pub => "pub",
303-
.keyword_resume => "resume",
304-
.keyword_return => "return",
305-
.keyword_linksection => "linksection",
306-
.keyword_struct => "struct",
307-
.keyword_suspend => "suspend",
308-
.keyword_switch => "switch",
309-
.keyword_test => "test",
303+
.minus_percent_equal => "-%=",
304+
.plus_pipe => "+|",
305+
.keyword_noinline => "noinline",
306+
.keyword_addrspace => "addrspace",
310307
.keyword_threadlocal => "threadlocal",
311-
.keyword_try => "try",
312-
.keyword_union => "union",
313-
.keyword_unreachable => "unreachable",
314-
.keyword_usingnamespace => "usingnamespace",
315-
.keyword_var => "var",
316-
.keyword_volatile => "volatile",
317-
.keyword_while => "while",
308+
.plus_pipe_equal => "+|=",
309+
.minus_pipe_equal => "-|=",
310+
.asterisk_pipe => "*|",
311+
.percent_equal => "%=",
312+
.angle_bracket_angle_bracket_left_pipe => "<<|",
313+
.invalid => null,
314+
.keyword_linksection => "linksection",
315+
.asterisk_pipe_equal => "*|=",
316+
.angle_bracket_angle_bracket_left_pipe_equal => "<<|=",
317+
.invalid_periodasterisks => ".**",
318318
};
319319
}
320320

321321
pub fn symbol(tag: Tag) []const u8 {
322+
// Sorted in descending order of frequency for faster lookup
322323
return tag.lexeme() orelse switch (tag) {
323-
.invalid => "invalid token",
324324
.identifier => "an identifier",
325+
.number_literal => "a number literal",
326+
.builtin => "a builtin function",
325327
.string_literal, .multiline_string_literal_line => "a string literal",
328+
.doc_comment, .container_doc_comment => "a document comment",
326329
.char_literal => "a character literal",
327330
.eof => "EOF",
328-
.builtin => "a builtin function",
329-
.number_literal => "a number literal",
330-
.doc_comment, .container_doc_comment => "a document comment",
331+
.invalid => "invalid token",
331332
else => unreachable,
332333
};
333334
}

0 commit comments

Comments
 (0)