@@ -11,13 +11,6 @@ extra_data: []Node.Index,
11
11
12
12
errors : []const Error ,
13
13
14
- const std = @import ("../std.zig" );
15
- const assert = std .debug .assert ;
16
- const testing = std .testing ;
17
- const mem = std .mem ;
18
- const Token = std .zig .Token ;
19
- const Ast = @This ();
20
-
21
14
pub const TokenIndex = u32 ;
22
15
pub const ByteOffset = u32 ;
23
16
@@ -34,7 +27,7 @@ pub const Location = struct {
34
27
line_end : usize ,
35
28
};
36
29
37
- pub fn deinit (tree : * Ast , gpa : mem. Allocator ) void {
30
+ pub fn deinit (tree : * Ast , gpa : Allocator ) void {
38
31
tree .tokens .deinit (gpa );
39
32
tree .nodes .deinit (gpa );
40
33
gpa .free (tree .extra_data );
@@ -48,11 +41,69 @@ pub const RenderError = error{
48
41
OutOfMemory ,
49
42
};
50
43
44
+ pub const Mode = enum { zig , zon };
45
+
46
+ /// Result should be freed with tree.deinit() when there are
47
+ /// no more references to any of the tokens or nodes.
48
+ pub fn parse (gpa : Allocator , source : [:0 ]const u8 , mode : Mode ) Allocator.Error ! Ast {
49
+ var tokens = Ast.TokenList {};
50
+ defer tokens .deinit (gpa );
51
+
52
+ // Empirically, the zig std lib has an 8:1 ratio of source bytes to token count.
53
+ const estimated_token_count = source .len / 8 ;
54
+ try tokens .ensureTotalCapacity (gpa , estimated_token_count );
55
+
56
+ var tokenizer = std .zig .Tokenizer .init (source );
57
+ while (true ) {
58
+ const token = tokenizer .next ();
59
+ try tokens .append (gpa , .{
60
+ .tag = token .tag ,
61
+ .start = @intCast (u32 , token .loc .start ),
62
+ });
63
+ if (token .tag == .eof ) break ;
64
+ }
65
+
66
+ var parser : Parse = .{
67
+ .source = source ,
68
+ .gpa = gpa ,
69
+ .token_tags = tokens .items (.tag ),
70
+ .token_starts = tokens .items (.start ),
71
+ .errors = .{},
72
+ .nodes = .{},
73
+ .extra_data = .{},
74
+ .scratch = .{},
75
+ .tok_i = 0 ,
76
+ };
77
+ defer parser .errors .deinit (gpa );
78
+ defer parser .nodes .deinit (gpa );
79
+ defer parser .extra_data .deinit (gpa );
80
+ defer parser .scratch .deinit (gpa );
81
+
82
+ // Empirically, Zig source code has a 2:1 ratio of tokens to AST nodes.
83
+ // Make sure at least 1 so we can use appendAssumeCapacity on the root node below.
84
+ const estimated_node_count = (tokens .len + 2 ) / 2 ;
85
+ try parser .nodes .ensureTotalCapacity (gpa , estimated_node_count );
86
+
87
+ switch (mode ) {
88
+ .zig = > try parser .parseRoot (),
89
+ .zon = > try parser .parseZon (),
90
+ }
91
+
92
+ // TODO experiment with compacting the MultiArrayList slices here
93
+ return Ast {
94
+ .source = source ,
95
+ .tokens = tokens .toOwnedSlice (),
96
+ .nodes = parser .nodes .toOwnedSlice (),
97
+ .extra_data = try parser .extra_data .toOwnedSlice (gpa ),
98
+ .errors = try parser .errors .toOwnedSlice (gpa ),
99
+ };
100
+ }
101
+
51
102
/// `gpa` is used for allocating the resulting formatted source code, as well as
52
103
/// for allocating extra stack memory if needed, because this function utilizes recursion.
53
104
/// Note: that's not actually true yet, see https://github.com/ziglang/zig/issues/1006.
54
105
/// Caller owns the returned slice of bytes, allocated with `gpa`.
55
- pub fn render (tree : Ast , gpa : mem. Allocator ) RenderError ! []u8 {
106
+ pub fn render (tree : Ast , gpa : Allocator ) RenderError ! []u8 {
56
107
var buffer = std .ArrayList (u8 ).init (gpa );
57
108
defer buffer .deinit ();
58
109
@@ -3347,3 +3398,12 @@ pub const Node = struct {
3347
3398
rparen : TokenIndex ,
3348
3399
};
3349
3400
};
3401
+
3402
+ const std = @import ("../std.zig" );
3403
+ const assert = std .debug .assert ;
3404
+ const testing = std .testing ;
3405
+ const mem = std .mem ;
3406
+ const Token = std .zig .Token ;
3407
+ const Ast = @This ();
3408
+ const Allocator = std .mem .Allocator ;
3409
+ const Parse = @import ("Parse.zig" );
0 commit comments