-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathparser.hpp
496 lines (418 loc) · 12.4 KB
/
parser.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
///
/// Tokenizer and Parser
///
/// Tokenizer
/// The tokenizer is responsible for transforming a stream of characters into a stream of tokens, which, then,
/// may be easily parsed by the parser.
///
/// Parser
/// The parser is responsible for transforming the stream of tokens into an abstract syntax tree. Such tree,
/// can then be easily parsed by the semantic analyzer in the next compilation step.
///
#pragma once
#include <stdinc.h>
struct ParserContext;
enum class Token
{
Command,
Label,
ScopeBegin,
ScopeEnd,
MISSION_START,
MISSION_END,
SCRIPT_START,
SCRIPT_END,
NewLine,
VAR_INT,
VAR_FLOAT,
VAR_TEXT_LABEL,
VAR_TEXT_LABEL16,
LVAR_INT,
LVAR_FLOAT,
LVAR_TEXT_LABEL,
LVAR_TEXT_LABEL16,
CONST_INT,
CONST_FLOAT,
Integer,
Float,
Text,
String,
NOT,
AND,
OR,
IF,
ELSE,
ENDIF,
WHILE,
ENDWHILE,
REPEAT,
ENDREPEAT,
SWITCH,
ENDSWITCH,
CASE,
DEFAULT,
BREAK,
CONTINUE, // Extension
Decrement,
Increment,
Equal,
Greater,
GreaterEqual,
Lesser,
LesserEqual,
Plus,
Minus,
Times,
Divide,
TimedPlus,
TimedMinus,
EqCast,
EqPlus,
EqMinus,
EqTimes,
EqDivide,
EqTimedPlus,
EqTimedMinus,
// Extensions
Hexadecimal,
DUMP,
ENDDUMP,
};
enum class NodeType
{
Block,
Command,
Label,
Scope,
MISSION_START,
MISSION_END,
SCRIPT_START,
SCRIPT_END,
VAR_INT,
VAR_FLOAT,
VAR_TEXT_LABEL,
VAR_TEXT_LABEL16,
LVAR_INT,
LVAR_FLOAT,
LVAR_TEXT_LABEL,
LVAR_TEXT_LABEL16,
CONST_INT,
CONST_FLOAT,
Integer,
Float,
Text,
String,
NOT,
AND,
OR,
IF,
ELSE,
WHILE,
REPEAT,
SWITCH,
CASE,
DEFAULT,
BREAK,
CONTINUE, // Extension
Decrement,
Increment,
Cast,
Equal,
Greater,
GreaterEqual,
Lesser,
LesserEqual,
Add,
Sub,
Times,
Divide,
TimedAdd,
TimedSub,
// Extensions
DUMP,
};
/// A Miss2 Identifier is anything that starts with A-Z or $.
///
/// An array access is in fact a single identifier (i.e. array[1]), as such
/// this little structure is provided to help separating the two pieces.
///
/// This is supposed to be used in the semantic analyzer, the parser only works with full identifier.
struct Miss2Identifier
{
enum Error
{
InvalidIdentifier,
NestingOfArrays,
NegativeIndex,
OutOfRange,
};
string_view identifier;
optional<variant<size_t, string_view>> index;
/// Matches a miss2 identifier.
///
/// \warning the lifetime of the returned `Miss2Identifier` must be as long
/// \warning as the lifetime of the view `value`.
static auto match(const string_view& value, const Options&) -> expected<Miss2Identifier, Error>;
/// Checks whether a string is a miss2 identifier.
static bool is_identifier(const string_view& value, const Options& options);
};
inline const char* to_string(Miss2Identifier::Error e)
{
switch(e)
{
case Miss2Identifier::InvalidIdentifier:return "invalid identifier";
case Miss2Identifier::NestingOfArrays: return "nesting of arrays not allowed";
case Miss2Identifier::NegativeIndex: return "index cannot be negative";
case Miss2Identifier::OutOfRange: return "index out of range";
default: Unreachable();
}
}
optional<int32_t> to_integer(const SyntaxTree&, ProgramContext&);
optional<float> to_float(const SyntaxTree&, ProgramContext&);
///////////////////////////////
class TokenStream : public std::enable_shared_from_this<TokenStream>
{
public:
struct TokenData
{
Token type; //< Type of token
size_t begin; //< Offset for token in TokenStream::data
size_t end; //< Offset for token in TokenStream::data (end)
};
struct TextStream
{
const std::string stream_name; //< Name of this stream (usually name of the source file).
const std::string data; //< UTF-8 source file.
std::vector<size_t> line_offset;
size_t max_offset = 0;
explicit TextStream(std::string data, std::string name);
/// Gets the byte offset in this->text() that the specified line number (1-based) is in.
///
/// \throws std::logic_error if lineno does not exist.
size_t offset_for_line(size_t lineno) const;
/// Gets the content of the specified line number (1-based).
///
/// \throws std::logic_error if lineno does not exist.
std::string get_line(size_t lineno) const;
/// Gets the (lineno, colno) (1-based) of the specified offset in the stream data.
///
/// \throws std::logic_error if offset is out of range.
std::pair<size_t, size_t> linecol_from_offset(size_t offset) const;
/// Gets the text in the stream in the specified range.
string_view get_text(size_t begin, size_t end) const;
};
// Used for error messages.
struct TokenInfo
{
const TextStream& stream;
size_t begin;
size_t end;
explicit TokenInfo(const TextStream& stream, size_t begin, size_t end)
: stream(stream), begin(begin), end(end)
{}
explicit TokenInfo(const TextStream& stream, const TokenData& token)
: TokenInfo(stream, token.begin, token.end)
{}
};
const TextStream text; //< Source file.
const std::vector<TokenData> tokens; //< Tokenized source file.
public:
/// Tokenizes the specified file.
static std::shared_ptr<TokenStream> tokenize(ProgramContext&, const fs::path&);
/// Tokenizes the specified data.
static std::shared_ptr<TokenStream> tokenize(ProgramContext&, std::string data, const char* stream_name);
TokenStream(TokenStream&&);
TokenStream(const TokenStream&) = delete;
/// For debugging purposes.
std::string to_string() const;
private:
ProgramContext& program;
explicit TokenStream(ProgramContext&, const char* stream_name, std::string data, std::vector<TokenData>);
explicit TokenStream(ProgramContext&, TextStream stream, std::vector<TokenData>);
};
///////////////////////////////
class SyntaxTree : public std::enable_shared_from_this<SyntaxTree>
{
public:
using iterator = std::vector<std::shared_ptr<SyntaxTree>>::iterator;
using const_iterator = std::vector<std::shared_ptr<SyntaxTree>>::const_iterator;
public:
static std::shared_ptr<SyntaxTree> compile(ProgramContext&, const TokenStream& tstream);
SyntaxTree(const SyntaxTree&) = delete;
SyntaxTree(SyntaxTree&&);
/// Gets the type of this node.
NodeType type() const
{
return this->type_;
}
/// Text associated with the token in this node.
string_view text() const
{
Expects(this->instream != nullptr);
auto source_data = this->instream->tstream.lock()->text.data.c_str();
return string_view(source_data + this->token.begin, this->token.end - this->token.begin);
}
/// Checks if `text().empty()`.
bool has_text() const
{
if(this->instream)
return (this->token.begin != this->token.end);
return false;
}
/// Iterator to childs (begin).
iterator begin()
{
return this->childs.begin();
}
/// Iterator to childs (end).
iterator end()
{
return this->childs.end();
}
/// Iterator to childs (begin).
const_iterator begin() const
{
return this->childs.begin();
}
/// Iterator to childs (end).
const_iterator end() const
{
return this->childs.end();
}
/// Number of childs on this node.
size_t child_count() const
{
return this->childs.size();
}
/// Gets the child at the specified index.
const SyntaxTree& child(size_t i) const
{
return *this->childs[i];
}
/// Gets the child at the specified index.
SyntaxTree& child(size_t i)
{
return *this->childs[i];
}
/// Gets the parent node, or `nullptr` if none.
std::shared_ptr<SyntaxTree> parent() const
{
if(this->parent_)
return this->parent_.value().lock();
return nullptr;
}
// Adds a child to this node.
void add_child(shared_ptr<SyntaxTree> child)
{
Expects(child->parent_ == nullopt);
child->parent_ = std::weak_ptr<SyntaxTree>(this->shared_from_this());
this->childs.emplace_back(std::move(child));
}
// Steals the childs from the other tree.
void take_childs(shared_ptr<SyntaxTree>& other)
{
this->childs.reserve(this->childs.size() + other->childs.size());
for(auto& child : other->childs)
{
child->parent_ = std::weak_ptr<SyntaxTree>(this->shared_from_this());
this->childs.emplace_back(std::move(child));
}
other->childs.clear();
}
/// Performs a pre-ordered depth-first traversal on this tree.
///
/// Does not go any deeper in a node that `fun()` returns false.
template<typename Functor> // Functor = bool(SyntaxTree)
void depth_first(Functor fun) //const
{
if(fun(*this))
{
for(auto& child : *this)
child->depth_first(std::ref(fun));
}
}
/// Sets the annotation for this node.
template<typename ValueType>
void set_annotation(ValueType&& v)
{
this->udata = std::forward<ValueType>(v);
}
/// Gets the annotation of this node, previosly set with `set_annotation`.
///
/// Note: You can get a ref by using e.g. `<int&>` instead of `<int>`.
///
/// \throws bad_any_cast if there's no annotation on this node.
template<typename T>
T annotation() const
{
return any_cast<T>(this->udata);
}
/// Gets the annotation of this node, previosly set with `set_annotation`, or `nullopt` if not set.
///
/// Note: You can get a ref by using e.g. `<int&>` instead of `<int>`.
template<typename T>
optional<T> maybe_annotation() const
{
using TNoRef = std::remove_reference_t<T>;
if(const TNoRef* p = any_cast<TNoRef>(&this->udata))
return *p;
return nullopt;
}
/// Checks if this node has been annotated.
bool is_annotated() const
{
return !this->udata.empty();
}
/// Returns an `any` object associated with the annotation.
const any& annotation_any() const
{
return this->udata;
}
/// Filename of the input stream associated with this SyntaxTree, or empty if none.
std::string filename() const
{
return this->instream? *this->instream->filename : "";
}
/// Input stream associated with this SyntaxTree, or `nullptr` if none.
weak_ptr<const TokenStream> token_stream() const
{
return this->instream? this->instream->tstream : weak_ptr<const TokenStream>();
}
///
const TokenStream::TokenData get_token() const
{
Expects(this->instream != nullptr);
return this->token;
}
/// For debugging purposes.
std::string to_string(size_t level = 0) const;
protected:
friend class TokenStream;
friend struct ParserContext;
struct InputStream
{
shared_ptr<std::string> filename; //< Name of the input file. Stored also here because tstream may get deallocated.
weak_ptr<const TokenStream> tstream; //< Input token stream, if still allocated.
};
private:
NodeType type_; // const NodeType
TokenStream::TokenData token; // invalid if (instream == nullptr)
shared_ptr<InputStream> instream; // may be nullptr
std::vector<std::shared_ptr<SyntaxTree>> childs;
optional<std::weak_ptr<SyntaxTree>> parent_;
any udata;
public:
explicit SyntaxTree(NodeType type, any udata)
: type_(type), instream(nullptr), udata(std::move(udata))
{
}
explicit SyntaxTree(NodeType type, shared_ptr<InputStream>& instream, const TokenStream::TokenData& token)
: type_(type), instream(instream), token(token)
{
}
explicit SyntaxTree(NodeType type)
: type_(type), instream(nullptr)
{
}
shared_ptr<SyntaxTree> clone() const;
};