-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathuparser.pas
608 lines (498 loc) · 18.5 KB
/
uparser.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
unit uparser;
{$mode objfpc}{$H+}
interface
uses Classes, utypes, ulist, uerror;
type TLPI_Parser = class(TLPI_ErrorMessage)
private
function create_node(operation: Byte; s: AnsiString; line: Integer): TASTTreeNode;
function create_binary_node(operation: Byte; s: AnsiString; line: Integer; child1, child2: TASTTreeNode): TASTTreeNode;
function expectVariable(node: TASTTreeNode; parent_operation: Byte): Boolean;
function get_variable_id(s: AnsiString): Integer;
public
symbols: TStringList;
Constructor Create; Override;
function execute(tokenlist: TLightList): TASTTreeNode; // returns the rootNode of the AST
Destructor Destroy; Override;
end;
implementation
uses SysUtils, uconstants, uutils;
Constructor TLPI_Parser.Create;
begin
inherited;
symbols := TStringList.Create; // stores original variable names
end;
function TLPI_Parser.create_node(operation: Byte; s: AnsiString; line: Integer): TASTTreeNode;
begin
Result := TASTTreeNode.Create(operation, s, line);
end;
function TLPI_Parser.create_binary_node(operation: Byte; s: AnsiString; line: Integer; child1, child2: TASTTreeNode): TASTTreeNode;
begin
Result := TASTTreeNode.Create(operation, s, line);
if child1 <> nil then Result.children.Add(child1);
if child2 <> nil then Result.children.Add(child2);
end;
// operation type helper
function TLPI_Parser.expectVariable(node: TASTTreeNode; parent_operation: Byte): Boolean;
begin
Result := false;
if node = nil then Exit;
Result := (node.operation = CopVariable);
if not Result then LogError('Variable expected, found ' + operationtostr(node.operation) + ' ' + node.s, node.line, parent_operation);
end;
function TLPI_Parser.get_variable_id(s: AnsiString): Integer;
var i, index: Integer;
found: Boolean;
begin
s := LowerCase(s);
index := Cunknown_var_id;
found := false;
for i := 0 to symbols.Count - 1 do
if symbols.Strings[i] = s then
begin
found := true;
index := i;
break;
end;
if not found then
if symbols.Count < ClpiInterpreterMaxVariables then
index := symbols.Add(s)
else
LogError('Internal Error. Maximum number of allowed variables reached (' + IntToStr(ClpiInterpreterMaxVariables) + ')', Cunknown_line, Cunknown_operation);
Result := index;
end;
function TLPI_Parser.execute(tokenlist: TLightList): TASTTreeNode;
var rootNode: TASTTreeNode;
error_token: TToken;
current_line: Integer; // cache last line if current token is invalid or empty...
// reads a token from the input
function preview_token: TToken;
begin
Result := nil;
if tokenlist.Count < 1 then
begin
LogError('Unexpected End of Program', current_line, Cunknown_operation);
Result := error_token;
end
else Result := TToken(tokenlist.GetFirst);
end;
// reads a token from the input AND deletes it (pop)
procedure read_token;
var token: TToken;
begin
if isError then Exit;
token := preview_token;
tokenlist.RemoveFirst;
FreeAndNil(token);
end;
// compares the string with the token, peek() does NOT read the token!
function peek(s: AnsiString): Boolean; inline;
begin
if tokenlist.Count < 1 then
Result := false
else
Result := (UpperCase(s) = UpperCase(preview_token.s));
end;
// compares the string and reads (accepts) the token on a match
function accept(s: AnsiString): Boolean; inline;
begin
Result := false;
if isError then Exit;
if peek(s) then
begin
read_token;
Result := true;
end;
end;
// the token is read and the string must match
procedure expect(s: AnsiString); inline;
begin
if not accept(s) then
LogError(s + ' expected, but ' + preview_token.s + ' found', current_line, Cunknown_operation);
end;
// the token is read and checked for a matching type
function expect_and_read_number(operation: Byte): Extended;
var token: TToken;
begin
Result := 0;
token := preview_token;
if token.id <> CtokenNumber then
begin
LogError('Number expected, found ' + token.s, token.line, operation);
Exit;
end;
Result := StrToFloatDef(token.s, 0, FormatSettings);
read_token; // also consume the token from the input stream
end;
// returns the name of the identifier
function expect_and_read_identifier(operation: Byte): AnsiString;
var token: TToken;
begin
Result := '';
token := preview_token;
if token.id <> CtokenIdentifier then
begin
LogError('Identifier expected, found ' + token.s, token.line, operation);
Exit;
end;
Result := token.s;
read_token; // also consume the token from the input stream
end;
// needed for primary expressions to read complex, recursive constructs -> go back to the beginning of parsing
function expr: TASTTreeNode; forward;
// needed for primary expressions to read complex, recursive constructs -> go back to mathematical expression parsing
function add_expr: TASTTreeNode; forward;
{
# How does the Parser work?
Every pascal statement (like conditions and loops) consists of expressions. The following code tries to
match the expressions, starting with primary expressions (such as numbers and variables) down to
more complex ones (like math operations, logic).
Every expression procedure first calls the procedure one level above itself and tries to find a match for the following token.
After the procedure returns, the next match is attempted. Anything matching inside the levels above the current procedure
was already parsed and is out of the way in the token list. For example a for statement parses "<for> ... <:=> ... <to> ... <do>"
but it doesn't parse the expressions like variables or math equations. It is calling the according procedures above it instead,
like prim_expr() (for variables) and add_expr() (for math expressions). The whole process starts at the bottom with statement().
# A more detailed breakdown of "for i := 1 to 2+3 do foobar;"
First, <for> is recognized as statement. The statement procedure calls the procedure to parse primary expressions to parse
the variable. The check if it really was a variable is done later, when building the syntax tree. It matches CtokenIdentifier <i>
and the code goes back to the statement. The next token is expected to be <:=>, no exceptions. Luckily it is there and the parser
is fetching the beginning of the loop range, which is the primary expression CtokenNumber <1>.
The code goes back to the statement and expects <to>, which is there. The final expression is the end of the loop range,
which is at first a simple CtokenNumber <2>, but it matches add_expr() on the way back and recognizes the <+>.
add_expr() keeps looking for additional + or - and calls the next higher function to check for expressions
like 4*5 or (4/2) etc. However the next expression is again a simple CtokenNumber <3>.
The code returns and expects <do> as token, which is there.
Finally the next statement is read within the for statement, which could be a begin / end block, a procedure call <foobar> or another loop.
If the statement was read successfully, the code returns to the main loop and tries to read the next statement.
}
// primary expressions
function prim_expr: TASTTreeNode;
var token: TToken;
temp_s: AnsiString;
i: Integer;
begin
Result := nil;
if isError then Exit;
token := preview_token;
current_line := token.line; // cache the current line number for possible error messages after the token was consumed
if token.id = CtokenNumber then
begin
Result := create_node(CopNumber, token.s, current_line); // accept a Number 0123.4
read_token;
end
else
if token.id = CtokenString then
begin
Result := create_node(CopString, token.s, current_line); // accept a String ''
read_token;
end
else
if peek('#') then
begin
temp_s := '';
while (accept('#') and not isError) do
begin
i := round(expect_and_read_number(CopString));
if (i < 0) or (i > 255) then
LogError('Constant out of range. Expected 0 - 255, found ' + IntToStr(i), current_line, CopString)
else
temp_s := temp_s + chr(i); // accept a Char #13
end;
Result := create_node(CopString, temp_s, current_line); // pascal allows chaining without a +, like #13#10
end
else
if accept('not') then
begin
Result := create_node(CopLogicNot, '', current_line); // accept not
Result.children.Add(expr());
end
else
if accept('true') then
begin
Result := create_node(CopLogicTrue, '', current_line); // accept True
end
else
if accept('false') then
begin
Result := create_node(CopLogicFalse, '', current_line); // accept False
end
else
if accept('round') then // accept round(x);
begin
Result := create_node(CopRound, '', current_line); // round
if not peek('(') then expect('('); // force ()
Result.children.Add(add_expr());
end
else
if token.id = CtokenIdentifier then
begin
// add it as variable, save the name in case of an error message
temp_s := token.s;
// s is cleared, so it won't be printed twice on debug - the only reference is the var_id now
Result := create_node(CopVariable, '', current_line);
// read and consume token, so we can use peek()
read_token;
// cache symbols
Result.var_id := get_variable_id(temp_s);
// variables are not allowed to have a following ()
if peek('(') then
LogError('Unexpected ( after variable ' + temp_s, current_line, CopVariable);
end
else
// normal brackets e.g. for mathematical expressions
if accept('(') then
begin
Result := expr(); // accepts anything between the brackets
if isError then Exit;
expect(')');
// we don't need to add any actual bracket nodes to the ast,
// the structure already takes care of the right execution order :-)
end
else
// special case for sign e.g. a := +a;
// it just gets ignored, this is different from 2 + 3 which is accepted as an operator in add_expr
if (accept('+')) then
Result := prim_expr()
else
if (accept('-')) then
begin
token := preview_token;
// is it a number? just add the sign...
if token.id = CtokenNumber then
begin
Result := create_node(CopNumber, '-' + token.s, current_line); // accept negative Number -0123.4
read_token;
end
else
// special case for variables e.g. a := -x;
// the parser adds a multiplication node so cases like -x become -1 * x
Result := create_binary_node(CopMathMul, '', current_line, create_node(CopNumber, '-1', current_line), prim_expr()); // accept -a
end
else
LogError('Unexpected ''' + token.s + '''', token.line, Cunknown_operation);
end;
// order of math operations is set by the order of procedure calls
// calling the next higher one as first line to check if the expression is known (accepted)
function mul_expr: TASTTreeNode;
begin
Result := nil;
if isError then Exit;
Result := prim_expr();
if isError then Exit;
current_line := preview_token.line;
while (peek('*') or peek('/') or peek('mod') or peek ('div')) and (not isError) do
begin
if accept('*') then
Result := create_binary_node(CopMathMul, '', current_line, Result, prim_expr()) // accept *
else
if accept('/') then
Result := create_binary_node(CopMathDiv, '', current_line, Result, prim_expr()) // accept /
else
if accept('mod') then
Result := create_binary_node(CopMathMod, '', current_line, Result, prim_expr()) // accept mod
else
if accept('div') then
Result := create_binary_node(CopMathDivInt, '', current_line, Result, prim_expr()); // accept div
end;
end;
// lower math expressions
function add_expr: TASTTreeNode;
begin
Result := nil;
if isError then Exit;
Result := mul_expr();
if isError then Exit;
current_line := preview_token.line;
while (peek('+') or peek('-')) and (not isError) do
begin
if accept('+') then
Result := create_binary_node(CopMathAdd, '', current_line, Result, mul_expr()) // accept +
else
if accept('-') then
Result := create_binary_node(CopMathSub, '', current_line, Result, mul_expr()); // accept -
end;
end;
// compare operators
function eq_expr: TASTTreeNode;
begin
Result := nil;
if isError then Exit;
Result := add_expr();
if isError then Exit;
current_line := preview_token.line;
if accept('=') then
Result := create_binary_node(CopCompareEqual, '', current_line, Result, add_expr()) // accept =
else
if accept('<>') then
Result := create_binary_node(CopCompareUnequal, '', current_line, Result, add_expr()) // accept <>
else
if accept('>') then
Result := create_binary_node(CopCompareGreater, '', current_line, Result, add_expr()) // accept >
else
if accept('<') then
Result := create_binary_node(CopCompareSmaller, '', current_line, Result, add_expr()) // accept <
else
if accept('>=') then
Result := create_binary_node(CopCompareGreaterOrEqual, '', current_line, Result, add_expr()) // accept >=
else
if accept('<=') then
Result := create_binary_node(CopCompareSmallerOrEqual, '', current_line, Result, add_expr()); // accept <=
end;
// logic
function logic_expr: TASTTreeNode;
begin
Result := nil;
if isError then Exit;
Result := eq_expr();
if isError then Exit;
current_line := preview_token.line;
while (peek('and') or peek('or')) and (not isError) do
begin
if accept('and') then
Result := create_binary_node(CopLogicAnd, '', current_line, Result, eq_expr()) // accept and
else
if accept('or') then
Result := create_binary_node(CopLogicOr, '', current_line, Result, eq_expr()); // accept or
end;
end;
// assignment
function expr: TASTTreeNode;
begin
Result := nil;
if isError then Exit;
Result := logic_expr();
if isError then Exit;
current_line := preview_token.line;
// is it an assignment? check the left and get the right side of the assignment...
if accept(':=') then
begin
if not expectVariable(Result, CopAssign) then Exit; // left side has to be a variable
Result := create_binary_node(CopAssign, '', current_line, Result, logic_expr()); // accept :=
end;
end;
// recognizes a statement like a "begin end" block or a "for to do" loop
function statement: TASTTreeNode;
var tempNode: TASTTreeNode;
temp_s: AnsiString;
begin
Result := nil;
if isError then Exit;
current_line := preview_token.line;
if accept('begin') then
begin
Result := create_node(CopCommandBlock, '', current_line); // accept begin-end
while not accept('end') and (not isError) do Result.children.Add(statement());
if not peek('else') then expect(';'); // exception for else, no ; before it! otherwise expect end;
end
else
if accept('if') then
begin
Result := create_binary_node(CopCondition, '', current_line, logic_expr(), nil); // accept condition
expect('then');
Result.children.Add(statement()); // statement
if accept('else') then
Result.children.Add(statement()); // statement for else
end
else
if accept('while') then
begin
Result := create_binary_node(CopLoopWhile, '', current_line, logic_expr(), nil); // accept while (condition)
expect('do');
Result.children.Add(statement()); // statement
end
else
if accept('repeat') then
begin
Result := create_node(CopLoopRepeatUntil, '', current_line); // accept repeat until
while not accept('until') and (not isError) do Result.children.Add(statement());
if isError then Exit;
Result.children.Add(logic_expr()); // read the condition
if not peek('else') then expect(';'); // exception for else, no ; before it! otherwise expect end;
end
else
if accept('for') then // accept for i := 3 to 5 do
begin
tempNode := prim_expr();
if not expectVariable(tempNode, CopLoopForTo) then Exit;
Result := create_binary_node(CopLoopForTo, '', current_line, tempNode, nil); // add the loop variable i
expect(':=');
Result.children.Add(add_expr()); // add the loop start 3
expect('to');
Result.children.Add(add_expr()); // add the loop end 5
expect('do');
Result.children.Add(statement()); //looped statement
end
else
if accept('writeln') then // accept messages.Add(a);
begin
expect('(');
// at least one argument is expected, so don't use a while loop to check
Result := create_node(CopWriteln, '', current_line); // messages.Add
repeat
tempNode := logic_expr(); // logic_expr because no lower levels are allowed (e.g. no statements like messages.Add(repeat ... until...);)
Result.children.Add(tempNode);
until (not accept(',')) or (isError);
if isError then Exit;
expect(')');
if not peek('else') then expect(';'); // exception for else, no ; before it! otherwise expect end;
end
else
begin
temp_s := preview_token.s; // cache last token for error messages
// parses variables, however result is not allowed to be a single variable or expression
// like add, mul etc at statement level e.g. a := 5; is allowed a; / a + 1; a or b; etc. is not!
Result := expr();
if isError then Exit;
// only assign and internal functions are allowed...
if not ((Result.operation = CopAssign) or (Result.operation = CopRound)) then
LogError('Unexpected "' + temp_s + '"', current_line, Cunknown_operation);
// exception for else, to accept code like this: if true then x := 5 else x := 7;
if not (peek('else')) then expect(';');
end;
end;
var token: TToken;
i: Integer;
begin
Result := nil;
if tokenlist = nil then Exit;
if ClpiDebugMode then
begin
messages.Add('');
messages.Add('*** Parser ***');
end;
current_line := Cunknown_line;
// create dummy token for easier error handling...
error_token := TToken.Create;
error_token.id := CopInvalid; // invalid token
error_token.s := '<EOF>'; // invalid string
error_token.line := Cunknown_line; // unknown line
rootNode := create_node(CopCommandBlock, '', 0); // encase the whole code in a begin-end block...
// accept our language....
while (tokenlist.Count > 0) and (not isError) do rootNode.children.Add(statement());
// delete the token list...
for i := 0 to tokenlist.Count - 1 do
begin
token := TToken(tokenlist.Items(i));
FreeAndNil(token);
end;
// it could be nil if an error occured before
if error_token <> nil then
FreeAndNil(error_token);
if isError then
FreeAndNil(rootNode) // delete the whole (likely incomplete) abstract syntax tree
else
begin
if ClpiDebugMode and not isError then
begin
messages.Add('# Resulting Tree:');
rootNode.debug(symbols, messages); // contains the root of the abstract syntax tree, parses the other nodes recursively
end;
Result := rootNode;
end;
FreeAndNil(tokenlist);
end;
Destructor TLPI_Parser.Destroy;
begin
FreeAndNil(symbols);
inherited;
end;
end.