-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[gmrreader] Lexer generator | [lexer.gmr] full lexer grammar | [parse…
…r.gmr] first part of parser grammar
- Loading branch information
Showing
3 changed files
with
178 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
#!/usr/bin/python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
import re | ||
|
||
class Token: | ||
def __init__(self, name, **attributes): | ||
self.name = name | ||
self.attributes = attributes | ||
|
||
class Tokenizer: | ||
def __init__(self, name, rule, ignore=False): | ||
self.name = name | ||
self.rule = re.compile(rule) | ||
self.ignore = ignore | ||
def __call__(self, flux, pos): | ||
result = self.rule.match(flux[pos:]) | ||
if result: | ||
if self.ignore: return True, result.end, None | ||
return True, result.end, Token(self.name, **result.groupdict()) | ||
else: | ||
return False, pos, None | ||
def __repr__(self): | ||
return "<Tokenizer named %s with rule %s %s>" % (self.name, self.rule, "- ignored" * int(self.ignore)) | ||
|
||
class LexerReader: | ||
def __init__(self, inp): | ||
self.inp = inp | ||
self.pos = 0 | ||
def read(self): | ||
tokenizers = {} | ||
while self.pos < len(self.inp): | ||
self.pos = self.ignore_lines(self.pos) | ||
self.pos, ignore = self.read_ignore(self.pos) | ||
self.pos, name = self.read_name(self.pos) | ||
self.pos = self.ignore_spaces(self.pos) | ||
self.pos = self.ignore_assignment(self.pos) | ||
self.pos = self.ignore_spaces(self.pos) | ||
self.pos, rule = self.read_rule(self.pos) | ||
tokenizers[name] = Tokenizer(name, rule, ignore) | ||
return tokenizers | ||
|
||
def read_ignore(self, pos): | ||
if self.inp[pos:].startswith("ignore "): | ||
return pos+len("ignore "), True | ||
else: | ||
return pos, False | ||
def read_name(self, pos): | ||
result = "" | ||
while self.inp[pos] not in {" ", "\t"}: | ||
result += self.inp[pos] | ||
pos += 1 | ||
return pos, result | ||
def ignore_assignment(self, pos): | ||
if self.inp[pos:].startswith("::="): | ||
return pos+len("::=") | ||
raise SyntaxError("syntax is wrong at character %s of line %s" % pos2coords(pos, self.inp)) | ||
def read_rule(self, pos): | ||
maxsize = len(self.inp) # in case it doesn't end with \n | ||
rule = '' | ||
while pos < maxsize and self.inp[pos] != '\n': | ||
rule += self.inp[pos] | ||
pos += 1 | ||
return pos + 1, rule | ||
def ignore_spaces(self, pos): | ||
maxsize = len(self.inp) | ||
while pos < maxsize and self.inp[pos] in {" ", "\t"}: | ||
pos += 1 | ||
return pos | ||
def ignore_lines(self, pos): | ||
maxsize = len(self.inp) | ||
while pos < maxsize and self.inp[pos] in {" ", "\t", "\n"}: | ||
pos += 1 | ||
return pos | ||
|
||
def pos2coords(pos, flux): | ||
y = 1 | ||
x = 1 | ||
for char in flux[:pos]: | ||
if char == '\n': | ||
y += 1 | ||
x = 1 | ||
else: | ||
x += 1 | ||
return x, y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
IF ::= if | ||
ELSE ::= else | ||
ELIF ::= elif | ||
WHILE ::= while | ||
FOR ::= for | ||
FUNCTION ::= function | ||
CLASS ::= class | ||
METHOD ::= method | ||
MODULE ::= module | ||
FROM ::= from | ||
IMPORT ::= import | ||
DATA ::= data | ||
SPACENAME ::= spacename | ||
PACKAGE ::= package | ||
PUBLIC ::= public | ||
PRIVATE ::= private | ||
|
||
EOF ::= \Z | ||
STRING ::= "(?P<value>((\\")|[^"])*)" | ||
CHAR ::= '(?P<value>((\\')|[^'])?)' | ||
INT ::= (?P<value>\d+\.?) | ||
FLOAT ::= (?P<value>\d*\.\d+) | ||
ignore SPACE ::= [\t ] | ||
ignore EOL ::= $ | ||
ID ::= (?P<name>\w+) | ||
|
||
DOT ::= [.] | ||
|
||
LPAR ::= \( | ||
RPAR ::= \) | ||
LBRACE ::= { | ||
RBRACE ::= } | ||
LBRACKET ::= \[ | ||
RBRACKET ::= \] | ||
|
||
EQ ::= == | ||
GT ::= > | ||
LT ::= < | ||
GE ::= >= | ||
LE ::= <= | ||
NE ::= != | ||
|
||
IN ::= in | ||
|
||
AND ::= and | ||
OR ::= or | ||
NOT ::= not | ||
|
||
EQUALS ::= = | ||
|
||
PLUS ::= \+ | ||
MINUS ::= - | ||
ASTERISK ::= \* | ||
DASTERISK ::= \*\* | ||
DSLASH ::= // | ||
SLASH ::= / | ||
PERCENTAGE ::= % | ||
|
||
COLON ::= : | ||
GRAVE ::= ` | ||
DGRAVE ::= `` | ||
SEMICOLON ::= ; | ||
COMMA ::= , | ||
TILDE ::= ~ | ||
EXCLAMATION ::= ! | ||
QUESTION ::= \? |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
Expression ::= | ||
NUMBER@value <.> | ||
: ID@value <.> | ||
: STRING@value <.> | ||
: FLOAT@value <.> | ||
: LPAR Expression@value RPAR <.> | ||
: Expression@var LPAR ExpressionList?@arguments RPAR <. + {"op": "call"}> | ||
: Expression@var LBRACKET Expression@argument LBRACKET <. + {"op": "subscription"}> | ||
: Expression@father DOT@op Expression@child <.> | ||
: (PLUS|MINUS)@op Expression@value <.> | ||
: Expression@left DASTERISK@op Expression@right <.> | ||
: Expression@left (ASTERISK|SLASH|DSLASH|PERCENTAGE)@op Expression@right <.> | ||
: Expression@left (PLUS|MINUS)@op Expression@right <.> | ||
: Expression@left (IN|EQ|GT|LT|GE|LE|NE)@op Expression@right <.> | ||
: NOT@op Expression@value <.> | ||
: AND@op Expression@value <.> | ||
: OR@op Expression@value <.>; | ||
|
||
ExpressionList ::= Expression@value <{"value": [value]}> | ||
: ExpressionList@values COMMA Expression@value <{"value": values["value"] + [value]}>; | ||
|
||
Assignment ::= ID@key EQUALS Expression@value <.>; | ||
FunctionCall ::= Expression@var LPAR ExpressionList?@arguments RPAR <.>; | ||
|
||
Statement ::= | ||
Assignment | ||
:FunctionCall; |