From d2dcdcf6572a48149666e8ed1b7acad90426e268 Mon Sep 17 00:00:00 2001 From: BlackBeans Date: Thu, 5 Jul 2018 19:58:30 -0400 Subject: [PATCH] [gmrreader] Lexer generator | [lexer.gmr] full lexer grammar | [parser.gmr] first part of parser grammar --- beansast/gmrreader.py | 85 +++++++++++++++++++++++++++++++++++++++++++ beansast/lexer.gmr | 66 +++++++++++++++++++++++++++++++++ beansast/parser.gmr | 27 ++++++++++++++ 3 files changed, 178 insertions(+) create mode 100644 beansast/gmrreader.py create mode 100644 beansast/lexer.gmr create mode 100644 beansast/parser.gmr diff --git a/beansast/gmrreader.py b/beansast/gmrreader.py new file mode 100644 index 0000000..6e18f57 --- /dev/null +++ b/beansast/gmrreader.py @@ -0,0 +1,85 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import re + +class Token: + def __init__(self, name, **attributes): + self.name = name + self.attributes = attributes + +class Tokenizer: + def __init__(self, name, rule, ignore=False): + self.name = name + self.rule = re.compile(rule) + self.ignore = ignore + def __call__(self, flux, pos): + result = self.rule.match(flux[pos:]) + if result: + if self.ignore: return True, result.end, None + return True, result.end, Token(self.name, **result.groupdict()) + else: + return False, pos, None + def __repr__(self): + return "" % (self.name, self.rule, "- ignored" * int(self.ignore)) + +class LexerReader: + def __init__(self, inp): + self.inp = inp + self.pos = 0 + def read(self): + tokenizers = {} + while self.pos < len(self.inp): + self.pos = self.ignore_lines(self.pos) + self.pos, ignore = self.read_ignore(self.pos) + self.pos, name = self.read_name(self.pos) + self.pos = self.ignore_spaces(self.pos) + self.pos = self.ignore_assignment(self.pos) + self.pos = self.ignore_spaces(self.pos) + self.pos, rule = self.read_rule(self.pos) + tokenizers[name] = Tokenizer(name, rule, ignore) + return tokenizers + + def read_ignore(self, pos): + if self.inp[pos:].startswith("ignore "): + return pos+len("ignore "), True + else: + return pos, False + def read_name(self, pos): + result = "" + while self.inp[pos] not in {" ", "\t"}: + result += self.inp[pos] + pos += 1 + return pos, result + def ignore_assignment(self, pos): + if self.inp[pos:].startswith("::="): + return pos+len("::=") + raise SyntaxError("syntax is wrong at character %s of line %s" % pos2coords(pos, self.inp)) + def read_rule(self, pos): + maxsize = len(self.inp) # in case it doesn't end with \n + rule = '' + while pos < maxsize and self.inp[pos] != '\n': + rule += self.inp[pos] + pos += 1 + return pos + 1, rule + def ignore_spaces(self, pos): + maxsize = len(self.inp) + while pos < maxsize and self.inp[pos] in {" ", "\t"}: + pos += 1 + return pos + def ignore_lines(self, pos): + maxsize = len(self.inp) + while pos < maxsize and self.inp[pos] in {" ", "\t", "\n"}: + pos += 1 + return pos + +def pos2coords(pos, flux): + y = 1 + x = 1 + for char in flux[:pos]: + if char == '\n': + y += 1 + x = 1 + else: + x += 1 + return x, y diff --git a/beansast/lexer.gmr b/beansast/lexer.gmr new file mode 100644 index 0000000..81422ca --- /dev/null +++ b/beansast/lexer.gmr @@ -0,0 +1,66 @@ +IF ::= if +ELSE ::= else +ELIF ::= elif +WHILE ::= while +FOR ::= for +FUNCTION ::= function +CLASS ::= class +METHOD ::= method +MODULE ::= module +FROM ::= from +IMPORT ::= import +DATA ::= data +SPACENAME ::= spacename +PACKAGE ::= package +PUBLIC ::= public +PRIVATE ::= private + +EOF ::= \Z +STRING ::= "(?P((\\")|[^"])*)" +CHAR ::= '(?P((\\')|[^'])?)' +INT ::= (?P\d+\.?) +FLOAT ::= (?P\d*\.\d+) +ignore SPACE ::= [\t ] +ignore EOL ::= $ +ID ::= (?P\w+) + +DOT ::= [.] + +LPAR ::= \( +RPAR ::= \) +LBRACE ::= { +RBRACE ::= } +LBRACKET ::= \[ +RBRACKET ::= \] + +EQ ::= == +GT ::= > +LT ::= < +GE ::= >= +LE ::= <= +NE ::= != + +IN ::= in + +AND ::= and +OR ::= or +NOT ::= not + +EQUALS ::= = + +PLUS ::= \+ +MINUS ::= - +ASTERISK ::= \* +DASTERISK ::= \*\* +DSLASH ::= // +SLASH ::= / +PERCENTAGE ::= % + +COLON ::= : +GRAVE ::= ` +DGRAVE ::= `` +SEMICOLON ::= ; +COMMA ::= , +TILDE ::= ~ +EXCLAMATION ::= ! +QUESTION ::= \? \ No newline at end of file diff --git a/beansast/parser.gmr b/beansast/parser.gmr new file mode 100644 index 0000000..af38ffc --- /dev/null +++ b/beansast/parser.gmr @@ -0,0 +1,27 @@ +Expression ::= + NUMBER@value <.> + : ID@value <.> + : STRING@value <.> + : FLOAT@value <.> + : LPAR Expression@value RPAR <.> + : Expression@var LPAR ExpressionList?@arguments RPAR <. + {"op": "call"}> + : Expression@var LBRACKET Expression@argument LBRACKET <. + {"op": "subscription"}> + : Expression@father DOT@op Expression@child <.> + : (PLUS|MINUS)@op Expression@value <.> + : Expression@left DASTERISK@op Expression@right <.> + : Expression@left (ASTERISK|SLASH|DSLASH|PERCENTAGE)@op Expression@right <.> + : Expression@left (PLUS|MINUS)@op Expression@right <.> + : Expression@left (IN|EQ|GT|LT|GE|LE|NE)@op Expression@right <.> + : NOT@op Expression@value <.> + : AND@op Expression@value <.> + : OR@op Expression@value <.>; + +ExpressionList ::= Expression@value <{"value": [value]}> + : ExpressionList@values COMMA Expression@value <{"value": values["value"] + [value]}>; + +Assignment ::= ID@key EQUALS Expression@value <.>; +FunctionCall ::= Expression@var LPAR ExpressionList?@arguments RPAR <.>; + +Statement ::= + Assignment + :FunctionCall;