Skip to content

Commit

Permalink
[gmrreader] Lexer generator | [lexer.gmr] full lexer grammar | [parse…
Browse files Browse the repository at this point in the history
…r.gmr] first part of parser grammar
  • Loading branch information
jthulhu committed Jul 5, 2018
1 parent 4f9e77a commit d2dcdcf
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 0 deletions.
85 changes: 85 additions & 0 deletions beansast/gmrreader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-

import re

class Token:
def __init__(self, name, **attributes):
self.name = name
self.attributes = attributes

class Tokenizer:
def __init__(self, name, rule, ignore=False):
self.name = name
self.rule = re.compile(rule)
self.ignore = ignore
def __call__(self, flux, pos):
result = self.rule.match(flux[pos:])
if result:
if self.ignore: return True, result.end, None
return True, result.end, Token(self.name, **result.groupdict())
else:
return False, pos, None
def __repr__(self):
return "<Tokenizer named %s with rule %s %s>" % (self.name, self.rule, "- ignored" * int(self.ignore))

class LexerReader:
def __init__(self, inp):
self.inp = inp
self.pos = 0
def read(self):
tokenizers = {}
while self.pos < len(self.inp):
self.pos = self.ignore_lines(self.pos)
self.pos, ignore = self.read_ignore(self.pos)
self.pos, name = self.read_name(self.pos)
self.pos = self.ignore_spaces(self.pos)
self.pos = self.ignore_assignment(self.pos)
self.pos = self.ignore_spaces(self.pos)
self.pos, rule = self.read_rule(self.pos)
tokenizers[name] = Tokenizer(name, rule, ignore)
return tokenizers

def read_ignore(self, pos):
if self.inp[pos:].startswith("ignore "):
return pos+len("ignore "), True
else:
return pos, False
def read_name(self, pos):
result = ""
while self.inp[pos] not in {" ", "\t"}:
result += self.inp[pos]
pos += 1
return pos, result
def ignore_assignment(self, pos):
if self.inp[pos:].startswith("::="):
return pos+len("::=")
raise SyntaxError("syntax is wrong at character %s of line %s" % pos2coords(pos, self.inp))
def read_rule(self, pos):
maxsize = len(self.inp) # in case it doesn't end with \n
rule = ''
while pos < maxsize and self.inp[pos] != '\n':
rule += self.inp[pos]
pos += 1
return pos + 1, rule
def ignore_spaces(self, pos):
maxsize = len(self.inp)
while pos < maxsize and self.inp[pos] in {" ", "\t"}:
pos += 1
return pos
def ignore_lines(self, pos):
maxsize = len(self.inp)
while pos < maxsize and self.inp[pos] in {" ", "\t", "\n"}:
pos += 1
return pos

def pos2coords(pos, flux):
y = 1
x = 1
for char in flux[:pos]:
if char == '\n':
y += 1
x = 1
else:
x += 1
return x, y
66 changes: 66 additions & 0 deletions beansast/lexer.gmr
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
IF ::= if
ELSE ::= else
ELIF ::= elif
WHILE ::= while
FOR ::= for
FUNCTION ::= function
CLASS ::= class
METHOD ::= method
MODULE ::= module
FROM ::= from
IMPORT ::= import
DATA ::= data
SPACENAME ::= spacename
PACKAGE ::= package
PUBLIC ::= public
PRIVATE ::= private

EOF ::= \Z
STRING ::= "(?P<value>((\\")|[^"])*)"
CHAR ::= '(?P<value>((\\')|[^'])?)'
INT ::= (?P<value>\d+\.?)
FLOAT ::= (?P<value>\d*\.\d+)
ignore SPACE ::= [\t ]
ignore EOL ::= $
ID ::= (?P<name>\w+)

DOT ::= [.]

LPAR ::= \(
RPAR ::= \)
LBRACE ::= {
RBRACE ::= }
LBRACKET ::= \[
RBRACKET ::= \]

EQ ::= ==
GT ::= >
LT ::= <
GE ::= >=
LE ::= <=
NE ::= !=

IN ::= in

AND ::= and
OR ::= or
NOT ::= not

EQUALS ::= =

PLUS ::= \+
MINUS ::= -
ASTERISK ::= \*
DASTERISK ::= \*\*
DSLASH ::= //
SLASH ::= /
PERCENTAGE ::= %

COLON ::= :
GRAVE ::= `
DGRAVE ::= ``
SEMICOLON ::= ;
COMMA ::= ,
TILDE ::= ~
EXCLAMATION ::= !
QUESTION ::= \?
27 changes: 27 additions & 0 deletions beansast/parser.gmr
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Expression ::=
NUMBER@value <.>
: ID@value <.>
: STRING@value <.>
: FLOAT@value <.>
: LPAR Expression@value RPAR <.>
: Expression@var LPAR ExpressionList?@arguments RPAR <. + {"op": "call"}>
: Expression@var LBRACKET Expression@argument LBRACKET <. + {"op": "subscription"}>
: Expression@father DOT@op Expression@child <.>
: (PLUS|MINUS)@op Expression@value <.>
: Expression@left DASTERISK@op Expression@right <.>
: Expression@left (ASTERISK|SLASH|DSLASH|PERCENTAGE)@op Expression@right <.>
: Expression@left (PLUS|MINUS)@op Expression@right <.>
: Expression@left (IN|EQ|GT|LT|GE|LE|NE)@op Expression@right <.>
: NOT@op Expression@value <.>
: AND@op Expression@value <.>
: OR@op Expression@value <.>;

ExpressionList ::= Expression@value <{"value": [value]}>
: ExpressionList@values COMMA Expression@value <{"value": values["value"] + [value]}>;

Assignment ::= ID@key EQUALS Expression@value <.>;
FunctionCall ::= Expression@var LPAR ExpressionList?@arguments RPAR <.>;

Statement ::=
Assignment
:FunctionCall;

0 comments on commit d2dcdcf

Please sign in to comment.