-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpsparser.py
142 lines (129 loc) · 5.04 KB
/
psparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""Parts of the lexer and parser code was adopted from https://composingprograms.com/.
The code has been changed according to Postscript syntax.
https://creativecommons.org/licenses/by-sa/3.0/
"""
import string
from buffer import Buffer
from psexpressions import Literal, PSString, PSName, PSCodeArray
from colors import *
# Constants
SYMBOL_STARTS = set(string.ascii_lowercase + string.ascii_uppercase + '_' + '/'+',')
SYMBOL_INNERS = SYMBOL_STARTS | set(string.digits + ',' + '-')
NUMERAL = set(string.digits + '-.')
WHITESPACE = set(' \t\n\r')
DELIMITERS = set('(){}[]')
BOOLEANS = {'true':True, 'false':False}
#---------------------------------------------------
# Lexer #
#---------------------------------------------------
"""Splits the string s into tokens and returns a list of them.
>>> tokenize('/addsq { /sq {dup mul} def sq exch sq add exch sq add } def 2 3 4 addsq') """
def tokenize(s):
src = Buffer(s)
tokens = []
while True:
token = next_token(src)
if token is None:
#print(RED+"tokens"+CEND,tokens)
return tokens
tokens.append(token)
""" Takes allowed characters only. Filters out everything else. """
def take(src, allowed_characters):
result = ''
while src.current() in allowed_characters:
result += src.pop_first()
return result
"""Returns the next token from the given Buffer object. """
def next_token(src):
take(src, WHITESPACE) # skip whitespace
c = src.current()
if c is None:
return None
elif c in NUMERAL:
literal = take(src, NUMERAL)
try:
return int(literal)
except ValueError:
try:
return float(literal)
except ValueError:
raise SyntaxError("'{}' is not a numeral".format(literal))
elif c in SYMBOL_STARTS:
sym = take(src, SYMBOL_INNERS)
if sym in BOOLEANS.keys():
return BOOLEANS[sym] # FIX this for next year
else:
return sym
elif c in DELIMITERS:
src.pop_first()
return c
else:
raise SyntaxError("'{}' is not a token".format(c))
#---------------------------------------------------
# Parser #
#---------------------------------------------------
# Helper functions for the parser.
""" Checks if the given token is a literal - primitive constant value. """
def is_literal(s):
return isinstance(s, int) or isinstance(s, float) or isinstance(s,bool)
""" Checks if the given token is a variable or function name.
The name can either be:
- a name constant (where the first character is /) or
- a variable, function, or built-in operator """
def is_name(s):
return isinstance(s, str) and s not in DELIMITERS
""" Returns the constant string enclosed within matching () paranthesis. """
def read_str_constant(src):
s = []
while src.current() != ')':
if src.current() is None:
raise SyntaxError("String doesn't have a matching `)`!")
s.append(str(src.pop_first()))
"Pop the `)`."
src.pop_first()
"Will insert ` ` between tokens."
return PSString('(' + ' '.join(s) + ')')
""" Returns the constant code array enclosed within matching {} paranthesis. """
def read_block_expr(src):
s = []
while src.current() != '}':
if src.current() is None:
raise SyntaxError("Doesn't have a matching '{}'!".format('}'))
s.append(read_expr(src))
"Pop the `}`."
src.pop_first()
return PSCodeArray(s)
""" Converts the next token in the given Buffer to an expression. """
def read_expr(src):
token = src.pop_first()
if token is None:
raise SyntaxError('Incomplete expression')
# TO-DO - complete the following; include each condition as an `elif` case.
# if the token is a literal return a `Literal` object having `value` token.
elif is_literal(token):
return Literal(token)
# if the token is a string delimiter (i.e., '('), get all tokens until the matching ')' delimiter and combine them as a Python string;
# create a PSString object having this string value.
elif isinstance(token, str) and token == '(':
return read_str_constant(src)
# if the token is a name, create a PSName object having `var_name` token.
elif is_name(token):
return PSName(token)
# if the token is a code-array delimiter (i.e., '{'), get all tokens until the matching '}' delimiter and combine them as a Python list;
elif isinstance(token, str) and token == '{':
return read_block_expr(src)
# create a PSCodeArray object having this list value.
else:
raise SyntaxError("'{}' is not the start of an expression".format(token))
"""Parse an expression from a string. If the string does not contain an
expression, None is returned. If the string cannot be parsed, a SyntaxError
is raised.
"""
def read(s):
#reading one token at a time
src = Buffer(tokenize(s))
out = []
while src.current() is not None:
out.append(read_expr(src))
#print(OKGREEN+'out'+CEND,out)
return out