-
Notifications
You must be signed in to change notification settings - Fork 0
/
shepp_lexer.py
112 lines (73 loc) · 2.42 KB
/
shepp_lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""A lexer for POSIX shells to be used with the SHEll PreProcessor."""
from ply.lex import TOKEN
import lexer
class SheppLexer(lexer.Lexer):
"""A lexer for POSIX shells to be used with the SHEll PreProcessor."""
states = (('pp', 'exclusive'), ('escaped', 'exclusive'))
PP_RESERVED = {
'include': 'INCLUDE',
'define': 'DEFINE',
}
tokens = ('WS', 'WORD', 'CHAR', 'COMMENT', 'PP_WORD') + tuple(
PP_RESERVED.values())
literals = ['\\']
t_WORD = r'[\w-]+'
t_ignore_COMMENT = r'[#].*\n'
def __init__(self, **kwargs):
"""Initialize the lexer.
All arguments are passed as is to lex.lex.
"""
super().__init__(**kwargs)
self._pp_escape = False
@TOKEN(r'\s+')
def t_WS(self, t):
"""A whitespace token.
Can include any number of whitespace charachters.
"""
self._lexer.lineno += t.value.count('\n')
return t
def t_error(self, t):
"""Handling of unmatched tokens."""
t.type = t.value[0]
t.value = t.value[0]
self._lexer.skip(1)
return t
@TOKEN(r'\\')
def t_ANY_begin_escaped(self, t):
"""Entering the escaped state."""
self._lexer.push_state('escaped')
@TOKEN(r'["\'\n]')
def t_escaped_CHAR(self, t):
"""An escaped charachter."""
self._lexer.pop_state()
return t
@TOKEN(r'[#]\$')
def t_begin_pp(self, t):
"""Entering the PreProcessor state."""
self._lexer.push_state('pp')
@TOKEN(r'\s')
def t_pp_WS(self, t):
"""A whitespace token in PreProcessor state.
If an unescaped newline is encountered, ends PreProcessor state.
"""
pp_escape = self._pp_escape
self._pp_escape = False
if t.value == '\n':
self._lexer.lineno += 1
if not pp_escape:
self._lexer.pop_state()
return t
@TOKEN(r'(\w+)|("[^\n]*")')
def t_pp_PP_WORD(self, t):
"""PrePrecessor identifiers and strings."""
t.type = type(self).PP_RESERVED.get(t.value, 'PP_WORD')
self._pp_escape = False
if t.value[0] == t.value[-1] and t.value[0] in ("'", '"'):
t.value = t.value[1:-1]
return t
@TOKEN(r'\\')
def t_pp_fslash(self, t):
"""Escape charachter in preprocessor block."""
t.type = '\\'
self._pp_escape = True
return t