-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathparser.py
262 lines (218 loc) · 9.62 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# (C) William W. Cohen and Carnegie Mellon University, 2016
import sys
import collections
import logging
#
# Parse prolog rules in one of these sample formats
#
# p(X,Y) :- q(X,Z), r(Z,X). # normal prolog clause
# p(X,Y,Z) :- . # unit clause
# p(X,Y) :- q(X,Z) {f(Y,X)}. # normal prolog clause plus a 'feature'
# p(X,Y) :- q(X,Z) {f(Y,X),g(Y)}. # multiple 'features'
# p(X,Y) :- q(X,Z) {f(W) : g(Y,W)}. # features geberated by a 'findall'
# # ie for all solutions of g(Y,W),
# # produce a feature f(W)
#
# TODO: remove the stuff that's not supported in TensorLog
from tensorlog import config
conf = config.Config()
conf.syntax = 'proppr'; conf.help.syntax = "Should be 'pythonic' or 'proppr'"
##############################################################################
## data structures to encode rules
##############################################################################
def isProcessedConstant(a):
return not isProcessedVariable(a)
def isProcessedVariable(a):
return type(a)==type(0)
def isVariableAtom(a):
return a[0].isupper() or a[0]=='_'
class Goal(object):
"""A prolog goal, eg brotherOf(X,Y)."""
def __init__(self,functor,args):
self.functor = functor
self._setArgs(args)
def _setArgs(self,args):
self.args = args
self.arity = len(args)
def __str__(self):
if self.arity: return "%s(%s)" % (self.functor,",".join(map(str,self.args)))
else: return self.functor
def __repr__(self):
return 'Goal(%r,%r)' % (self.functor,self.args)
class Rule(object):
"""A prolog rule. The lhs is a goal, the rhs a list of goals, so the
rule's format is "lhs :- rhs." The features for a rule are, in
general, of the form "features : findall", where 'findall' and
'features' are lists of goals. Features are produced as follows:
after binding the head of the rule, you find all solutions to the
'findall' part (the "generator"), and for each solution, create a
feature corresponding to a bound version of each goal g in
'features'.
"""
def __init__(self,lhs,rhs,features=None,findall=None):
self.lhs = lhs
self.rhs = rhs
self.features = features
self.findall = findall
self.variableList = None
self.nvars = -1
def variabilize(self):
"""To simplify compilation - convert the variables to integer indices,
-1,-2, ... and save their original names in "variableList",
and the number of distinct variables in 'nvars."""
if self.nvars>=0:
pass #already done
else:
varTab = syt.SymbolTable()
def convertArgs(args):
return [-varTab.getId(a) if isVariableAtom(a) else a for a in args]
def convertGoal(g):
return Goal(g.functor, convertArgs(g.args))
if self.lhs: self.lhs = convertGoal(self.lhs)
self.rhs = list(map(convertGoal, self.rhs))
if self.features:
self.features = list(map(convertGoal, self.features))
if self.findall:
self.findall = list(map(convertGoal, self.findall))
self.variableList = varTab.getSymbolList()
self.nvars = len(self.variableList)
def __str__(self):
return self.asString()
def asString(self,syntax=None):
if syntax is None: syntax=conf.syntax
vars = " #v:"+str(self.variableList) if self.variableList else ''
if syntax == 'proppr':
findalls = ' : '+",".join(map(str,self.findall)) if self.findall else ''
features = ' {' + ",".join(map(str,self.features)) + findalls + '}' if self.features else ''
return str(self.lhs) + " :- " + ", ".join(map(str,self.rhs)) + features + vars + '.'
else:
findalls = ' | '+" & ".join(map(str,self.findall)) if self.findall else ''
features = ' // ' + " & ".join(map(str,self.features)) + findalls if self.features else ''
return str(self.lhs) + " <= " + " & ".join(map(str,self.rhs)) + features + vars
class RuleCollection(object):
"""A set of prolog rules, indexed by functor and arity."""
def __init__(self,syntax=None):
self.index = collections.defaultdict(list)
self.syntax = syntax or conf.syntax
def _key(self,g):
return '%s/%d' % (g.functor,g.arity)
def add(self,r):
key = self._key(r.lhs)
self.index[key] += [r]
def size(self):
return sum(len(self.index[k]) for k in list(self.index.keys()))
def rulesFor(self,g):
return self.index.get(self._key(g))
def mapRules(self,mapfun):
for key in self.index:
try:
self.index[key] = list(map(mapfun, self.index[key]))
except:
print(("Trouble mapping rule %s:"%key))
raise
def listing(self):
for key in self.index:
print(('% rules for',key))
for r in self.index[key]:
print((r.asString(syntax=self.syntax)))
def __iter__(self):
for key in self.index:
for r in self.index[key]:
yield r
def equals(self,other):
for r1,r2 in zip(self,other):
if not r1 and r2: return False
if not r2 and r1: return False
if r1.asString(syntax='pythonic')!=r2.asString(syntax='pythonic'): return False
return True
##############################################################################
## the parser
##############################################################################
from pyparsing import Word, CharsNotIn, alphas, alphanums, delimitedList, nestedExpr, Optional, Group, QuotedString
class Parser(object):
def __init__(self,syntax=None):
self.setSyntax(syntax or conf.syntax)
def setSyntax(self,syntax):
self.syntax = syntax
self.atomNT = Word( alphanums+"_$" ) | QuotedString(quoteChar="'",escChar="\\")
self.goalNT = self.atomNT + Optional("(" + delimitedList(self.atomNT) + ")")
if self.syntax=='proppr':
self.goalListNT = Optional(delimitedList(Group(self.goalNT)))
self.featureFindAllNT = Optional(":" + delimitedList(Group(self.goalNT)))
self.featureTemplateNT = delimitedList(Group(self.goalNT))
self.featureBlockNT = Optional("{" + self.featureTemplateNT('ftemplate') + self.featureFindAllNT('ffindall') + "}")
self.ruleNT = self.goalNT("lhs") + ":-" + self.goalListNT("rhs") + self.featureBlockNT("features") + "."
else:
self.goalListNT = Optional(delimitedList(Group(self.goalNT), delim="&"))
self.featureFindAllNT = Optional("|" + delimitedList(Group(self.goalNT), delim="&"))
self.featureTemplateNT = delimitedList(Group(self.goalNT), delim="&")
self.featureBlockNT = Optional("//" + self.featureTemplateNT('ftemplate') + self.featureFindAllNT('ffindall'))
self.ruleNT = self.goalNT("lhs") + "<=" + self.goalListNT("rhs") + self.featureBlockNT("features")
def _convertGoal(self,ptree):
return Goal(ptree[0], ptree[2:-1])
def _convertRule(self,ptree):
if 'rhs' in ptree:
tmpRhs = list(map(self._convertGoal, ptree['rhs'].asList()))
else:
tmpRhs = []
if not 'features' in ptree:
return Rule(self._convertGoal(ptree['lhs']),tmpRhs,None,None)
else:
if not 'ffindall' in ptree:
featureList = ptree['ftemplate'].asList()
tmpFeatures = list(map(self._convertGoal, featureList))
return Rule(self._convertGoal(ptree['lhs']),tmpRhs,tmpFeatures,None)
else:
featureList = ptree['ftemplate'].asList()
tmpFeatures = list(map(self._convertGoal, featureList))
findallList = ptree['ffindall'].asList()[1:]
tmpFindall = list(map(self._convertGoal, findallList))
return Rule(self._convertGoal(ptree['lhs']),tmpRhs,tmpFeatures,tmpFindall)
def parseGoal(self,s):
"""Convert a string to a goal."""
return self._convertGoal(self.goalNT.parseString(s))
def parseGoalList(self,s):
"""Convert a string to a goal list."""
return list(map(self._convertGoal, self.goalListNT.parseString(s).asList()))
def parseRule(self,s):
"""Convert a string to a rule."""
return self._convertRule(self.ruleNT.parseString(s))
def parseQuery(self,s):
"""Convert a string to a headless rule (no lhs)"""
result = Parser().parseRule('dummy :- %s\n' % s)
result.lhs = None
return result
def parseFile(self,filename,rules=None):
"""Extract a series of rules from a file."""
if filename.endswith("tlog"): self.setSyntax('pythonic')
with open(filename) as fp:
return self.parseStream(fp,rules=rules)
def parseStream(self,fileLike,rules=None):
"""Extract a series of rules from a stream."""
if not rules: rules = RuleCollection(syntax=self.syntax)
linebuf = []
for line in fileLike:
if not line[0]=='#':
linebuf.append(line)
buf = "".join(linebuf)
try:
first_time = True
for (ptree,lo,hi) in self.ruleNT.scanString(buf):
rules.add(self._convertRule(ptree))
if first_time:
unread_text = buf[:lo].strip()
if len(unread_text)>0:
logging.error('unparsed text at start of %s: "%s..."' % (filename,unread_text))
first_time = False
unread_text = buf[hi:].strip() if rules.size()>0 else buf
if len(unread_text)>0:
logging.error('unparsed text at end of %s: "...%s"' % (fileLike,unread_text))
return rules
except KeyError:
print(('error near ',lo,'in',filename))
return rules
if __name__ == "__main__":
p = Parser(syntax='pythonic')
for f in sys.argv[1:]:
print(('\nparsed from file %r:' % f))
Parser().parseFile(f).listing()