-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.js
226 lines (191 loc) · 9.69 KB
/
parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
(function(global, undef){
if(typeof require === "function"){ _ = require("lodash");}
var parser = {
EOF:"$end",
reset:function (){
var self = this;
self.lexer.reset();
},
lexer: (function(){
return {
CONST:{"INITIAL":"INITIAL","EOF":"$end"},
states:{"exclusive":{}},
rules: [{regex:/^\/\/[^\n]*/,action:' /* skip singleline comment */'},{regex:/^\/\*(.|\n|\r)*?\*\//,action:' /* skip multiline comment */ '},{regex:/^"(?:\\[bfnrt\/"]|\\u[a-fA-F0-9]{4}|[^"\\])*"/,action:' this.yytext=this.yytext.slice(1, this.yyleng-1);return "STRING";'},{regex:/^\s+/,action:''},{regex:/^\r\n/,action:''},{regex:/^((0|[1-9][0-9]*)(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?|[0][xX][0-9a-fA-F]+/,action:' return "NUMBER"'},{regex:/^null/,action:' return "NULL";'},{regex:/^true/,action:' return "TRUE";'},{regex:/^false/,action:' return "FALSE";'},{regex:/^:/,action:' return ":";'},{regex:/^,/,action:' return ",";'},{regex:/^\[/,action:' return "[";'},{regex:/^\]/,action:' return "]";'},{regex:/^\{/,action:' return "{";'},{regex:/^\}/,action:' return "}";'},{regex:/^\s+/,action:' /* skip whitespace */'},{regex:/^\n/,action:' /* skip lineterminal */'},{regex:/^./,action:' return "INVALID";'}],
yymore:function (){
this._more = true;
},
stateStack:["INITIAL"],
pushState:function (state){
this.stateStack.push(state);
},
popState:function (){
return this.stateStack.pop();
},
getCurrentRules:function (){
var self = this,
rules = self.rules,
curState = self.stateStack[self.stateStack.length-1],
activeRules = [],
isInclusiveState = true; //是否为包容状态
if(self.states.exclusive[curState]){
isInclusiveState = false;
}
for(var i=0, len=rules.length; i<len; i++){
//处于包容状态时,没有声明状态的规则被激活
//否则,只有开始条件中包含当前状态的规则被激活
if((isInclusiveState && (!rules[i].conditions)) || (rules[i].conditions && rules[i].conditions.indexOf(curState) > -1)){
activeRules.push(rules[i]);
}
}
return activeRules;
},
setInput:function (input){
_.merge(this, {
input: input,
position: 0,
matched: '',
text: '',
yytext: '',
lineno: 1,
firstline: 1,
lastline: 1,
firstcolumn: 1,
lastcolumn: 1,
_more: false
});
},
getToken:function (isDebug){
var self = this,
token = self.getToken_(isDebug);
if(!token){
token = self.getToken(isDebug);
}
return token;
},
unToken:function (charsNum){
this.position -= charsNum;
},
getToken_:function (isDebug){
var self = this,
input = self.input.slice(self.position),
regex,
activeRules = self.getCurrentRules(),
matches;
if(!input){
return self.CONST.EOF;
}
if(!activeRules.length && isDebug){
debugger
//这个断点的原因是,这是编写lex文法时常见的错误,就是自动机陷入一个没有任何规则激活的状态中了
}
var possibleInputs = [],
maxLength = 0;
for(var i=0,len=activeRules.length; i<len; i++){
regex = activeRules[i].regex;
if(matches = input.match(activeRules[i].regex)){
possibleInputs.push({rule:activeRules[i], match: matches[0]});
maxLength = maxLength > matches[0].length ? maxLength : matches[0].length;
}
}
if(possibleInputs.length){
possibleInputs = _.filter(possibleInputs, function(possible){
return possible.match.length === maxLength;
});
if(self._more){
self.yytext += possibleInputs[0].match;
}else{
self.yytext = possibleInputs[0].match;
}
self.position += possibleInputs[0].match.length;
self.yyleng = self.yytext.length;
self._more = false;
return (new Function(possibleInputs[0].rule.action)).call(self);
}
if(isDebug){
debugger
//这个断点的原因是,没有在循环体中return 说明当前输入已经无法命中任何规则,自动机将陷入死循环
}
throw('invalid input: ' + input);
},
reset:function (){
this.setInput(this.input);
}
};
})(),
lrtable: {"actions":{"0":{"$end":["accept",0]}},"gotos":{"0":{}}},
productions: [{"symbol":"$accept","nullable":false,"firsts":[],"rhs":[null,"$end"],"srhs":" $end","id":0,"actionCode":"this.$$ = $1;"}],
parse:function (input, isDebug){
var self = this,
stateStack = [0], //状态栈 初始状态0
symbolStack = [], //符号栈
valueStack = [], //值栈
lexer = self.lexer,
token,
state;
lexer.setInput(input);
token = self.lexer.getToken(isDebug);
while(true){
state = stateStack[stateStack.length - 1];
var action = self.lrtable.actions[state] && self.lrtable.actions[state][token];
if(!action && isDebug){
//这是编写bnf时容易出错的,通过当前输入和当前状态(状态隐含了当前入栈的符号)
//无法找到右端句柄,也无法通过当前输入决定应进行移进动作
debugger
}
if(isDebug){
console.log('当前状态:'+state, '输入符号:'+token, '动作:'+action);
}
if(action){
if(action[0] === 'shift'){
stateStack.push(action[1]);
symbolStack.push(token);
valueStack.push(lexer.yytext);
token = lexer.getToken(isDebug);
}else if(action[0] === 'reduce'){
var production = self.productions[action[1]];
var reduceCode = ('/*' + production.symbol + ' -> ' + production.srhs + ';*/ this.$$ = $1;'+production.actionCode)
.replace(/\$(\d+)/g, function(_, n){
return 'valueStack[' + (valueStack.length - production.rhs.length + parseInt(n, 10) - 1) + ']'
});
eval(reduceCode);
if(isDebug){
console.log(' 当前右端句柄为:' + production.rhs);
console.log(' 右端句柄对应值栈内容为:' + JSON.stringify(valueStack.slice(-production.rhs.length)));
console.log(' 归约后的值为:' + JSON.stringify(this.$$));
}
//如果是当前归约用的产生式不是epsilon:
// 符号栈才需要对右端句柄包含的各个symbol出栈,归约为产生式的非终结符(lhs)再入栈
// 值栈才需要对右端句柄对应的各个值出栈,进行归约计算为某个lhs值,再把lhs值入栈
// 状态栈也才需要对代表右端句柄的各个状态进行出栈,查goto表找到代表lhs符号的新状态入栈
//否则,应用epsilon,各栈保持原地不动
if(production.rhs.length){
symbolStack = symbolStack.slice(0, -production.rhs.length);
valueStack = valueStack.slice(0, -production.rhs.length);
stateStack = stateStack.slice(0, -production.rhs.length);
}
var curstate = stateStack[stateStack.length-1];
//查goto表,找到代表归约后的lhs符号的新状态
var newstate = self.lrtable.gotos[curstate] && self.lrtable.gotos[curstate][production.symbol];
if(isDebug){
console.log(' 右端句柄归约后的符号:'+production.symbol+',应转移到:'+newstate);
}
symbolStack.push(production.symbol); //归约后的lhs符号,压入符号栈
valueStack.push(this.$$); //语义动作中归约后的值(rhs各项计算出的lhs值),压入值栈
stateStack.push(newstate); //goto表查到的新状态,压入状态栈
}else if(action[0] === 'accept'){
if(isDebug){
console.log('accept');
}
return true;
}else{
return false;
}
}else{
return false;
}
}
}
};
if(typeof module == "object"){module.exports = parser}
return parser;
})(this);