-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.go
272 lines (255 loc) · 8.03 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
package comb
import (
"math"
"sync"
)
// ============================================================================
// Leaf Parser
//
type prsr[Output any] struct {
id int32
expected string
parser func(State) (State, Output, *ParserError)
recoverer func(State) int
saveSpot bool
}
// NewParser is THE way to create leaf parsers.
// recover can be nil to signal that there is no optimized recoverer available.
// In case of an error the parser will be called again and again moving forward
// one byte/rune at a time instead.
func NewParser[Output any](
expected string,
parse func(State) (State, Output, *ParserError),
recover Recoverer,
) Parser[Output] {
p := &prsr[Output]{
id: -1,
expected: expected,
parser: parse,
recoverer: recover,
}
return p
}
func (p *prsr[Output]) ID() int32 {
return p.id
}
func (p *prsr[Output]) Expected() string {
return p.expected
}
func (p *prsr[Output]) Parse(state State) (State, Output, *ParserError) {
nState, out, err := p.parser(state)
if err != nil && err.parserID < 0 {
err.parserID = p.id
}
return nState, out, err
}
func (p *prsr[Output]) parse(state State) ParseResult {
nState, output, err := p.Parse(state)
return ParseResult{StartState: state, EndState: nState, Output: output, Error: err}
}
func (p *prsr[Output]) IsSaveSpot() bool {
return p.saveSpot
}
func (p *prsr[Output]) setSaveSpot() {
p.saveSpot = true
}
func (p *prsr[Output]) Recover(state State) int {
return p.recoverer(state)
}
func (p *prsr[Output]) IsStepRecoverer() bool {
return p.recoverer == nil
}
func (p *prsr[Output]) SwapRecoverer(newRecoverer Recoverer) {
p.recoverer = newRecoverer // this isn't concurrency safe, but it only happens in the initialization phase
}
func (p *prsr[Output]) setID(id int32) {
p.id = id
}
// ============================================================================
// Branch Parser
//
type brnchprsr[Output any] struct {
id int32
expected string
childs func() []AnyParser
prsAfterChild func(childID int32, childResult ParseResult) ParseResult
}
// NewBranchParser is THE way to create branch parsers.
// parseAfterChild will be called with a childID < 0 if it should parse from
// the beginning.
func NewBranchParser[Output any](
expected string,
children func() []AnyParser,
parseAfterChild func(childID int32, childResult ParseResult) ParseResult,
) Parser[Output] {
return &brnchprsr[Output]{
id: -1,
expected: expected,
childs: children,
prsAfterChild: parseAfterChild,
}
}
func (bp *brnchprsr[Output]) ID() int32 {
return bp.id
}
func (bp *brnchprsr[Output]) Expected() string {
return bp.expected
}
func (bp *brnchprsr[Output]) Parse(state State) (State, Output, *ParserError) {
result := bp.parseAfterChild(-1, ParseResult{EndState: state})
if out, ok := result.Output.(Output); ok {
return result.EndState, out, result.Error
}
return result.EndState, ZeroOf[Output](), result.Error
}
func (bp *brnchprsr[Output]) parse(state State) ParseResult {
return bp.parseAfterChild(-1, ParseResult{EndState: state})
}
func (bp *brnchprsr[Output]) IsSaveSpot() bool {
return false
}
func (bp *brnchprsr[Output]) setSaveSpot() {
panic("a branch parser can never be a save spot")
}
func (bp *brnchprsr[Output]) Recover(_ State) int {
return RecoverWasteNever // never recover with a branch parser
}
func (bp *brnchprsr[Output]) IsStepRecoverer() bool {
return false
}
func (bp *brnchprsr[Output]) SwapRecoverer(_ Recoverer) {
panic("a branch parser can never have a special recoverer")
}
func (bp *brnchprsr[Output]) children() []AnyParser {
return bp.childs()
}
func (bp *brnchprsr[Output]) parseAfterChild(childID int32, childResult ParseResult) ParseResult {
bp.ensureIDs()
childResult = childResult.PrepareOutputFor(bp.id)
result := bp.prsAfterChild(childID, childResult)
result.SetID(bp.id)
if result.Error != nil && result.Error.parserID < 0 {
result.Error.parserID = bp.id
}
return result
}
func (bp *brnchprsr[Output]) ensureIDs() { // only needed if Parse was called directly
if bp.id < 0 { // ensure sane IDs
bp.id = 0
for i, child := range bp.childs() {
child.setID(int32(i + 1))
}
}
}
func (bp *brnchprsr[Output]) setID(id int32) {
bp.id = id
}
// ============================================================================
// Lazy Branch Parser
//
type lazyprsr[Output any] struct {
cachedPrsr Parser[Output]
once sync.Once
makePrsr func() Parser[Output]
newRecoverer Recoverer
}
// LazyBranchParser just stores a function that creates the parser and evaluates the function later.
// This allows to defer the call to NewParser() and thus to define recursive grammars.
// Only branch parsers need this ability. A leaf parser can't be recursive by definition.
func LazyBranchParser[Output any](makeParser func() Parser[Output]) Parser[Output] {
return &lazyprsr[Output]{makePrsr: makeParser}
}
func (lp *lazyprsr[Output]) ensurePrsr() {
lp.cachedPrsr = lp.makePrsr()
if lp.newRecoverer != nil {
lp.cachedPrsr.SwapRecoverer(lp.newRecoverer)
}
}
func (lp *lazyprsr[Output]) ID() int32 {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.ID()
}
func (lp *lazyprsr[Output]) Expected() string {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.Expected()
}
func (lp *lazyprsr[Output]) Parse(state State) (State, Output, *ParserError) {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.Parse(state)
}
func (lp *lazyprsr[Output]) parse(state State) ParseResult {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.parse(state)
}
func (lp *lazyprsr[Output]) IsSaveSpot() bool {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.IsSaveSpot()
}
func (lp *lazyprsr[Output]) setSaveSpot() {
lp.once.Do(lp.ensurePrsr)
lp.cachedPrsr.setSaveSpot()
}
func (lp *lazyprsr[Output]) Recover(state State) int {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.Recover(state)
}
func (lp *lazyprsr[Output]) IsStepRecoverer() bool {
lp.once.Do(lp.ensurePrsr)
return lp.cachedPrsr.IsStepRecoverer()
}
func (lp *lazyprsr[Output]) SwapRecoverer(newRecoverer Recoverer) {
if lp.cachedPrsr == nil {
lp.newRecoverer = newRecoverer
return
}
lp.cachedPrsr.SwapRecoverer(newRecoverer)
}
func (lp *lazyprsr[Output]) setID(id int32) {
lp.once.Do(lp.ensurePrsr)
lp.cachedPrsr.setID(id)
}
// ============================================================================
// Save Spot Parser
//
// SafeSpot applies a sub-parser and marks the new state as a
// point of no return if successful.
// It really serves 3 slightly different purposes:
//
// 1. Prevent a `FirstSuccessful` parser from trying later sub-parsers even
// in case of an error.
// 2. Prevent other unnecessary backtracking in case of an error.
// 3. Mark a parser as a potential safe place to recover to
// when recovering from an error.
//
// So you don't need this parser at all if your input is always correct.
// SafeSpot is THE cornerstone of good and performant parsing otherwise.
//
// NOTE:
// - Parsers that accept the empty input or only perform look ahead are
// NOT allowed as sub-parsers.
// SafeSpot tests the optional recoverer of the parser during the
// construction phase to do a timely panic.
// This way we won't have to panic at the runtime of the parser.
// - Only leaf parsers MUST be given to SafeSpot as sub-parsers.
// SafeSpot will treat the sub-parser as a leaf parser.
// Any error will look as if coming from SafeSpot itself.
func SafeSpot[Output any](p Parser[Output]) Parser[Output] {
// call Recoverer to find a Forbidden recoverer during the construction phase and panic
recoverer := p.Recover
if recoverer != nil && recoverer(NewFromBytes([]byte{}, true)) == math.MinInt {
panic("can't make parser with Forbidden recoverer a safe spot")
}
if _, ok := p.(BranchParser); ok {
panic("a branch parser can never be a save spot")
}
nParse := func(state State) (State, Output, *ParserError) {
nState, output, err := p.Parse(state)
if err == nil {
nState.saveSpot = nState.input.pos // move the mark!
}
return nState, output, ClaimError(err)
}
sp := NewParser[Output](p.Expected(), nParse, p.Recover)
sp.setSaveSpot()
return sp
}