forked from npaton/gogtfs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.go
188 lines (161 loc) · 3.92 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
package gtfs
import (
"bufio"
"errors"
"fmt"
"io"
"strings"
"log"
)
const (
bom0 = 0xef
bom1 = 0xbb
bom2 = 0xbf
)
type Parser string
type ParseError struct {
Message string
LineNumber int
FileName string
}
func (pe *ParseError) Error() string {
return fmt.Sprintf("ParseError in file %v at line %d: %v", pe.FileName, pe.LineNumber, pe.Message)
}
type settableThroughField interface {
setField(fieldName string, value string)
}
func fieldsSetter(model settableThroughField, fieldKeys, fieldValues []string) {
for i, key := range fieldKeys {
model.setField(key, fieldValues[i])
}
}
func cleanBOM(b []byte) []byte {
if len(b) >= 3 &&
b[0] == bom0 &&
b[1] == bom1 &&
b[2] == bom2 {
return b[3:]
}
return b
}
func (p *Parser) parse(r io.Reader, recordHandler func(k, v []string)) error {
lineNumber := 1
reader := bufio.NewReader(r)
firstline, isPrefix, err := reader.ReadLine()
if err != nil {
perr := &ParseError{Message: err.Error()}
perr.FileName = string(*p)
perr.LineNumber = lineNumber
return perr
} else if isPrefix {
return errors.New(fmt.Sprintf("First line too long (not handled yet, oups): \"%v\"", p))
}
firstline = cleanBOM(firstline)
fieldKeys, perr := p.parseLine(firstline)
if perr != nil {
perr.FileName = string(*p)
perr.LineNumber = lineNumber
return perr
}
line, isPrefix, err := reader.ReadLine()
for err == nil {
if err != nil {
panic(err)
} else if isPrefix {
return errors.New(fmt.Sprintf("First line too long (not handled yet, oups): \"%v\"", p))
}
lineNumber = lineNumber + 1
fieldValues, perr := p.parseLine(line)
if perr != nil {
perr.FileName = string(*p)
perr.LineNumber = lineNumber
log.Println(perr)
// return perr
} else {
lengthdiff := len(fieldKeys) - len(fieldValues)
if lengthdiff != 0 && lengthdiff > 0 {
for lengthdiff > 0 {
fieldValues = append(fieldValues, "")
lengthdiff = lengthdiff - 1
}
}
recordHandler(fieldKeys, fieldValues)
}
line, isPrefix, err = reader.ReadLine()
}
if err != nil && err != io.EOF {
return err
}
return nil
}
func (p *Parser) parseLine(line []byte) (tokens []string, err *ParseError) {
reader := bufio.NewReader(strings.NewReader(string(line)))
tokens = make([]string, 0, 10)
var previousRune rune
field := ""
startedWithQuote := false
charIndex := 0
quoteCount := 0
rune, size, error := reader.ReadRune()
for {
if error != nil || size == 0 {
if error == io.EOF || size == 0 { // EOF is the gracious end of Read. Same for ReadRune? Seems like size==0 is replacing that
return append(tokens, field), nil
}
return nil, &ParseError{Message: error.Error()}
}
switch rune {
case '\t', '\n', '\r':
var char string
if rune == '\t' {
char = "\\t"
} else if rune == '\n' {
char = "\\n"
} else if rune == '\r' {
char = "\\r"
}
return nil, &ParseError{Message: fmt.Sprintf("Found illegal character '%v' at char %d", char, charIndex)}
case '"':
if field == "" && !startedWithQuote {
startedWithQuote = true
break
}
if field != "" && !startedWithQuote {
log.Println(fmt.Sprintf("Unexpected quote (\") found at char %d", charIndex))
field = field + string(rune)
// return nil, &ParseError{Message: fmt.Sprintf("Unexpected quote (\") found at char %d", charIndex)}
}
if quoteCount == 1 {
quoteCount = 0
} else {
quoteCount = 1
}
if startedWithQuote && previousRune == '"' {
field = field + string(rune)
}
break
case ',':
if (startedWithQuote && quoteCount == 1) || !startedWithQuote {
tokens = append(tokens, field)
field = ""
startedWithQuote = false
quoteCount = 0
} else {
field = field + string(rune)
}
break
case ' ':
if field != "" {
field = field + string(rune)
}
break
default:
field = field + string(rune)
break
}
previousRune = rune
charIndex += 1
rune, size, error = reader.ReadRune()
}
return tokens, nil
}