This repository has been archived by the owner on May 21, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 69
/
machine.go.rl
400 lines (320 loc) · 11.3 KB
/
machine.go.rl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
package rfc5424
import (
"time"
"fmt"
"github.com/influxdata/go-syslog/v3"
"github.com/influxdata/go-syslog/v3/common"
)
// ColumnPositionTemplate is the template used to communicate the column where errors occur.
var ColumnPositionTemplate = " [col %d]"
const (
// ErrPrival represents an error in the priority value (PRIVAL) inside the PRI part of the RFC5424 syslog message.
ErrPrival = "expecting a priority value in the range 1-191 or equal to 0"
// ErrPri represents an error in the PRI part of the RFC5424 syslog message.
ErrPri = "expecting a priority value within angle brackets"
// ErrVersion represents an error in the VERSION part of the RFC5424 syslog message.
ErrVersion = "expecting a version value in the range 1-999"
// ErrTimestamp represents an error in the TIMESTAMP part of the RFC5424 syslog message.
ErrTimestamp = "expecting a RFC3339MICRO timestamp or a nil value"
// ErrHostname represents an error in the HOSTNAME part of the RFC5424 syslog message.
ErrHostname = "expecting an hostname (from 1 to max 255 US-ASCII characters) or a nil value"
// ErrAppname represents an error in the APP-NAME part of the RFC5424 syslog message.
ErrAppname = "expecting an app-name (from 1 to max 48 US-ASCII characters) or a nil value"
// ErrProcID represents an error in the PROCID part of the RFC5424 syslog message.
ErrProcID = "expecting a procid (from 1 to max 128 US-ASCII characters) or a nil value"
// ErrMsgID represents an error in the MSGID part of the RFC5424 syslog message.
ErrMsgID = "expecting a msgid (from 1 to max 32 US-ASCII characters) or a nil value"
// ErrStructuredData represents an error in the STRUCTURED DATA part of the RFC5424 syslog message.
ErrStructuredData = "expecting a structured data section containing one or more elements (`[id( key=\"value\")*]+`) or a nil value"
// ErrSdID represents an error regarding the ID of a STRUCTURED DATA element of the RFC5424 syslog message.
ErrSdID = "expecting a structured data element id (from 1 to max 32 US-ASCII characters; except `=`, ` `, `]`, and `\"`"
// ErrSdIDDuplicated represents an error occurring when two STRUCTURED DATA elementes have the same ID in a RFC5424 syslog message.
ErrSdIDDuplicated = "duplicate structured data element id"
// ErrSdParam represents an error regarding a STRUCTURED DATA PARAM of the RFC5424 syslog message.
ErrSdParam = "expecting a structured data parameter (`key=\"value\"`, both part from 1 to max 32 US-ASCII characters; key cannot contain `=`, ` `, `]`, and `\"`, while value cannot contain `]`, backslash, and `\"` unless escaped)"
// ErrMsg represents an error in the MESSAGE part of the RFC5424 syslog message.
ErrMsg = "expecting a free-form optional message in UTF-8 (starting with or without BOM)"
// ErrMsgNotCompliant represents an error in the MESSAGE part of the RFC5424 syslog message if WithCompliatMsg option is on.
ErrMsgNotCompliant = ErrMsg + " or a free-form optional message in any encoding (starting without BOM)"
// ErrEscape represents the error for a RFC5424 syslog message occurring when a STRUCTURED DATA PARAM value contains '"', '\', or ']' not escaped.
ErrEscape = "expecting chars `]`, `\"`, and `\\` to be escaped within param value"
// ErrParse represents a general parsing error for a RFC5424 syslog message.
ErrParse = "parsing error"
)
// RFC3339MICRO represents the timestamp format that RFC5424 mandates.
const RFC3339MICRO = "2006-01-02T15:04:05.999999Z07:00"
%%{
machine rfc5424;
include common "common.rl";
# unsigned alphabet
alphtype uint8;
action mark {
m.pb = m.p
}
action markmsg {
m.msgat = m.p
}
action select_msg_mode {
fhold;
if m.compliantMsg {
fgoto msg_compliant;
}
fgoto msg_any;
}
action set_prival {
output.priority = uint8(common.UnsafeUTF8DecimalCodePointsToInt(m.text()))
output.prioritySet = true
}
action set_version {
output.version = uint16(common.UnsafeUTF8DecimalCodePointsToInt(m.text()))
}
action set_timestamp {
if t, e := time.Parse(RFC3339MICRO, string(m.text())); e != nil {
m.err = fmt.Errorf("%s [col %d]", e, m.p)
fhold;
fgoto fail;
} else {
output.timestamp = t
output.timestampSet = true
}
}
action set_hostname {
output.hostname = string(m.text())
}
action set_appname {
output.appname = string(m.text())
}
action set_procid {
output.procID = string(m.text())
}
action set_msgid {
output.msgID = string(m.text())
}
action ini_elements {
output.structuredData = map[string]map[string]string{}
}
action set_id {
if _, ok := output.structuredData[string(m.text())]; ok {
// As per RFC5424 section 6.3.2 SD-ID MUST NOT exist more than once in a message
m.err = fmt.Errorf(ErrSdIDDuplicated + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
} else {
id := string(m.text())
output.structuredData[id] = map[string]string{}
output.hasElements = true
m.currentelem = id
}
}
action ini_sdparam {
m.backslashat = []int{}
}
action add_slash {
m.backslashat = append(m.backslashat, m.p)
}
action set_paramname {
m.currentparam = string(m.text())
}
action set_paramvalue {
if output.hasElements {
// (fixme) > what if SD-PARAM-NAME already exist for the current element (ie., current SD-ID)?
// Store text
text := m.text()
// Strip backslashes only when there are ...
if len(m.backslashat) > 0 {
text = common.RemoveBytes(text, m.backslashat, m.pb)
}
output.structuredData[m.currentelem][m.currentparam] = string(text)
}
}
action set_msg {
output.message = string(m.text())
}
action err_prival {
m.err = fmt.Errorf(ErrPrival + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
action err_pri {
m.err = fmt.Errorf(ErrPri + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
action err_version {
m.err = fmt.Errorf(ErrVersion + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
action err_timestamp {
m.err = fmt.Errorf(ErrTimestamp + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
action err_hostname {
m.err = fmt.Errorf(ErrHostname + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
action err_appname {
m.err = fmt.Errorf(ErrAppname + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
action err_procid {
m.err = fmt.Errorf(ErrProcID + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
action err_msgid {
m.err = fmt.Errorf(ErrMsgID + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
action err_structureddata {
m.err = fmt.Errorf(ErrStructuredData + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
action err_sdid {
delete(output.structuredData, m.currentelem)
if len(output.structuredData) == 0 {
output.hasElements = false
}
m.err = fmt.Errorf(ErrSdID + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
action err_sdparam {
if len(output.structuredData) > 0 {
delete(output.structuredData[m.currentelem], m.currentparam)
}
m.err = fmt.Errorf(ErrSdParam + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
action err_msg {
// If error encountered within the message rule ...
if m.msgat > 0 {
// Save the text until valid (m.p is where the parser has stopped)
output.message = string(m.data[m.msgat:m.p])
}
if m.compliantMsg {
m.err = fmt.Errorf(ErrMsgNotCompliant + ColumnPositionTemplate, m.p)
} else {
m.err = fmt.Errorf(ErrMsg + ColumnPositionTemplate, m.p)
}
fhold;
fgoto fail;
}
action err_escape {
m.err = fmt.Errorf(ErrEscape + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
action err_parse {
m.err = fmt.Errorf(ErrParse + ColumnPositionTemplate, m.p)
fhold;
fgoto fail;
}
nilvalue = '-';
pri = ('<' prival >mark %from(set_prival) $err(err_prival) '>') @err(err_pri);
version = (nonzerodigit digit{0,2} <err(err_version)) >mark %from(set_version) %eof(set_version) @err(err_version);
timestamp = (nilvalue | (fulldate >mark 'T' fulltime %set_timestamp %err(set_timestamp))) @err(err_timestamp);
hostname = hostnamerange >mark %set_hostname $err(err_hostname);
appname = appnamerange >mark %set_appname $err(err_appname);
procid = procidrange >mark %set_procid $err(err_procid);
msgid = msgidrange >mark %set_msgid $err(err_msgid);
header = (pri version sp timestamp sp hostname sp appname sp procid sp msgid) <>err(err_parse);
# \", \], \\
escapes = (bs >add_slash toescape) $err(err_escape);
# As per section 6.3.3 param value MUST NOT contain '"', '\' and ']', unless they are escaped.
# A backslash '\' followed by none of the this three characters is an invalid escape sequence.
# In this case, treat it as a regular backslash and the following character as a regular character (not altering the invalid sequence).
paramvalue = (utf8charwodelims* escapes*)+ >mark %set_paramvalue;
paramname = sdname >mark %set_paramname;
sdparam = (paramname '=' dq paramvalue dq) >ini_sdparam $err(err_sdparam);
# (note) > finegrained semantics of section 6.3.2 not represented here since not so useful for parsing goal
sdid = sdname >mark %set_id %err(set_id) $err(err_sdid);
sdelement = ('[' sdid (sp sdparam)* ']');
structureddata = nilvalue | sdelement+ >ini_elements $err(err_structureddata);
msg_any := any* >mark >markmsg %set_msg $err(err_msg);
# MSG-ANY = *OCTET ; not starting with BOM
# MSG-UTF8 = BOM *OCTECT ; UTF-8 string as specified in RFC 3629
# MSG = MSG-ANY | MSG-UTF8
msg_compliant := ((bom utf8octets) | (any* - (bom any*))) >mark >markmsg %set_msg $err(err_msg);
msg = any? @select_msg_mode;
fail := (any - [\n\r])* @err{ fgoto main; };
main := header sp structureddata (sp msg)? $err(err_parse);
}%%
%% write data noerror noprefix;
type machine struct {
data []byte
cs int
p, pe, eof int
pb int
err error
currentelem string
currentparam string
msgat int
backslashat []int
bestEffort bool
compliantMsg bool
}
// NewMachine creates a new FSM able to parse RFC5424 syslog messages.
func NewMachine(options ...syslog.MachineOption) syslog.Machine {
m := &machine{}
for _, opt := range options {
opt(m)
}
%% access m.;
%% variable p m.p;
%% variable pe m.pe;
%% variable eof m.eof;
%% variable data m.data;
return m
}
// WithBestEffort enables best effort mode.
func (m *machine) WithBestEffort() {
m.bestEffort = true
}
// HasBestEffort tells whether the receiving machine has best effort mode on or off.
func (m *machine) HasBestEffort() bool {
return m.bestEffort
}
// Err returns the error that occurred on the last call to Parse.
//
// If the result is nil, then the line was parsed successfully.
func (m *machine) Err() error {
return m.err
}
func (m *machine) text() []byte {
return m.data[m.pb:m.p]
}
// Parse parses the input byte array as a RFC5424 syslog message.
//
// When a valid RFC5424 syslog message is given it outputs its structured representation.
// If the parsing detects an error it returns it with the position where the error occurred.
//
// It can also partially parse input messages returning a partially valid structured representation
// and the error that stopped the parsing.
func (m *machine) Parse(input []byte) (syslog.Message, error) {
m.data = input
m.p = 0
m.pb = 0
m.msgat = 0
m.backslashat = []int{}
m.pe = len(input)
m.eof = len(input)
m.err = nil
output := &syslogMessage{}
%% write init;
%% write exec;
if m.cs < first_final || m.cs == en_fail {
if m.bestEffort && output.minimal() {
// An error occurred but partial parsing is on and partial message is minimally valid
return output.export(), m.err
}
return nil, m.err
}
return output.export(), nil
}