eskip: separate parsing of predicates and filters

Introduce start tokens to allow separate parsing of eskip document, predicates and filters, see https://www.gnu.org/software/bison/manual/html_node/Multiple-start_002dsymbols.html Also remove `stringval` and `regexpval` rules. These rules are aliases for `stringliteral` which can be used directly. This reduces parser stack depth and item size. The change fixes parse error position value and speeds up predicates/filters parsing. ``` goos: linux goarch: amd64 pkg: github.com/zalando/skipper/eskip cpu: Intel(R) Core(TM) i5-8350U CPU @ 1.70GHz │ master │ HEAD │ │ sec/op │ sec/op vs base │ ParsePredicates-8 7.626µ ± 2% 6.491µ ± 2% -14.88% (p=0.000 n=10) Parse-8 237.7m ± 1% 229.7m ± 2% -3.36% (p=0.000 n=10) geomean 1.346m 1.221m -9.30% │ master │ HEAD │ │ B/op │ B/op vs base │ ParsePredicates-8 1.961Ki ± 0% 1.359Ki ± 0% -30.68% (p=0.000 n=10) Parse-8 49.02Mi ± 0% 49.02Mi ± 0% ~ (p=0.529 n=10) geomean 313.7Ki 261.2Ki -16.74% │ master │ HEAD │ │ allocs/op │ allocs/op vs base │ ParsePredicates-8 32.00 ± 0% 29.00 ± 0% -9.38% (p=0.000 n=10) Parse-8 1.080M ± 0% 1.080M ± 0% ~ (p=0.777 n=10) geomean 5.879k 5.596k -4.80% │ master │ HEAD │ │ bytes/op │ bytes/op vs base │ Parse-8 15.99Mi ± 0% 15.99Mi ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal ``` Signed-off-by: Alexander Yastrebov <[email protected]>
zalando · Jan 19, 2024 · 82fb4b7 · 82fb4b7
1 parent 5c1a94f
commit 82fb4b7
Show file tree

Hide file tree

Showing 9 changed files with 250 additions and 149 deletions.
diff --git a/cmd/webhook/admission/admission_test.go b/cmd/webhook/admission/admission_test.go
@@ -107,7 +107,7 @@ func TestRouteGroupAdmitter(t *testing.T) {
 		{
 			name:      "invalid eskip filters",
 			inputFile: "rg-with-invalid-eskip-filters.json",
-			message:   "parse failed after token status, position 11: syntax error",
+			message:   "parse failed after token status, position 6: syntax error",
 		},
 		{
 			name:      "valid eskip predicates",
@@ -121,7 +121,7 @@ func TestRouteGroupAdmitter(t *testing.T) {
 		{
 			name:      "invalid eskip filters and predicates",
 			inputFile: "rg-with-invalid-eskip-filters-and-predicates.json",
-			message:   "parse failed after token status, position 11: syntax error\\nparse failed after token Method, position 6: syntax error",
+			message:   "parse failed after token status, position 6: syntax error\\nparse failed after token Method, position 6: syntax error",
 		},
 		{
 			name:      "invalid routgroup multiple filters per json/yaml array item",
@@ -185,12 +185,12 @@ func TestIngressAdmitter(t *testing.T) {
 		{
 			name:      "invalid eskip filters",
 			inputFile: "invalid-filters.json",
-			message:   `invalid \"zalando.org/skipper-filter\" annotation: parse failed after token this, position 9: syntax error`,
+			message:   `invalid \"zalando.org/skipper-filter\" annotation: parse failed after token this, position 4: syntax error`,
 		},
 		{
 			name:      "invalid eskip predicates",
 			inputFile: "invalid-predicates.json",
-			message:   `invalid \"zalando.org/skipper-predicate\" annotation: parse failed after token ), position 15: syntax error`,
+			message:   `invalid \"zalando.org/skipper-predicate\" annotation: parse failed after token ), position 15: unexpected token`,
 		},
 		{
 			name:      "invalid eskip routes",
@@ -200,7 +200,7 @@ func TestIngressAdmitter(t *testing.T) {
 		{
 			name:      "invalid eskip filters and predicates",
 			inputFile: "invalid-filters-and-predicates.json",
-			message:   `invalid \"zalando.org/skipper-filter\" annotation: parse failed after token this, position 9: syntax error\ninvalid \"zalando.org/skipper-predicate\" annotation: parse failed after token ), position 15: syntax error`,
+			message:   `invalid \"zalando.org/skipper-filter\" annotation: parse failed after token this, position 4: syntax error\ninvalid \"zalando.org/skipper-predicate\" annotation: parse failed after token ), position 15: unexpected token`,
 		},
 	} {
 		t.Run(tc.name, func(t *testing.T) {

diff --git a/dataclients/kubernetes/testdata/routegroups/convert/failing-filter.log b/dataclients/kubernetes/testdata/routegroups/convert/failing-filter.log
@@ -1 +1 @@
-\[routegroup\] parse failed after token foo, position 8: syntax error
+\[routegroup\] parse failed after token foo, position 3: syntax error
diff --git a/dataclients/kubernetes/testdata/routegroups/convert/no-catchall-for-failed-route-group.log b/dataclients/kubernetes/testdata/routegroups/convert/no-catchall-for-failed-route-group.log
@@ -1 +1 @@
-\[routegroup\] parse failed after token foo, position 8: syntax error
+\[routegroup\] parse failed after token foo, position 3: syntax error
diff --git a/eskip/eskip.go b/eskip/eskip.go
@@ -588,8 +588,22 @@ type eskipLexParser struct {
 
 var parserPool sync.Pool
 
-// executes the parser.
-func parse(code string) ([]*parsedRoute, error) {
+func parseDocument(code string) ([]*parsedRoute, error) {
+	routes, _, _, err := parse(start_document, code)
+	return routes, err
+}
+
+func parsePredicates(code string) ([]*Predicate, error) {
+	_, predicates, _, err := parse(start_predicates, code)
+	return predicates, err
+}
+
+func parseFilters(code string) ([]*Filter, error) {
+	_, _, filters, err := parse(start_filters, code)
+	return filters, err
+}
+
+func parse(start int, code string) ([]*parsedRoute, []*Predicate, []*Filter, error) {
 	lp, ok := parserPool.Get().(*eskipLexParser)
 	if !ok {
 		lp = &eskipLexParser{}
@@ -598,16 +612,19 @@ func parse(code string) ([]*parsedRoute, error) {
 	}
 	defer parserPool.Put(lp)
 
-	lp.lexer.init(code)
+	lexer := &lp.lexer
+	lexer.init(start, code)
 
-	lp.parser.Parse(&lp.lexer)
+	lp.parser.Parse(lexer)
 
-	return lp.lexer.routes, lp.lexer.err
+	// Do not return lexer to avoid reading lexer fields after returning eskipLexParser to the pool.
+	// Let the caller decide which of return values to use based on the start token.
+	return lexer.routes, lexer.predicates, lexer.filters, lexer.err
 }
 
 // Parses a route expression or a routing document to a set of route definitions.
 func Parse(code string) ([]*Route, error) {
-	parsedRoutes, err := parse(code)
+	parsedRoutes, err := parseDocument(code)
 	if err != nil {
 		return nil, err
 	}
@@ -662,12 +679,7 @@ func ParseFilters(f string) ([]*Filter, error) {
 		return nil, nil
 	}
 
-	rs, err := parse("* -> " + f + " -> <shunt>")
-	if err != nil {
-		return nil, err
-	}
-
-	return rs[0].filters, nil
+	return parseFilters(f)
 }
 
 // ParsePredicates parses a set of predicates (combined by '&&') into
@@ -678,7 +690,7 @@ func ParsePredicates(p string) ([]*Predicate, error) {
 		return nil, nil
 	}
 
-	rs, err := parse(p + " -> <shunt>")
+	rs, err := parsePredicates(p)
 	if err != nil {
 		return nil, err
 	}
@@ -687,9 +699,8 @@ func ParsePredicates(p string) ([]*Predicate, error) {
 		return nil, nil
 	}
 
-	r := rs[0]
-	ps := make([]*Predicate, 0, len(r.predicates))
-	for _, p := range r.predicates {
+	ps := make([]*Predicate, 0, len(rs))
+	for _, p := range rs {
 		if p.Name != "*" {
 			ps = append(ps, p)
 		}

diff --git a/eskip/eskip_test.go b/eskip/eskip_test.go
@@ -239,19 +239,39 @@ func TestParseFilters(t *testing.T) {
 		"error",
 		"trallala",
 		nil,
-		// TODO: fix position
-		"parse failed after token ->, position 16: syntax error",
+		"parse failed after token trallala, position 8: syntax error",
 	}, {
 		"error 2",
 		"foo-bar",
 		nil,
-		// TODO: fix position
-		"parse failed after token foo, position 8: syntax error",
+		"parse failed after token foo, position 3: syntax error",
 	}, {
 		"success",
 		`filter1(3.14) -> filter2("key", 42)`,
 		[]*Filter{{Name: "filter1", Args: []interface{}{3.14}}, {Name: "filter2", Args: []interface{}{"key", float64(42)}}},
 		"",
+	}, {
+		"a comment produces empty filters without error",
+		"// a comment",
+		nil,
+		"",
+	}, {
+		"a trailing comment is ignored",
+		`filter1(3.14) -> filter2("key", 42) // a trailing comment`,
+		[]*Filter{{Name: "filter1", Args: []interface{}{3.14}}, {Name: "filter2", Args: []interface{}{"key", float64(42)}}},
+		"",
+	}, {
+		"a comment before is ignored",
+		`// a comment on a separate line
+		filter1(3.14) -> filter2("key", 42)`,
+		[]*Filter{{Name: "filter1", Args: []interface{}{3.14}}, {Name: "filter2", Args: []interface{}{"key", float64(42)}}},
+		"",
+	}, {
+		"a comment after is ignored",
+		`filter1(3.14) -> filter2("key", 42)
+		// a comment on a separate line`,
+		[]*Filter{{Name: "filter1", Args: []interface{}{3.14}}, {Name: "filter2", Args: []interface{}{"key", float64(42)}}},
+		"",
 	}} {
 		t.Run(ti.msg, func(t *testing.T) {
 			fs, err := ParseFilters(ti.expression)
@@ -306,6 +326,29 @@ func TestParsePredicates(t *testing.T) {
 		title:    "comment fuzz 2", // "\x2f\x2f..." == "//..."
 		input:    "\x2f\x2f\x00\x00\x00\xe6\xfe\x00\x00\x2f\x00\x00\x00\x00\x00\x00\x00\xe6\xfe\x00\x00\x2f\x00\x00\x00\x00",
 		expected: nil,
+	}, {
+		title: "a trailing comment is ignored",
+		input: `Foo("bar") && Baz("qux") // a trailing comment`,
+		expected: []*Predicate{
+			{Name: "Foo", Args: []interface{}{"bar"}},
+			{Name: "Baz", Args: []interface{}{"qux"}},
+		},
+	}, {
+		title: "a comment before is ignored",
+		input: `// a comment on a separate line
+		Foo("bar") && Baz("qux")`,
+		expected: []*Predicate{
+			{Name: "Foo", Args: []interface{}{"bar"}},
+			{Name: "Baz", Args: []interface{}{"qux"}},
+		},
+	}, {
+		title: "a comment after is ignored",
+		input: `Foo("bar") && Baz("qux")
+		// a comment on a separate line`,
+		expected: []*Predicate{
+			{Name: "Foo", Args: []interface{}{"bar"}},
+			{Name: "Baz", Args: []interface{}{"qux"}},
+		},
 	}} {
 		t.Run(test.title, func(t *testing.T) {
 			ps, err := ParsePredicates(test.input)

diff --git a/eskip/lexer.go b/eskip/lexer.go
@@ -14,12 +14,15 @@ type token struct {
 type charPredicate func(byte) bool
 
 type eskipLex struct {
+	start         int
 	code          string
 	lastToken     string
 	lastRouteID   string
 	err           error
 	initialLength int
 	routes        []*parsedRoute
+	predicates    []*Predicate
+	filters       []*Filter
 }
 
 type fixedScanner token
@@ -62,7 +65,8 @@ func (fs *fixedScanner) scan(code string) (t token, rest string, err error) {
 	return token(*fs), code[len(fs.val):], nil
 }
 
-func (l *eskipLex) init(code string) {
+func (l *eskipLex) init(start int, code string) {
+	l.start = start
 	l.code = code
 	l.initialLength = len(code)
 }
@@ -352,6 +356,13 @@ func (l *eskipLex) next() (token, error) {
 }
 
 func (l *eskipLex) Lex(lval *eskipSymType) int {
+	// first emit the start token
+	if l.start != 0 {
+		start := l.start
+		l.start = 0
+		return start
+	}
+
 	t, err := l.next()
 	if err == eof {
 		return -1