Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

eskip: improve lexer performance 2 #2870

Merged
merged 2 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ opentracingplugin/build
build/
skptesting/lorem.html
.vscode/*
*.test

34 changes: 34 additions & 0 deletions eskip/eskip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package eskip
import (
"reflect"
"regexp"
"strings"
"testing"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -849,6 +850,39 @@ func BenchmarkParsePredicates(b *testing.B) {
}
}

func BenchmarkParse(b *testing.B) {
doc := strings.Repeat(`xxxx_xx__xxxxx__xxx_xxxxxxxx_xxxxxxxxxx_xxxxxxx_xxxxxxx_xxxxxxx_xxxxx__xxx__40_0:
Path("/xxxxxxxxx/:xxxxxxxx_xx/xxxxxxxx-xxxxxxxxxx-xxxxxxxxx")
&& Host("^(xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-18[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-19[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-20[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-21[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxxxxxxxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx[.]xxxxxxxxxxx[.]xxx[.]?(:[0-9]+)?)$")
&& Host("^(xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-21[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?)$")
&& Weight(4)
&& Method("GET")
&& JWTPayloadAllKV("xxxxx://xxxxxxxx.xxxxxxx.xxx/xxxxx", "xxxxx")
&& Header("X-Xxxxxxxxx-Xxxxx", "xxxxx")
-> disableAccessLog(2, 3, 40, 500)
-> fifo(1000, 100, "10s")
-> apiUsageMonitoring("{\"xxx_xx\":\"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\",\"xxxxxxxxxxx_xx\":\"xxx-xxxxxxxx-xxxxxxxxxx\",\"xxxx_xxxxxxxxx\":[\"/xxxxxxxxx/{xxxxxxxx_xx}/xxxxxxxx-xxxxxxxxxx\",\"/xxxxxxxxx/{xxxxxxxx_xx}/xxxxxxxx-xxxxxxxxxx-xxxxxxx\",\"/xxxxxxxxx/{xxxxxxxx_xx}/xxxxxxxx-xxxxxxxxxx-xxxxxxxxx\"]}")
-> oauthTokeninfoAnyKV("xxxxx", "/xxxxxxxxx")
-> unverifiedAuditLog("xxxxx://xxxxxxxx.xxxxxxx.xxx/xxxxxxx-xx")
-> oauthTokeninfoAllScope("xxx")
-> flowId("reuse")
-> forwardToken("X-XxxxxXxxx-Xxxxxxx", "xxx", "xxxxx", "xxxxx")
-> stateBagToTag("xxxx-xxxx", "xxxxxx.xxx")
-> <powerOfRandomNChoices, "http://1.2.1.1:8080", "http://1.2.1.2:8080", "http://1.2.1.3:8080", "http://1.2.1.4:8080", "http://1.2.1.5:8080">;
`, 10_000)

_, err := Parse(doc)
if err != nil {
b.Fatal(err)
}

b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = Parse(doc)
}
}

var stringSink string

func BenchmarkRouteString(b *testing.B) {
Expand Down
77 changes: 36 additions & 41 deletions eskip/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"errors"
"fmt"
"strings"
"unicode"
)

type token struct {
Expand All @@ -14,14 +13,6 @@ type token struct {

type charPredicate func(byte) bool

type scanner interface {
scan(string) (token, string, error)
}

type scannerFunc func(string) (token, string, error)

func (sf scannerFunc) scan(code string) (token, string, error) { return sf(code) }

type eskipLex struct {
code string
lastToken string
Expand Down Expand Up @@ -78,11 +69,11 @@ func (l *eskipLex) init(code string) {

func isNewline(c byte) bool { return c == newlineChar }
func isUnderscore(c byte) bool { return c == underscore }
func isAlpha(c byte) bool { return unicode.IsLetter(rune(c)) }
func isDigit(c byte) bool { return unicode.IsDigit(rune(c)) }
Comment on lines -81 to -82
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is equivalent but inlines better since c is byte and unicode.* does internal check for ascii

func isSymbolChar(c byte) bool { return isUnderscore(c) || isAlpha(c) || isDigit(c) }
func isAlpha(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') }
func isDigit(c byte) bool { return c >= '0' && c <= '9' }
func isSymbolChar(c byte) bool { return isAlpha(c) || isDigit(c) || isUnderscore(c) }
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check more frequent class first

func isDecimalChar(c byte) bool { return c == decimalChar }
func isNumberChar(c byte) bool { return isDecimalChar(c) || isDigit(c) }
func isNumberChar(c byte) bool { return isDigit(c) || isDecimalChar(c) }

func scanWhile(code string, p charPredicate) (string, string) {
for i := 0; i < len(code); i++ {
Expand Down Expand Up @@ -277,74 +268,78 @@ func scanNumber(code string) (t token, rest string, err error) {

func scanSymbol(code string) (t token, rest string, err error) {
t.id = symbol
t.val, rest = scanWhile(code, isSymbolChar)
for i := 0; i < len(code); i++ {
if !isSymbolChar(code[i]) {
t.val, rest = code[0:i], code[i:]
return
}
}
t.val, rest = code, ""
return
}
Comment on lines 269 to 279
Copy link
Member Author

@AlexanderYastrebov AlexanderYastrebov Jan 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before scanSymbol had awful performance - slower than scanning escaped string:

Screenshot from 2024-01-16 22-37-12

After
Screenshot from 2024-01-16 22-38-19


func selectScanner(code string) scanner {
func scan(code string) (token, string, error) {
switch code[0] {
case ',':
return commaToken
return commaToken.scan(code)
case ')':
return closeparenToken
return closeparenToken.scan(code)
case '(':
return openparenToken
return openparenToken.scan(code)
case ':':
return colonToken
return colonToken.scan(code)
case ';':
return semicolonToken
return semicolonToken.scan(code)
case '>':
return closearrowToken
return closearrowToken.scan(code)
case '*':
return anyToken
return anyToken.scan(code)
case '&':
if len(code) >= 2 && code[1] == '&' {
return andToken
return andToken.scan(code)
}
case '-':
if len(code) >= 2 && code[1] == '>' {
return arrowToken
return arrowToken.scan(code)
}
case '/':
return scannerFunc(scanRegexpOrComment)
return scanRegexpOrComment(code)
case '"':
return scannerFunc(scanDoubleQuote)
return scanDoubleQuote(code)
case '`':
return scannerFunc(scanBacktick)
return scanBacktick(code)
case '<':
for _, tok := range openarrowPrefixedTokens {
if strings.HasPrefix(code, tok.val) {
return tok
return tok.scan(code)
}
}
return openarrowToken
return openarrowToken.scan(code)
}

if isNumberChar(code[0]) {
return scannerFunc(scanNumber)
return scanNumber(code)
}

if isAlpha(code[0]) || isUnderscore(code[0]) {
return scannerFunc(scanSymbol)
return scanSymbol(code)
}

return nil
return token{}, "", unexpectedToken
}

func (l *eskipLex) next() (t token, err error) {
func (l *eskipLex) next() (token, error) {
l.code = scanWhitespace(l.code)
if len(l.code) == 0 {
err = eof
return
return token{}, eof
}

s := selectScanner(l.code)
if s == nil {
err = unexpectedToken
return
t, rest, err := scan(l.code)
if err == unexpectedToken {
return token{}, err
}
l.code = rest

t, l.code, err = s.scan(l.code)
if err == void {
return l.next()
}
Expand All @@ -353,7 +348,7 @@ func (l *eskipLex) next() (t token, err error) {
l.lastToken = t.val
}

return
return t, err
}

func (l *eskipLex) Lex(lval *eskipSymType) int {
Expand Down