Skip to content

Commit

Permalink
eskip: improve lexer performance 2
Browse files Browse the repository at this point in the history
* use plain ascii instead of unicode package
* use loop for scanSymbol
* call scan functions directly instead of selectScanner to aid inlining

```
goos: linux
goarch: amd64
pkg: github.com/zalando/skipper/eskip
                  │    HEAD~1    │                HEAD                 │
                  │    sec/op    │   sec/op     vs base                │
ParsePredicates-8   9.637µ ± 11%   8.894µ ± 4%   -7.71% (p=0.001 n=10)
Parse-8             329.1m ±  4%   272.7m ± 2%  -17.15% (p=0.000 n=10)
geomean             1.781m         1.557m       -12.56%

                  │    HEAD~1    │                HEAD                 │
                  │     B/op     │     B/op      vs base               │
ParsePredicates-8   2.008Ki ± 0%   2.008Ki ± 0%       ~ (p=1.000 n=10)
Parse-8             49.94Mi ± 0%   49.94Mi ± 0%       ~ (p=0.926 n=10)
geomean             320.4Ki        320.4Ki       -0.00%

                  │   HEAD~1    │                 HEAD                 │
                  │  allocs/op  │  allocs/op   vs base                 │
ParsePredicates-8    33.00 ± 0%    33.00 ± 0%       ~ (p=1.000 n=10) ¹
Parse-8             1.100M ± 0%   1.100M ± 0%       ~ (p=0.367 n=10)
geomean             6.025k        6.025k       +0.00%
¹ all samples are equal
```

See previous #2755

Signed-off-by: Alexander Yastrebov <[email protected]>
  • Loading branch information
AlexanderYastrebov committed Jan 16, 2024
1 parent 9270779 commit b039073
Showing 1 changed file with 30 additions and 39 deletions.
69 changes: 30 additions & 39 deletions eskip/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"errors"
"fmt"
"strings"
"unicode"
)

type token struct {
Expand All @@ -14,14 +13,6 @@ type token struct {

type charPredicate func(byte) bool

type scanner interface {
scan(string) (token, string, error)
}

type scannerFunc func(string) (token, string, error)

func (sf scannerFunc) scan(code string) (token, string, error) { return sf(code) }

type eskipLex struct {
code string
lastToken string
Expand Down Expand Up @@ -78,11 +69,11 @@ func (l *eskipLex) init(code string) {

func isNewline(c byte) bool { return c == newlineChar }
func isUnderscore(c byte) bool { return c == underscore }
func isAlpha(c byte) bool { return unicode.IsLetter(rune(c)) }
func isDigit(c byte) bool { return unicode.IsDigit(rune(c)) }
func isSymbolChar(c byte) bool { return isUnderscore(c) || isAlpha(c) || isDigit(c) }
func isAlpha(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') }
func isDigit(c byte) bool { return c >= '0' && c <= '9' }
func isSymbolChar(c byte) bool { return isAlpha(c) || isDigit(c) || isUnderscore(c) }
func isDecimalChar(c byte) bool { return c == decimalChar }
func isNumberChar(c byte) bool { return isDecimalChar(c) || isDigit(c) }
func isNumberChar(c byte) bool { return isDigit(c) || isDecimalChar(c) }

func scanWhile(code string, p charPredicate) (string, string) {
for i := 0; i < len(code); i++ {
Expand Down Expand Up @@ -277,58 +268,64 @@ func scanNumber(code string) (t token, rest string, err error) {

func scanSymbol(code string) (t token, rest string, err error) {
t.id = symbol
t.val, rest = scanWhile(code, isSymbolChar)
for i := 0; i < len(code); i++ {
if !isSymbolChar(code[i]) {
t.val, rest = code[0:i], code[i:]
return
}
}
t.val, rest = code, ""
return
}

func selectScanner(code string) scanner {
func scan(code string) (token, string, error) {
switch code[0] {
case ',':
return commaToken
return commaToken.scan(code)
case ')':
return closeparenToken
return closeparenToken.scan(code)
case '(':
return openparenToken
return openparenToken.scan(code)
case ':':
return colonToken
return colonToken.scan(code)
case ';':
return semicolonToken
return semicolonToken.scan(code)
case '>':
return closearrowToken
return closearrowToken.scan(code)
case '*':
return anyToken
return anyToken.scan(code)
case '&':
if len(code) >= 2 && code[1] == '&' {
return andToken
return andToken.scan(code)
}
case '-':
if len(code) >= 2 && code[1] == '>' {
return arrowToken
return arrowToken.scan(code)
}
case '/':
return scannerFunc(scanRegexpOrComment)
return scanRegexpOrComment(code)
case '"':
return scannerFunc(scanDoubleQuote)
return scanDoubleQuote(code)
case '`':
return scannerFunc(scanBacktick)
return scanBacktick(code)
case '<':
for _, tok := range openarrowPrefixedTokens {
if strings.HasPrefix(code, tok.val) {
return tok
return tok.scan(code)
}
}
return openarrowToken
return openarrowToken.scan(code)
}

if isNumberChar(code[0]) {
return scannerFunc(scanNumber)
return scanNumber(code)
}

if isAlpha(code[0]) || isUnderscore(code[0]) {
return scannerFunc(scanSymbol)
return scanSymbol(code)
}

return nil
return token{}, "", unexpectedToken
}

func (l *eskipLex) next() (t token, err error) {
Expand All @@ -338,13 +335,7 @@ func (l *eskipLex) next() (t token, err error) {
return
}

s := selectScanner(l.code)
if s == nil {
err = unexpectedToken
return
}

t, l.code, err = s.scan(l.code)
t, l.code, err = scan(l.code)
if err == void {
return l.next()
}
Expand Down

0 comments on commit b039073

Please sign in to comment.