Skip to content

Commit

Permalink
Replace checks for valid UTF-8 in strings with go-maintained calls (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
TristonianJones authored Dec 10, 2024
1 parent bd1ec92 commit 0091f8d
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 22 deletions.
2 changes: 1 addition & 1 deletion parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,7 @@ func (p *parser) VisitDouble(ctx *gen.DoubleContext) any {

// Visit a parse tree produced by CELParser#String.
func (p *parser) VisitString(ctx *gen.StringContext) any {
s := p.unquote(ctx, ctx.GetText(), false)
s := p.unquote(ctx, ctx.GetTok().GetText(), false)
return p.helper.newLiteralString(ctx, s)
}

Expand Down
7 changes: 7 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1780,6 +1780,13 @@ var testCases = []testInfo{
| ..................^
`,
},
{
I: `'\udead' == '\ufffd'`,
E: `
ERROR: <input>:1:1: invalid unicode code point
| '\udead' == '\ufffd'
| ^`,
},
}

type testInfo struct {
Expand Down
42 changes: 21 additions & 21 deletions parser/unescape.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
package parser

import (
"fmt"
"errors"
"strings"
"unicode/utf8"
)
Expand All @@ -30,7 +30,7 @@ func unescape(value string, isBytes bool) (string, error) {

// Nothing to unescape / decode.
if n < 2 {
return value, fmt.Errorf("unable to unescape string")
return value, errors.New("unable to unescape string")
}

// Raw string preceded by the 'r|R' prefix.
Expand All @@ -43,20 +43,20 @@ func unescape(value string, isBytes bool) (string, error) {

// Quoted string of some form, must have same first and last char.
if value[0] != value[n-1] || (value[0] != '"' && value[0] != '\'') {
return value, fmt.Errorf("unable to unescape string")
return value, errors.New("unable to unescape string")
}

// Normalize the multi-line CEL string representation to a standard
// Go quoted string.
if n >= 6 {
if strings.HasPrefix(value, "'''") {
if !strings.HasSuffix(value, "'''") {
return value, fmt.Errorf("unable to unescape string")
return value, errors.New("unable to unescape string")
}
value = "\"" + value[3:n-3] + "\""
} else if strings.HasPrefix(value, `"""`) {
if !strings.HasSuffix(value, `"""`) {
return value, fmt.Errorf("unable to unescape string")
return value, errors.New("unable to unescape string")
}
value = "\"" + value[3:n-3] + "\""
}
Expand Down Expand Up @@ -90,10 +90,10 @@ func unescape(value string, isBytes bool) (string, error) {

// unescapeChar takes a string input and returns the following info:
//
// value - the escaped unicode rune at the front of the string.
// encode - the value should be unicode-encoded
// tail - the remainder of the input string.
// err - error value, if the character could not be unescaped.
// value - the escaped unicode rune at the front of the string.
// encode - the value should be unicode-encoded
// tail - the remainder of the input string.
// err - error value, if the character could not be unescaped.
//
// When encode is true the return value may still fit within a single byte,
// but unicode encoding is attempted which is more expensive than when the
Expand All @@ -113,7 +113,7 @@ func unescapeChar(s string, isBytes bool) (value rune, encode bool, tail string,

// 2. Last character is the start of an escape sequence.
if len(s) <= 1 {
err = fmt.Errorf("unable to unescape string, found '\\' as last character")
err = errors.New("unable to unescape string, found '\\' as last character")
return
}

Expand Down Expand Up @@ -157,53 +157,53 @@ func unescapeChar(s string, isBytes bool) (value rune, encode bool, tail string,
case 'u':
n = 4
if isBytes {
err = fmt.Errorf("unable to unescape string")
err = errors.New("unable to unescape string")
return
}
case 'U':
n = 8
if isBytes {
err = fmt.Errorf("unable to unescape string")
err = errors.New("unable to unescape string")
return
}
}
var v rune
if len(s) < n {
err = fmt.Errorf("unable to unescape string")
err = errors.New("unable to unescape string")
return
}
for j := 0; j < n; j++ {
x, ok := unhex(s[j])
if !ok {
err = fmt.Errorf("unable to unescape string")
err = errors.New("unable to unescape string")
return
}
v = v<<4 | x
}
s = s[n:]
if !isBytes && v > utf8.MaxRune {
err = fmt.Errorf("unable to unescape string")
if !isBytes && !utf8.ValidRune(v) {
err = errors.New("invalid unicode code point")
return
}
value = v

// 5. Octal escape sequences, must be three digits \[0-3][0-7][0-7]
case '0', '1', '2', '3':
if len(s) < 2 {
err = fmt.Errorf("unable to unescape octal sequence in string")
err = errors.New("unable to unescape octal sequence in string")
return
}
v := rune(c - '0')
for j := 0; j < 2; j++ {
x := s[j]
if x < '0' || x > '7' {
err = fmt.Errorf("unable to unescape octal sequence in string")
err = errors.New("unable to unescape octal sequence in string")
return
}
v = v*8 + rune(x-'0')
}
if !isBytes && v > utf8.MaxRune {
err = fmt.Errorf("unable to unescape string")
if !isBytes && !utf8.ValidRune(v) {
err = errors.New("invalid unicode code point")
return
}
value = v
Expand All @@ -212,7 +212,7 @@ func unescapeChar(s string, isBytes bool) (value rune, encode bool, tail string,

// Unknown escape sequence.
default:
err = fmt.Errorf("unable to unescape string")
err = errors.New("unable to unescape string")
}

tail = s
Expand Down

0 comments on commit 0091f8d

Please sign in to comment.