diff --git a/parser/parser.go b/parser/parser.go index 5cbb1767..f8c02c23 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -756,7 +756,7 @@ func (p *parser) VisitDouble(ctx *gen.DoubleContext) any { // Visit a parse tree produced by CELParser#String. func (p *parser) VisitString(ctx *gen.StringContext) any { - s := p.unquote(ctx, ctx.GetText(), false) + s := p.unquote(ctx, ctx.GetTok().GetText(), false) return p.helper.newLiteralString(ctx, s) } diff --git a/parser/parser_test.go b/parser/parser_test.go index f174c8fe..f5eb15ab 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1780,6 +1780,13 @@ var testCases = []testInfo{ | ..................^ `, }, + { + I: `'\udead' == '\ufffd'`, + E: ` + ERROR: :1:1: invalid unicode code point + | '\udead' == '\ufffd' + | ^`, + }, } type testInfo struct { diff --git a/parser/unescape.go b/parser/unescape.go index 27c57a9f..43cc9b90 100644 --- a/parser/unescape.go +++ b/parser/unescape.go @@ -15,7 +15,7 @@ package parser import ( - "fmt" + "errors" "strings" "unicode/utf8" ) @@ -30,7 +30,7 @@ func unescape(value string, isBytes bool) (string, error) { // Nothing to unescape / decode. if n < 2 { - return value, fmt.Errorf("unable to unescape string") + return value, errors.New("unable to unescape string") } // Raw string preceded by the 'r|R' prefix. @@ -43,7 +43,7 @@ func unescape(value string, isBytes bool) (string, error) { // Quoted string of some form, must have same first and last char. if value[0] != value[n-1] || (value[0] != '"' && value[0] != '\'') { - return value, fmt.Errorf("unable to unescape string") + return value, errors.New("unable to unescape string") } // Normalize the multi-line CEL string representation to a standard @@ -51,12 +51,12 @@ func unescape(value string, isBytes bool) (string, error) { if n >= 6 { if strings.HasPrefix(value, "'''") { if !strings.HasSuffix(value, "'''") { - return value, fmt.Errorf("unable to unescape string") + return value, errors.New("unable to unescape string") } value = "\"" + value[3:n-3] + "\"" } else if strings.HasPrefix(value, `"""`) { if !strings.HasSuffix(value, `"""`) { - return value, fmt.Errorf("unable to unescape string") + return value, errors.New("unable to unescape string") } value = "\"" + value[3:n-3] + "\"" } @@ -90,10 +90,10 @@ func unescape(value string, isBytes bool) (string, error) { // unescapeChar takes a string input and returns the following info: // -// value - the escaped unicode rune at the front of the string. -// encode - the value should be unicode-encoded -// tail - the remainder of the input string. -// err - error value, if the character could not be unescaped. +// value - the escaped unicode rune at the front of the string. +// encode - the value should be unicode-encoded +// tail - the remainder of the input string. +// err - error value, if the character could not be unescaped. // // When encode is true the return value may still fit within a single byte, // but unicode encoding is attempted which is more expensive than when the @@ -113,7 +113,7 @@ func unescapeChar(s string, isBytes bool) (value rune, encode bool, tail string, // 2. Last character is the start of an escape sequence. if len(s) <= 1 { - err = fmt.Errorf("unable to unescape string, found '\\' as last character") + err = errors.New("unable to unescape string, found '\\' as last character") return } @@ -157,32 +157,32 @@ func unescapeChar(s string, isBytes bool) (value rune, encode bool, tail string, case 'u': n = 4 if isBytes { - err = fmt.Errorf("unable to unescape string") + err = errors.New("unable to unescape string") return } case 'U': n = 8 if isBytes { - err = fmt.Errorf("unable to unescape string") + err = errors.New("unable to unescape string") return } } var v rune if len(s) < n { - err = fmt.Errorf("unable to unescape string") + err = errors.New("unable to unescape string") return } for j := 0; j < n; j++ { x, ok := unhex(s[j]) if !ok { - err = fmt.Errorf("unable to unescape string") + err = errors.New("unable to unescape string") return } v = v<<4 | x } s = s[n:] - if !isBytes && v > utf8.MaxRune { - err = fmt.Errorf("unable to unescape string") + if !isBytes && !utf8.ValidRune(v) { + err = errors.New("invalid unicode code point") return } value = v @@ -190,20 +190,20 @@ func unescapeChar(s string, isBytes bool) (value rune, encode bool, tail string, // 5. Octal escape sequences, must be three digits \[0-3][0-7][0-7] case '0', '1', '2', '3': if len(s) < 2 { - err = fmt.Errorf("unable to unescape octal sequence in string") + err = errors.New("unable to unescape octal sequence in string") return } v := rune(c - '0') for j := 0; j < 2; j++ { x := s[j] if x < '0' || x > '7' { - err = fmt.Errorf("unable to unescape octal sequence in string") + err = errors.New("unable to unescape octal sequence in string") return } v = v*8 + rune(x-'0') } - if !isBytes && v > utf8.MaxRune { - err = fmt.Errorf("unable to unescape string") + if !isBytes && !utf8.ValidRune(v) { + err = errors.New("invalid unicode code point") return } value = v @@ -212,7 +212,7 @@ func unescapeChar(s string, isBytes bool) (value rune, encode bool, tail string, // Unknown escape sequence. default: - err = fmt.Errorf("unable to unescape string") + err = errors.New("unable to unescape string") } tail = s