Skip to content

Commit

Permalink
Support \u{HEX} syntax in ECMAScript and Unicode
Browse files Browse the repository at this point in the history
  • Loading branch information
mstoykov committed Jun 29, 2022
1 parent 8fc3b60 commit 4d323a2
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 1 deletion.
1 change: 1 addition & 0 deletions regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ const (
Debug = 0x0080 // "d"
ECMAScript = 0x0100 // "e"
RE2 = 0x0200 // RE2 (regexp package) compatibility mode
Unicode = 0x0400 // "u"
)

func (re *Regexp) RightToLeft() bool {
Expand Down
14 changes: 14 additions & 0 deletions regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,20 @@ func TestECMAScriptXCurlyBraceEscape(t *testing.T) {
}
}

func TestEcmaScriptUnicodeRange(t *testing.T) {
r, err := Compile(`([\u{001a}-\u{ffff}]+)`, ECMAScript|Unicode)
if err != nil {
panic(err)
}
m, err := r.FindStringMatch("qqqq")
if err != nil {
panic(err)
}
if m == nil {
t.Fatal("Expected non-nil, got nil")
}
}

func TestNegateRange(t *testing.T) {
re := MustCompile(`[\D]`, 0)
if m, err := re.MatchString("A"); err != nil {
Expand Down
16 changes: 15 additions & 1 deletion syntax/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const (
Debug = 0x0080 // "d"
ECMAScript = 0x0100 // "e"
RE2 = 0x0200 // RE2 compat mode
Unicode = 0x0400 // "u"
)

func optionFromCode(ch rune) RegexOptions {
Expand All @@ -43,6 +44,8 @@ func optionFromCode(ch rune) RegexOptions {
return Debug
case 'e', 'E':
return ECMAScript
case 'u', 'U':
return Unicode
default:
return 0
}
Expand Down Expand Up @@ -1695,7 +1698,13 @@ func (p *parser) scanCharEscape() (r rune, err error) {
r, err = p.scanHex(2)
}
case 'u':
r, err = p.scanHex(4)
// ECMAscript suppot \u{HEX} only if `u` is also set
if p.useOptionE() && p.useOptionU() && p.charsRight() > 0 && p.rightChar(0) == '{' {
p.moveRight(1)
return p.scanHexUntilBrace()
} else {
r, err = p.scanHex(4)
}
case 'a':
return '\u0007', nil
case 'b':
Expand Down Expand Up @@ -1972,6 +1981,11 @@ func (p *parser) useRE2() bool {
return (p.options & RE2) != 0
}

// True if U option enabling ECMAScript's Unicode behavior on.
func (p *parser) useOptionU() bool {
return (p.options & Unicode) != 0
}

// True if options stack is empty.
func (p *parser) emptyOptionsStack() bool {
return len(p.optionsStack) == 0
Expand Down

0 comments on commit 4d323a2

Please sign in to comment.