diff --git a/regexp.go b/regexp.go index 7c7b01d..818c766 100644 --- a/regexp.go +++ b/regexp.go @@ -121,6 +121,7 @@ const ( Debug = 0x0080 // "d" ECMAScript = 0x0100 // "e" RE2 = 0x0200 // RE2 (regexp package) compatibility mode + Unicode = 0x0400 // "u" ) func (re *Regexp) RightToLeft() bool { diff --git a/regexp_test.go b/regexp_test.go index 111fa74..925c3ea 100644 --- a/regexp_test.go +++ b/regexp_test.go @@ -858,6 +858,20 @@ func TestECMAScriptXCurlyBraceEscape(t *testing.T) { } } +func TestEcmaScriptUnicodeRange(t *testing.T) { + r, err := Compile(`([\u{001a}-\u{ffff}]+)`, ECMAScript|Unicode) + if err != nil { + panic(err) + } + m, err := r.FindStringMatch("qqqq") + if err != nil { + panic(err) + } + if m == nil { + t.Fatal("Expected non-nil, got nil") + } +} + func TestNegateRange(t *testing.T) { re := MustCompile(`[\D]`, 0) if m, err := re.MatchString("A"); err != nil { diff --git a/syntax/parser.go b/syntax/parser.go index 5b7eafa..839540c 100644 --- a/syntax/parser.go +++ b/syntax/parser.go @@ -22,6 +22,7 @@ const ( Debug = 0x0080 // "d" ECMAScript = 0x0100 // "e" RE2 = 0x0200 // RE2 compat mode + Unicode = 0x0400 // "u" ) func optionFromCode(ch rune) RegexOptions { @@ -43,6 +44,8 @@ func optionFromCode(ch rune) RegexOptions { return Debug case 'e', 'E': return ECMAScript + case 'u', 'U': + return Unicode default: return 0 } @@ -1695,7 +1698,13 @@ func (p *parser) scanCharEscape() (r rune, err error) { r, err = p.scanHex(2) } case 'u': - r, err = p.scanHex(4) + // ECMAscript suppot \u{HEX} only if `u` is also set + if p.useOptionE() && p.useOptionU() && p.charsRight() > 0 && p.rightChar(0) == '{' { + p.moveRight(1) + return p.scanHexUntilBrace() + } else { + r, err = p.scanHex(4) + } case 'a': return '\u0007', nil case 'b': @@ -1972,6 +1981,11 @@ func (p *parser) useRE2() bool { return (p.options & RE2) != 0 } +// True if U option enabling ECMAScript's Unicode behavior on. +func (p *parser) useOptionU() bool { + return (p.options & Unicode) != 0 +} + // True if options stack is empty. func (p *parser) emptyOptionsStack() bool { return len(p.optionsStack) == 0