Skip to content

Commit

Permalink
fix: ecma ranges with set terminator
Browse files Browse the repository at this point in the history
Fix ECMAScript un-escaped literal '-' when followed predefined
character sets.

Also:
* Fixed missing error check on parseProperty() call.
* Use addChar(ch) helper instead of addRange(ch, ch).

Fixes dlclark#54
  • Loading branch information
stevenh committed Dec 6, 2022
1 parent 3511044 commit 00a2f4c
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 7 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
module github.com/dlclark/regexp2

go 1.13

require github.com/stretchr/testify v1.8.1
17 changes: 17 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
50 changes: 50 additions & 0 deletions regexp_ecma_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package regexp2_test

import (
"testing"

"github.com/dlclark/regexp2"
"github.com/stretchr/testify/require"
)

func TestECMA_basic(t *testing.T) {
tests := map[string]struct {
expr string
data string
want []string
}{
"charset": {
expr: `[a-c]`,
data: "abcd",
want: []string{"a", "b", "c"},
},
"charset-set": {
expr: `[a-\s]`,
data: "a-b cd",
want: []string{"a", "-", " "},
},
}

for name, tt := range tests {
t.Run(name, func(t *testing.T) {
re, err := regexp2.Compile(tt.expr, regexp2.ECMAScript)
require.NoError(t, err)

match, err := re.FindStringMatch(tt.data)
require.NoError(t, err)

var res []string
for match != nil {
for _, g := range match.Groups() {
for _, c := range g.Captures {
res = append(res, c.String())
}
}

match, err = re.FindNextMatch(match)
require.NoError(t, err)
}
require.Equal(t, tt.want, res)
})
}
}
33 changes: 26 additions & 7 deletions syntax/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -1427,7 +1427,7 @@ func (p *parser) scanCapname() string {
return string(p.pattern[startpos:p.textpos()])
}

//Scans contents of [] (not including []'s), and converts to a set.
// Scans contents of [] (not including []'s), and converts to a set.
func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
ch := '\x00'
chPrev := '\x00'
Expand Down Expand Up @@ -1467,7 +1467,11 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
case 'D', 'd':
if !scanOnly {
if inRange {
return nil, p.getErr(ErrBadClassInCharRange, ch)
if !p.useOptionE() {
return nil, p.getErr(ErrBadClassInCharRange, ch)
}
cc.addChar('-')
cc.addChar(chPrev)
}
cc.addDigit(p.useOptionE() || p.useRE2(), ch == 'D', p.patternRaw)
}
Expand All @@ -1476,7 +1480,11 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
case 'S', 's':
if !scanOnly {
if inRange {
return nil, p.getErr(ErrBadClassInCharRange, ch)
if !p.useOptionE() {
return nil, p.getErr(ErrBadClassInCharRange, ch)
}
cc.addChar('-')
cc.addChar(chPrev)
}
cc.addSpace(p.useOptionE(), p.useRE2(), ch == 'S')
}
Expand All @@ -1485,7 +1493,11 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
case 'W', 'w':
if !scanOnly {
if inRange {
return nil, p.getErr(ErrBadClassInCharRange, ch)
if !p.useOptionE() {
return nil, p.getErr(ErrBadClassInCharRange, ch)
}
cc.addChar('-')
cc.addChar(chPrev)
}

cc.addWord(p.useOptionE() || p.useRE2(), ch == 'W')
Expand All @@ -1495,22 +1507,29 @@ func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
case 'p', 'P':
if !scanOnly {
if inRange {
return nil, p.getErr(ErrBadClassInCharRange, ch)
if !p.useOptionE() {
return nil, p.getErr(ErrBadClassInCharRange, ch)
}
cc.addChar('-')
cc.addChar(chPrev)
}
prop, err := p.parseProperty()
if err != nil {
return nil, err
}
cc.addCategory(prop, (ch != 'p'), caseInsensitive, p.patternRaw)
} else {
p.parseProperty()
_, err := p.parseProperty()
if err != nil {
return nil, err
}
}

continue

case '-':
if !scanOnly {
cc.addRange(ch, ch)
cc.addChar(ch)
}
continue

Expand Down

0 comments on commit 00a2f4c

Please sign in to comment.