Skip to content

Commit

Permalink
storage/url: allow wildcards to match new line characters (#505)
Browse files Browse the repository at this point in the history
This commit adds `s` flag [^1] to URL's filter Regexp to "let `.` match
`\n`".

[^1]:
https://github.com/google/re2/wiki/Syntax#:~:text=text%20(default%20false)-,s,-let%20.%20match

Co-authored-by: İlkin Balkanay <[email protected]>
  • Loading branch information
kucukaslan and ilkinulas authored Sep 20, 2022
1 parent 5e867c9 commit 83ce8bc
Showing 7 changed files with 83 additions and 35 deletions.
16 changes: 6 additions & 10 deletions command/exclude.go
Original file line number Diff line number Diff line change
@@ -4,23 +4,19 @@ import (
"path/filepath"
"regexp"
"strings"
)

func wildCardToRegexp(pattern string) string {
patternRegex := regexp.QuoteMeta(pattern)
patternRegex = strings.Replace(patternRegex, "\\?", ".", -1)
patternRegex = strings.Replace(patternRegex, "\\*", ".*", -1)
patternRegex = "^" + patternRegex + "$"
return patternRegex
}
"github.com/peak/s5cmd/strutil"
)

// createExcludesFromWildcard creates regex strings from wildcard.
func createExcludesFromWildcard(inputExcludes []string) ([]*regexp.Regexp, error) {
var result []*regexp.Regexp
for _, input := range inputExcludes {
if input != "" {
regexVersion := wildCardToRegexp(input)
regexpCompiled, err := regexp.Compile(regexVersion)
regex := strutil.WildCardToRegexp(input)
regex = strutil.MatchFromStartToEnd(regex)
regex = strutil.AddNewLineFlag(regex)
regexpCompiled, err := regexp.Compile(regex)
if err != nil {
return nil, err
}
32 changes: 31 additions & 1 deletion e2e/ls_test.go
Original file line number Diff line number Diff line change
@@ -144,6 +144,32 @@ func TestListSingleWildcardS3Object(t *testing.T) {
}, alignment(true))
}

func TestListWildcardS3ObjectWithNewLineInName(t *testing.T) {
t.Parallel()

bucket := s3BucketFromTestName(t)

s3client, s5cmd := setup(t)

createBucket(t, s3client, bucket)
putFile(t, s3client, bucket, "normal.txt", "this is a file content")
putFile(t, s3client, bucket, "another.txt", "this is another file content")
putFile(t, s3client, bucket, "newli\ne.txt", "this is yet another file content")
putFile(t, s3client, bucket, "nap.txt", "this, too, is a file content")

cmd := s5cmd("ls", "s3://"+bucket+"/n*.txt")
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)

assertLines(t, result.Stdout(), map[int]compareFunc{
0: suffix("28 nap.txt"),
1: suffix("32 newli"),
2: equals("e.txt"),
3: suffix("22 normal.txt"),
})
}

// ls -s bucket/object
func TestListS3ObjectsWithDashS(t *testing.T) {
t.Parallel()
@@ -399,6 +425,8 @@ func TestListS3ObjectsWithExcludeFilter(t *testing.T) {
"a/file.c",
"file2.txt",
"file2.txt.extension", // this should not be excluded.
"newli\ne",
"newli\ne.txt",
}

s3client, s5cmd := setup(t)
@@ -419,7 +447,9 @@ func TestListS3ObjectsWithExcludeFilter(t *testing.T) {
1: match(`a/try.py`),
2: match(`file.py`),
3: match(`file2.txt.extension`),
}, trimMatch(dateRe), alignment(true))
4: match("newli"),
5: match("e"),
}, trimMatch(dateRe), alignment(false))
}

// ls --exclude ".txt" --exclude ".py" s3://bucket
10 changes: 6 additions & 4 deletions e2e/util_test.go
Original file line number Diff line number Diff line change
@@ -23,6 +23,9 @@ import (
"testing"
"time"

"github.com/peak/s5cmd/storage"
"github.com/peak/s5cmd/strutil"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/aws/client"
@@ -34,7 +37,6 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/iancoleman/strcase"
"github.com/igungor/gofakes3"
"github.com/peak/s5cmd/storage"
"gotest.tools/v3/assert"
"gotest.tools/v3/fs"
"gotest.tools/v3/icmd"
@@ -554,7 +556,7 @@ func replaceMatchWithSpace(input string, match ...string) string {
if m == "" {
continue
}
re := regexp.MustCompile(m)
re := regexp.MustCompile(strutil.AddNewLineFlag(m))
input = re.ReplaceAllString(input, " ")
}

@@ -735,7 +737,7 @@ func alignment(v bool) func(*assertOpts) {
}

func trimMatch(match string) func(*assertOpts) {
re := regexp.MustCompile(match)
re := regexp.MustCompile(strutil.AddNewLineFlag(match))
return func(opts *assertOpts) {
opts.trimRegexes = append(opts.trimRegexes, re)
}
@@ -863,7 +865,7 @@ func checkLineAlignments(actual string) error {
}

func match(expected string) compareFunc {
re := regexp.MustCompile(expected)
re := regexp.MustCompile(strutil.AddNewLineFlag(expected))
return func(actual string) error {
if re.MatchString(actual) {
return nil
14 changes: 7 additions & 7 deletions storage/url/url.go
Original file line number Diff line number Diff line change
@@ -10,6 +10,8 @@ import (
"regexp"
"runtime"
"strings"

"github.com/peak/s5cmd/strutil"
)

const (
@@ -249,9 +251,7 @@ func (u *URL) setPrefixAndFilter() error {
return nil
}

loc := strings.IndexAny(u.Path, globCharacters)
wildOperation := loc > -1
if !wildOperation {
if loc := strings.IndexAny(u.Path, globCharacters); loc < 0 {
u.Delimiter = s3Separator
u.Prefix = u.Path
} else {
@@ -261,12 +261,12 @@ func (u *URL) setPrefixAndFilter() error {

filterRegex := matchAllRe
if u.filter != "" {
filterRegex = regexp.QuoteMeta(u.filter)
filterRegex = strings.Replace(filterRegex, "\\?", ".", -1)
filterRegex = strings.Replace(filterRegex, "\\*", ".*?", -1)
filterRegex = strutil.WildCardToRegexp(u.filter)
}
filterRegex = regexp.QuoteMeta(u.Prefix) + filterRegex
r, err := regexp.Compile("^" + filterRegex + "$")
filterRegex = strutil.MatchFromStartToEnd(filterRegex)
filterRegex = strutil.AddNewLineFlag(filterRegex)
r, err := regexp.Compile(filterRegex)
if err != nil {
return err
}
14 changes: 7 additions & 7 deletions storage/url/url_test.go
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@ import (

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/peak/s5cmd/strutil"
)

func TestHasWild(t *testing.T) {
@@ -70,7 +71,7 @@ func TestNew(t *testing.T) {
Prefix: "key",
Delimiter: "/",
},
wantFilterRe: regexp.MustCompile(`^key.*$`).String(),
wantFilterRe: regexp.MustCompile(strutil.AddNewLineFlag(`^key.*$`)).String(),
},
{
name: "url_with_no_wildcard_end_with_slash",
@@ -82,7 +83,7 @@ func TestNew(t *testing.T) {
Prefix: "key/",
Delimiter: "/",
},
wantFilterRe: regexp.MustCompile(`^key/.*$`).String(),
wantFilterRe: regexp.MustCompile(strutil.AddNewLineFlag(`^key/.*$`)).String(),
},
{
name: "url_with_wildcard",
@@ -92,10 +93,10 @@ func TestNew(t *testing.T) {
Bucket: "bucket",
Path: "key/a/?/test/*",
Prefix: "key/a/",
filterRegex: regexp.MustCompile(`^key/a/./test/.*?$`),
filterRegex: regexp.MustCompile(strutil.AddNewLineFlag(`^key/a/./test/.*$`)),
Delimiter: "",
},
wantFilterRe: regexp.MustCompile(`^key/a/./test/.*?$`).String(),
wantFilterRe: regexp.MustCompile(strutil.AddNewLineFlag(`^key/a/./test/.*$`)).String(),
},
}
for _, tc := range tests {
@@ -112,7 +113,6 @@ func TestNew(t *testing.T) {
if tc.wantFilterRe != "" {
if diff := cmp.Diff(tc.wantFilterRe, got.filterRegex.String()); diff != "" {
t.Errorf("test case %q: URL.filterRegex mismatch (-want +got):\n%v", tc.name, diff)

}
}
})
@@ -228,7 +228,7 @@ func TestURLSetPrefixAndFilter(t *testing.T) {
Prefix: "a/b_c/",
Delimiter: "",
filter: "*/de/*/test",
filterRegex: regexp.MustCompile("^a/b_c/.*?/de/.*?/test$"),
filterRegex: regexp.MustCompile(strutil.AddNewLineFlag("^a/b_c/.*/de/.*/test$")),
},
},
{
@@ -241,7 +241,7 @@ func TestURLSetPrefixAndFilter(t *testing.T) {
Prefix: "a/b_c/d/e",
Delimiter: "/",
filter: "",
filterRegex: regexp.MustCompile("^a/b_c/d/e.*$"),
filterRegex: regexp.MustCompile(strutil.AddNewLineFlag("^a/b_c/d/e.*$")),
},
},
}
20 changes: 20 additions & 0 deletions strutil/strutil.go
Original file line number Diff line number Diff line change
@@ -3,7 +3,9 @@ package strutil
import (
"encoding/json"
"fmt"
"regexp"
"strconv"
"strings"
)

var humanDivisors = [...]struct {
@@ -40,3 +42,21 @@ func JSON(v interface{}) string {
bytes, _ := json.Marshal(v)
return string(bytes)
}

// AddNewLineFlag adds a flag that allows . to match new line character "\n".
// It assumes that the pattern does not have any flags.
func AddNewLineFlag(pattern string) string {
return "(?s)" + pattern
}

// WildCardToRegexp converts a wildcarded expresiion to equivalent regular expression
func WildCardToRegexp(pattern string) string {
patternRegex := regexp.QuoteMeta(pattern)
patternRegex = strings.Replace(patternRegex, "\\?", ".", -1)
return strings.Replace(patternRegex, "\\*", ".*", -1)
}

// MatchFromStartToEnd enforces that the regex will match the full string
func MatchFromStartToEnd(pattern string) string {
return "^" + pattern + "$"
}
12 changes: 6 additions & 6 deletions command/exclude_test.go → strutil/strutil_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package command
package strutil

import "testing"

func Test_wildCardToRegexp(t *testing.T) {
func Test_WildCardToRegexp(t *testing.T) {
t.Parallel()
tests := []struct {
name string
@@ -12,22 +12,22 @@ func Test_wildCardToRegexp(t *testing.T) {
{
name: "main*",
pattern: "main*",
wanted: "^main.*$",
wanted: "main.*",
},
{
name: "*.txt",
pattern: "*.txt",
wanted: "^.*\\.txt$",
wanted: ".*\\.txt",
},
{
name: "?_main*.txt",
pattern: "?_main*.txt",
wanted: "^._main.*\\.txt$",
wanted: "._main.*\\.txt",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := wildCardToRegexp(tt.pattern); got != tt.wanted {
if got := WildCardToRegexp(tt.pattern); got != tt.wanted {
t.Errorf("wildCardToRegexp() = %v, want %v", got, tt.wanted)
}
})

0 comments on commit 83ce8bc

Please sign in to comment.