Skip to content

Commit

Permalink
Handle dash in the regex2glob function
Browse files Browse the repository at this point in the history
  • Loading branch information
ccl-core committed Jan 24, 2025
1 parent e96852f commit f02c756
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 0 deletions.
2 changes: 2 additions & 0 deletions python/mlcroissant/mlcroissant/_src/core/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def _regex_to_glob_for_str(regex: str) -> Iterable[str]:
regex = re.sub(r"\.\*", "*", regex)
# Interpret .+ as *
regex = re.sub(r"\.\+", "*", regex)
# Interpret \\- as -
regex = re.sub(r"\\-", "-", regex)
return [regex]


Expand Down
6 changes: 6 additions & 0 deletions python/mlcroissant/mlcroissant/_src/core/regex_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@
"*/train/*.parquet", # ...to a valid glob pattern.
],
],
[
"^.+/my\\-train/.*\.parquet$", # From a valid regex...
[
"*/my-train/*.parquet", # ...to a valid glob pattern.
],
],
],
)
def test_regex_to_glob(regex: str, output: list[str]):
Expand Down

0 comments on commit f02c756

Please sign in to comment.