From c3b4bc36a8c2fa1b98429f5cd963ea0b4d04ad11 Mon Sep 17 00:00:00 2001 From: Jonathan Langlois <37172224+john-jam@users.noreply.github.com> Date: Tue, 22 Aug 2023 22:47:43 +0900 Subject: [PATCH] Better double asterisks `**` support (#1329) --- fsspec/asyn.py | 104 ++- fsspec/implementations/http.py | 47 +- fsspec/implementations/local.py | 19 - .../tests/local/local_fixtures.py | 6 +- fsspec/implementations/tests/test_local.py | 8 +- fsspec/implementations/tests/test_memory.py | 4 +- fsspec/spec.py | 106 ++- fsspec/tests/abstract/__init__.py | 60 ++ fsspec/tests/abstract/copy.py | 252 ++++-- fsspec/tests/abstract/get.py | 226 ++++- fsspec/tests/abstract/put.py | 257 ++++-- fsspec/tests/conftest.py | 178 +++- fsspec/tests/test_generic.py | 2 + fsspec/tests/test_spec.py | 855 ++++++++++++++++-- fsspec/tests/test_utils.py | 33 +- fsspec/utils.py | 7 +- setup.cfg | 2 +- 17 files changed, 1797 insertions(+), 369 deletions(-) diff --git a/fsspec/asyn.py b/fsspec/asyn.py index b8f8642a0..03b3fcc39 100644 --- a/fsspec/asyn.py +++ b/fsspec/asyn.py @@ -13,12 +13,7 @@ from .callbacks import _DEFAULT_CALLBACK from .exceptions import FSTimeoutError -from .implementations.local import ( - LocalFileSystem, - make_path_posix, - trailing_sep, - trailing_sep_maybe_asterisk, -) +from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep from .spec import AbstractBufferedFile, AbstractFileSystem from .utils import is_exception, other_paths @@ -357,14 +352,19 @@ async def _copy( if not paths: return - isdir = isinstance(path2, str) and ( + source_is_file = len(paths) == 1 + dest_is_dir = isinstance(path2, str) and ( trailing_sep(path2) or await self._isdir(path2) ) + + exists = source_is_str and ( + (has_magic(path1) and source_is_file) + or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1)) + ) path2 = other_paths( paths, path2, - exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(path1), - is_dir=isdir, + exists=exists, flatten=not source_is_str, ) batch_size = batch_size or self.batch_size @@ -514,15 +514,20 @@ async def _put( if not lpaths: return - isdir = isinstance(rpath, str) and ( + source_is_file = len(lpaths) == 1 + dest_is_dir = isinstance(rpath, str) and ( trailing_sep(rpath) or await self._isdir(rpath) ) + rpath = self._strip_protocol(rpath) + exists = source_is_str and ( + (has_magic(lpath) and source_is_file) + or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath)) + ) rpaths = other_paths( lpaths, rpath, - exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(lpath), - is_dir=isdir, + exists=exists, flatten=not source_is_str, ) @@ -571,11 +576,9 @@ async def _get( """ source_is_str = isinstance(rpath, str) # First check for rpath trailing slash as _strip_protocol removes it. - source_not_trailing_sep = source_is_str and not trailing_sep_maybe_asterisk( - rpath - ) + source_not_trailing_sep = source_is_str and not trailing_sep(rpath) rpath = self._strip_protocol(rpath) - rpaths = await self._expand_path(rpath, recursive=recursive) + rpaths = await self._expand_path(rpath, recursive=recursive, maxdepth=maxdepth) if source_is_str and (not recursive or maxdepth is not None): # Non-recursive glob does not copy directories rpaths = [ @@ -585,14 +588,19 @@ async def _get( return lpath = make_path_posix(lpath) - isdir = isinstance(lpath, str) and ( + source_is_file = len(rpaths) == 1 + dest_is_dir = isinstance(lpath, str) and ( trailing_sep(lpath) or LocalFileSystem().isdir(lpath) ) + + exists = source_is_str and ( + (has_magic(rpath) and source_is_file) + or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep) + ) lpaths = other_paths( rpaths, lpath, - exists=isdir and source_not_trailing_sep, - is_dir=isdir, + exists=exists, flatten=not source_is_str, ) [os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths] @@ -695,25 +703,24 @@ async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs): ): yield _ - async def _glob(self, path, **kwargs): + async def _glob(self, path, maxdepth=None, **kwargs): + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + import re ends = path.endswith("/") path = self._strip_protocol(path) - indstar = path.find("*") if path.find("*") >= 0 else len(path) - indques = path.find("?") if path.find("?") >= 0 else len(path) - indbrace = path.find("[") if path.find("[") >= 0 else len(path) + idx_star = path.find("*") if path.find("*") >= 0 else len(path) + idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) + idx_brace = path.find("[") if path.find("[") >= 0 else len(path) - ind = min(indstar, indques, indbrace) + min_idx = min(idx_star, idx_qmark, idx_brace) detail = kwargs.pop("detail", False) if not has_magic(path): - root = path - depth = 1 - if ends: - path += "/*" - elif await self._exists(path): + if await self._exists(path): if not detail: return [path] else: @@ -723,13 +730,21 @@ async def _glob(self, path, **kwargs): return [] # glob of non-existent returns empty else: return {} - elif "/" in path[:ind]: - ind2 = path[:ind].rindex("/") - root = path[: ind2 + 1] - depth = None if "**" in path else path[ind2 + 1 :].count("/") + 1 + elif "/" in path[:min_idx]: + min_idx = path[:min_idx].rindex("/") + root = path[: min_idx + 1] + depth = path[min_idx + 1 :].count("/") + 1 else: root = "" - depth = None if "**" in path else path[ind + 1 :].count("/") + 1 + depth = path[min_idx + 1 :].count("/") + 1 + + if "**" in path: + if maxdepth is not None: + idx_double_stars = path.find("**") + depth_double_stars = path[idx_double_stars:].count("/") + 1 + depth = depth - depth_double_stars + maxdepth + else: + depth = None allpaths = await self._find( root, maxdepth=depth, withdirs=True, detail=True, **kwargs @@ -757,14 +772,23 @@ async def _glob(self, path, **kwargs): ) + "$" ) - pattern = re.sub("[*]{2}", "=PLACEHOLDER=", pattern) + pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern) + pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern) pattern = re.sub("[*]", "[^/]*", pattern) - pattern = re.compile(pattern.replace("=PLACEHOLDER=", ".*")) + pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern) + pattern = re.sub("=DOUBLE_STARS=", ".*", pattern) + pattern = re.compile(pattern) out = { p: allpaths[p] for p in sorted(allpaths) if pattern.match(p.replace("//", "/").rstrip("/")) } + + # Return directories only when the glob end by a slash + # This is needed for posix glob compliance + if ends: + out = {k: v for k, v in out.items() if v["type"] == "directory"} + if detail: return out else: @@ -785,6 +809,12 @@ async def _find(self, path, maxdepth=None, withdirs=False, **kwargs): path = self._strip_protocol(path) out = dict() detail = kwargs.pop("detail", False) + + # Add the root directory if withdirs is requested + # This is needed for posix glob compliance + if withdirs and path != "" and await self._isdir(path): + out[path] = await self._info(path) + # async for? async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs): if withdirs: @@ -811,7 +841,7 @@ async def _expand_path(self, path, recursive=False, maxdepth=None): path = [self._strip_protocol(p) for p in path] for p in path: # can gather here if has_magic(p): - bit = set(await self._glob(p)) + bit = set(await self._glob(p, maxdepth=maxdepth)) out |= bit if recursive: # glob call above expanded one depth so if maxdepth is defined diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index 5d118dcbd..e37b0001e 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -431,7 +431,7 @@ async def _info(self, url, **kwargs): return {"name": url, "size": None, **info, "type": "file"} - async def _glob(self, path, **kwargs): + async def _glob(self, path, maxdepth=None, **kwargs): """ Find files by glob-matching. @@ -439,23 +439,21 @@ async def _glob(self, path, **kwargs): but "?" is not considered as a character for globbing, because it is so common in URLs, often identifying the "query" part. """ + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") import re ends = path.endswith("/") path = self._strip_protocol(path) - indstar = path.find("*") if path.find("*") >= 0 else len(path) - indbrace = path.find("[") if path.find("[") >= 0 else len(path) + idx_star = path.find("*") if path.find("*") >= 0 else len(path) + idx_brace = path.find("[") if path.find("[") >= 0 else len(path) - ind = min(indstar, indbrace) + min_idx = min(idx_star, idx_brace) detail = kwargs.pop("detail", False) if not has_magic(path): - root = path - depth = 1 - if ends: - path += "/*" - elif await self._exists(path): + if await self._exists(path): if not detail: return [path] else: @@ -465,13 +463,21 @@ async def _glob(self, path, **kwargs): return [] # glob of non-existent returns empty else: return {} - elif "/" in path[:ind]: - ind2 = path[:ind].rindex("/") - root = path[: ind2 + 1] - depth = None if "**" in path else path[ind2 + 1 :].count("/") + 1 + elif "/" in path[:min_idx]: + min_idx = path[:min_idx].rindex("/") + root = path[: min_idx + 1] + depth = path[min_idx + 1 :].count("/") + 1 else: root = "" - depth = None if "**" in path else path[ind + 1 :].count("/") + 1 + depth = path[min_idx + 1 :].count("/") + 1 + + if "**" in path: + if maxdepth is not None: + idx_double_stars = path.find("**") + depth_double_stars = path[idx_double_stars:].count("/") + 1 + depth = depth - depth_double_stars + maxdepth + else: + depth = None allpaths = await self._find( root, maxdepth=depth, withdirs=True, detail=True, **kwargs @@ -498,14 +504,23 @@ async def _glob(self, path, **kwargs): ) + "$" ) - pattern = re.sub("[*]{2}", "=PLACEHOLDER=", pattern) + pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern) + pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern) pattern = re.sub("[*]", "[^/]*", pattern) - pattern = re.compile(pattern.replace("=PLACEHOLDER=", ".*")) + pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern) + pattern = re.sub("=DOUBLE_STARS=", ".*", pattern) + pattern = re.compile(pattern) out = { p: allpaths[p] for p in sorted(allpaths) if pattern.match(p.replace("//", "/").rstrip("/")) } + + # Return directories only when the glob end by a slash + # This is needed for posix glob compliance + if ends: + out = {k: v for k, v in out.items() if v["type"] == "directory"} + if detail: return out else: diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index 1a8ffc29f..971074e95 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -65,10 +65,6 @@ def ls(self, path, detail=False, **kwargs): else: return [posixpath.join(path, f) for f in os.listdir(path)] - def glob(self, path, **kwargs): - path = self._strip_protocol(path) - return super().glob(path, **kwargs) - def info(self, path, **kwargs): if isinstance(path, os.DirEntry): # scandir DirEntry @@ -287,21 +283,6 @@ def trailing_sep(path): return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep)) -def trailing_sep_maybe_asterisk(path): - """Return True if the path ends with a path separator and optionally an - asterisk. - - A forward slash is always considered a path separator, even on Operating - Systems that normally use a backslash. - """ - # TODO: if all incoming paths were posix-compliant then separator would - # always be a forward slash, simplifying this function. - # See https://github.com/fsspec/filesystem_spec/pull/1250 - return path.endswith((os.sep, os.sep + "*")) or ( - os.altsep is not None and path.endswith((os.altsep, os.altsep + "*")) - ) - - class LocalFileOpener(io.IOBase): def __init__( self, path, mode, autocommit=True, fs=None, compression=None, **kwargs diff --git a/fsspec/implementations/tests/local/local_fixtures.py b/fsspec/implementations/tests/local/local_fixtures.py index d850fcf5f..bafff60d9 100644 --- a/fsspec/implementations/tests/local/local_fixtures.py +++ b/fsspec/implementations/tests/local/local_fixtures.py @@ -1,6 +1,6 @@ import pytest -from fsspec.implementations.local import LocalFileSystem +from fsspec.implementations.local import LocalFileSystem, make_path_posix from fsspec.tests.abstract import AbstractFixtures @@ -12,3 +12,7 @@ def fs(self): @pytest.fixture def fs_path(self, tmpdir): return str(tmpdir) + + @pytest.fixture + def fs_sanitize_path(self): + return make_path_posix diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 20e54311e..1cfd14c0a 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -323,7 +323,9 @@ def test_globfind_dirs(tmpdir): fs.glob(tmpdir + "/dir/*", detail=True)[tmpdir + "/dir/afile"]["type"] == "file" ) assert [tmpdir + "/dir/afile"] == fs.find(tmpdir) - assert [tmpdir + "/dir", tmpdir + "/dir/afile"] == fs.find(tmpdir, withdirs=True) + assert [tmpdir, tmpdir + "/dir", tmpdir + "/dir/afile"] == fs.find( + tmpdir, withdirs=True + ) def test_touch(tmpdir): @@ -952,12 +954,12 @@ def test_cp_get_put_empty_directory(tmpdir, funcname): # cp/get/put without slash, target directory exists assert fs.isdir(target) func(empty, target) - assert fs.find(target, withdirs=True) == [] + assert fs.find(target, withdirs=True) == [make_path_posix(target)] # cp/get/put with slash, target directory exists assert fs.isdir(target) func(empty + "/", target) - assert fs.find(target, withdirs=True) == [] + assert fs.find(target, withdirs=True) == [make_path_posix(target)] fs.rmdir(target) diff --git a/fsspec/implementations/tests/test_memory.py b/fsspec/implementations/tests/test_memory.py index 5bf1131c9..05a40b287 100644 --- a/fsspec/implementations/tests/test_memory.py +++ b/fsspec/implementations/tests/test_memory.py @@ -316,12 +316,12 @@ def test_cp_empty_directory(m): # cp without slash, target directory exists assert m.isdir(target) m.cp(empty, target) - assert m.find(target, withdirs=True) == [] + assert m.find(target, withdirs=True) == [target] # cp with slash, target directory exists assert m.isdir(target) m.cp(empty + "/", target) - assert m.find(target, withdirs=True) == [] + assert m.find(target, withdirs=True) == [target] m.rmdir(target) diff --git a/fsspec/spec.py b/fsspec/spec.py index 457c082e2..ea6949c26 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -486,6 +486,12 @@ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): # TODO: allow equivalent of -name parameter path = self._strip_protocol(path) out = dict() + + # Add the root directory if withdirs is requested + # This is needed for posix glob compliance + if withdirs and path != "" and self.isdir(path): + out[path] = self.info(path) + for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs): if withdirs: files.update(dirs) @@ -534,40 +540,40 @@ def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs): else: return sizes - def glob(self, path, **kwargs): + def glob(self, path, maxdepth=None, **kwargs): """ Find files by glob-matching. - If the path ends with '/' and does not contain "*", it is essentially - the same as ``ls(path)``, returning only files. + If the path ends with '/', only folders are returned. We support ``"**"``, ``"?"`` and ``"[..]"``. We do not support ^ for pattern negation. + The `maxdepth` option is applied on the first `**` found in the path. + Search path names that contain embedded characters special to this implementation of glob may not produce expected results; e.g., 'foo/bar/*starredfilename*'. kwargs are passed to ``ls``. """ + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + import re ends = path.endswith("/") path = self._strip_protocol(path) - indstar = path.find("*") if path.find("*") >= 0 else len(path) - indques = path.find("?") if path.find("?") >= 0 else len(path) - indbrace = path.find("[") if path.find("[") >= 0 else len(path) + idx_star = path.find("*") if path.find("*") >= 0 else len(path) + idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) + idx_brace = path.find("[") if path.find("[") >= 0 else len(path) - ind = min(indstar, indques, indbrace) + min_idx = min(idx_star, idx_qmark, idx_brace) detail = kwargs.pop("detail", False) if not has_magic(path): - root = path - depth = 1 - if ends: - path += "/*" - elif self.exists(path): + if self.exists(path): if not detail: return [path] else: @@ -577,13 +583,21 @@ def glob(self, path, **kwargs): return [] # glob of non-existent returns empty else: return {} - elif "/" in path[:ind]: - ind2 = path[:ind].rindex("/") - root = path[: ind2 + 1] - depth = None if "**" in path else path[ind2 + 1 :].count("/") + 1 + elif "/" in path[:min_idx]: + min_idx = path[:min_idx].rindex("/") + root = path[: min_idx + 1] + depth = path[min_idx + 1 :].count("/") + 1 else: root = "" - depth = None if "**" in path else path[ind + 1 :].count("/") + 1 + depth = path[min_idx + 1 :].count("/") + 1 + + if "**" in path: + if maxdepth is not None: + idx_double_stars = path.find("**") + depth_double_stars = path[idx_double_stars:].count("/") + 1 + depth = depth - depth_double_stars + maxdepth + else: + depth = None allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs) # Escape characters special to python regex, leaving our supported @@ -609,14 +623,24 @@ def glob(self, path, **kwargs): ) + "$" ) - pattern = re.sub("[*]{2}", "=PLACEHOLDER=", pattern) + pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern) + pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern) pattern = re.sub("[*]", "[^/]*", pattern) - pattern = re.compile(pattern.replace("=PLACEHOLDER=", ".*")) + pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern) + pattern = re.sub("=DOUBLE_STARS=", ".*", pattern) + pattern = re.compile(pattern) + out = { p: allpaths[p] for p in sorted(allpaths) if pattern.match(p.replace("//", "/").rstrip("/")) } + + # Return directories only when the glob end by a slash + # This is needed for posix glob compliance + if ends: + out = {k: v for k, v in out.items() if v["type"] == "directory"} + if detail: return out else: @@ -918,7 +942,6 @@ def get( LocalFileSystem, make_path_posix, trailing_sep, - trailing_sep_maybe_asterisk, ) source_is_str = isinstance(rpath, str) @@ -931,14 +954,20 @@ def get( if isinstance(lpath, str): lpath = make_path_posix(lpath) - isdir = isinstance(lpath, str) and ( + + source_is_file = len(rpaths) == 1 + dest_is_dir = isinstance(lpath, str) and ( trailing_sep(lpath) or LocalFileSystem().isdir(lpath) ) + + exists = source_is_str and ( + (has_magic(rpath) and source_is_file) + or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath)) + ) lpaths = other_paths( rpaths, lpath, - exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(rpath), - is_dir=isdir, + exists=exists, flatten=not source_is_str, ) @@ -988,7 +1017,6 @@ def put( LocalFileSystem, make_path_posix, trailing_sep, - trailing_sep_maybe_asterisk, ) source_is_str = isinstance(lpath, str) @@ -1002,17 +1030,24 @@ def put( if not lpaths: return - isdir = isinstance(rpath, str) and (trailing_sep(rpath) or self.isdir(rpath)) + source_is_file = len(lpaths) == 1 + dest_is_dir = isinstance(rpath, str) and ( + trailing_sep(rpath) or self.isdir(rpath) + ) + rpath = ( self._strip_protocol(rpath) if isinstance(rpath, str) else [self._strip_protocol(p) for p in rpath] ) + exists = source_is_str and ( + (has_magic(lpath) and source_is_file) + or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath)) + ) rpaths = other_paths( lpaths, rpath, - exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(lpath), - is_dir=isdir, + exists=exists, flatten=not source_is_str, ) @@ -1045,7 +1080,7 @@ def copy( not-found exceptions will cause the path to be skipped; defaults to raise unless recursive is true, where the default is ignore """ - from .implementations.local import trailing_sep, trailing_sep_maybe_asterisk + from .implementations.local import trailing_sep if on_error is None and recursive: on_error = "ignore" @@ -1060,12 +1095,19 @@ def copy( if not paths: return - isdir = isinstance(path2, str) and (trailing_sep(path2) or self.isdir(path2)) + source_is_file = len(paths) == 1 + dest_is_dir = isinstance(path2, str) and ( + trailing_sep(path2) or self.isdir(path2) + ) + + exists = source_is_str and ( + (has_magic(path1) and source_is_file) + or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1)) + ) path2 = other_paths( paths, path2, - exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(path1), - is_dir=isdir, + exists=exists, flatten=not source_is_str, ) @@ -1093,7 +1135,7 @@ def expand_path(self, path, recursive=False, maxdepth=None, **kwargs): path = [self._strip_protocol(p) for p in path] for p in path: if has_magic(p): - bit = set(self.glob(p, **kwargs)) + bit = set(self.glob(p, maxdepth=maxdepth, **kwargs)) out |= bit if recursive: # glob call above expanded one depth so if maxdepth is defined diff --git a/fsspec/tests/abstract/__init__.py b/fsspec/tests/abstract/__init__.py index d2bc1627d..fc63e9ca9 100644 --- a/fsspec/tests/abstract/__init__.py +++ b/fsspec/tests/abstract/__init__.py @@ -26,6 +26,17 @@ def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path): yield source fs.rm(source, recursive=True) + @pytest.fixture + def fs_glob_edge_cases_files(self, fs, fs_join, fs_path): + """ + Scenario on remote filesystem that is used for glob edge cases cp/get/put tests. + + Cleans up at the end of each test it which it is used. + """ + source = self._glob_edge_cases_files(fs, fs_join, fs_path) + yield source + fs.rm(source, recursive=True) + @pytest.fixture def fs_target(self, fs, fs_join, fs_path): """ @@ -49,6 +60,17 @@ def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path): yield source local_fs.rm(source, recursive=True) + @pytest.fixture + def local_glob_edge_cases_files(self, local_fs, local_join, local_path): + """ + Scenario on local filesystem that is used for glob edge cases cp/get/put tests. + + Cleans up at the end of each test it which it is used. + """ + source = self._glob_edge_cases_files(local_fs, local_join, local_path) + yield source + local_fs.rm(source, recursive=True) + @pytest.fixture def local_target(self, local_fs, local_join, local_path): """ @@ -61,6 +83,39 @@ def local_target(self, local_fs, local_join, local_path): if local_fs.exists(target): local_fs.rm(target, recursive=True) + def _glob_edge_cases_files(self, some_fs, some_join, some_path): + """ + Scenario that is used for glob edge cases cp/get/put tests. + Creates the following directory and file structure: + + 📁 source + ├── 📄 file1 + ├── 📄 file2 + ├── 📁 subdir0 + │ ├── 📄 subfile1 + │ ├── 📄 subfile2 + │ └── 📁 nesteddir + │ └── 📄 nestedfile + └── 📁 subdir1 + ├── 📄 subfile1 + ├── 📄 subfile2 + └── 📁 nesteddir + └── 📄 nestedfile + """ + source = some_join(some_path, "source") + some_fs.touch(some_join(source, "file1")) + some_fs.touch(some_join(source, "file2")) + + for subdir_idx in range(2): + subdir = some_join(source, f"subdir{subdir_idx}") + nesteddir = some_join(subdir, "nesteddir") + some_fs.makedirs(nesteddir) + some_fs.touch(some_join(subdir, "subfile1")) + some_fs.touch(some_join(subdir, "subfile2")) + some_fs.touch(some_join(nesteddir, "nestedfile")) + + return source + def _bulk_operations_scenario_0(self, some_fs, some_join, some_path): """ Scenario that is used for many cp/get/put tests. Creates the following @@ -133,8 +188,13 @@ def local_join(self): def local_path(self, tmpdir): return tmpdir + @pytest.fixture def supports_empty_directories(self): """ Return whether this implementation supports empty directories. """ return True + + @pytest.fixture + def fs_sanitize_path(self): + return lambda x: x diff --git a/fsspec/tests/abstract/copy.py b/fsspec/tests/abstract/copy.py index 6498fd215..a5eb19038 100644 --- a/fsspec/tests/abstract/copy.py +++ b/fsspec/tests/abstract/copy.py @@ -1,13 +1,25 @@ +from itertools import product + +import pytest + +from fsspec.tests.conftest import GLOB_EDGE_CASES_TESTS + + class AbstractCopyTests: def test_copy_file_to_existing_directory( - self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, ): # Copy scenario 1a source = fs_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file fs.touch(fs_join(target, "dummy")) assert fs.isdir(target) @@ -53,13 +65,22 @@ def test_copy_file_to_new_directory( assert fs.isfile(fs_join(target, "newdir", "subfile1")) def test_copy_file_to_file_in_existing_directory( - self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, ): # Copy scenario 1c source = fs_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + fs.touch(fs_join(target, "dummy")) + assert fs.isdir(target) fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile")) assert fs.isfile(fs_join(target, "newfile")) @@ -80,14 +101,19 @@ def test_copy_file_to_file_in_new_directory( assert fs.isfile(fs_join(target, "newdir", "newfile")) def test_copy_directory_to_existing_directory( - self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, ): # Copy scenario 1e source = fs_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file dummy = fs_join(target, "dummy") fs.touch(dummy) @@ -101,7 +127,7 @@ def test_copy_directory_to_existing_directory( # Without recursive does nothing fs.cp(s, t) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) # With recursive fs.cp(s, t, recursive=True) @@ -112,7 +138,14 @@ def test_copy_directory_to_existing_directory( assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) else: assert fs.isdir(fs_join(target, "subdir")) assert fs.isfile(fs_join(target, "subdir", "subfile1")) @@ -121,7 +154,7 @@ def test_copy_directory_to_existing_directory( assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile")) fs.rm(fs_join(target, "subdir"), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) # Limit recursive by maxdepth fs.cp(s, t, recursive=True, maxdepth=1) @@ -131,7 +164,13 @@ def test_copy_directory_to_existing_directory( assert not fs.exists(fs_join(target, "nesteddir")) assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) else: assert fs.isdir(fs_join(target, "subdir")) assert fs.isfile(fs_join(target, "subdir", "subfile1")) @@ -139,10 +178,15 @@ def test_copy_directory_to_existing_directory( assert not fs.exists(fs_join(target, "subdir", "nesteddir")) fs.rm(fs_join(target, "subdir"), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) def test_copy_directory_to_new_directory( - self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, ): # Copy scenario 1f source = fs_bulk_operations_scenario_0 @@ -160,7 +204,11 @@ def test_copy_directory_to_new_directory( # Without recursive does nothing fs.cp(s, t) - assert fs.ls(target) == [] + if supports_empty_directories: + assert fs.ls(target) == [] + else: + with pytest.raises(FileNotFoundError): + fs.ls(target) # With recursive fs.cp(s, t, recursive=True) @@ -186,13 +234,23 @@ def test_copy_directory_to_new_directory( assert not fs.exists(fs_join(target, "newdir")) def test_copy_glob_to_existing_directory( - self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, ): # Copy scenario 1g source = fs_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + dummy = fs_join(target, "dummy") + fs.touch(dummy) + assert fs.isdir(target) for target_slash in [False, True]: t = target + "/" if target_slash else target @@ -205,29 +263,51 @@ def test_copy_glob_to_existing_directory( assert not fs.exists(fs_join(target, "nesteddir", "nestedfile")) assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) - assert fs.ls(target) == [] + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) # With recursive - fs.cp(fs_join(source, "subdir", "*"), t, recursive=True) - assert fs.isfile(fs_join(target, "subfile1")) - assert fs.isfile(fs_join(target, "subfile2")) - assert fs.isdir(fs_join(target, "nesteddir")) - assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) - assert not fs.exists(fs_join(target, "subdir")) - - fs.rm(fs.ls(target, detail=False), recursive=True) - assert fs.ls(target) == [] + for glob, recursive in zip(["*", "**"], [True, False]): + fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert fs.isdir(fs_join(target, "nesteddir")) + assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) - # Limit recursive by maxdepth - fs.cp(fs_join(source, "subdir", "*"), t, recursive=True, maxdepth=1) - assert fs.isfile(fs_join(target, "subfile1")) - assert fs.isfile(fs_join(target, "subfile2")) - assert not fs.exists(fs_join(target, "nesteddir")) - assert not fs.exists(fs_join(target, "subdir")) + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) + + # Limit recursive by maxdepth + fs.cp( + fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1 + ) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert not fs.exists(fs_join(target, "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) - assert fs.ls(target) == [] + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) def test_copy_glob_to_new_directory( self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target @@ -257,39 +337,92 @@ def test_copy_glob_to_new_directory( assert not fs.exists(fs_join(target, "newdir")) # With recursive - fs.cp(fs_join(source, "subdir", "*"), t, recursive=True) - assert fs.isdir(fs_join(target, "newdir")) - assert fs.isfile(fs_join(target, "newdir", "subfile1")) - assert fs.isfile(fs_join(target, "newdir", "subfile2")) - assert fs.isdir(fs_join(target, "newdir", "nesteddir")) - assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) - assert not fs.exists(fs_join(target, "subdir")) - assert not fs.exists(fs_join(target, "newdir", "subdir")) + for glob, recursive in zip(["*", "**"], [True, False]): + fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert fs.isdir(fs_join(target, "newdir", "nesteddir")) + assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + # Limit recursive by maxdepth + fs.cp( + fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1 + ) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + @pytest.mark.parametrize( + GLOB_EDGE_CASES_TESTS["argnames"], + GLOB_EDGE_CASES_TESTS["argvalues"], + ) + def test_copy_glob_edge_cases( + self, + path, + recursive, + maxdepth, + expected, + fs, + fs_join, + fs_glob_edge_cases_files, + fs_target, + fs_sanitize_path, + ): + # Copy scenario 1g + source = fs_glob_edge_cases_files - fs.rm(fs_join(target, "newdir"), recursive=True) - assert not fs.exists(fs_join(target, "newdir")) + target = fs_target - # Limit recursive by maxdepth - fs.cp(fs_join(source, "subdir", "*"), t, recursive=True, maxdepth=1) - assert fs.isdir(fs_join(target, "newdir")) - assert fs.isfile(fs_join(target, "newdir", "subfile1")) - assert fs.isfile(fs_join(target, "newdir", "subfile2")) - assert not fs.exists(fs_join(target, "newdir", "nesteddir")) - assert not fs.exists(fs_join(target, "subdir")) - assert not fs.exists(fs_join(target, "newdir", "subdir")) + for new_dir, target_slash in product([True, False], [True, False]): + fs.mkdir(target) - fs.rm(fs.ls(target, detail=False), recursive=True) - assert not fs.exists(fs_join(target, "newdir")) + t = fs_join(target, "newdir") if new_dir else target + t = t + "/" if target_slash else t + + fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth) + + output = fs.find(target) + if new_dir: + prefixed_expected = [ + fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected + ] + else: + prefixed_expected = [ + fs_sanitize_path(fs_join(target, p)) for p in expected + ] + assert sorted(output) == sorted(prefixed_expected) + + try: + fs.rm(target, recursive=True) + except FileNotFoundError: + pass def test_copy_list_of_files_to_existing_directory( - self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, ): # Copy scenario 2a source = fs_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file dummy = fs_join(target, "dummy") fs.touch(dummy) @@ -309,8 +442,15 @@ def test_copy_list_of_files_to_existing_directory( assert fs.isfile(fs_join(target, "file2")) assert fs.isfile(fs_join(target, "subfile1")) - fs.rm(fs.find(target)) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + fs.rm( + [ + fs_join(target, "file1"), + fs_join(target, "file2"), + fs_join(target, "subfile1"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) def test_copy_list_of_files_to_new_directory( self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target diff --git a/fsspec/tests/abstract/get.py b/fsspec/tests/abstract/get.py index baa9aa4a9..08c04e909 100644 --- a/fsspec/tests/abstract/get.py +++ b/fsspec/tests/abstract/get.py @@ -1,3 +1,11 @@ +from itertools import product + +import pytest + +from fsspec.implementations.local import make_path_posix +from fsspec.tests.conftest import GLOB_EDGE_CASES_TESTS + + class AbstractGetTests: def test_get_file_to_existing_directory( self, @@ -66,7 +74,6 @@ def test_get_file_to_file_in_existing_directory( self, fs, fs_join, - fs_path, fs_bulk_operations_scenario_0, local_fs, local_join, @@ -117,6 +124,7 @@ def test_get_directory_to_existing_directory( target = local_target local_fs.mkdir(target) + assert local_fs.isdir(target) for source_slash, target_slash in zip([False, True], [False, True]): s = fs_join(source, "subdir") @@ -125,9 +133,8 @@ def test_get_directory_to_existing_directory( t = target + "/" if target_slash else target # Without recursive does nothing - # ERROR: erroneously creates new directory - # fs.get(s, t) - # assert fs.ls(target) == [] + fs.get(s, t) + assert local_fs.ls(target) == [] # With recursive fs.get(s, t, recursive=True) @@ -136,6 +143,7 @@ def test_get_directory_to_existing_directory( assert local_fs.isfile(local_join(target, "subfile2")) assert local_fs.isdir(local_join(target, "nesteddir")) assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile")) + assert not local_fs.exists(local_join(target, "subdir")) local_fs.rm( [ @@ -157,8 +165,29 @@ def test_get_directory_to_existing_directory( local_fs.rm(local_join(target, "subdir"), recursive=True) assert local_fs.ls(target) == [] - # Limit by maxdepth - # ERROR: maxdepth ignored here + # Limit recursive by maxdepth + fs.get(s, t, recursive=True, maxdepth=1) + if source_slash: + assert local_fs.isfile(local_join(target, "subfile1")) + assert local_fs.isfile(local_join(target, "subfile2")) + assert not local_fs.exists(local_join(target, "nesteddir")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + ], + recursive=True, + ) + else: + assert local_fs.isdir(local_join(target, "subdir")) + assert local_fs.isfile(local_join(target, "subdir", "subfile1")) + assert local_fs.isfile(local_join(target, "subdir", "subfile2")) + assert not local_fs.exists(local_join(target, "subdir", "nesteddir")) + + local_fs.rm(local_join(target, "subdir"), recursive=True) + assert local_fs.ls(target) == [] def test_get_directory_to_new_directory( self, @@ -184,9 +213,8 @@ def test_get_directory_to_new_directory( t += "/" # Without recursive does nothing - # ERROR: erroneously creates new directory - # fs.get(s, t) - # assert fs.ls(target) == [] + fs.get(s, t) + assert local_fs.ls(target) == [] # With recursive fs.get(s, t, recursive=True) @@ -197,12 +225,21 @@ def test_get_directory_to_new_directory( assert local_fs.isfile( local_join(target, "newdir", "nesteddir", "nestedfile") ) + assert not local_fs.exists(local_join(target, "subdir")) local_fs.rm(local_join(target, "newdir"), recursive=True) assert local_fs.ls(target) == [] - # Limit by maxdepth - # ERROR: maxdepth ignored here + # Limit recursive by maxdepth + fs.get(s, t, recursive=True, maxdepth=1) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + assert local_fs.isfile(local_join(target, "newdir", "subfile2")) + assert not local_fs.exists(local_join(target, "newdir", "nesteddir")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm(local_join(target, "newdir"), recursive=True) + assert not local_fs.exists(local_join(target, "newdir")) def test_get_glob_to_existing_directory( self, @@ -219,20 +256,62 @@ def test_get_glob_to_existing_directory( target = local_target local_fs.mkdir(target) - # for target_slash in [False, True]: - for target_slash in [False]: + for target_slash in [False, True]: t = target + "/" if target_slash else target # Without recursive fs.get(fs_join(source, "subdir", "*"), t) assert local_fs.isfile(local_join(target, "subfile1")) assert local_fs.isfile(local_join(target, "subfile2")) - # assert not local_fs.isdir(local_join(target, "nesteddir")) # ERROR - assert not local_fs.isdir(local_join(target, "subdir")) + assert not local_fs.isdir(local_join(target, "nesteddir")) + assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + ], + recursive=True, + ) + assert local_fs.ls(target) == [] # With recursive + for glob, recursive in zip(["*", "**"], [True, False]): + fs.get(fs_join(source, "subdir", glob), t, recursive=recursive) + assert local_fs.isfile(local_join(target, "subfile1")) + assert local_fs.isfile(local_join(target, "subfile2")) + assert local_fs.isdir(local_join(target, "nesteddir")) + assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + local_join(target, "nesteddir"), + ], + recursive=True, + ) + assert local_fs.ls(target) == [] - # Limit by maxdepth + # Limit recursive by maxdepth + fs.get( + fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1 + ) + assert local_fs.isfile(local_join(target, "subfile1")) + assert local_fs.isfile(local_join(target, "subfile2")) + assert not local_fs.exists(local_join(target, "nesteddir")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + ], + recursive=True, + ) + assert local_fs.ls(target) == [] def test_get_glob_to_new_directory( self, @@ -259,27 +338,91 @@ def test_get_glob_to_new_directory( assert local_fs.isdir(local_join(target, "newdir")) assert local_fs.isfile(local_join(target, "newdir", "subfile1")) assert local_fs.isfile(local_join(target, "newdir", "subfile2")) - # ERROR - do not copy empty directory - # assert not local_fs.exists(local_join(target, "newdir", "nesteddir")) + assert not local_fs.exists(local_join(target, "newdir", "nesteddir")) + assert not local_fs.exists( + local_join(target, "newdir", "nesteddir", "nestedfile") + ) + assert not local_fs.exists(local_join(target, "subdir")) + assert not local_fs.exists(local_join(target, "newdir", "subdir")) local_fs.rm(local_join(target, "newdir"), recursive=True) assert local_fs.ls(target) == [] # With recursive - fs.get(fs_join(source, "subdir", "*"), t, recursive=True) - assert local_fs.isdir(local_join(target, "newdir")) - assert local_fs.isfile(local_join(target, "newdir", "subfile1")) - assert local_fs.isfile(local_join(target, "newdir", "subfile2")) - assert local_fs.isdir(local_join(target, "newdir", "nesteddir")) - assert local_fs.isfile( - local_join(target, "newdir", "nesteddir", "nestedfile") - ) + for glob, recursive in zip(["*", "**"], [True, False]): + fs.get(fs_join(source, "subdir", glob), t, recursive=recursive) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + assert local_fs.isfile(local_join(target, "newdir", "subfile2")) + assert local_fs.isdir(local_join(target, "newdir", "nesteddir")) + assert local_fs.isfile( + local_join(target, "newdir", "nesteddir", "nestedfile") + ) + assert not local_fs.exists(local_join(target, "subdir")) + assert not local_fs.exists(local_join(target, "newdir", "subdir")) - local_fs.rm(local_join(target, "newdir"), recursive=True) - assert local_fs.ls(target) == [] + local_fs.rm(local_join(target, "newdir"), recursive=True) + assert not local_fs.exists(local_join(target, "newdir")) + + # Limit recursive by maxdepth + fs.get( + fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1 + ) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + assert local_fs.isfile(local_join(target, "newdir", "subfile2")) + assert not local_fs.exists(local_join(target, "newdir", "nesteddir")) + assert not local_fs.exists(local_join(target, "subdir")) + assert not local_fs.exists(local_join(target, "newdir", "subdir")) + + local_fs.rm(local_fs.ls(target, detail=False), recursive=True) + assert not local_fs.exists(local_join(target, "newdir")) + + @pytest.mark.parametrize( + GLOB_EDGE_CASES_TESTS["argnames"], + GLOB_EDGE_CASES_TESTS["argvalues"], + ) + def test_get_glob_edge_cases( + self, + path, + recursive, + maxdepth, + expected, + fs, + fs_join, + fs_glob_edge_cases_files, + local_fs, + local_join, + local_target, + ): + # Copy scenario 1g + source = fs_glob_edge_cases_files - # Limit by maxdepth - # ERROR: this is not correct + target = local_target + + for new_dir, target_slash in product([True, False], [True, False]): + local_fs.mkdir(target) + + t = local_join(target, "newdir") if new_dir else target + t = t + "/" if target_slash else t + + fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth) + + output = local_fs.find(target) + if new_dir: + prefixed_expected = [ + make_path_posix(local_join(target, "newdir", p)) for p in expected + ] + else: + prefixed_expected = [ + make_path_posix(local_join(target, p)) for p in expected + ] + assert sorted(output) == sorted(prefixed_expected) + + try: + local_fs.rm(target, recursive=True) + except FileNotFoundError: + pass def test_get_list_of_files_to_existing_directory( self, @@ -310,7 +453,14 @@ def test_get_list_of_files_to_existing_directory( assert local_fs.isfile(local_join(target, "file2")) assert local_fs.isfile(local_join(target, "subfile1")) - local_fs.rm(local_fs.find(target)) + local_fs.rm( + [ + local_join(target, "file1"), + local_join(target, "file2"), + local_join(target, "subfile1"), + ], + recursive=True, + ) assert local_fs.ls(target) == [] def test_get_list_of_files_to_new_directory( @@ -358,13 +508,13 @@ def test_get_directory_recursive( fs.get(src, target, recursive=True) assert local_fs.isdir(target) - if loop == 0: - assert local_fs.isfile(local_join(target, "file")) - assert not local_fs.exists(local_join(target, "src")) - else: - assert local_fs.isfile(local_join(target, "file")) - assert local_fs.isdir(local_join(target, "src")) - assert local_fs.isfile(local_join(target, "src", "file")) + if loop == 0: + assert local_fs.isfile(local_join(target, "file")) + assert not local_fs.exists(local_join(target, "src")) + else: + assert local_fs.isfile(local_join(target, "file")) + assert local_fs.isdir(local_join(target, "src")) + assert local_fs.isfile(local_join(target, "src", "file")) local_fs.rm(target, recursive=True) diff --git a/fsspec/tests/abstract/put.py b/fsspec/tests/abstract/put.py index d06f9d9b5..a92bc4a13 100644 --- a/fsspec/tests/abstract/put.py +++ b/fsspec/tests/abstract/put.py @@ -1,3 +1,10 @@ +from itertools import product + +import pytest + +from fsspec.tests.conftest import GLOB_EDGE_CASES_TESTS + + class AbstractPutTests: def test_put_file_to_existing_directory( self, @@ -6,13 +13,14 @@ def test_put_file_to_existing_directory( fs_target, local_join, local_bulk_operations_scenario_0, + supports_empty_directories, ): # Copy scenario 1a source = local_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file fs.touch(fs_join(target, "dummy")) assert fs.isdir(target) @@ -58,13 +66,23 @@ def test_put_file_to_new_directory( assert fs.isfile(fs_join(target, "newdir", "subfile1")) def test_put_file_to_file_in_existing_directory( - self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0 + self, + fs, + fs_join, + fs_target, + local_join, + supports_empty_directories, + local_bulk_operations_scenario_0, ): # Copy scenario 1c source = local_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + fs.touch(fs_join(target, "dummy")) + assert fs.isdir(target) fs.put(local_join(source, "subdir", "subfile1"), fs_join(target, "newfile")) assert fs.isfile(fs_join(target, "newfile")) @@ -86,14 +104,19 @@ def test_put_file_to_file_in_new_directory( assert fs.isfile(fs_join(target, "newdir", "newfile")) def test_put_directory_to_existing_directory( - self, fs, fs_join, fs_target, local_bulk_operations_scenario_0 + self, + fs, + fs_join, + fs_target, + local_bulk_operations_scenario_0, + supports_empty_directories, ): # Copy scenario 1e source = local_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file dummy = fs_join(target, "dummy") fs.touch(dummy) @@ -107,7 +130,7 @@ def test_put_directory_to_existing_directory( # Without recursive does nothing fs.put(s, t) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) # With recursive fs.put(s, t, recursive=True) @@ -118,7 +141,14 @@ def test_put_directory_to_existing_directory( assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) else: assert fs.isdir(fs_join(target, "subdir")) assert fs.isfile(fs_join(target, "subdir", "subfile1")) @@ -127,7 +157,7 @@ def test_put_directory_to_existing_directory( assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile")) fs.rm(fs_join(target, "subdir"), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) # Limit recursive by maxdepth fs.put(s, t, recursive=True, maxdepth=1) @@ -137,7 +167,13 @@ def test_put_directory_to_existing_directory( assert not fs.exists(fs_join(target, "nesteddir")) assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) else: assert fs.isdir(fs_join(target, "subdir")) assert fs.isfile(fs_join(target, "subdir", "subfile1")) @@ -145,21 +181,21 @@ def test_put_directory_to_existing_directory( assert not fs.exists(fs_join(target, "subdir", "nesteddir")) fs.rm(fs_join(target, "subdir"), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) def test_put_directory_to_new_directory( - self, fs, fs_join, fs_target, local_bulk_operations_scenario_0 + self, + fs, + fs_join, + fs_target, + local_bulk_operations_scenario_0, + supports_empty_directories, ): # Copy scenario 1f source = local_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): - # Force target directory to exist by adding a dummy file - dummy = fs_join(target, "dummy") - fs.touch(dummy) - assert fs.isdir(target) for source_slash, target_slash in zip([False, True], [False, True]): s = fs_join(source, "subdir") @@ -171,7 +207,11 @@ def test_put_directory_to_new_directory( # Without recursive does nothing fs.put(s, t) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + if supports_empty_directories: + assert fs.ls(target) == [] + else: + with pytest.raises(FileNotFoundError): + fs.ls(target) # With recursive fs.put(s, t, recursive=True) @@ -197,14 +237,20 @@ def test_put_directory_to_new_directory( assert not fs.exists(fs_join(target, "newdir")) def test_put_glob_to_existing_directory( - self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0 + self, + fs, + fs_join, + fs_target, + local_join, + supports_empty_directories, + local_bulk_operations_scenario_0, ): # Copy scenario 1g source = local_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file dummy = fs_join(target, "dummy") fs.touch(dummy) @@ -221,29 +267,54 @@ def test_put_glob_to_existing_directory( assert not fs.exists(fs_join(target, "nesteddir", "nestedfile")) assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) # With recursive - fs.put(local_join(source, "subdir", "*"), t, recursive=True) - assert fs.isfile(fs_join(target, "subfile1")) - assert fs.isfile(fs_join(target, "subfile2")) - assert fs.isdir(fs_join(target, "nesteddir")) - assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) - assert not fs.exists(fs_join(target, "subdir")) - - fs.rm(fs.ls(target, detail=False), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + for glob, recursive in zip(["*", "**"], [True, False]): + fs.put(local_join(source, "subdir", glob), t, recursive=recursive) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert fs.isdir(fs_join(target, "nesteddir")) + assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) - # Limit recursive by maxdepth - fs.put(local_join(source, "subdir", "*"), t, recursive=True, maxdepth=1) - assert fs.isfile(fs_join(target, "subfile1")) - assert fs.isfile(fs_join(target, "subfile2")) - assert not fs.exists(fs_join(target, "nesteddir")) - assert not fs.exists(fs_join(target, "subdir")) + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) + + # Limit recursive by maxdepth + fs.put( + local_join(source, "subdir", glob), + t, + recursive=recursive, + maxdepth=1, + ) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert not fs.exists(fs_join(target, "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) def test_put_glob_to_new_directory( self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0 @@ -253,11 +324,6 @@ def test_put_glob_to_new_directory( target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): - # Force target directory to exist by adding a dummy file - dummy = fs_join(target, "dummy") - fs.touch(dummy) - assert fs.isdir(target) for target_slash in [False, True]: t = fs_join(target, "newdir") @@ -278,29 +344,81 @@ def test_put_glob_to_new_directory( assert not fs.exists(fs_join(target, "newdir")) # With recursive - fs.put(local_join(source, "subdir", "*"), t, recursive=True) - assert fs.isdir(fs_join(target, "newdir")) - assert fs.isfile(fs_join(target, "newdir", "subfile1")) - assert fs.isfile(fs_join(target, "newdir", "subfile2")) - assert fs.isdir(fs_join(target, "newdir", "nesteddir")) - assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) - assert not fs.exists(fs_join(target, "subdir")) - assert not fs.exists(fs_join(target, "newdir", "subdir")) + for glob, recursive in zip(["*", "**"], [True, False]): + fs.put(local_join(source, "subdir", glob), t, recursive=recursive) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert fs.isdir(fs_join(target, "newdir", "nesteddir")) + assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + # Limit recursive by maxdepth + fs.put( + local_join(source, "subdir", glob), + t, + recursive=recursive, + maxdepth=1, + ) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) - fs.rm(fs_join(target, "newdir"), recursive=True) - assert not fs.exists(fs_join(target, "newdir")) + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) - # Limit recursive by maxdepth - fs.put(local_join(source, "subdir", "*"), t, recursive=True, maxdepth=1) - assert fs.isdir(fs_join(target, "newdir")) - assert fs.isfile(fs_join(target, "newdir", "subfile1")) - assert fs.isfile(fs_join(target, "newdir", "subfile2")) - assert not fs.exists(fs_join(target, "newdir", "nesteddir")) - assert not fs.exists(fs_join(target, "subdir")) - assert not fs.exists(fs_join(target, "newdir", "subdir")) + @pytest.mark.parametrize( + GLOB_EDGE_CASES_TESTS["argnames"], + GLOB_EDGE_CASES_TESTS["argvalues"], + ) + def test_put_glob_edge_cases( + self, + path, + recursive, + maxdepth, + expected, + fs, + fs_join, + fs_target, + local_glob_edge_cases_files, + local_join, + fs_sanitize_path, + ): + # Copy scenario 1g + source = local_glob_edge_cases_files - fs.rm(fs_join(target, "newdir"), recursive=True) - assert not fs.exists(fs_join(target, "newdir")) + target = fs_target + + for new_dir, target_slash in product([True, False], [True, False]): + fs.mkdir(target) + + t = fs_join(target, "newdir") if new_dir else target + t = t + "/" if target_slash else t + + fs.put(local_join(source, path), t, recursive=recursive, maxdepth=maxdepth) + + output = fs.find(target) + if new_dir: + prefixed_expected = [ + fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected + ] + else: + prefixed_expected = [ + fs_sanitize_path(fs_join(target, p)) for p in expected + ] + assert sorted(output) == sorted(prefixed_expected) + + try: + fs.rm(target, recursive=True) + except FileNotFoundError: + pass def test_put_list_of_files_to_existing_directory( self, @@ -309,14 +427,14 @@ def test_put_list_of_files_to_existing_directory( fs_target, local_join, local_bulk_operations_scenario_0, - fs_path, + supports_empty_directories, ): # Copy scenario 2a source = local_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file dummy = fs_join(target, "dummy") fs.touch(dummy) @@ -336,8 +454,15 @@ def test_put_list_of_files_to_existing_directory( assert fs.isfile(fs_join(target, "file2")) assert fs.isfile(fs_join(target, "subfile1")) - fs.rm(fs.find(target)) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + fs.rm( + [ + fs_join(target, "file1"), + fs_join(target, "file2"), + fs_join(target, "subfile1"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) def test_put_list_of_files_to_new_directory( self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0 diff --git a/fsspec/tests/conftest.py b/fsspec/tests/conftest.py index 9fdf25b7a..544e8a0e0 100644 --- a/fsspec/tests/conftest.py +++ b/fsspec/tests/conftest.py @@ -18,6 +18,182 @@ ).read() win = os.name == "nt" +GLOB_EDGE_CASES_TESTS = { + "argnames": ("path", "recursive", "maxdepth", "expected"), + "argvalues": [ + ("fil?1", False, None, ["file1"]), + ("fil?1", True, None, ["file1"]), + ("file[1-2]", False, None, ["file1", "file2"]), + ("file[1-2]", True, None, ["file1", "file2"]), + ("*", False, None, ["file1", "file2"]), + ( + "*", + True, + None, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("*", True, 1, ["file1", "file2"]), + ( + "*", + True, + 2, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ("*1", False, None, ["file1"]), + ( + "*1", + True, + None, + [ + "file1", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]), + ( + "**", + False, + None, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ( + "**", + True, + None, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("**", True, 1, ["file1", "file2"]), + ( + "**", + True, + 2, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ( + "**", + False, + 2, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ("**1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]), + ( + "**1", + True, + None, + [ + "file1", + "subdir0/subfile1", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("**1", True, 1, ["file1"]), + ( + "**1", + True, + 2, + ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"], + ), + ("**1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]), + ("**/subdir0", False, None, []), + ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]), + ("**/subdir0/nested*", False, 2, []), + ("**/subdir0/nested*", True, 2, ["nestedfile"]), + ("subdir[1-2]", False, None, []), + ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]), + ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]), + ("subdir[0-1]", False, None, []), + ( + "subdir[0-1]", + True, + None, + [ + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ( + "subdir[0-1]/*fil[e]*", + False, + None, + [ + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ( + "subdir[0-1]/*fil[e]*", + True, + None, + [ + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ], +} + @pytest.fixture def reset_files(): @@ -72,7 +248,7 @@ def do_GET(self): else: # suffix only l = len(file_data) - content_range = f"bytes {l-int(end)}-{l-1}/{l}" + content_range = f"bytes {l - int(end)}-{l - 1}/{l}" file_data = file_data[-int(end) :] if "use_206" in self.headers: status = 206 diff --git a/fsspec/tests/test_generic.py b/fsspec/tests/test_generic.py index 4eff3f434..aa8d9bfc3 100644 --- a/fsspec/tests/test_generic.py +++ b/fsspec/tests/test_generic.py @@ -64,6 +64,7 @@ def test_rsync(tmpdir, m): assert set(allfiles) == { f"file://{pos_tmpdir}{_}" for _ in [ + "", "/deep", "/deep/path", "/deep/path/afile", @@ -76,6 +77,7 @@ def test_rsync(tmpdir, m): assert set(allfiles2) == { f"file://{pos_tmpdir}{_}" for _ in [ + "", "/deep", "/deep/path", "/deep/path/afile", diff --git a/fsspec/tests/test_spec.py b/fsspec/tests/test_spec.py index 68be18c66..f095ebdf1 100644 --- a/fsspec/tests/test_spec.py +++ b/fsspec/tests/test_spec.py @@ -1,6 +1,9 @@ +import glob import json import os import pickle +import subprocess +import sys from collections import defaultdict import numpy as np @@ -9,8 +12,371 @@ import fsspec from fsspec.implementations.ftp import FTPFileSystem from fsspec.implementations.http import HTTPFileSystem +from fsspec.implementations.local import LocalFileSystem from fsspec.spec import AbstractBufferedFile, AbstractFileSystem +PATHS_FOR_GLOB_TESTS = ( + {"name": "test0.json", "type": "file", "size": 100}, + {"name": "test0.yaml", "type": "file", "size": 100}, + {"name": "test0", "type": "directory", "size": 0}, + {"name": "test0/test0.json", "type": "file", "size": 100}, + {"name": "test0/test0.yaml", "type": "file", "size": 100}, + {"name": "test0/test1", "type": "directory", "size": 0}, + {"name": "test0/test1/test0.json", "type": "file", "size": 100}, + {"name": "test0/test1/test0.yaml", "type": "file", "size": 100}, + {"name": "test0/test1/test2", "type": "directory", "size": 0}, + {"name": "test0/test1/test2/test0.json", "type": "file", "size": 100}, + {"name": "test0/test1/test2/test0.yaml", "type": "file", "size": 100}, + {"name": "test0/test2", "type": "directory", "size": 0}, + {"name": "test0/test2/test0.json", "type": "file", "size": 100}, + {"name": "test0/test2/test0.yaml", "type": "file", "size": 100}, + {"name": "test0/test2/test1", "type": "directory", "size": 0}, + {"name": "test0/test2/test1/test0.json", "type": "file", "size": 100}, + {"name": "test0/test2/test1/test0.yaml", "type": "file", "size": 100}, + {"name": "test0/test2/test1/test3", "type": "directory", "size": 0}, + {"name": "test0/test2/test1/test3/test0.json", "type": "file", "size": 100}, + {"name": "test0/test2/test1/test3/test0.yaml", "type": "file", "size": 100}, + {"name": "test1.json", "type": "file", "size": 100}, + {"name": "test1.yaml", "type": "file", "size": 100}, + {"name": "test1", "type": "directory", "size": 0}, + {"name": "test1/test0.json", "type": "file", "size": 100}, + {"name": "test1/test0.yaml", "type": "file", "size": 100}, + {"name": "test1/test0", "type": "directory", "size": 0}, + {"name": "test1/test0/test0.json", "type": "file", "size": 100}, + {"name": "test1/test0/test0.yaml", "type": "file", "size": 100}, + {"name": "special_chars", "type": "directory", "size": 0}, + {"name": "special_chars/f\\oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f.oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f+oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f(oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f)oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f|oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f^oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f$oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f{oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f}oo.txt", "type": "file", "size": 100}, +) + +GLOB_POSIX_TESTS = { + "argnames": ("path", "expected"), + "argvalues": [ + ("nonexistent", []), + ("test0.json", ["test0.json"]), + ("test0", ["test0"]), + ("test0/", ["test0"]), + ("test1/test0.yaml", ["test1/test0.yaml"]), + ("test0/test[1-2]", ["test0/test1", "test0/test2"]), + ("test0/test[1-2]/", ["test0/test1", "test0/test2"]), + ( + "test0/test[1-2]/*", + [ + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test2/test0.json", + "test0/test2/test0.yaml", + "test0/test2/test1", + ], + ), + ( + "test0/test[1-2]/*.[j]*", + ["test0/test1/test0.json", "test0/test2/test0.json"], + ), + ("special_chars/f\\oo.*", ["special_chars/f\\oo.txt"]), + ("special_chars/f.oo.*", ["special_chars/f.oo.txt"]), + ("special_chars/f+oo.*", ["special_chars/f+oo.txt"]), + ("special_chars/f(oo.*", ["special_chars/f(oo.txt"]), + ("special_chars/f)oo.*", ["special_chars/f)oo.txt"]), + ("special_chars/f|oo.*", ["special_chars/f|oo.txt"]), + ("special_chars/f^oo.*", ["special_chars/f^oo.txt"]), + ("special_chars/f$oo.*", ["special_chars/f$oo.txt"]), + ("special_chars/f{oo.*", ["special_chars/f{oo.txt"]), + ("special_chars/f}oo.*", ["special_chars/f}oo.txt"]), + ( + "*", + [ + "special_chars", + "test0.json", + "test0.yaml", + "test0", + "test1.json", + "test1.yaml", + "test1", + ], + ), + ("*.yaml", ["test0.yaml", "test1.yaml"]), + ( + "**", + [ + "special_chars", + "special_chars/f$oo.txt", + "special_chars/f(oo.txt", + "special_chars/f)oo.txt", + "special_chars/f+oo.txt", + "special_chars/f.oo.txt", + "special_chars/f\\oo.txt", + "special_chars/f^oo.txt", + "special_chars/f{oo.txt", + "special_chars/f|oo.txt", + "special_chars/f}oo.txt", + "test0.json", + "test0.yaml", + "test0", + "test0/test0.json", + "test0/test0.yaml", + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test1/test2/test0.json", + "test0/test1/test2/test0.yaml", + "test0/test2", + "test0/test2/test0.json", + "test0/test2/test0.yaml", + "test0/test2/test1", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + "test0/test2/test1/test3/test0.json", + "test0/test2/test1/test3/test0.yaml", + "test1.json", + "test1.yaml", + "test1", + "test1/test0.json", + "test1/test0.yaml", + "test1/test0", + "test1/test0/test0.json", + "test1/test0/test0.yaml", + ], + ), + ("*/", ["special_chars", "test0", "test1"]), + ( + "**/", + [ + "special_chars", + "test0", + "test0/test1", + "test0/test1/test2", + "test0/test2", + "test0/test2/test1", + "test0/test2/test1/test3", + "test1", + "test1/test0", + ], + ), + ("*/*.yaml", ["test0/test0.yaml", "test1/test0.yaml"]), + ( + "**/*.yaml", + [ + "test0.yaml", + "test0/test0.yaml", + "test0/test1/test0.yaml", + "test0/test1/test2/test0.yaml", + "test0/test2/test0.yaml", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3/test0.yaml", + "test1.yaml", + "test1/test0.yaml", + "test1/test0/test0.yaml", + ], + ), + ( + "*/test1/*", + ["test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2"], + ), + ("*/test1/*.yaml", ["test0/test1/test0.yaml"]), + ( + "**/test1/*", + [ + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + "test1/test0.json", + "test1/test0.yaml", + "test1/test0", + ], + ), + ( + "**/test1/*.yaml", + [ + "test0/test1/test0.yaml", + "test0/test2/test1/test0.yaml", + "test1/test0.yaml", + ], + ), + ("*/test1/*/", ["test0/test1/test2"]), + ( + "**/test1/*/", + ["test0/test1/test2", "test0/test2/test1/test3", "test1/test0"], + ), + ( + "*/test1/**", + [ + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test1/test2/test0.json", + "test0/test1/test2/test0.yaml", + ], + ), + ( + "**/test1/**", + [ + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test1/test2/test0.json", + "test0/test1/test2/test0.yaml", + "test0/test2/test1", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + "test0/test2/test1/test3/test0.json", + "test0/test2/test1/test3/test0.yaml", + "test1", + "test1/test0.json", + "test1/test0.yaml", + "test1/test0", + "test1/test0/test0.json", + "test1/test0/test0.yaml", + ], + ), + ("*/test1/**/", ["test0/test1", "test0/test1/test2"]), + ( + "**/test1/**/", + [ + "test0/test1", + "test0/test1/test2", + "test0/test2/test1", + "test0/test2/test1/test3", + "test1", + "test1/test0", + ], + ), + ( + "test0/*", + ["test0/test0.json", "test0/test0.yaml", "test0/test1", "test0/test2"], + ), + ("test0/*.yaml", ["test0/test0.yaml"]), + ( + "test0/**", + [ + "test0", + "test0/test0.json", + "test0/test0.yaml", + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test1/test2/test0.json", + "test0/test1/test2/test0.yaml", + "test0/test2", + "test0/test2/test0.json", + "test0/test2/test0.yaml", + "test0/test2/test1", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + "test0/test2/test1/test3/test0.json", + "test0/test2/test1/test3/test0.yaml", + ], + ), + ("test0/*/", ["test0/test1", "test0/test2"]), + ( + "test0/**/", + [ + "test0", + "test0/test1", + "test0/test1/test2", + "test0/test2", + "test0/test2/test1", + "test0/test2/test1/test3", + ], + ), + ("test0/*/*.yaml", ["test0/test1/test0.yaml", "test0/test2/test0.yaml"]), + ( + "test0/**/*.yaml", + [ + "test0/test0.yaml", + "test0/test1/test0.yaml", + "test0/test1/test2/test0.yaml", + "test0/test2/test0.yaml", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3/test0.yaml", + ], + ), + ( + "test0/*/test1/*", + [ + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + ], + ), + ("test0/*/test1/*.yaml", ["test0/test2/test1/test0.yaml"]), + ( + "test0/**/test1/*", + [ + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + ], + ), + ( + "test0/**/test1/*.yaml", + ["test0/test1/test0.yaml", "test0/test2/test1/test0.yaml"], + ), + ("test0/*/test1/*/", ["test0/test2/test1/test3"]), + ("test0/**/test1/*/", ["test0/test1/test2", "test0/test2/test1/test3"]), + ( + "test0/*/test1/**", + [ + "test0/test2/test1", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + "test0/test2/test1/test3/test0.json", + "test0/test2/test1/test3/test0.yaml", + ], + ), + ( + "test0/**/test1/**", + [ + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test1/test2/test0.json", + "test0/test1/test2/test0.yaml", + "test0/test2/test1", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + "test0/test2/test1/test3/test0.json", + "test0/test2/test1/test3/test0.yaml", + ], + ), + ("test0/*/test1/**/", ["test0/test2/test1", "test0/test2/test1/test3"]), + ( + "test0/**/test1/**/", + [ + "test0/test1", + "test0/test1/test2", + "test0/test2/test1", + "test0/test2/test1/test3", + ], + ), + ], +} + class DummyTestFS(AbstractFileSystem): protocol = "mock" @@ -43,17 +409,13 @@ class DummyTestFS(AbstractFileSystem): }, {"name": "misc", "type": "directory"}, {"name": "misc/foo.txt", "type": "file", "size": 100}, - {"name": "glob_test", "type": "directory", "size": 0}, - {"name": "glob_test/hat", "type": "directory", "size": 0}, - {"name": "glob_test/hat/^foo.txt", "type": "file", "size": 100}, - {"name": "glob_test/dollar", "type": "directory", "size": 0}, - {"name": "glob_test/dollar/$foo.txt", "type": "file", "size": 100}, - {"name": "glob_test/lbrace", "type": "directory", "size": 0}, - {"name": "glob_test/lbrace/{foo.txt", "type": "file", "size": 100}, - {"name": "glob_test/rbrace", "type": "directory", "size": 0}, - {"name": "glob_test/rbrace/}foo.txt", "type": "file", "size": 100}, ) + def __init__(self, fs_content=None, **kwargs): + if fs_content is not None: + self._fs_contents = fs_content + super().__init__(**kwargs) + def __getitem__(self, name): for item in self._fs_contents: if item["name"] == name: @@ -107,69 +469,123 @@ def _open( @pytest.mark.parametrize( - "test_path, expected", + ["test_paths", "recursive", "maxdepth", "expected"], [ ( - "mock://top_level/second_level/date=2019-10-01/a.parquet", - ["top_level/second_level/date=2019-10-01/a.parquet"], + ( + "top_level/second_level", + "top_level/sec*", + "top_level/sec*vel", + "top_level/*", + ), + True, + None, + [ + "top_level/second_level", + "top_level/second_level/date=2019-10-01", + "top_level/second_level/date=2019-10-01/a.parquet", + "top_level/second_level/date=2019-10-01/b.parquet", + "top_level/second_level/date=2019-10-02", + "top_level/second_level/date=2019-10-02/a.parquet", + "top_level/second_level/date=2019-10-04", + "top_level/second_level/date=2019-10-04/a.parquet", + ], + ), + ( + ( + "top_level/second_level", + "top_level/sec*", + "top_level/sec*vel", + "top_level/*", + ), + False, + None, + [ + "top_level/second_level", + ], + ), + ( + ("top_level/second_level",), + True, + 1, + [ + "top_level/second_level", + "top_level/second_level/date=2019-10-01", + "top_level/second_level/date=2019-10-02", + "top_level/second_level/date=2019-10-04", + ], ), ( - "mock://top_level/second_level/date=2019-10-01/*", + ("top_level/second_level",), + True, + 2, [ + "top_level/second_level", + "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", "top_level/second_level/date=2019-10-01/b.parquet", + "top_level/second_level/date=2019-10-02", + "top_level/second_level/date=2019-10-02/a.parquet", + "top_level/second_level/date=2019-10-04", + "top_level/second_level/date=2019-10-04/a.parquet", ], ), - ("mock://top_level/second_level/date=2019-10", []), ( - "mock://top_level/second_level/date=2019-10-0[1-4]", + ("top_level/*", "top_level/sec*", "top_level/sec*vel", "top_level/*"), + True, + 1, + ["top_level/second_level"], + ), + ( + ("top_level/*", "top_level/sec*", "top_level/sec*vel", "top_level/*"), + True, + 2, [ + "top_level/second_level", "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-04", ], ), ( - "mock://top_level/second_level/date=2019-10-0[1-4]/*", + ("top_level/**",), + False, + None, [ + "top_level", + "top_level/second_level", + "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", "top_level/second_level/date=2019-10-01/b.parquet", + "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-02/a.parquet", + "top_level/second_level/date=2019-10-04", "top_level/second_level/date=2019-10-04/a.parquet", ], ), ( - "mock://top_level/second_level/date=2019-10-0[1-4]/[a].*", + ("top_level/**",), + True, + None, [ + "top_level", + "top_level/second_level", + "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", + "top_level/second_level/date=2019-10-01/b.parquet", + "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-02/a.parquet", + "top_level/second_level/date=2019-10-04", "top_level/second_level/date=2019-10-04/a.parquet", ], ), - ("mock://glob_test/hat/^foo.*", ["glob_test/hat/^foo.txt"]), - ("mock://glob_test/dollar/$foo.*", ["glob_test/dollar/$foo.txt"]), - ("mock://glob_test/lbrace/{foo.*", ["glob_test/lbrace/{foo.txt"]), - ("mock://glob_test/rbrace/}foo.*", ["glob_test/rbrace/}foo.txt"]), - ], -) -def test_glob(test_path, expected): - test_fs = DummyTestFS() - res = test_fs.glob(test_path) - res = sorted(res) # FIXME: py35 back-compat - assert res == expected - res = test_fs.glob(test_path, detail=True) - assert isinstance(res, dict) - assert sorted(res) == expected # FIXME: py35 back-compat - for name, info in res.items(): - assert info == test_fs[name] - - -@pytest.mark.parametrize( - ["test_paths", "expected"], - [ + (("top_level/**",), True, 1, ["top_level", "top_level/second_level"]), ( - ("top_level/second_level", "top_level/sec*", "top_level/*"), + ("top_level/**",), + True, + 2, [ + "top_level", "top_level/second_level", "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", @@ -180,58 +596,87 @@ def test_glob(test_path, expected): "top_level/second_level/date=2019-10-04/a.parquet", ], ), - (("misc/foo.txt", "misc/*.txt"), ["misc/foo.txt"]), + ( + ("top_level/**/a.*",), + False, + None, + [ + "top_level/second_level/date=2019-10-01/a.parquet", + "top_level/second_level/date=2019-10-02/a.parquet", + "top_level/second_level/date=2019-10-04/a.parquet", + ], + ), + ( + ("top_level/**/a.*",), + True, + None, + [ + "top_level/second_level/date=2019-10-01/a.parquet", + "top_level/second_level/date=2019-10-02/a.parquet", + "top_level/second_level/date=2019-10-04/a.parquet", + ], + ), + ( + ("top_level/**/second_level/date=2019-10-02",), + False, + 2, + [ + "top_level/second_level/date=2019-10-02", + ], + ), + ( + ("top_level/**/second_level/date=2019-10-02",), + True, + 2, + [ + "top_level/second_level/date=2019-10-02", + "top_level/second_level/date=2019-10-02/a.parquet", + ], + ), + [("misc/foo.txt", "misc/*.txt"), False, None, ["misc/foo.txt"]], + [("misc/foo.txt", "misc/*.txt"), True, None, ["misc/foo.txt"]], ( ("",), + False, + None, + [DummyTestFS.root_marker], + ), + ( + ("",), + True, + None, DummyTestFS.get_test_paths() + [DummyTestFS.root_marker], ), ], - # ids=["all_second_level", "single_file"], ) -def test_expand_path_recursive(test_paths, expected): +def test_expand_path(test_paths, recursive, maxdepth, expected): """Test a number of paths and then their combination which should all yield the same set of expanded paths""" test_fs = DummyTestFS() # test single query for test_path in test_paths: - paths = test_fs.expand_path(test_path, recursive=True) + paths = test_fs.expand_path(test_path, recursive=recursive, maxdepth=maxdepth) assert sorted(paths) == sorted(expected) # test with all queries - paths = test_fs.expand_path(list(test_paths), recursive=True) + paths = test_fs.expand_path( + list(test_paths), recursive=recursive, maxdepth=maxdepth + ) assert sorted(paths) == sorted(expected) - # test with maxdepth - assert test_fs.expand_path("top_level", recursive=True, maxdepth=1) == [ - "top_level", - "top_level/second_level", - ] - - assert test_fs.expand_path("top_level", recursive=True, maxdepth=2) == [ - "top_level", - "top_level/second_level", - "top_level/second_level/date=2019-10-01", - "top_level/second_level/date=2019-10-02", - "top_level/second_level/date=2019-10-04", - ] - - assert test_fs.expand_path("top_level", recursive=True, maxdepth=3) == [ - "top_level", - "top_level/second_level", - "top_level/second_level/date=2019-10-01", - "top_level/second_level/date=2019-10-01/a.parquet", - "top_level/second_level/date=2019-10-01/b.parquet", - "top_level/second_level/date=2019-10-02", - "top_level/second_level/date=2019-10-02/a.parquet", - "top_level/second_level/date=2019-10-04", - "top_level/second_level/date=2019-10-04/a.parquet", - ] + +def test_expand_paths_with_wrong_args(): + test_fs = DummyTestFS() with pytest.raises(ValueError): test_fs.expand_path("top_level", recursive=True, maxdepth=0) with pytest.raises(ValueError): test_fs.expand_path("top_level", maxdepth=0) + with pytest.raises(FileNotFoundError): + test_fs.expand_path("top_level/**/second_level/date=2019-10-02", maxdepth=1) + with pytest.raises(FileNotFoundError): + test_fs.expand_path("nonexistent/*") @pytest.mark.xfail @@ -342,7 +787,6 @@ class UploadError(ValueError): ... class DummyBufferedFile(AbstractBufferedFile): - can_initiate = False def _initiate_upload(self): @@ -611,3 +1055,274 @@ def check_events(lpaths, rpaths): fs.get(base, dest, callback=callback) check_events(base, dest) callback.events.clear() + + +def _clean_paths(paths, prefix=""): + """ + Helper to cleanup paths results by doing the following: + - remove the prefix provided from all paths + - remove the trailing slashes from all paths + - remove duplicates paths + - sort all paths + """ + paths_list = paths + if isinstance(paths, dict): + paths_list = list(paths) + paths_list = [p.replace(prefix, "").strip("/") for p in sorted(set(paths_list))] + if isinstance(paths, dict): + return {p: paths[p] for p in paths_list} + return paths_list + + +@pytest.fixture(scope="function") +def glob_fs(): + return DummyTestFS(fs_content=PATHS_FOR_GLOB_TESTS) + + +@pytest.fixture(scope="function") +def glob_files_folder(tmp_path): + local_fs = LocalFileSystem(auto_mkdir=True) + local_fake_dir = str(tmp_path) + for path_info in PATHS_FOR_GLOB_TESTS: + if path_info["type"] == "file": + local_fs.touch(path=f"{str(tmp_path)}/{path_info['name']}") + return local_fake_dir + + +@pytest.mark.skipif( + sys.platform.startswith("win"), + reason="no need to run python glob posix tests on windows", +) +@pytest.mark.parametrize( + GLOB_POSIX_TESTS["argnames"], + GLOB_POSIX_TESTS["argvalues"], +) +def test_posix_tests_python_glob(path, expected, glob_files_folder): + """ + Tests against python glob to check if our posix tests are accurate. + """ + os.chdir(glob_files_folder) + + python_output = glob.glob(pathname=path, recursive=True) + assert _clean_paths(python_output, glob_files_folder) == _clean_paths(expected) + + +@pytest.mark.skipif( + sys.platform.startswith("win"), + reason="no need to run bash stat posix tests on windows", +) +@pytest.mark.parametrize( + GLOB_POSIX_TESTS["argnames"], + GLOB_POSIX_TESTS["argvalues"], +) +def test_posix_tests_bash_stat(path, expected, glob_files_folder): + """ + Tests against bash stat to check if our posix tests are accurate. + """ + try: + subprocess.check_output(["bash", "-c", "shopt -s globstar"]) + except FileNotFoundError: + pytest.skip("bash is not available") + except subprocess.CalledProcessError: + pytest.skip("globstar option is not available") + + bash_path = ( + path.replace("\\", "\\\\") + .replace("$", "\\$") + .replace("(", "\\(") + .replace(")", "\\)") + .replace("|", "\\|") + ) + bash_output = subprocess.run( + [ + "bash", + "-c", + f"cd {glob_files_folder} && shopt -s globstar && stat -c %N {bash_path}", + ], + capture_output=True, + ) + # Remove the last element always empty + bash_output = bash_output.stdout.decode("utf-8").replace("'", "").split("\n")[:-1] + assert _clean_paths(bash_output, glob_files_folder) == _clean_paths(expected) + + +@pytest.mark.parametrize( + GLOB_POSIX_TESTS["argnames"], + GLOB_POSIX_TESTS["argvalues"], +) +def test_glob_posix_rules(path, expected, glob_fs): + output = glob_fs.glob(path=f"mock://{path}") + assert _clean_paths(output) == _clean_paths(expected) + + detailed_output = glob_fs.glob(path=f"mock://{path}", detail=True) + for name, info in _clean_paths(detailed_output).items(): + assert info == glob_fs[name] + + +@pytest.mark.parametrize( + ("path", "maxdepth", "expected"), + [ + ( + "test1**", + None, + [ + "test1", + "test1.json", + "test1.yaml", + "test1/test0", + "test1/test0.json", + "test1/test0.yaml", + "test1/test0/test0.json", + "test1/test0/test0.yaml", + ], + ), + ("test1**/", None, ["test1", "test1/test0"]), + ( + "**.yaml", + None, + [ + "test0.yaml", + "test0/test0.yaml", + "test0/test1/test0.yaml", + "test0/test1/test2/test0.yaml", + "test0/test2/test0.yaml", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3/test0.yaml", + "test1.yaml", + "test1/test0.yaml", + "test1/test0/test0.yaml", + ], + ), + ("**1/", None, ["test0/test1", "test0/test2/test1", "test1"]), + ( + "**1/*.yaml", + None, + [ + "test0/test1/test0.yaml", + "test0/test2/test1/test0.yaml", + "test1/test0.yaml", + ], + ), + ( + "test0**1**.yaml", + None, + [ + "test0/test1/test2/test0.yaml", + "test0/test1/test0.yaml", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3/test0.yaml", + ], + ), + ( + "test0/t**.yaml", + None, + [ + "test0/test0.yaml", + "test0/test1/test0.yaml", + "test0/test1/test2/test0.yaml", + "test0/test2/test0.yaml", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3/test0.yaml", + ], + ), + ("test0/t**1/", None, ["test0/test1", "test0/test2/test1"]), + ( + "test0/t**1/*.yaml", + None, + ["test0/test1/test0.yaml", "test0/test2/test1/test0.yaml"], + ), + ( + "test0/**", + 1, + [ + "test0", + "test0/test0.json", + "test0/test0.yaml", + "test0/test1", + "test0/test2", + ], + ), + ( + "test0/**", + 2, + [ + "test0", + "test0/test0.json", + "test0/test0.yaml", + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test2", + "test0/test2/test0.json", + "test0/test2/test0.yaml", + "test0/test2/test1", + ], + ), + ("test0/**/test1/*", 1, []), + ( + "test0/**/test1/*", + 2, + ["test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2"], + ), + ("test0/**/test1/**", 1, ["test0/test1"]), + ( + "test0/**/test1/**", + 2, + [ + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test2/test1", + ], + ), + ( + "test0/test[1-2]/**", + 1, + [ + "test0/test1", + "test0/test1/test0.yaml", + "test0/test1/test0.json", + "test0/test1/test2", + "test0/test2", + "test0/test2/test0.json", + "test0/test2/test0.yaml", + "test0/test2/test1", + ], + ), + ( + "test0/test[1-2]/**", + 2, + [ + "test0/test1", + "test0/test1/test0.yaml", + "test0/test1/test0.json", + "test0/test1/test2", + "test0/test1/test2/test0.json", + "test0/test1/test2/test0.yaml", + "test0/test2", + "test0/test2/test0.json", + "test0/test2/test0.yaml", + "test0/test2/test1", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test0.json", + "test0/test2/test1/test3", + ], + ), + ], +) +def test_glob_non_posix_rules(path, maxdepth, expected, glob_fs): + output = glob_fs.glob(path=f"mock://{path}", maxdepth=maxdepth) + assert _clean_paths(output) == _clean_paths(expected) + + detailed_output = glob_fs.glob( + path=f"mock://{path}", maxdepth=maxdepth, detail=True + ) + for name, info in _clean_paths(detailed_output).items(): + assert info == glob_fs[name] + + +def test_glob_with_wrong_args(glob_fs): + with pytest.raises(ValueError): + _ = glob_fs.glob(path="mock://test0/*", maxdepth=0) diff --git a/fsspec/tests/test_utils.py b/fsspec/tests/test_utils.py index 517d42ba9..c83eeea0c 100644 --- a/fsspec/tests/test_utils.py +++ b/fsspec/tests/test_utils.py @@ -255,20 +255,19 @@ def test_common_prefix(paths, out): @pytest.mark.parametrize( - "paths, other, is_dir, exists, expected", + "paths, other, exists, expected", ( - (["/path1"], "/path2", False, False, ["/path2"]), - (["/path1"], "/path2", True, True, ["/path2/path1"]), - (["/path1"], "/path2", None, False, ["/path2"]), - (["/path1"], "/path2/", True, True, ["/path2/path1"]), - (["/path1"], ["/path2"], True, False, ["/path2"]), - (["/path1"], ["/path2"], True, True, ["/path2"]), - (["/path1", "/path2"], "/path2", True, False, ["/path2/path1", "/path2/path2"]), - (["/path1", "/path2"], "/path2", True, True, ["/path2/path1", "/path2/path2"]), + (["/path1"], "/path2", False, ["/path2"]), + (["/path1"], "/path2", True, ["/path2/path1"]), + (["/path1"], "/path2", False, ["/path2"]), + (["/path1"], "/path2/", True, ["/path2/path1"]), + (["/path1"], ["/path2"], False, ["/path2"]), + (["/path1"], ["/path2"], True, ["/path2"]), + (["/path1", "/path2"], "/path2", False, ["/path2/path1", "/path2/path2"]), + (["/path1", "/path2"], "/path2", True, ["/path2/path1", "/path2/path2"]), ( ["/more/path1", "/more/path2"], "/path2", - True, False, ["/path2/path1", "/path2/path2"], ), @@ -276,63 +275,55 @@ def test_common_prefix(paths, out): ["/more/path1", "/more/path2"], "/path2", True, - True, ["/path2/more/path1", "/path2/more/path2"], ), ( ["/more/path1", "/more/path2"], "/path2", False, - False, ["/path2/path1", "/path2/path2"], ), ( ["/more/path1", "/more/path2"], "/path2", - False, True, ["/path2/more/path1", "/path2/more/path2"], ), ( ["/more/path1", "/more/path2"], "/path2/", - None, False, ["/path2/path1", "/path2/path2"], ), ( ["/more/path1", "/more/path2"], "/path2/", - None, True, ["/path2/more/path1", "/path2/more/path2"], ), ( ["/more/path1", "/diff/path2"], "/path2/", - None, False, ["/path2/more/path1", "/path2/diff/path2"], ), ( ["/more/path1", "/diff/path2"], "/path2/", - None, True, ["/path2/more/path1", "/path2/diff/path2"], ), - (["a", "b/", "b/c"], "dest/", True, False, ["dest/a", "dest/b/", "dest/b/c"]), + (["a", "b/", "b/c"], "dest/", False, ["dest/a", "dest/b/", "dest/b/c"]), ( ["/a", "/b/", "/b/c"], "dest/", - True, False, ["dest/a", "dest/b/", "dest/b/c"], ), ), ) -def test_other_paths(paths, other, is_dir, exists, expected): - assert other_paths(paths, other, is_dir, exists) == expected +def test_other_paths(paths, other, exists, expected): + assert other_paths(paths, other, exists) == expected def test_log(): diff --git a/fsspec/utils.py b/fsspec/utils.py index 1aa630c01..91bc6ad1a 100644 --- a/fsspec/utils.py +++ b/fsspec/utils.py @@ -343,7 +343,7 @@ def common_prefix(paths): return "/".join(parts[0][:i]) -def other_paths(paths, path2, is_dir=None, exists=False, flatten=False): +def other_paths(paths, path2, exists=False, flatten=False): """In bulk file operations, construct a new file tree from a list of files Parameters @@ -353,10 +353,6 @@ def other_paths(paths, path2, is_dir=None, exists=False, flatten=False): path2: str or list of str Root to construct the new list in. If this is already a list of str, we just assert it has the right number of elements. - is_dir: bool (optional) - For the special case where the input in one element, whether to regard the value - as the target path, or as a directory to put a file path within. If None, a - directory is inferred if the path ends in '/' exists: bool (optional) For a str destination, it is already exists (and is a dir), files should end up inside. @@ -370,7 +366,6 @@ def other_paths(paths, path2, is_dir=None, exists=False, flatten=False): """ if isinstance(path2, str): - is_dir = is_dir or path2.endswith("/") path2 = path2.rstrip("/") if flatten: diff --git a/setup.cfg b/setup.cfg index d87021a4a..8a8bdee72 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,4 +51,4 @@ warn_unused_ignores = True # don't bother type-checking test_*.py or conftest.py files -exclude = (test_.*|conftest)\.py$ +exclude = (test.*|conftest)\.py$