From 1d7b8e2987a8504943d1cab9675084ec12ac2f0a Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 22 Sep 2023 12:57:57 -0700 Subject: [PATCH 01/20] passing tests --- cloudpathlib/cloudpath.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index b2241b61..faf9dc09 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -9,9 +9,9 @@ PurePosixPath, WindowsPath, _make_selector, - _posix_flavour, _PathParents, ) + import shutil import sys from typing import ( @@ -44,6 +44,12 @@ else: from typing_extensions import Self +if sys.version_info >= (3, 12): + from pathlib import posixpath as _posix_flavour +else: + from pathlib import _posix_flavour + + from cloudpathlib.enums import FileCacheMode from . import anypath @@ -445,11 +451,11 @@ def _build_tree(trunk, branch, nodes, is_dir): # select_from returns self.name/... so strip before joining yield (self / str(p)[len(self.name) + 1 :]) - def glob(self, pattern: str) -> Generator[Self, None, None]: + def glob(self, pattern: str, case_sensitive: Optional[bool] = None) -> Generator[Self, None, None]: self._glob_checks(pattern) pattern_parts = PurePosixPath(pattern).parts - selector = _make_selector(tuple(pattern_parts), _posix_flavour) + selector = _make_selector(tuple(pattern_parts), _posix_flavour, case_sensitive=case_sensitive) yield from self._glob( selector, @@ -458,11 +464,11 @@ def glob(self, pattern: str) -> Generator[Self, None, None]: in pattern, # recursive listing needed if explicit ** or any sub folder in pattern ) - def rglob(self, pattern: str) -> Generator[Self, None, None]: + def rglob(self, pattern: str, case_sensitive: Optional[bool] = None) -> Generator[Self, None, None]: self._glob_checks(pattern) pattern_parts = PurePosixPath(pattern).parts - selector = _make_selector(("**",) + tuple(pattern_parts), _posix_flavour) + selector = _make_selector(("**",) + tuple(pattern_parts), _posix_flavour, case_sensitive=case_sensitive) yield from self._glob(selector, True) @@ -1238,9 +1244,23 @@ def _make_child_relpath(self, part): def scandir( root: "_CloudPathSelectable", ) -> Generator[Generator["_CloudPathSelectable", None, None], None, None]: + print("scandir on ", root) yield ( _CloudPathSelectable(child, root._parents + [root._name], grand_children) for child, grand_children in root._all_children.items() ) _scandir = scandir # Py 3.11 compatibility + + def walk(self): + # split into dirs and files + dirs_files = defaultdict(list) + with self.scandir(self) as items: + for child in items: + dirs_files[child.is_dir()].append(child) + + # top-down, so yield self before recursive call + yield self, [f.name for f in dirs_files[True]], [f.name for f in dirs_files[False]] + + for child_dir in dirs_files[True]: + yield from child_dir.walk() From d4957edf0931e29f569943a6b9304f65986682f9 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 22 Sep 2023 13:05:03 -0700 Subject: [PATCH 02/20] remove print --- cloudpathlib/cloudpath.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index faf9dc09..0646d13a 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -1244,7 +1244,6 @@ def _make_child_relpath(self, part): def scandir( root: "_CloudPathSelectable", ) -> Generator[Generator["_CloudPathSelectable", None, None], None, None]: - print("scandir on ", root) yield ( _CloudPathSelectable(child, root._parents + [root._name], grand_children) for child, grand_children in root._all_children.items() From e1ff4870f5fc7572d24d8f0b88dabf02b0b1e72e Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 22 Sep 2023 14:44:06 -0700 Subject: [PATCH 03/20] make method signatures match --- cloudpathlib/cloudpath.py | 36 +++++++++++++------ tests/test_cloudpath_instantiation.py | 51 +++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 11 deletions(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 0646d13a..6f3b9435 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -451,11 +451,15 @@ def _build_tree(trunk, branch, nodes, is_dir): # select_from returns self.name/... so strip before joining yield (self / str(p)[len(self.name) + 1 :]) - def glob(self, pattern: str, case_sensitive: Optional[bool] = None) -> Generator[Self, None, None]: + def glob( + self, pattern: str, case_sensitive: Optional[bool] = None + ) -> Generator[Self, None, None]: self._glob_checks(pattern) pattern_parts = PurePosixPath(pattern).parts - selector = _make_selector(tuple(pattern_parts), _posix_flavour, case_sensitive=case_sensitive) + selector = _make_selector( + tuple(pattern_parts), _posix_flavour, case_sensitive=case_sensitive + ) yield from self._glob( selector, @@ -464,11 +468,15 @@ def glob(self, pattern: str, case_sensitive: Optional[bool] = None) -> Generator in pattern, # recursive listing needed if explicit ** or any sub folder in pattern ) - def rglob(self, pattern: str, case_sensitive: Optional[bool] = None) -> Generator[Self, None, None]: + def rglob( + self, pattern: str, case_sensitive: Optional[bool] = None + ) -> Generator[Self, None, None]: self._glob_checks(pattern) pattern_parts = PurePosixPath(pattern).parts - selector = _make_selector(("**",) + tuple(pattern_parts), _posix_flavour, case_sensitive=case_sensitive) + selector = _make_selector( + ("**",) + tuple(pattern_parts), _posix_flavour, case_sensitive=case_sensitive + ) yield from self._glob(selector, True) @@ -653,6 +661,9 @@ def read_text(self, encoding: Optional[str] = None, errors: Optional[str] = None with self.open(mode="r", encoding=encoding, errors=errors) as f: return f.read() + def is_junction(self): + return False # only windows paths can be junctions, not cloudpaths + # ====================== DISPATCHED TO POSIXPATH FOR PURE PATHS ====================== # Methods that are dispatched to exactly how pathlib.PurePosixPath would calculate it on # self._path for pure paths (does not matter if file exists); @@ -698,8 +709,8 @@ def __truediv__(self, other: Union[str, PurePosixPath]) -> Self: return self._dispatch_to_path("__truediv__", other) - def joinpath(self, *args: Union[str, os.PathLike]) -> Self: - return self._dispatch_to_path("joinpath", *args) + def joinpath(self, *pathsegments: Union[str, os.PathLike]) -> Self: + return self._dispatch_to_path("joinpath", *pathsegments) def absolute(self) -> Self: return self @@ -710,7 +721,7 @@ def is_absolute(self) -> bool: def resolve(self, strict: bool = False) -> Self: return self - def relative_to(self, other: Self) -> PurePosixPath: + def relative_to(self, other: Self, walk_up: bool = False) -> PurePosixPath: # We don't dispatch regularly since this never returns a cloud path (since it is relative, and cloud paths are # absolute) if not isinstance(other, CloudPath): @@ -719,7 +730,7 @@ def relative_to(self, other: Self) -> PurePosixPath: raise ValueError( f"{self} is a {self.cloud_prefix} path, but {other} is a {other.cloud_prefix} path" ) - return self._path.relative_to(other._path) + return self._path.relative_to(other._path, walk_up=walk_up) def is_relative_to(self, other: Self) -> bool: try: @@ -732,12 +743,12 @@ def is_relative_to(self, other: Self) -> bool: def name(self) -> str: return self._dispatch_to_path("name") - def match(self, path_pattern: str) -> bool: + def match(self, path_pattern: str, case_sensitive: Optional[bool] = None) -> bool: # strip scheme from start of pattern before testing if path_pattern.startswith(self.anchor + self.drive + "/"): path_pattern = path_pattern[len(self.anchor + self.drive + "/") :] - return self._dispatch_to_path("match", path_pattern) + return self._dispatch_to_path("match", path_pattern, case_sensitive=case_sensitive) @property def parent(self) -> Self: @@ -777,6 +788,9 @@ def with_stem(self, stem: str) -> Self: def with_name(self, name: str) -> Self: return self._dispatch_to_path("with_name", name) + def with_segments(self, *pathsegments) -> Self: + return self._new_cloudpath("/".join(pathsegments)) + def with_suffix(self, suffix: str) -> Self: return self._dispatch_to_path("with_suffix", suffix) @@ -1257,7 +1271,7 @@ def walk(self): with self.scandir(self) as items: for child in items: dirs_files[child.is_dir()].append(child) - + # top-down, so yield self before recursive call yield self, [f.name for f in dirs_files[True]], [f.name for f in dirs_files[False]] diff --git a/tests/test_cloudpath_instantiation.py b/tests/test_cloudpath_instantiation.py index 1ae100b2..f510956f 100644 --- a/tests/test_cloudpath_instantiation.py +++ b/tests/test_cloudpath_instantiation.py @@ -1,4 +1,7 @@ +import inspect import os +from pathlib import Path, PurePath +import re import pytest @@ -83,3 +86,51 @@ def test_dependencies_not_loaded(rig, monkeypatch): def test_is_pathlike(rig): p = rig.create_cloud_path("dir_0") assert isinstance(p, os.PathLike) + + +def test_public_interface_is_superset(rig): + """Test that a CloudPath has all of the Path methods and properties. For methods + we also ensure that the only difference in the signature is that a CloudPath has + optional additional kwargs (which are likely added in subsequent Python versions). + """ + lp = PurePath(".") + cp = rig.create_cloud_path("dir_0/file0_0.txt") + + # Use regex to find the methods not implemented that are listed in the CloudPath code + not_implemented_section = re.search( + r"# =+ NOT IMPLEMENTED =+\n(.+?)\n\n", inspect.getsource(CloudPath), re.DOTALL + ) + + if not_implemented_section: + methods_not_implemented_str = not_implemented_section.group(1) + methods_not_implemented = re.findall(r"# (\w+)", methods_not_implemented_str) + + for name, lp_member in inspect.getmembers(lp): + if name.startswith("_") or name in methods_not_implemented: + continue + + # checks all public methods and properties + cp_member = getattr(cp, name, None) + assert cp_member is not None, f"CloudPath missing {name}" + + # for methods, checks the function signature + if callable(lp_member): + cp_signature = inspect.signature(cp_member) + lp_signature = inspect.signature(lp_member) + + # all parameters for Path method should be part of CloudPath signature + for parameter in lp_signature.parameters: + # some parameters like _deprecated in Path.is_relative_to are not really part of the signature + if parameter.startswith("_"): + continue + + assert ( + parameter in cp_signature.parameters + ), f"CloudPath.{name} missing parameter {parameter}" + + # extra parameters for CloudPath method should be optional with defaults + for parameter, param_details in cp_signature.parameters.items(): + if parameter not in lp_signature.parameters: + assert ( + param_details.default is not inspect.Parameter.empty + ), f"CloudPath.{name} added parameter {parameter} without a default" From 41c211d519735a0c7d99857085639129df52519c Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 22 Sep 2023 14:57:21 -0700 Subject: [PATCH 04/20] Remove unused import --- tests/test_cloudpath_instantiation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cloudpath_instantiation.py b/tests/test_cloudpath_instantiation.py index f510956f..7dbe7b78 100644 --- a/tests/test_cloudpath_instantiation.py +++ b/tests/test_cloudpath_instantiation.py @@ -1,6 +1,6 @@ import inspect import os -from pathlib import Path, PurePath +from pathlib import PurePath import re import pytest From 236af455bdab82b88975c280df4a4d41a3a70a91 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 22 Sep 2023 16:11:20 -0700 Subject: [PATCH 05/20] ignore type errors --- cloudpathlib/cloudpath.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 6f3b9435..c8bf15ae 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -45,7 +45,7 @@ from typing_extensions import Self if sys.version_info >= (3, 12): - from pathlib import posixpath as _posix_flavour + from pathlib import posixpath as _posix_flavour # type: ignore[attr-defined] else: from pathlib import _posix_flavour @@ -730,7 +730,7 @@ def relative_to(self, other: Self, walk_up: bool = False) -> PurePosixPath: raise ValueError( f"{self} is a {self.cloud_prefix} path, but {other} is a {other.cloud_prefix} path" ) - return self._path.relative_to(other._path, walk_up=walk_up) + return self._path.relative_to(other._path, walk_up=walk_up) # type: ignore[call-arg] def is_relative_to(self, other: Self) -> bool: try: From b02e4e745022bb55e536b5b656f3d3cf9efd08c0 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 22 Sep 2023 16:21:52 -0700 Subject: [PATCH 06/20] ignore more type errors --- cloudpathlib/cloudpath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index c8bf15ae..65e28346 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -47,7 +47,7 @@ if sys.version_info >= (3, 12): from pathlib import posixpath as _posix_flavour # type: ignore[attr-defined] else: - from pathlib import _posix_flavour + from pathlib import _posix_flavour # type: ignore[attr-defined] from cloudpathlib.enums import FileCacheMode From fc21d15592fd941b8501693a322819affca9b6fe Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 22 Sep 2023 16:40:44 -0700 Subject: [PATCH 07/20] make linting and tests work on multiple py versions --- cloudpathlib/cloudpath.py | 21 ++++++++++++++++++--- tests/test_cloudpath_instantiation.py | 10 +++++++++- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 65e28346..81f9434e 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -8,7 +8,6 @@ PosixPath, PurePosixPath, WindowsPath, - _make_selector, _PathParents, ) @@ -46,8 +45,13 @@ if sys.version_info >= (3, 12): from pathlib import posixpath as _posix_flavour # type: ignore[attr-defined] + from pathlib import _make_selector # type: ignore[attr-defined] else: from pathlib import _posix_flavour # type: ignore[attr-defined] + from pathlib import _make_selector as _make_selector_pathlib # type: ignore[attr-defined] + + def _make_selector(pattern_parts, _flavour, case_sensitive=True): + return _make_selector_pathlib(tuple(pattern_parts), _flavour) from cloudpathlib.enums import FileCacheMode @@ -730,7 +734,13 @@ def relative_to(self, other: Self, walk_up: bool = False) -> PurePosixPath: raise ValueError( f"{self} is a {self.cloud_prefix} path, but {other} is a {other.cloud_prefix} path" ) - return self._path.relative_to(other._path, walk_up=walk_up) # type: ignore[call-arg] + + kwargs = dict(walk_up=walk_up) + + if sys.version_info < (3, 12): + kwargs.pop("walk_up") + + return self._path.relative_to(other._path, **kwargs) # type: ignore[call-arg] def is_relative_to(self, other: Self) -> bool: try: @@ -748,7 +758,12 @@ def match(self, path_pattern: str, case_sensitive: Optional[bool] = None) -> boo if path_pattern.startswith(self.anchor + self.drive + "/"): path_pattern = path_pattern[len(self.anchor + self.drive + "/") :] - return self._dispatch_to_path("match", path_pattern, case_sensitive=case_sensitive) + kwargs = dict(case_sensitive=case_sensitive) + + if sys.version_info < (3, 12): + kwargs.pop("case_sensitive") + + return self._dispatch_to_path("match", path_pattern, **kwargs) @property def parent(self) -> Self: diff --git a/tests/test_cloudpath_instantiation.py b/tests/test_cloudpath_instantiation.py index 7dbe7b78..366607fc 100644 --- a/tests/test_cloudpath_instantiation.py +++ b/tests/test_cloudpath_instantiation.py @@ -121,7 +121,9 @@ def test_public_interface_is_superset(rig): # all parameters for Path method should be part of CloudPath signature for parameter in lp_signature.parameters: # some parameters like _deprecated in Path.is_relative_to are not really part of the signature - if parameter.startswith("_"): + if parameter.startswith("_") or ( + name == "joinpath" and parameter in ["args", "pathsegments"] + ): # handle arg name change in 3.12 continue assert ( @@ -130,6 +132,12 @@ def test_public_interface_is_superset(rig): # extra parameters for CloudPath method should be optional with defaults for parameter, param_details in cp_signature.parameters.items(): + if name == "joinpath" and parameter in [ + "args", + "pathsegments", + ]: # handle arg name change in 3.12 + continue + if parameter not in lp_signature.parameters: assert ( param_details.default is not inspect.Parameter.empty From 243713af5bab435b675215922f6e1a0260f4d2ef Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sat, 7 Oct 2023 12:46:40 -0700 Subject: [PATCH 08/20] add 3.12 to CI and pyproject --- .github/workflows/tests.yml | 2 +- pyproject.toml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6918cf64..708bf4ad 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -42,7 +42,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [3.7, 3.8, 3.9, "3.10", "3.11"] + python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 diff --git a/pyproject.toml b/pyproject.toml index 998de781..1aec1d6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] requires-python = ">=3.7" dependencies = [ @@ -49,7 +50,7 @@ all = ["cloudpathlib[azure]", "cloudpathlib[gs]", "cloudpathlib[s3]"] [tool.black] line-length = 99 -target-version = ['py37', 'py38', 'py39', 'py310', 'py311'] +target-version = ['py37', 'py38', 'py39', 'py310', 'py311', 'py312'] include = '\.pyi?$|\.ipynb$' extend-exclude = ''' /( From 07bcb1604e00c6b12c8ae5d14093439878453e3b Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sat, 7 Oct 2023 12:55:56 -0700 Subject: [PATCH 09/20] use pytest-cases fork --- requirements-dev.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 3acae4f4..ade42b38 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -20,7 +20,8 @@ pillow psutil pydantic pytest -pytest-cases +# pytest-cases +git+https://github.com/jayqi/python-pytest-cases@packaging-version pytest-cov pytest-xdist python-dotenv From d8fadc3a24063d80601663b466af95030ee9d79f Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sat, 7 Oct 2023 18:58:40 -0700 Subject: [PATCH 10/20] More performant walk implementation --- cloudpathlib/cloudpath.py | 42 ++++++++++++++++++++++++-- tests/performance/perf_file_listing.py | 9 ++++++ tests/performance/runner.py | 11 ++++++- 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 81f9434e..c3b2e6a8 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -416,7 +416,7 @@ def _glob_checks(self, pattern: str) -> None: ".glob is only supported within a bucket or container; you can use `.iterdir` to list buckets; for example, CloudPath('s3://').iterdir()" ) - def _glob(self, selector, recursive: bool) -> Generator[Self, None, None]: + def _build_subtree(self, recursive): # build a tree structure for all files out of default dicts Tree: Callable = lambda: defaultdict(Tree) @@ -443,7 +443,10 @@ def _build_tree(trunk, branch, nodes, is_dir): nodes = (p for p in parts) _build_tree(file_tree, next(nodes, None), nodes, is_dir) - file_tree = dict(file_tree) # freeze as normal dict before passing in + return dict(file_tree) # freeze as normal dict before passing in + + def _glob(self, selector, recursive: bool) -> Generator[Self, None, None]: + file_tree = self._build_subtree(recursive) root = _CloudPathSelectable( self.name, @@ -489,6 +492,41 @@ def iterdir(self) -> Generator[Self, None, None]: if f != self: # iterdir does not include itself in pathlib yield f + @staticmethod + def _walk_results_from_tree(root, tree, top_down=True): + """ Utility to yield tuples in the form expected by `.walk` from the file + tree constructed by `_build_substree`. + """ + dirs = [] + files = [] + for item, branch in tree.items(): + files.append(item) if branch is None else dirs.append(item) + + if top_down: + yield root, dirs, files + + for dir in dirs: + yield from CloudPath._walk_results_from_tree(root / dir, tree[dir], top_down=top_down) + + if not top_down: + yield root, dirs, files + + def walk( + self, + top_down: bool = True, + on_error: Optional[Callable] = None, + follow_symlinks: bool = False, + ) -> Generator[Tuple[Self, List[str], List[str]], None, None]: + try: + file_tree = self._build_subtree(recursive=True) # walking is always recursive + yield from self._walk_results_from_tree(self, file_tree, top_down=top_down) + + except Exception as e: + if on_error is not None: + on_error(e) + else: + raise + def open( self, mode: str = "r", diff --git a/tests/performance/perf_file_listing.py b/tests/performance/perf_file_listing.py index c555b612..054a2e54 100644 --- a/tests/performance/perf_file_listing.py +++ b/tests/performance/perf_file_listing.py @@ -10,3 +10,12 @@ def glob(folder, recursive): return {"n_items": len(list(folder.rglob("*.item")))} else: return {"n_items": len(list(folder.glob("*.item")))} + + +def walk(folder): + n_items = 0 + + for _, _, files in folder.walk(): + n_items += len(files) + + return {"n_items": n_items} \ No newline at end of file diff --git a/tests/performance/runner.py b/tests/performance/runner.py index 494882fd..859b5494 100644 --- a/tests/performance/runner.py +++ b/tests/performance/runner.py @@ -14,7 +14,7 @@ from cloudpathlib import CloudPath -from perf_file_listing import folder_list, glob +from perf_file_listing import folder_list, glob, walk # make loguru and tqdm play nicely together @@ -137,6 +137,15 @@ def main(root, iterations, burn_in): PerfRunConfig(name="Glob deep non-recursive", args=[deep, False], kwargs={}), ], ), + ( + "Walk scenarios", + walk, + [ + PerfRunConfig(name="Walk shallow", args=[shallow], kwargs={}), + PerfRunConfig(name="Walk normal", args=[normal], kwargs={}), + PerfRunConfig(name="Walk deep", args=[deep], kwargs={}), + ], + ), ] logger.info( From a78438bc476c33e20a13c5bd208482e40874f2b3 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sat, 7 Oct 2023 19:01:51 -0700 Subject: [PATCH 11/20] format --- cloudpathlib/cloudpath.py | 4 ++-- tests/performance/perf_file_listing.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index c3b2e6a8..f2fa8e37 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -494,8 +494,8 @@ def iterdir(self) -> Generator[Self, None, None]: @staticmethod def _walk_results_from_tree(root, tree, top_down=True): - """ Utility to yield tuples in the form expected by `.walk` from the file - tree constructed by `_build_substree`. + """Utility to yield tuples in the form expected by `.walk` from the file + tree constructed by `_build_substree`. """ dirs = [] files = [] diff --git a/tests/performance/perf_file_listing.py b/tests/performance/perf_file_listing.py index 054a2e54..3551a85d 100644 --- a/tests/performance/perf_file_listing.py +++ b/tests/performance/perf_file_listing.py @@ -18,4 +18,4 @@ def walk(folder): for _, _, files in folder.walk(): n_items += len(files) - return {"n_items": n_items} \ No newline at end of file + return {"n_items": n_items} From bf79c410c3f79d30d2d6524d19934ea3b3656e60 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sat, 7 Oct 2023 21:54:54 -0700 Subject: [PATCH 12/20] update methods --- README.md | 5 ++++- cloudpathlib/cloudpath.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1343a840..ffde2073 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,7 @@ Most methods and properties from `pathlib.Path` are supported except for the one | `is_absolute` | ✅ | ✅ | ✅ | | `is_dir` | ✅ | ✅ | ✅ | | `is_file` | ✅ | ✅ | ✅ | +| `is_junction` | ✅ | ✅ | ✅ | | `is_relative_to` | ✅ | ✅ | ✅ | | `iterdir` | ✅ | ✅ | ✅ | | `joinpath` | ✅ | ✅ | ✅ | @@ -160,7 +161,9 @@ Most methods and properties from `pathlib.Path` are supported except for the one | `suffixes` | ✅ | ✅ | ✅ | | `touch` | ✅ | ✅ | ✅ | | `unlink` | ✅ | ✅ | ✅ | +| `walk` | ✅ | ✅ | ✅ | | `with_name` | ✅ | ✅ | ✅ | +| `with_segments` | ✅ | ✅ | ✅ | | `with_stem` | ✅ | ✅ | ✅ | | `with_suffix` | ✅ | ✅ | ✅ | | `write_bytes` | ✅ | ✅ | ✅ | @@ -170,6 +173,7 @@ Most methods and properties from `pathlib.Path` are supported except for the one | `cwd` | ❌ | ❌ | ❌ | | `expanduser` | ❌ | ❌ | ❌ | | `group` | ❌ | ❌ | ❌ | +| `hardlink_to` | ❌ | ❌ | ❌ | | `home` | ❌ | ❌ | ❌ | | `is_block_device` | ❌ | ❌ | ❌ | | `is_char_device` | ❌ | ❌ | ❌ | @@ -179,7 +183,6 @@ Most methods and properties from `pathlib.Path` are supported except for the one | `is_socket` | ❌ | ❌ | ❌ | | `is_symlink` | ❌ | ❌ | ❌ | | `lchmod` | ❌ | ❌ | ❌ | -| `link_to` | ❌ | ❌ | ❌ | | `lstat` | ❌ | ❌ | ❌ | | `owner` | ❌ | ❌ | ❌ | | `readlink` | ❌ | ❌ | ❌ | diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index f2fa8e37..25b977f2 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -352,6 +352,8 @@ def __ge__(self, other: Any) -> bool: # owner - no cloud equivalent # root - drive already has the bucket and anchor/prefix has the scheme, so nothing to store here # symlink_to - no cloud equivalent + # link_to - no cloud equivalent + # hardlink_to - no cloud equivalent # ====================== REQUIRED, NOT GENERIC ====================== # Methods that must be implemented, but have no generic application From 9a28b27bd4ef0bbbc81f256402a2eaf6e46cfe3b Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sat, 7 Oct 2023 21:55:08 -0700 Subject: [PATCH 13/20] Test walk method --- tests/test_cloudpath_file_io.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py index 8b9a7051..e88c37ce 100644 --- a/tests/test_cloudpath_file_io.py +++ b/tests/test_cloudpath_file_io.py @@ -104,11 +104,12 @@ def _make_glob_directory(root): rmtree(local_root) -def _assert_glob_results_match(cloud_results, local_results, cloud_root, local_root): - def _lstrip_path_root(path, root): - rel_path = str(path)[len(str(root)) :] - return rel_path.rstrip("/") # agnostic to trailing slash +def _lstrip_path_root(path, root): + rel_path = str(path)[len(str(root)) :] + return rel_path.rstrip("/") # agnostic to trailing slash + +def _assert_glob_results_match(cloud_results, local_results, cloud_root, local_root): local_results_no_root = [_lstrip_path_root(c.as_posix(), local_root) for c in local_results] cloud_results_no_root = [_lstrip_path_root(c, cloud_root) for c in cloud_results] @@ -119,6 +120,19 @@ def _lstrip_path_root(path, root): assert set(local_results_no_root) == set(cloud_results_no_root) +def _assert_walk_results_match(cloud_results, local_results, cloud_root, local_root): + for (cloud_item, cloud_dirs, cloud_files), (local_item, local_dirs, local_files) in zip( + cloud_results, local_results + ): + # check items returned by walk by stripping the root and comparing strings + assert _lstrip_path_root(cloud_item, cloud_root) == _lstrip_path_root( + local_item.as_posix(), local_root + ) + + assert cloud_dirs == local_dirs + assert local_files == cloud_files + + def test_iterdir(glob_test_dirs): cloud_root, local_root = glob_test_dirs @@ -138,6 +152,14 @@ def test_iterdir(glob_test_dirs): ) +def test_walk(glob_test_dirs): + cloud_root, local_root = glob_test_dirs + _assert_walk_results_match(cloud_root.walk(), local_root.walk(), cloud_root, local_root) + _assert_walk_results_match( + cloud_root.walk(top_down=False), local_root.walk(top_down=False), cloud_root, local_root + ) + + def test_list_buckets(rig): # test we can list buckets buckets = list(rig.path_class(f"{rig.path_class.cloud_prefix}").iterdir()) From d00628737e5b2ea1bfff0cb1b78dc72839fc7810 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sat, 7 Oct 2023 22:02:36 -0700 Subject: [PATCH 14/20] Version agnostic tests --- tests/test_cloudpath_file_io.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py index e88c37ce..deeab585 100644 --- a/tests/test_cloudpath_file_io.py +++ b/tests/test_cloudpath_file_io.py @@ -1,6 +1,6 @@ from datetime import datetime import os -from pathlib import PurePosixPath +from pathlib import Path, PurePosixPath import pickle from shutil import rmtree from time import sleep @@ -126,7 +126,7 @@ def _assert_walk_results_match(cloud_results, local_results, cloud_root, local_r ): # check items returned by walk by stripping the root and comparing strings assert _lstrip_path_root(cloud_item, cloud_root) == _lstrip_path_root( - local_item.as_posix(), local_root + Path(local_item).as_posix(), local_root ) assert cloud_dirs == local_dirs @@ -154,9 +154,20 @@ def test_iterdir(glob_test_dirs): def test_walk(glob_test_dirs): cloud_root, local_root = glob_test_dirs - _assert_walk_results_match(cloud_root.walk(), local_root.walk(), cloud_root, local_root) + + # walk only natively available in python 3.12+ + local_results = local_root.walk() if hasattr(local_root, "walk") else os.walk(local_root) + + _assert_walk_results_match(cloud_root.walk(), local_results, cloud_root, local_root) + + local_results = ( + local_root.walk(top_down=False) + if hasattr(local_root, "walk") + else os.walk(local_root, topdown=False) + ) + _assert_walk_results_match( - cloud_root.walk(top_down=False), local_root.walk(top_down=False), cloud_root, local_root + cloud_root.walk(top_down=False), local_results, cloud_root, local_root ) From 921f84e27cc1ba0d866b8dc26768cc17b5282976 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sun, 8 Oct 2023 09:40:35 -0700 Subject: [PATCH 15/20] update tests --- tests/test_cloudpath_file_io.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py index deeab585..b96cf1c4 100644 --- a/tests/test_cloudpath_file_io.py +++ b/tests/test_cloudpath_file_io.py @@ -3,6 +3,7 @@ from pathlib import Path, PurePosixPath import pickle from shutil import rmtree +import sys from time import sleep import pytest @@ -129,8 +130,8 @@ def _assert_walk_results_match(cloud_results, local_results, cloud_root, local_r Path(local_item).as_posix(), local_root ) - assert cloud_dirs == local_dirs - assert local_files == cloud_files + assert set(cloud_dirs) == set(local_dirs) # order not guaranteed + assert set(local_files) == set(cloud_files) # order not guaranteed def test_iterdir(glob_test_dirs): @@ -188,10 +189,10 @@ def test_glob(glob_test_dirs): # cases adapted from CPython glob tests: # https://github.com/python/cpython/blob/7ffe7ba30fc051014977c6f393c51e57e71a6648/Lib/test/test_pathlib.py#L1634-L1720 - def _check_glob(pattern, glob_method): + def _check_glob(pattern, glob_method, **kwargs): _assert_glob_results_match( - getattr(cloud_root, glob_method)(pattern), - getattr(local_root, glob_method)(pattern), + getattr(cloud_root, glob_method)(pattern, **kwargs), + getattr(local_root, glob_method)(pattern, **kwargs), cloud_root, local_root, ) @@ -223,6 +224,21 @@ def _check_glob(pattern, glob_method): dir_c_cloud.rglob("*/*"), dir_c_local.rglob("*/*"), dir_c_cloud, dir_c_local ) + # 3.12+ kwargs + if sys.version_info < (3, 12): + _check_glob("dir*/FILE*", "glob", case_sensitive=False) + _check_glob("dir*/file*", "glob", case_sensitive=True) + _check_glob("dir*/FILE*", "rglob", case_sensitive=False) + _check_glob("dir*/file*", "rglob", case_sensitive=True) + + # test case insensitive for cloud; sensitive different pattern for local + _assert_glob_results_match( + dir_c_cloud.glob("FILE*", case_sensitive=False), + dir_c_local.glob("file*"), + dir_c_cloud, + dir_c_local, + ) + def test_glob_buckets(rig): # CloudPath("s3://").glob("*") results in error From b87d95d0e2c83fab47210d64022da040c60b8da4 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sun, 8 Oct 2023 10:18:26 -0700 Subject: [PATCH 16/20] Add tests --- cloudpathlib/cloudpath.py | 3 +++ tests/test_cloudpath_file_io.py | 2 +- tests/test_cloudpath_manipulation.py | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 25b977f2..4e322fa0 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -844,6 +844,9 @@ def with_name(self, name: str) -> Self: return self._dispatch_to_path("with_name", name) def with_segments(self, *pathsegments) -> Self: + """Create a new CloudPath with the same client out of the given segments. + The first segment will be interpreted as the bucket/container name. + """ return self._new_cloudpath("/".join(pathsegments)) def with_suffix(self, suffix: str) -> Self: diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py index b96cf1c4..fcc5a426 100644 --- a/tests/test_cloudpath_file_io.py +++ b/tests/test_cloudpath_file_io.py @@ -225,7 +225,7 @@ def _check_glob(pattern, glob_method, **kwargs): ) # 3.12+ kwargs - if sys.version_info < (3, 12): + if sys.version_info >= (3, 12): _check_glob("dir*/FILE*", "glob", case_sensitive=False) _check_glob("dir*/file*", "glob", case_sensitive=True) _check_glob("dir*/FILE*", "rglob", case_sensitive=False) diff --git a/tests/test_cloudpath_manipulation.py b/tests/test_cloudpath_manipulation.py index 1b405957..a6aad166 100644 --- a/tests/test_cloudpath_manipulation.py +++ b/tests/test_cloudpath_manipulation.py @@ -1,4 +1,5 @@ from pathlib import PurePosixPath +import sys import pytest @@ -50,6 +51,12 @@ def test_relative_to(rig, azure_rig, gs_rig): assert rig.create_cloud_path("bucket/b/c/d.file").relative_to( rig.create_cloud_path("bucket/z") ) + + if sys.version_info >= (3, 12): + assert rig.create_cloud_path("bucket/path/to/file.txt").relative_to( + rig.create_cloud_path("other_bucket/path2"), walk_up=True + ) == PurePosixPath("../../bucket/path/to/file.txt") + with pytest.raises(ValueError): assert rig.create_cloud_path("a/b/c/d.file").relative_to(PurePosixPath("/a/b/c")) other_rig = azure_rig if rig.cloud_prefix != azure_rig.cloud_prefix else gs_rig @@ -73,6 +80,9 @@ def test_joins(rig): assert not rig.create_cloud_path("a/b/c/d").match("**/c") assert rig.create_cloud_path("a/b/c/d").match("a/*/c/d") + if sys.version_info >= (3, 12): + assert rig.create_cloud_path("a/b/c/d").match("A/*/C/D", case_sensitive=False) + assert rig.create_cloud_path("a/b/c/d").anchor == rig.cloud_prefix assert rig.create_cloud_path("a/b/c/d").parent == rig.create_cloud_path("a/b/c") @@ -107,6 +117,16 @@ def test_joins(rig): ) +def test_with_segments(rig): + assert rig.create_cloud_path("a/b/c/d").with_segments( + "x", "y", "z" + ) == rig.client_class().CloudPath(f"{rig.cloud_prefix}x/y/z") + + +def test_is_junction(rig): + assert not rig.create_cloud_path("a/b/foo").is_junction() + + def test_equality(rig): assert rig.create_cloud_path("a/b/foo") == rig.create_cloud_path("a/b/foo") assert hash(rig.create_cloud_path("a/b/foo")) == hash(rig.create_cloud_path("a/b/foo")) From 21fa27d159dfd23ac39cc1c0a58962ec1714e75f Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sun, 8 Oct 2023 10:49:10 -0700 Subject: [PATCH 17/20] Order agnostic walk test --- tests/test_cloudpath_file_io.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py index fcc5a426..af8f9e63 100644 --- a/tests/test_cloudpath_file_io.py +++ b/tests/test_cloudpath_file_io.py @@ -122,13 +122,20 @@ def _assert_glob_results_match(cloud_results, local_results, cloud_root, local_r def _assert_walk_results_match(cloud_results, local_results, cloud_root, local_root): - for (cloud_item, cloud_dirs, cloud_files), (local_item, local_dirs, local_files) in zip( - cloud_results, local_results - ): - # check items returned by walk by stripping the root and comparing strings - assert _lstrip_path_root(cloud_item, cloud_root) == _lstrip_path_root( - Path(local_item).as_posix(), local_root - ) + # order not guaranteed, so strip use top as keys for matching + cloud_results = { + _lstrip_path_root(top, cloud_root): [dirs, files] for top, dirs, files in cloud_results + } + local_results = { + _lstrip_path_root(Path(top).as_posix(), local_root): [dirs, files] + for top, dirs, files in local_results + } + + assert set(cloud_results.keys()) == set(local_results.keys()) + + for top in local_results: + local_dirs, local_files = local_results[top] + cloud_dirs, cloud_files = cloud_results[top] assert set(cloud_dirs) == set(local_dirs) # order not guaranteed assert set(local_files) == set(cloud_files) # order not guaranteed From f9d3e9be58b08d67f0d83e08fd547815d68fac36 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sun, 8 Oct 2023 10:49:15 -0700 Subject: [PATCH 18/20] Changes --- HISTORY.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 25aee0e6..a918f2e6 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,9 +1,10 @@ # cloudpathlib Changelog -## UNRELEASED +## v0.16.0 (2023-10-08) - Add "CloudPath" as return type on `__init__` for mypy issues. ([Issue #179](https://github.com/drivendataorg/cloudpathlib/issues/179), [PR #342](https://github.com/drivendataorg/cloudpathlib/pull/342)) - Add `with_stem` to all path types when python version supports it (>=3.9). ([Issue #287](https://github.com/drivendataorg/cloudpathlib/issues/287), [PR #290](https://github.com/drivendataorg/cloudpathlib/pull/290), thanks to [@Gilthans](https://github.com/Gilthans)) - Add `newline` parameter to the `write_text` method to align to `pathlib` functionality as of Python 3.10. [PR #362]https://github.com/drivendataorg/cloudpathlib/pull/362), thanks to [@pricemg](https://github.com/pricemg). + - Add support for Python 3.12 ([PR #290](https://github.com/drivendataorg/cloudpathlib/pull/364)) ## v0.15.1 (2023-07-12) From 64ec8bc9fe7216d8dcf297fd60fda04cc9b1f593 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sun, 8 Oct 2023 10:49:41 -0700 Subject: [PATCH 19/20] Update changelog --- HISTORY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index a918f2e6..978a70be 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,7 +4,7 @@ - Add "CloudPath" as return type on `__init__` for mypy issues. ([Issue #179](https://github.com/drivendataorg/cloudpathlib/issues/179), [PR #342](https://github.com/drivendataorg/cloudpathlib/pull/342)) - Add `with_stem` to all path types when python version supports it (>=3.9). ([Issue #287](https://github.com/drivendataorg/cloudpathlib/issues/287), [PR #290](https://github.com/drivendataorg/cloudpathlib/pull/290), thanks to [@Gilthans](https://github.com/Gilthans)) - Add `newline` parameter to the `write_text` method to align to `pathlib` functionality as of Python 3.10. [PR #362]https://github.com/drivendataorg/cloudpathlib/pull/362), thanks to [@pricemg](https://github.com/pricemg). - - Add support for Python 3.12 ([PR #290](https://github.com/drivendataorg/cloudpathlib/pull/364)) + - Add support for Python 3.12 ([PR #364](https://github.com/drivendataorg/cloudpathlib/pull/364)) ## v0.15.1 (2023-07-12) From 95f8cb82d25d7c0008f3b6c44d4e30b366812724 Mon Sep 17 00:00:00 2001 From: P Bull Date: Sun, 8 Oct 2023 11:33:38 -0700 Subject: [PATCH 20/20] sleep for flaky test --- tests/test_caching.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_caching.py b/tests/test_caching.py index 607e3da4..15321129 100644 --- a/tests/test_caching.py +++ b/tests/test_caching.py @@ -231,6 +231,7 @@ def test_interaction_with_local_cache_dir(rig: CloudProviderTestRig, tmpdir): assert cp.client.file_cache_mode == FileCacheMode.tmp_dir # download from cloud into the cache + sleep(0.1) # test can be flaky saing that the cache dir doesn't exist yet with cp.open("r") as f: _ = f.read()