diff --git a/.github/workflows/docs-master.yml b/.github/workflows/docs-master.yml index d0afe3c5..5209e1ea 100644 --- a/.github/workflows/docs-master.yml +++ b/.github/workflows/docs-master.yml @@ -16,7 +16,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: 3.11 cache: "pip" # caching pip dependencies cache-dependency-path: | pyproject.toml diff --git a/.github/workflows/docs-preview.yml b/.github/workflows/docs-preview.yml index d01c8e41..e74d44f2 100644 --- a/.github/workflows/docs-preview.yml +++ b/.github/workflows/docs-preview.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: 3.11 cache: "pip" # caching pip dependencies cache-dependency-path: | pyproject.toml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e90a118f..c49b4253 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,7 +16,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: 3.11 - name: Install dependencies run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b81ca408..adab5247 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -37,7 +37,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] + python-version: [3.8, 3.9, "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 diff --git a/HISTORY.md b/HISTORY.md index 66b2baad..1dd1caea 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,6 +4,18 @@ - Added support for custom schemes in CloudPath and Client subclases. (Issue [#466](https://github.com/drivendataorg/cloudpathlib/issues/466), PR [#467](https://github.com/drivendataorg/cloudpathlib/pull/467)) - Fixed `ResourceNotFoundError` on Azure gen2 storage accounts with HNS enabled and issue that some Azure credentials do not have `account_name`. (Issue [#470](https://github.com/drivendataorg/cloudpathlib/issues/470), Issue [#476](https://github.com/drivendataorg/cloudpathlib/issues/476), PR [#478](https://github.com/drivendataorg/cloudpathlib/pull/478)) +- Added support for Python 3.13 (Issue [#472](https://github.com/drivendataorg/cloudpathlib/issues/472), [PR #474](https://github.com/drivendataorg/cloudpathlib/pull/474)): + - [`.full_match` added](https://docs.python.org/3.13/library/pathlib.html#pathlib.PurePath.full_match) + - [`.from_uri` added](https://docs.python.org/3.13/library/pathlib.html#pathlib.Path.from_uri) + - [`follow_symlinks` kwarg added to `is_file`](https://docs.python.org/3.13/library/pathlib.html#pathlib.Path.is_file) added as no-op + - [`follow_symlinks` kwarg added to `is_dir`](https://docs.python.org/3.13/library/pathlib.html#pathlib.Path.is_dir) added as no-op + - [`newline` kwarg added to `read_text`](https://docs.python.org/3.13/library/pathlib.html#pathlib.Path.read_text) + - [`recurse_symlinks` kwarg added to `glob`](https://docs.python.org/3.13/library/pathlib.html#pathlib.Path.glob) added as no-op + - [`pattern` parameter for `glob` can be PathLike](https://docs.python.org/3.13/library/pathlib.html#pathlib.Path.glob) + - [`recurse_symlinks` kwarg added to `rglob`](https://docs.python.org/3.13/library/pathlib.html#pathlib.Path.rglob) added as no-op + - [`pattern` parameter for `rglob` can be PathLike](https://docs.python.org/3.13/library/pathlib.html#pathlib.Path.rglob) + - [`.parser` property added](https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.parser) + ## v0.19.0 (2024-08-29) diff --git a/cloudpathlib/azure/azblobpath.py b/cloudpathlib/azure/azblobpath.py index 282ea18d..4f8df0c9 100644 --- a/cloudpathlib/azure/azblobpath.py +++ b/cloudpathlib/azure/azblobpath.py @@ -39,12 +39,6 @@ class AzureBlobPath(CloudPath): def drive(self) -> str: return self.container - def is_dir(self) -> bool: - return self.client._is_file_or_dir(self) == "dir" - - def is_file(self) -> bool: - return self.client._is_file_or_dir(self) == "file" - def mkdir(self, parents=False, exist_ok=False): self.client._mkdir(self, parents=parents, exist_ok=exist_ok) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 4aa895a6..ef28d5e6 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -9,11 +9,11 @@ PosixPath, PurePosixPath, WindowsPath, - _PathParents, ) import shutil import sys +from types import MethodType from typing import ( BinaryIO, Literal, @@ -56,21 +56,29 @@ else: from typing_extensions import Self -if sys.version_info >= (3, 12): + +if sys.version_info < (3, 12): + from pathlib import _posix_flavour # type: ignore[attr-defined] # noqa: F811 + from pathlib import _make_selector as _make_selector_pathlib # type: ignore[attr-defined] # noqa: F811 + from pathlib import _PathParents # type: ignore[attr-defined] + + def _make_selector(pattern_parts, _flavour, case_sensitive=True): # noqa: F811 + return _make_selector_pathlib(tuple(pattern_parts), _flavour) + +elif sys.version_info[:2] == (3, 12): + from pathlib import _PathParents # type: ignore[attr-defined] from pathlib import posixpath as _posix_flavour # type: ignore[attr-defined] from pathlib import _make_selector # type: ignore[attr-defined] -else: - from pathlib import _posix_flavour # type: ignore[attr-defined] - from pathlib import _make_selector as _make_selector_pathlib # type: ignore[attr-defined] +elif sys.version_info >= (3, 13): + from pathlib._local import _PathParents + import posixpath as _posix_flavour # type: ignore[attr-defined] # noqa: F811 - def _make_selector(pattern_parts, _flavour, case_sensitive=True): - return _make_selector_pathlib(tuple(pattern_parts), _flavour) + from .legacy.glob import _make_selector # noqa: F811 from cloudpathlib.enums import FileCacheMode from . import anypath - from .exceptions import ( ClientMismatchError, CloudPathFileExistsError, @@ -194,7 +202,12 @@ def __init__(cls, name: str, bases: Tuple[type, ...], dic: Dict[str, Any]) -> No and getattr(getattr(Path, attr), "__doc__", None) ): docstring = getattr(Path, attr).__doc__ + " _(Docstring copied from pathlib.Path)_" - getattr(cls, attr).__doc__ = docstring + + if isinstance(getattr(cls, attr), (MethodType)): + getattr(cls, attr).__func__.__doc__ = docstring + else: + getattr(cls, attr).__doc__ = docstring + if isinstance(getattr(cls, attr), property): # Properties have __doc__ duplicated under fget, and at least some parsers # read it from there. @@ -383,16 +396,6 @@ def drive(self) -> str: """For example "bucket" on S3 or "container" on Azure; needs to be defined for each class""" pass - @abc.abstractmethod - def is_dir(self) -> bool: - """Should be implemented without requiring a dir is downloaded""" - pass - - @abc.abstractmethod - def is_file(self) -> bool: - """Should be implemented without requiring that the file is downloaded""" - pass - @abc.abstractmethod def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None: """Should be implemented using the client API without requiring a dir is downloaded""" @@ -427,17 +430,35 @@ def as_uri(self) -> str: def exists(self) -> bool: return self.client._exists(self) + def is_dir(self, follow_symlinks=True) -> bool: + return self.client._is_file_or_dir(self) == "dir" + + def is_file(self, follow_symlinks=True) -> bool: + return self.client._is_file_or_dir(self) == "file" + @property def fspath(self) -> str: return self.__fspath__() - def _glob_checks(self, pattern: str) -> None: - if ".." in pattern: + @classmethod + def from_uri(cls, uri: str) -> Self: + return cls(uri) + + def _glob_checks(self, pattern: Union[str, os.PathLike]) -> str: + if isinstance(pattern, os.PathLike): + if isinstance(pattern, CloudPath): + str_pattern = str(pattern.relative_to(self)) + else: + str_pattern = os.fspath(pattern) + else: + str_pattern = str(pattern) + + if ".." in str_pattern: raise CloudPathNotImplementedError( "Relative paths with '..' not supported in glob patterns." ) - if pattern.startswith(self.cloud_prefix) or pattern.startswith("/"): + if str_pattern.startswith(self.cloud_prefix) or str_pattern.startswith("/"): raise CloudPathNotImplementedError("Non-relative patterns are unsupported") if self.drive == "": @@ -445,6 +466,8 @@ def _glob_checks(self, pattern: str) -> None: ".glob is only supported within a bucket or container; you can use `.iterdir` to list buckets; for example, CloudPath('s3://').iterdir()" ) + return str_pattern + def _build_subtree(self, recursive): # build a tree structure for all files out of default dicts Tree: Callable = lambda: defaultdict(Tree) @@ -488,9 +511,9 @@ def _glob(self, selector, recursive: bool) -> Generator[Self, None, None]: yield (self / str(p)[len(self.name) + 1 :]) def glob( - self, pattern: str, case_sensitive: Optional[bool] = None + self, pattern: Union[str, os.PathLike], case_sensitive: Optional[bool] = None ) -> Generator[Self, None, None]: - self._glob_checks(pattern) + pattern = self._glob_checks(pattern) pattern_parts = PurePosixPath(pattern).parts selector = _make_selector( @@ -505,9 +528,9 @@ def glob( ) def rglob( - self, pattern: str, case_sensitive: Optional[bool] = None + self, pattern: Union[str, os.PathLike], case_sensitive: Optional[bool] = None ) -> Generator[Self, None, None]: - self._glob_checks(pattern) + pattern = self._glob_checks(pattern) pattern_parts = PurePosixPath(pattern).parts selector = _make_selector( @@ -812,8 +835,13 @@ def read_bytes(self) -> bytes: with self.open(mode="rb") as f: return f.read() - def read_text(self, encoding: Optional[str] = None, errors: Optional[str] = None) -> str: - with self.open(mode="r", encoding=encoding, errors=errors) as f: + def read_text( + self, + encoding: Optional[str] = None, + errors: Optional[str] = None, + newline: Optional[str] = None, + ) -> str: + with self.open(mode="r", encoding=encoding, errors=errors, newline=newline) as f: return f.read() def is_junction(self): @@ -904,6 +932,19 @@ def is_relative_to(self, other: Self) -> bool: def name(self) -> str: return self._dispatch_to_path("name") + def full_match(self, pattern: str, case_sensitive: Optional[bool] = None) -> bool: + if sys.version_info < (3, 13): + raise NotImplementedError("full_match requires Python 3.13 or higher") + + # strip scheme from start of pattern before testing + if pattern.startswith(self.anchor + self.drive): + pattern = pattern[len(self.anchor + self.drive) :] + + # remove drive, which is kept on normal dispatch to pathlib + return PurePosixPath(self._no_prefix_no_drive).full_match( # type: ignore[attr-defined] + pattern, case_sensitive=case_sensitive + ) + def match(self, path_pattern: str, case_sensitive: Optional[bool] = None) -> bool: # strip scheme from start of pattern before testing if path_pattern.startswith(self.anchor + self.drive + "/"): @@ -916,6 +957,13 @@ def match(self, path_pattern: str, case_sensitive: Optional[bool] = None) -> boo return self._dispatch_to_path("match", path_pattern, **kwargs) + @property + def parser(self) -> Self: + if sys.version_info < (3, 13): + raise NotImplementedError("parser requires Python 3.13 or higher") + + return self._dispatch_to_path("parser") + @property def parent(self) -> Self: return self._dispatch_to_path("parent") diff --git a/cloudpathlib/gs/gspath.py b/cloudpathlib/gs/gspath.py index 4e6f6365..bf085a78 100644 --- a/cloudpathlib/gs/gspath.py +++ b/cloudpathlib/gs/gspath.py @@ -32,12 +32,6 @@ class GSPath(CloudPath): def drive(self) -> str: return self.bucket - def is_dir(self) -> bool: - return self.client._is_file_or_dir(self) == "dir" - - def is_file(self) -> bool: - return self.client._is_file_or_dir(self) == "file" - def mkdir(self, parents=False, exist_ok=False): # not possible to make empty directory on cloud storage pass diff --git a/cloudpathlib/legacy/glob.py b/cloudpathlib/legacy/glob.py new file mode 100644 index 00000000..06e578b2 --- /dev/null +++ b/cloudpathlib/legacy/glob.py @@ -0,0 +1,205 @@ +import fnmatch +import functools +import re + +# +# Globbing helpers +# + + +@functools.cache +def _is_case_sensitive(flavour): + return flavour.normcase("Aa") == "Aa" + + +# fnmatch.translate() returns a regular expression that includes a prefix and +# a suffix, which enable matching newlines and ensure the end of the string is +# matched, respectively. These features are undesirable for our implementation +# of PurePatch.match(), which represents path separators as newlines and joins +# pattern segments together. As a workaround, we define a slice object that +# can remove the prefix and suffix from any translate() result. See the +# _compile_pattern_lines() function for more details. +_FNMATCH_PREFIX, _FNMATCH_SUFFIX = fnmatch.translate("_").split("_") +_FNMATCH_SLICE = slice(len(_FNMATCH_PREFIX), -len(_FNMATCH_SUFFIX)) +_SWAP_SEP_AND_NEWLINE = { + "/": str.maketrans({"/": "\n", "\n": "/"}), + "\\": str.maketrans({"\\": "\n", "\n": "\\"}), +} + + +@functools.lru_cache() +def _make_selector(pattern_parts, flavour, case_sensitive): + pat = pattern_parts[0] + if not pat: + return _TerminatingSelector() + if pat == "**": + child_parts_idx = 1 + while child_parts_idx < len(pattern_parts) and pattern_parts[child_parts_idx] == "**": + child_parts_idx += 1 + child_parts = pattern_parts[child_parts_idx:] + if "**" in child_parts: + cls = _DoubleRecursiveWildcardSelector + else: + cls = _RecursiveWildcardSelector + else: + child_parts = pattern_parts[1:] + if pat == "..": + cls = _ParentSelector + elif "**" in pat: + raise ValueError("Invalid pattern: '**' can only be an entire path component") + else: + cls = _WildcardSelector + return cls(pat, child_parts, flavour, case_sensitive) + + +@functools.lru_cache(maxsize=256) +def _compile_pattern(pat, case_sensitive): + flags = re.NOFLAG if case_sensitive else re.IGNORECASE + return re.compile(fnmatch.translate(pat), flags).match + + +@functools.lru_cache() +def _compile_pattern_lines(pattern_lines, case_sensitive): + """Compile the given pattern lines to an `re.Pattern` object. + + The *pattern_lines* argument is a glob-style pattern (e.g. '*/*.py') with + its path separators and newlines swapped (e.g. '*\n*.py`). By using + newlines to separate path components, and not setting `re.DOTALL`, we + ensure that the `*` wildcard cannot match path separators. + + The returned `re.Pattern` object may have its `match()` method called to + match a complete pattern, or `search()` to match from the right. The + argument supplied to these methods must also have its path separators and + newlines swapped. + """ + + # Match the start of the path, or just after a path separator + parts = ["^"] + for part in pattern_lines.splitlines(keepends=True): + if part == "*\n": + part = r".+\n" + elif part == "*": + part = r".+" + else: + # Any other component: pass to fnmatch.translate(). We slice off + # the common prefix and suffix added by translate() to ensure that + # re.DOTALL is not set, and the end of the string not matched, + # respectively. With DOTALL not set, '*' wildcards will not match + # path separators, because the '.' characters in the pattern will + # not match newlines. + part = fnmatch.translate(part)[_FNMATCH_SLICE] + parts.append(part) + # Match the end of the path, always. + parts.append(r"\Z") + flags = re.MULTILINE + if not case_sensitive: + flags |= re.IGNORECASE + return re.compile("".join(parts), flags=flags) + + +class _Selector: + """A selector matches a specific glob pattern part against the children + of a given path.""" + + def __init__(self, child_parts, flavour, case_sensitive): + self.child_parts = child_parts + if child_parts: + self.successor = _make_selector(child_parts, flavour, case_sensitive) + self.dironly = True + else: + self.successor = _TerminatingSelector() + self.dironly = False + + def select_from(self, parent_path): + """Iterate over all child paths of `parent_path` matched by this + selector. This can contain parent_path itself.""" + path_cls = type(parent_path) + scandir = path_cls._scandir + if not parent_path.is_dir(): + return iter([]) + return self._select_from(parent_path, scandir) + + +class _TerminatingSelector: + + def _select_from(self, parent_path, scandir): + yield parent_path + + +class _ParentSelector(_Selector): + + def __init__(self, name, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) + + def _select_from(self, parent_path, scandir): + path = parent_path._make_child_relpath("..") + for p in self.successor._select_from(path, scandir): + yield p + + +class _WildcardSelector(_Selector): + + def __init__(self, pat, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) + if case_sensitive is None: + # TODO: evaluate case-sensitivity of each directory in _select_from() + case_sensitive = _is_case_sensitive(flavour) + self.match = _compile_pattern(pat, case_sensitive) + + def _select_from(self, parent_path, scandir): + try: + # We must close the scandir() object before proceeding to + # avoid exhausting file descriptors when globbing deep trees. + with scandir(parent_path) as scandir_it: + entries = list(scandir_it) + except OSError: + pass + else: + for entry in entries: + if self.dironly: + try: + if not entry.is_dir(): + continue + except OSError: + continue + name = entry.name + if self.match(name): + path = parent_path._make_child_relpath(name) + for p in self.successor._select_from(path, scandir): + yield p + + +class _RecursiveWildcardSelector(_Selector): + + def __init__(self, pat, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) + + def _iterate_directories(self, parent_path): + yield parent_path + for dirpath, dirnames, _ in parent_path.walk(): + for dirname in dirnames: + yield dirpath._make_child_relpath(dirname) + + def _select_from(self, parent_path, scandir): + successor_select = self.successor._select_from + for starting_point in self._iterate_directories(parent_path): + for p in successor_select(starting_point, scandir): + yield p + + +class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector): + """ + Like _RecursiveWildcardSelector, but also de-duplicates results from + successive selectors. This is necessary if the pattern contains + multiple non-adjacent '**' segments. + """ + + def _select_from(self, parent_path, scandir): + yielded = set() + try: + for p in super()._select_from(parent_path, scandir): + if p not in yielded: + yield p + yielded.add(p) + finally: + yielded.clear() diff --git a/cloudpathlib/local/localclient.py b/cloudpathlib/local/localclient.py index e057390c..d37cb7c3 100644 --- a/cloudpathlib/local/localclient.py +++ b/cloudpathlib/local/localclient.py @@ -4,6 +4,7 @@ import os from pathlib import Path, PurePosixPath import shutil +import sys from tempfile import TemporaryDirectory from time import sleep from typing import Callable, ClassVar, Dict, Iterable, List, Optional, Tuple, Union @@ -103,11 +104,19 @@ def _download_file(self, cloud_path: "LocalPath", local_path: Union[str, os.Path def _exists(self, cloud_path: "LocalPath") -> bool: return self._cloud_path_to_local(cloud_path).exists() - def _is_dir(self, cloud_path: "LocalPath") -> bool: - return self._cloud_path_to_local(cloud_path).is_dir() + def _is_dir(self, cloud_path: "LocalPath", follow_symlinks=True) -> bool: + kwargs = dict(follow_symlinks=follow_symlinks) + if sys.version_info < (3, 13): + kwargs.pop("follow_symlinks") - def _is_file(self, cloud_path: "LocalPath") -> bool: - return self._cloud_path_to_local(cloud_path).is_file() + return self._cloud_path_to_local(cloud_path).is_dir(**kwargs) + + def _is_file(self, cloud_path: "LocalPath", follow_symlinks=True) -> bool: + kwargs = dict(follow_symlinks=follow_symlinks) + if sys.version_info < (3, 13): + kwargs.pop("follow_symlinks") + + return self._cloud_path_to_local(cloud_path).is_file(**kwargs) def _list_dir( self, cloud_path: "LocalPath", recursive=False diff --git a/cloudpathlib/local/localpath.py b/cloudpathlib/local/localpath.py index b04fa22a..e16ff112 100644 --- a/cloudpathlib/local/localpath.py +++ b/cloudpathlib/local/localpath.py @@ -13,11 +13,11 @@ class LocalPath(CloudPath): client: "LocalClient" - def is_dir(self) -> bool: - return self.client._is_dir(self) + def is_dir(self, follow_symlinks=True) -> bool: + return self.client._is_dir(self, follow_symlinks=follow_symlinks) - def is_file(self) -> bool: - return self.client._is_file(self) + def is_file(self, follow_symlinks=True) -> bool: + return self.client._is_file(self, follow_symlinks=follow_symlinks) def stat(self): try: diff --git a/cloudpathlib/s3/s3path.py b/cloudpathlib/s3/s3path.py index 5642f355..034746a6 100644 --- a/cloudpathlib/s3/s3path.py +++ b/cloudpathlib/s3/s3path.py @@ -32,12 +32,6 @@ class S3Path(CloudPath): def drive(self) -> str: return self.bucket - def is_dir(self) -> bool: - return self.client._is_file_or_dir(self) == "dir" - - def is_file(self) -> bool: - return self.client._is_file_or_dir(self) == "file" - def mkdir(self, parents=False, exist_ok=False): # not possible to make empty directory on s3 pass diff --git a/pyproject.toml b/pyproject.toml index ab8241e3..e18fd610 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] requires-python = ">=3.8" dependencies = [ @@ -48,7 +49,7 @@ all = ["cloudpathlib[azure]", "cloudpathlib[gs]", "cloudpathlib[s3]"] [tool.black] line-length = 99 -target-version = ['py38', 'py39', 'py310', 'py311', 'py312'] +target-version = ['py38', 'py39', 'py310', 'py311', 'py312', 'py313'] include = '\.pyi?$|\.ipynb$' extend-exclude = ''' /( @@ -70,3 +71,4 @@ testpaths = ["tests"] [tool.coverage.run] source = ["cloudpathlib"] +omit = ["cloudpathlib/legacy/glob.py"] \ No newline at end of file diff --git a/tests/mock_clients/mock_s3.py b/tests/mock_clients/mock_s3.py index 9b5360bd..4fb37f94 100644 --- a/tests/mock_clients/mock_s3.py +++ b/tests/mock_clients/mock_s3.py @@ -156,13 +156,9 @@ def filter(self, Prefix=""): path = self.root / Prefix if path.is_file(): - return MockCollection([PurePosixPath(path)], self.root, session=self.session) + return MockCollection([path], self.root, session=self.session) - items = [ - PurePosixPath(f) - for f in path.glob("**/*") - if f.is_file() and not f.name.startswith(".") - ] + items = [f for f in path.glob("**/*") if f.is_file() and not f.name.startswith(".")] return MockCollection(items, self.root, session=self.session) @@ -174,7 +170,9 @@ def __init__(self, items, root, session=None): self.full_paths = items self.s3_obj_paths = [ - s3_obj(bucket_name=DEFAULT_S3_BUCKET_NAME, key=str(i.relative_to(self.root))) + s3_obj( + bucket_name=DEFAULT_S3_BUCKET_NAME, key=str(i.relative_to(self.root).as_posix()) + ) for i in items ] diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py index 7dc5b149..d367e1ae 100644 --- a/tests/test_cloudpath_file_io.py +++ b/tests/test_cloudpath_file_io.py @@ -198,9 +198,13 @@ def test_glob(glob_test_dirs): # https://github.com/python/cpython/blob/7ffe7ba30fc051014977c6f393c51e57e71a6648/Lib/test/test_pathlib.py#L1634-L1720 def _check_glob(pattern, glob_method, **kwargs): + local_pattern = kwargs.pop("local_pattern", None) + _assert_glob_results_match( getattr(cloud_root, glob_method)(pattern, **kwargs), - getattr(local_root, glob_method)(pattern, **kwargs), + getattr(local_root, glob_method)( + pattern if local_pattern is None else local_pattern, **kwargs + ), cloud_root, local_root, ) @@ -214,6 +218,10 @@ def _check_glob(pattern, glob_method, **kwargs): _check_glob("*A", "glob") _check_glob("*B/*", "glob") _check_glob("*/fileB", "glob") + _check_glob(cloud_root / "**/*", "glob", local_pattern="**/*") + + if sys.version_info >= (3, 13): + _check_glob(PurePosixPath("**/*"), "glob") # rglob_common _check_glob("*", "rglob") @@ -222,6 +230,10 @@ def _check_glob(pattern, glob_method, **kwargs): _check_glob("*/fileA", "rglob") _check_glob("*/fileB", "rglob") _check_glob("file*", "rglob") + _check_glob(cloud_root / "*", "rglob", local_pattern="*") + + if sys.version_info >= (3, 13): + _check_glob(PurePosixPath("*"), "rglob") dir_c_cloud = cloud_root / "dirC" dir_c_local = local_root / "dirC" diff --git a/tests/test_cloudpath_instantiation.py b/tests/test_cloudpath_instantiation.py index de139593..4be6085c 100644 --- a/tests/test_cloudpath_instantiation.py +++ b/tests/test_cloudpath_instantiation.py @@ -35,6 +35,7 @@ def test_dispatch(path_class, cloud_path, monkeypatch): monkeypatch.setenv("GOOGLE_CLOUD_PROJECT", "fake-project") assert isinstance(CloudPath(cloud_path), path_class) + assert isinstance(CloudPath.from_uri(cloud_path), path_class) def test_dispatch_error(): @@ -74,6 +75,14 @@ def test_instantiation_errors(rig): rig.path_class("NOT_S3_PATH") +def test_from_uri(rig): + p = rig.create_cloud_path("dir_0/file0_0.txt") + + # classmethod uses default client + assert rig.path_class.from_uri(str(p)) == p + assert rig.path_class.from_uri(str(p)).client == p.client + + def test_idempotency(rig): rig.client_class._default_client = None diff --git a/tests/test_cloudpath_manipulation.py b/tests/test_cloudpath_manipulation.py index aaf4098c..d42b154a 100644 --- a/tests/test_cloudpath_manipulation.py +++ b/tests/test_cloudpath_manipulation.py @@ -1,4 +1,5 @@ from pathlib import PurePosixPath +import posixpath import sys import pytest @@ -177,3 +178,29 @@ def test_sorting(rig): assert cp1 > str(cp1) with pytest.raises(TypeError): assert cp1 >= str(cp1) + + +def test_full_match(rig): + if sys.version_info < (3, 13): + with pytest.raises(NotImplementedError): + rig.create_cloud_path("a/b/c").full_match("**/a/b/c") + else: + assert rig.create_cloud_path("a/b/c").full_match("**/a/b/c") + assert not rig.create_cloud_path("a/b/c").full_match("**/a/b/c/d") + assert rig.create_cloud_path("a/b.py").full_match("**/a/*.py") + assert not rig.create_cloud_path("a/b.py").full_match("*.py") + assert rig.create_cloud_path("/a/b/c.py").full_match("**/a/**") + + cp: CloudPath = rig.create_cloud_path("file.txt") + assert cp.full_match(cp._no_prefix_no_drive) + assert cp.full_match(str(cp)) + + +@pytest.mark.skipif(sys.version_info < (3, 13), reason="requires python3.13 or higher") +def test_parser(rig): + if sys.version_info < (3, 13): + with pytest.raises(NotImplementedError): + rig.create_cloud_path("a/b/c").parser + else: + # always posixpath since our dispath goes to PurePosixPath + assert rig.create_cloud_path("a/b/c").parser == posixpath