Skip to content

Commit

Permalink
Add support for Python 3.12 (#364)
Browse files Browse the repository at this point in the history
* passing tests

* remove print

* make method signatures match

* Remove unused import

* ignore type errors

* ignore more type errors

* make linting and tests work on multiple py versions

* add 3.12 to CI and pyproject

* use pytest-cases fork

* More performant walk implementation

* format

* update methods

* Test walk method

* Version agnostic tests

* update tests

* Add tests

* Order agnostic walk test

* Changes

* Update changelog

* sleep for flaky test
  • Loading branch information
pjbull authored Oct 9, 2023
1 parent e6ae9fa commit eb3ba8e
Show file tree
Hide file tree
Showing 12 changed files with 279 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: [3.7, 3.8, 3.9, "3.10", "3.11"]
python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v3
Expand Down
3 changes: 2 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# cloudpathlib Changelog

## UNRELEASED
## v0.16.0 (2023-10-08)
- Add "CloudPath" as return type on `__init__` for mypy issues. ([Issue #179](https://github.com/drivendataorg/cloudpathlib/issues/179), [PR #342](https://github.com/drivendataorg/cloudpathlib/pull/342))
- Add `with_stem` to all path types when python version supports it (>=3.9). ([Issue #287](https://github.com/drivendataorg/cloudpathlib/issues/287), [PR #290](https://github.com/drivendataorg/cloudpathlib/pull/290), thanks to [@Gilthans](https://github.com/Gilthans))
- Add `newline` parameter to the `write_text` method to align to `pathlib` functionality as of Python 3.10. [PR #362]https://github.com/drivendataorg/cloudpathlib/pull/362), thanks to [@pricemg](https://github.com/pricemg).
- Add support for Python 3.12 ([PR #364](https://github.com/drivendataorg/cloudpathlib/pull/364))

## v0.15.1 (2023-07-12)

Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ Most methods and properties from `pathlib.Path` are supported except for the one
| `is_absolute` ||||
| `is_dir` ||||
| `is_file` ||||
| `is_junction` ||||
| `is_relative_to` ||||
| `iterdir` ||||
| `joinpath` ||||
Expand All @@ -160,7 +161,9 @@ Most methods and properties from `pathlib.Path` are supported except for the one
| `suffixes` ||||
| `touch` ||||
| `unlink` ||||
| `walk` ||||
| `with_name` ||||
| `with_segments` ||||
| `with_stem` ||||
| `with_suffix` ||||
| `write_bytes` ||||
Expand All @@ -170,6 +173,7 @@ Most methods and properties from `pathlib.Path` are supported except for the one
| `cwd` ||||
| `expanduser` ||||
| `group` ||||
| `hardlink_to` ||||
| `home` ||||
| `is_block_device` ||||
| `is_char_device` ||||
Expand All @@ -179,7 +183,6 @@ Most methods and properties from `pathlib.Path` are supported except for the one
| `is_socket` ||||
| `is_symlink` ||||
| `lchmod` ||||
| `link_to` ||||
| `lstat` ||||
| `owner` ||||
| `readlink` ||||
Expand Down
119 changes: 105 additions & 14 deletions cloudpathlib/cloudpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@
PosixPath,
PurePosixPath,
WindowsPath,
_make_selector,
_posix_flavour,
_PathParents,
)

import shutil
import sys
from typing import (
Expand Down Expand Up @@ -44,6 +43,17 @@
else:
from typing_extensions import Self

if sys.version_info >= (3, 12):
from pathlib import posixpath as _posix_flavour # type: ignore[attr-defined]
from pathlib import _make_selector # type: ignore[attr-defined]
else:
from pathlib import _posix_flavour # type: ignore[attr-defined]
from pathlib import _make_selector as _make_selector_pathlib # type: ignore[attr-defined]

def _make_selector(pattern_parts, _flavour, case_sensitive=True):
return _make_selector_pathlib(tuple(pattern_parts), _flavour)


from cloudpathlib.enums import FileCacheMode

from . import anypath
Expand Down Expand Up @@ -342,6 +352,8 @@ def __ge__(self, other: Any) -> bool:
# owner - no cloud equivalent
# root - drive already has the bucket and anchor/prefix has the scheme, so nothing to store here
# symlink_to - no cloud equivalent
# link_to - no cloud equivalent
# hardlink_to - no cloud equivalent

# ====================== REQUIRED, NOT GENERIC ======================
# Methods that must be implemented, but have no generic application
Expand Down Expand Up @@ -406,7 +418,7 @@ def _glob_checks(self, pattern: str) -> None:
".glob is only supported within a bucket or container; you can use `.iterdir` to list buckets; for example, CloudPath('s3://').iterdir()"
)

def _glob(self, selector, recursive: bool) -> Generator[Self, None, None]:
def _build_subtree(self, recursive):
# build a tree structure for all files out of default dicts
Tree: Callable = lambda: defaultdict(Tree)

Expand All @@ -433,7 +445,10 @@ def _build_tree(trunk, branch, nodes, is_dir):
nodes = (p for p in parts)
_build_tree(file_tree, next(nodes, None), nodes, is_dir)

file_tree = dict(file_tree) # freeze as normal dict before passing in
return dict(file_tree) # freeze as normal dict before passing in

def _glob(self, selector, recursive: bool) -> Generator[Self, None, None]:
file_tree = self._build_subtree(recursive)

root = _CloudPathSelectable(
self.name,
Expand All @@ -445,11 +460,15 @@ def _build_tree(trunk, branch, nodes, is_dir):
# select_from returns self.name/... so strip before joining
yield (self / str(p)[len(self.name) + 1 :])

def glob(self, pattern: str) -> Generator[Self, None, None]:
def glob(
self, pattern: str, case_sensitive: Optional[bool] = None
) -> Generator[Self, None, None]:
self._glob_checks(pattern)

pattern_parts = PurePosixPath(pattern).parts
selector = _make_selector(tuple(pattern_parts), _posix_flavour)
selector = _make_selector(
tuple(pattern_parts), _posix_flavour, case_sensitive=case_sensitive
)

yield from self._glob(
selector,
Expand All @@ -458,11 +477,15 @@ def glob(self, pattern: str) -> Generator[Self, None, None]:
in pattern, # recursive listing needed if explicit ** or any sub folder in pattern
)

def rglob(self, pattern: str) -> Generator[Self, None, None]:
def rglob(
self, pattern: str, case_sensitive: Optional[bool] = None
) -> Generator[Self, None, None]:
self._glob_checks(pattern)

pattern_parts = PurePosixPath(pattern).parts
selector = _make_selector(("**",) + tuple(pattern_parts), _posix_flavour)
selector = _make_selector(
("**",) + tuple(pattern_parts), _posix_flavour, case_sensitive=case_sensitive
)

yield from self._glob(selector, True)

Expand All @@ -471,6 +494,41 @@ def iterdir(self) -> Generator[Self, None, None]:
if f != self: # iterdir does not include itself in pathlib
yield f

@staticmethod
def _walk_results_from_tree(root, tree, top_down=True):
"""Utility to yield tuples in the form expected by `.walk` from the file
tree constructed by `_build_substree`.
"""
dirs = []
files = []
for item, branch in tree.items():
files.append(item) if branch is None else dirs.append(item)

if top_down:
yield root, dirs, files

for dir in dirs:
yield from CloudPath._walk_results_from_tree(root / dir, tree[dir], top_down=top_down)

if not top_down:
yield root, dirs, files

def walk(
self,
top_down: bool = True,
on_error: Optional[Callable] = None,
follow_symlinks: bool = False,
) -> Generator[Tuple[Self, List[str], List[str]], None, None]:
try:
file_tree = self._build_subtree(recursive=True) # walking is always recursive
yield from self._walk_results_from_tree(self, file_tree, top_down=top_down)

except Exception as e:
if on_error is not None:
on_error(e)
else:
raise

def open(
self,
mode: str = "r",
Expand Down Expand Up @@ -647,6 +705,9 @@ def read_text(self, encoding: Optional[str] = None, errors: Optional[str] = None
with self.open(mode="r", encoding=encoding, errors=errors) as f:
return f.read()

def is_junction(self):
return False # only windows paths can be junctions, not cloudpaths

# ====================== DISPATCHED TO POSIXPATH FOR PURE PATHS ======================
# Methods that are dispatched to exactly how pathlib.PurePosixPath would calculate it on
# self._path for pure paths (does not matter if file exists);
Expand Down Expand Up @@ -692,8 +753,8 @@ def __truediv__(self, other: Union[str, PurePosixPath]) -> Self:

return self._dispatch_to_path("__truediv__", other)

def joinpath(self, *args: Union[str, os.PathLike]) -> Self:
return self._dispatch_to_path("joinpath", *args)
def joinpath(self, *pathsegments: Union[str, os.PathLike]) -> Self:
return self._dispatch_to_path("joinpath", *pathsegments)

def absolute(self) -> Self:
return self
Expand All @@ -704,7 +765,7 @@ def is_absolute(self) -> bool:
def resolve(self, strict: bool = False) -> Self:
return self

def relative_to(self, other: Self) -> PurePosixPath:
def relative_to(self, other: Self, walk_up: bool = False) -> PurePosixPath:
# We don't dispatch regularly since this never returns a cloud path (since it is relative, and cloud paths are
# absolute)
if not isinstance(other, CloudPath):
Expand All @@ -713,7 +774,13 @@ def relative_to(self, other: Self) -> PurePosixPath:
raise ValueError(
f"{self} is a {self.cloud_prefix} path, but {other} is a {other.cloud_prefix} path"
)
return self._path.relative_to(other._path)

kwargs = dict(walk_up=walk_up)

if sys.version_info < (3, 12):
kwargs.pop("walk_up")

return self._path.relative_to(other._path, **kwargs) # type: ignore[call-arg]

def is_relative_to(self, other: Self) -> bool:
try:
Expand All @@ -726,12 +793,17 @@ def is_relative_to(self, other: Self) -> bool:
def name(self) -> str:
return self._dispatch_to_path("name")

def match(self, path_pattern: str) -> bool:
def match(self, path_pattern: str, case_sensitive: Optional[bool] = None) -> bool:
# strip scheme from start of pattern before testing
if path_pattern.startswith(self.anchor + self.drive + "/"):
path_pattern = path_pattern[len(self.anchor + self.drive + "/") :]

return self._dispatch_to_path("match", path_pattern)
kwargs = dict(case_sensitive=case_sensitive)

if sys.version_info < (3, 12):
kwargs.pop("case_sensitive")

return self._dispatch_to_path("match", path_pattern, **kwargs)

@property
def parent(self) -> Self:
Expand Down Expand Up @@ -771,6 +843,12 @@ def with_stem(self, stem: str) -> Self:
def with_name(self, name: str) -> Self:
return self._dispatch_to_path("with_name", name)

def with_segments(self, *pathsegments) -> Self:
"""Create a new CloudPath with the same client out of the given segments.
The first segment will be interpreted as the bucket/container name.
"""
return self._new_cloudpath("/".join(pathsegments))

def with_suffix(self, suffix: str) -> Self:
return self._dispatch_to_path("with_suffix", suffix)

Expand Down Expand Up @@ -1244,3 +1322,16 @@ def scandir(
)

_scandir = scandir # Py 3.11 compatibility

def walk(self):
# split into dirs and files
dirs_files = defaultdict(list)
with self.scandir(self) as items:
for child in items:
dirs_files[child.is_dir()].append(child)

# top-down, so yield self before recursive call
yield self, [f.name for f in dirs_files[True]], [f.name for f in dirs_files[False]]

for child_dir in dirs_files[True]:
yield from child_dir.walk()
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ classifiers = [
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
requires-python = ">=3.7"
dependencies = [
Expand All @@ -49,7 +50,7 @@ all = ["cloudpathlib[azure]", "cloudpathlib[gs]", "cloudpathlib[s3]"]

[tool.black]
line-length = 99
target-version = ['py37', 'py38', 'py39', 'py310', 'py311']
target-version = ['py37', 'py38', 'py39', 'py310', 'py311', 'py312']
include = '\.pyi?$|\.ipynb$'
extend-exclude = '''
/(
Expand Down
3 changes: 2 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ pillow
psutil
pydantic
pytest
pytest-cases
# pytest-cases
git+https://github.com/jayqi/python-pytest-cases@packaging-version
pytest-cov
pytest-xdist
python-dotenv
Expand Down
9 changes: 9 additions & 0 deletions tests/performance/perf_file_listing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,12 @@ def glob(folder, recursive):
return {"n_items": len(list(folder.rglob("*.item")))}
else:
return {"n_items": len(list(folder.glob("*.item")))}


def walk(folder):
n_items = 0

for _, _, files in folder.walk():
n_items += len(files)

return {"n_items": n_items}
11 changes: 10 additions & 1 deletion tests/performance/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from cloudpathlib import CloudPath


from perf_file_listing import folder_list, glob
from perf_file_listing import folder_list, glob, walk


# make loguru and tqdm play nicely together
Expand Down Expand Up @@ -137,6 +137,15 @@ def main(root, iterations, burn_in):
PerfRunConfig(name="Glob deep non-recursive", args=[deep, False], kwargs={}),
],
),
(
"Walk scenarios",
walk,
[
PerfRunConfig(name="Walk shallow", args=[shallow], kwargs={}),
PerfRunConfig(name="Walk normal", args=[normal], kwargs={}),
PerfRunConfig(name="Walk deep", args=[deep], kwargs={}),
],
),
]

logger.info(
Expand Down
1 change: 1 addition & 0 deletions tests/test_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ def test_interaction_with_local_cache_dir(rig: CloudProviderTestRig, tmpdir):
assert cp.client.file_cache_mode == FileCacheMode.tmp_dir

# download from cloud into the cache
sleep(0.1) # test can be flaky saing that the cache dir doesn't exist yet
with cp.open("r") as f:
_ = f.read()

Expand Down
Loading

0 comments on commit eb3ba8e

Please sign in to comment.