Skip to content

Commit

Permalink
Py 313 compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
pjbull committed Sep 20, 2024
1 parent 8207b3d commit 2a5152e
Show file tree
Hide file tree
Showing 7 changed files with 264 additions and 44 deletions.
6 changes: 0 additions & 6 deletions cloudpathlib/azure/azblobpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,6 @@ class AzureBlobPath(CloudPath):
def drive(self) -> str:
return self.container

def is_dir(self) -> bool:
return self.client._is_file_or_dir(self) == "dir"

def is_file(self) -> bool:
return self.client._is_file_or_dir(self) == "file"

def mkdir(self, parents=False, exist_ok=False):
self.client._mkdir(self, parents=parents, exist_ok=exist_ok)

Expand Down
60 changes: 42 additions & 18 deletions cloudpathlib/cloudpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
PosixPath,
PurePosixPath,
WindowsPath,
_PathParents,
)

import shutil
Expand Down Expand Up @@ -56,17 +55,27 @@
else:
from typing_extensions import Self

if sys.version_info >= (3, 12):
if sys.version_info == (3, 12):
from pathlib import posixpath as _posix_flavour # type: ignore[attr-defined]
from pathlib import _make_selector # type: ignore[attr-defined]
else:
from pathlib import _posix_flavour # type: ignore[attr-defined]
from pathlib import _make_selector as _make_selector_pathlib # type: ignore[attr-defined]

def _make_selector(pattern_parts, _flavour, case_sensitive=True):
if sys.version_info < (3, 12):
from pathlib import _posix_flavour # type: ignore[attr-defined] # noqa: F811
from pathlib import _make_selector as _make_selector_pathlib # type: ignore[attr-defined] # noqa: F811

def _make_selector(pattern_parts, _flavour, case_sensitive=True): # noqa: F811
return _make_selector_pathlib(tuple(pattern_parts), _flavour)


if sys.version_info >= (3, 13):
from pathlib._local import _PathParents
import posixpath as _posix_flavour # type: ignore[attr-defined] # noqa: F811

from .legacy.glob import _make_selector # noqa: F811
else:
from pathlib import _PathParents


from cloudpathlib.enums import FileCacheMode

from . import anypath
Expand Down Expand Up @@ -383,16 +392,6 @@ def drive(self) -> str:
"""For example "bucket" on S3 or "container" on Azure; needs to be defined for each class"""
pass

@abc.abstractmethod
def is_dir(self) -> bool:
"""Should be implemented without requiring a dir is downloaded"""
pass

@abc.abstractmethod
def is_file(self) -> bool:
"""Should be implemented without requiring that the file is downloaded"""
pass

@abc.abstractmethod
def mkdir(self, parents: bool = False, exist_ok: bool = False) -> None:
"""Should be implemented using the client API without requiring a dir is downloaded"""
Expand Down Expand Up @@ -427,10 +426,19 @@ def as_uri(self) -> str:
def exists(self) -> bool:
return self.client._exists(self)

def is_dir(self, follow_symlinks=True) -> bool:
return self.client._is_file_or_dir(self) == "dir"

def is_file(self, follow_symlinks=True) -> bool:
return self.client._is_file_or_dir(self) == "file"

@property
def fspath(self) -> str:
return self.__fspath__()

def from_uri(self, uri: str) -> Self:
return self._new_cloudpath(uri)

def _glob_checks(self, pattern: str) -> None:
if ".." in pattern:
raise CloudPathNotImplementedError(
Expand Down Expand Up @@ -812,8 +820,13 @@ def read_bytes(self) -> bytes:
with self.open(mode="rb") as f:
return f.read()

def read_text(self, encoding: Optional[str] = None, errors: Optional[str] = None) -> str:
with self.open(mode="r", encoding=encoding, errors=errors) as f:
def read_text(
self,
encoding: Optional[str] = None,
errors: Optional[str] = None,
newline: Optional[str] = None,
) -> str:
with self.open(mode="r", encoding=encoding, errors=errors, newline=newline) as f:
return f.read()

def is_junction(self):
Expand Down Expand Up @@ -904,6 +917,13 @@ def is_relative_to(self, other: Self) -> bool:
def name(self) -> str:
return self._dispatch_to_path("name")

def full_match(self, pattern: str, case_sensitive: Optional[bool] = None) -> bool:
# strip scheme from start of pattern before testing
if pattern.startswith(self.anchor + self.drive + "/"):
pattern = pattern[len(self.anchor + self.drive + "/") :]

return self._dispatch_to_path("full_match", pattern, case_sensitive=case_sensitive)

def match(self, path_pattern: str, case_sensitive: Optional[bool] = None) -> bool:
# strip scheme from start of pattern before testing
if path_pattern.startswith(self.anchor + self.drive + "/"):
Expand All @@ -916,6 +936,10 @@ def match(self, path_pattern: str, case_sensitive: Optional[bool] = None) -> boo

return self._dispatch_to_path("match", path_pattern, **kwargs)

@property
def parser(self) -> Self:
return self._dispatch_to_path("parser")

@property
def parent(self) -> Self:
return self._dispatch_to_path("parent")
Expand Down
6 changes: 0 additions & 6 deletions cloudpathlib/gs/gspath.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,6 @@ class GSPath(CloudPath):
def drive(self) -> str:
return self.bucket

def is_dir(self) -> bool:
return self.client._is_file_or_dir(self) == "dir"

def is_file(self) -> bool:
return self.client._is_file_or_dir(self) == "file"

def mkdir(self, parents=False, exist_ok=False):
# not possible to make empty directory on cloud storage
pass
Expand Down
205 changes: 205 additions & 0 deletions cloudpathlib/legacy/glob.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
import fnmatch
import functools
import re

#
# Globbing helpers
#


@functools.cache
def _is_case_sensitive(flavour):
return flavour.normcase("Aa") == "Aa"


# fnmatch.translate() returns a regular expression that includes a prefix and
# a suffix, which enable matching newlines and ensure the end of the string is
# matched, respectively. These features are undesirable for our implementation
# of PurePatch.match(), which represents path separators as newlines and joins
# pattern segments together. As a workaround, we define a slice object that
# can remove the prefix and suffix from any translate() result. See the
# _compile_pattern_lines() function for more details.
_FNMATCH_PREFIX, _FNMATCH_SUFFIX = fnmatch.translate("_").split("_")
_FNMATCH_SLICE = slice(len(_FNMATCH_PREFIX), -len(_FNMATCH_SUFFIX))
_SWAP_SEP_AND_NEWLINE = {
"/": str.maketrans({"/": "\n", "\n": "/"}),
"\\": str.maketrans({"\\": "\n", "\n": "\\"}),
}


@functools.lru_cache()
def _make_selector(pattern_parts, flavour, case_sensitive):
pat = pattern_parts[0]
if not pat:
return _TerminatingSelector()
if pat == "**":
child_parts_idx = 1
while child_parts_idx < len(pattern_parts) and pattern_parts[child_parts_idx] == "**":
child_parts_idx += 1
child_parts = pattern_parts[child_parts_idx:]
if "**" in child_parts:
cls = _DoubleRecursiveWildcardSelector
else:
cls = _RecursiveWildcardSelector
else:
child_parts = pattern_parts[1:]
if pat == "..":
cls = _ParentSelector
elif "**" in pat:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
else:
cls = _WildcardSelector
return cls(pat, child_parts, flavour, case_sensitive)


@functools.lru_cache(maxsize=256)
def _compile_pattern(pat, case_sensitive):
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
return re.compile(fnmatch.translate(pat), flags).match


@functools.lru_cache()
def _compile_pattern_lines(pattern_lines, case_sensitive):
"""Compile the given pattern lines to an `re.Pattern` object.
The *pattern_lines* argument is a glob-style pattern (e.g. '*/*.py') with
its path separators and newlines swapped (e.g. '*\n*.py`). By using
newlines to separate path components, and not setting `re.DOTALL`, we
ensure that the `*` wildcard cannot match path separators.
The returned `re.Pattern` object may have its `match()` method called to
match a complete pattern, or `search()` to match from the right. The
argument supplied to these methods must also have its path separators and
newlines swapped.
"""

# Match the start of the path, or just after a path separator
parts = ["^"]
for part in pattern_lines.splitlines(keepends=True):
if part == "*\n":
part = r".+\n"
elif part == "*":
part = r".+"
else:
# Any other component: pass to fnmatch.translate(). We slice off
# the common prefix and suffix added by translate() to ensure that
# re.DOTALL is not set, and the end of the string not matched,
# respectively. With DOTALL not set, '*' wildcards will not match
# path separators, because the '.' characters in the pattern will
# not match newlines.
part = fnmatch.translate(part)[_FNMATCH_SLICE]
parts.append(part)
# Match the end of the path, always.
parts.append(r"\Z")
flags = re.MULTILINE
if not case_sensitive:
flags |= re.IGNORECASE
return re.compile("".join(parts), flags=flags)


class _Selector:
"""A selector matches a specific glob pattern part against the children
of a given path."""

def __init__(self, child_parts, flavour, case_sensitive):
self.child_parts = child_parts
if child_parts:
self.successor = _make_selector(child_parts, flavour, case_sensitive)
self.dironly = True
else:
self.successor = _TerminatingSelector()
self.dironly = False

def select_from(self, parent_path):
"""Iterate over all child paths of `parent_path` matched by this
selector. This can contain parent_path itself."""
path_cls = type(parent_path)
scandir = path_cls._scandir
if not parent_path.is_dir():
return iter([])
return self._select_from(parent_path, scandir)


class _TerminatingSelector:

def _select_from(self, parent_path, scandir):
yield parent_path


class _ParentSelector(_Selector):

def __init__(self, name, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour, case_sensitive)

def _select_from(self, parent_path, scandir):
path = parent_path._make_child_relpath("..")
for p in self.successor._select_from(path, scandir):
yield p


class _WildcardSelector(_Selector):

def __init__(self, pat, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour, case_sensitive)
if case_sensitive is None:
# TODO: evaluate case-sensitivity of each directory in _select_from()
case_sensitive = _is_case_sensitive(flavour)
self.match = _compile_pattern(pat, case_sensitive)

def _select_from(self, parent_path, scandir):
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
with scandir(parent_path) as scandir_it:
entries = list(scandir_it)
except OSError:
pass
else:
for entry in entries:
if self.dironly:
try:
if not entry.is_dir():
continue
except OSError:
continue
name = entry.name
if self.match(name):
path = parent_path._make_child_relpath(name)
for p in self.successor._select_from(path, scandir):
yield p


class _RecursiveWildcardSelector(_Selector):

def __init__(self, pat, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour, case_sensitive)

def _iterate_directories(self, parent_path):
yield parent_path
for dirpath, dirnames, _ in parent_path.walk():
for dirname in dirnames:
yield dirpath._make_child_relpath(dirname)

def _select_from(self, parent_path, scandir):
successor_select = self.successor._select_from
for starting_point in self._iterate_directories(parent_path):
for p in successor_select(starting_point, scandir):
yield p


class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector):
"""
Like _RecursiveWildcardSelector, but also de-duplicates results from
successive selectors. This is necessary if the pattern contains
multiple non-adjacent '**' segments.
"""

def _select_from(self, parent_path, scandir):
yielded = set()
try:
for p in super()._select_from(parent_path, scandir):
if p not in yielded:
yield p
yielded.add(p)
finally:
yielded.clear()
17 changes: 13 additions & 4 deletions cloudpathlib/local/localclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
from pathlib import Path, PurePosixPath
import shutil
import sys
from tempfile import TemporaryDirectory
from time import sleep
from typing import Callable, ClassVar, Dict, Iterable, List, Optional, Tuple, Union
Expand Down Expand Up @@ -103,11 +104,19 @@ def _download_file(self, cloud_path: "LocalPath", local_path: Union[str, os.Path
def _exists(self, cloud_path: "LocalPath") -> bool:
return self._cloud_path_to_local(cloud_path).exists()

def _is_dir(self, cloud_path: "LocalPath") -> bool:
return self._cloud_path_to_local(cloud_path).is_dir()
def _is_dir(self, cloud_path: "LocalPath", follow_symlinks=True) -> bool:
kwargs = dict(follow_symlinks=follow_symlinks)
if sys.version_info <= (3, 12):
kwargs.pop("follow_symlinks")

def _is_file(self, cloud_path: "LocalPath") -> bool:
return self._cloud_path_to_local(cloud_path).is_file()
return self._cloud_path_to_local(cloud_path).is_dir(**kwargs)

def _is_file(self, cloud_path: "LocalPath", follow_symlinks=True) -> bool:
kwargs = dict(follow_symlinks=follow_symlinks)
if sys.version_info <= (3, 12):
kwargs.pop("follow_symlinks")

return self._cloud_path_to_local(cloud_path).is_file(**kwargs)

def _list_dir(
self, cloud_path: "LocalPath", recursive=False
Expand Down
8 changes: 4 additions & 4 deletions cloudpathlib/local/localpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ class LocalPath(CloudPath):

client: "LocalClient"

def is_dir(self) -> bool:
return self.client._is_dir(self)
def is_dir(self, follow_symlinks=True) -> bool:
return self.client._is_dir(self, follow_symlinks=follow_symlinks)

def is_file(self) -> bool:
return self.client._is_file(self)
def is_file(self, follow_symlinks=True) -> bool:
return self.client._is_file(self, follow_symlinks=follow_symlinks)

def stat(self):
try:
Expand Down
Loading

0 comments on commit 2a5152e

Please sign in to comment.