From 9857dbda58225d42f709192f2c751d2078d25d87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Tue, 17 Sep 2024 19:51:45 -0600 Subject: [PATCH] Add type annotations to base spec --- fsspec/spec.py | 146 +++++++++++++++++++++++++++---------------------- pyproject.toml | 36 ++++++++++++ 2 files changed, 118 insertions(+), 64 deletions(-) diff --git a/fsspec/spec.py b/fsspec/spec.py index 8229170e2..987d3f3d7 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -10,7 +10,7 @@ from errno import ESPIPE from glob import has_magic from hashlib import sha256 -from typing import Any, ClassVar, Dict, Tuple +from typing import TYPE_CHECKING, Any, ClassVar, Dict, Literal, List, Tuple, overload from .callbacks import DEFAULT_CALLBACK from .config import apply_config, conf @@ -26,6 +26,12 @@ tokenize, ) +if TYPE_CHECKING: + import datetime + + from .caching import BaseCache + from .mapping import FSMap + logger = logging.getLogger("fsspec") @@ -184,8 +190,18 @@ def __eq__(self, other): def __reduce__(self): return make_instance, (type(self), self.storage_args, self.storage_options) + @overload @classmethod - def _strip_protocol(cls, path): + def _strip_protocol(cls, path: str) -> str: + ... + + @overload + @classmethod + def _strip_protocol(cls, path: List[str]) -> List[str]: + ... + + @classmethod + def _strip_protocol(cls, path) -> str | List[str]: """Turn path from fully-qualified to file-system-specific May require FS-specific handling, e.g., for relative paths or links. @@ -277,7 +293,7 @@ def invalidate_cache(self, path=None): if self._intrans: self._invalidated_caches_in_transaction.append(path) - def mkdir(self, path, create_parents=True, **kwargs): + def mkdir(self, path: str, create_parents: bool = True, **kwargs): """ Create directory entry at path @@ -295,7 +311,7 @@ def mkdir(self, path, create_parents=True, **kwargs): """ pass # not necessary to implement, may not have directories - def makedirs(self, path, exist_ok=False): + def makedirs(self, path: str, exist_ok: bool = False): """Recursively make directories Creates directory at path and any intervening required directories. @@ -311,11 +327,11 @@ def makedirs(self, path, exist_ok=False): """ pass # not necessary to implement, may not have directories - def rmdir(self, path): + def rmdir(self, path: str): """Remove a directory, if empty""" pass # not necessary to implement, may not have directories - def ls(self, path, detail=True, **kwargs): + def ls(self, path: str, detail: bool = True, **kwargs): """List objects at path. This should include subdirectories and files at that location. The @@ -381,7 +397,7 @@ def _ls_from_cache(self, path): except KeyError: pass - def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs): + def walk(self, path: str, maxdepth=None, topdown=True, on_error="omit", **kwargs): """Return all files belows path List all files, recursing into subdirectories; output is iterator-style, @@ -549,7 +565,7 @@ def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs): else: return sizes - def glob(self, path, maxdepth=None, **kwargs): + def glob(self, path: str, maxdepth: int | None = None, **kwargs): """ Find files by glob-matching. @@ -630,7 +646,7 @@ def glob(self, path, maxdepth=None, **kwargs): else: return list(out) - def exists(self, path, **kwargs): + def exists(self, path, **kwargs) -> bool: """Is there a file at the given path""" try: self.info(path, **kwargs) @@ -639,12 +655,12 @@ def exists(self, path, **kwargs): # any exception allowed bar FileNotFoundError? return False - def lexists(self, path, **kwargs): + def lexists(self, path, **kwargs) -> bool: """If there is a file at the given path (including broken links)""" return self.exists(path) - def info(self, path, **kwargs): + def info(self, path, **kwargs) -> Dict[str, Any]: """Give details of entry at path Returns a single dictionary, with exactly the same information as ``ls`` @@ -699,14 +715,14 @@ def sizes(self, paths): """Size in bytes of each file in a list of paths""" return [self.size(p) for p in paths] - def isdir(self, path): + def isdir(self, path) -> bool: """Is this entry directory-like?""" try: return self.info(path)["type"] == "directory" except OSError: return False - def isfile(self, path): + def isfile(self, path) -> bool: """Is this entry file-like?""" try: return self.info(path)["type"] == "file" @@ -733,7 +749,7 @@ def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs): return f.read() def write_text( - self, path, value, encoding=None, errors=None, newline=None, **kwargs + self, path: str, value: str, encoding=None, errors=None, newline=None, **kwargs ): """Write the text to the given file. @@ -757,7 +773,7 @@ def write_text( ) as f: return f.write(value) - def cat_file(self, path, start=None, end=None, **kwargs): + def cat_file(self, path: str, start=None, end=None, **kwargs): """Get the content of a file Parameters @@ -1129,7 +1145,7 @@ def copy( if on_error == "raise": raise - def expand_path(self, path, recursive=False, maxdepth=None, **kwargs): + def expand_path(self, path: str | List[str], recursive: bool = False, maxdepth: int | None = None, **kwargs) -> list[str]: """Turn one or more globs or directories into a list of all matching paths to files or directories. @@ -1139,6 +1155,8 @@ def expand_path(self, path, recursive=False, maxdepth=None, **kwargs): if maxdepth is not None and maxdepth < 1: raise ValueError("maxdepth must be at least 1") + out: set[str] | list[str] + if isinstance(path, (str, os.PathLike)): out = self.expand_path([path], recursive, maxdepth) else: @@ -1247,11 +1265,11 @@ def _open( def open( self, - path, - mode="rb", - block_size=None, - cache_options=None, - compression=None, + path: str, + mode: str = "rb", + block_size: int | None = None, + cache_options: dict | None = None, + compression: str | None = None, **kwargs, ): """ @@ -1320,7 +1338,7 @@ def open( self.transaction.files.append(f) return f - def touch(self, path, truncate=True, **kwargs): + def touch(self, path: str, truncate: bool = True, **kwargs) -> None: """Create empty file, or update timestamp Parameters @@ -1337,11 +1355,11 @@ def touch(self, path, truncate=True, **kwargs): else: raise NotImplementedError # update timestamp, if possible - def ukey(self, path): + def ukey(self, path: str) -> str: """Hash of file properties, to tell if it has changed""" return sha256(str(self.info(path)).encode()).hexdigest() - def read_block(self, fn, offset, length, delimiter=None): + def read_block(self, fn: str, offset: int, length: int, delimiter: bytes | None = None) -> bytes: """Read a block of bytes from Starting at ``offset`` of the file, read ``length`` bytes. If @@ -1530,7 +1548,7 @@ def _get_pyarrow_filesystem(self): # all instances already also derive from pyarrow return self - def get_mapper(self, root="", check=False, create=False, missing_exceptions=None): + def get_mapper(self, root="", check=False, create=False, missing_exceptions=None) -> FSMap: """Create key/value store based on this file-system Makes a MutableMapping interface to the FS at the given root path. @@ -1547,7 +1565,7 @@ def get_mapper(self, root="", check=False, create=False, missing_exceptions=None ) @classmethod - def clear_instance_cache(cls): + def clear_instance_cache(cls) -> None: """ Clear the cache of filesystem instances. @@ -1561,18 +1579,18 @@ def clear_instance_cache(cls): """ cls._cache.clear() - def created(self, path): + def created(self, path: str) -> datetime.datetime: """Return the created timestamp of a file as a datetime.datetime""" raise NotImplementedError - def modified(self, path): + def modified(self, path: str) -> datetime.datetime: """Return the modified timestamp of a file as a datetime.datetime""" raise NotImplementedError # ------------------------------------------------------------------------ # Aliases - def read_bytes(self, path, start=None, end=None, **kwargs): + def read_bytes(self, path: str, start=None, end=None, **kwargs): """Alias of `AbstractFileSystem.cat_file`.""" return self.cat_file(path, start=start, end=end, **kwargs) @@ -1580,11 +1598,11 @@ def write_bytes(self, path, value, **kwargs): """Alias of `AbstractFileSystem.pipe_file`.""" self.pipe_file(path, value, **kwargs) - def makedir(self, path, create_parents=True, **kwargs): + def makedir(self, path, create_parents=True, **kwargs) -> None: """Alias of `AbstractFileSystem.mkdir`.""" return self.mkdir(path, create_parents=create_parents, **kwargs) - def mkdirs(self, path, exist_ok=False): + def mkdirs(self, path, exist_ok=False) -> None: """Alias of `AbstractFileSystem.makedirs`.""" return self.makedirs(path, exist_ok=exist_ok) @@ -1670,14 +1688,14 @@ class AbstractBufferedFile(io.IOBase): def __init__( self, - fs, - path, - mode="rb", - block_size="default", - autocommit=True, + fs: AbstractFileSystem, + path: str, + mode: str = "rb", + block_size: int | Literal["default"] | None = "default", + autocommit: bool = True, cache_type="readahead", - cache_options=None, - size=None, + cache_options: dict | None = None, + size: int | None = None, **kwargs, ): """ @@ -1711,10 +1729,10 @@ def __init__( self.path = path self.fs = fs self.mode = mode - self.blocksize = ( + self.blocksize: int = ( self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size ) - self.loc = 0 + self.loc: int = 0 self.autocommit = autocommit self.end = None self.start = None @@ -1740,7 +1758,7 @@ def __init__( self.size = size else: self.size = self.details["size"] - self.cache = caches[cache_type]( + self.cache: BaseCache = caches[cache_type]( self.blocksize, self._fetch_range, self.size, **cache_options ) else: @@ -1765,22 +1783,22 @@ def full_name(self): return _unstrip_protocol(self.path, self.fs) @property - def closed(self): + def closed(self) -> bool: # get around this attr being read-only in IOBase # use getattr here, since this can be called during del return getattr(self, "_closed", True) @closed.setter - def closed(self, c): + def closed(self, c: bool) -> None: self._closed = c - def __hash__(self): + def __hash__(self) -> int: if "w" in self.mode: return id(self) else: return int(tokenize(self.details), 16) - def __eq__(self, other): + def __eq__(self, other: object) -> bool: """Files are equal if they have the same checksum, only in read mode""" if self is other: return True @@ -1804,11 +1822,11 @@ def info(self): else: raise ValueError("Info not available while writing") - def tell(self): + def tell(self) -> int: """Current file location""" return self.loc - def seek(self, loc, whence=0): + def seek(self, loc: int, whence: int = 0) -> int: """Set current file location Parameters @@ -1834,7 +1852,7 @@ def seek(self, loc, whence=0): self.loc = nloc return self.loc - def write(self, data): + def write(self, data) -> int: """ Write data to buffer. @@ -1858,7 +1876,7 @@ def write(self, data): self.flush() return out - def flush(self, force=False): + def flush(self, force: bool = False) -> None: """ Write buffered data to backend store. @@ -1919,7 +1937,7 @@ def _fetch_range(self, start, end): """Get the specified set of bytes from remote""" raise NotImplementedError - def read(self, length=-1): + def read(self, length: int = -1) -> bytes: """ Return data from cache, or fetch pieces as necessary @@ -1950,7 +1968,7 @@ def read(self, length=-1): self.loc += len(out) return out - def readinto(self, b): + def readinto(self, b) -> int: """mirrors builtin file's readinto method https://docs.python.org/3/library/io.html#io.RawIOBase.readinto @@ -1960,7 +1978,7 @@ def readinto(self, b): out[: len(data)] = data return len(data) - def readuntil(self, char=b"\n", blocks=None): + def readuntil(self, char: bytes = b"\n", blocks: int | None = None) -> bytes: """Return data between current position and first occurrence of char char is included in the output, except if the end of the tile is @@ -1988,7 +2006,7 @@ def readuntil(self, char=b"\n", blocks=None): out.append(part) return b"".join(out) - def readline(self): + def readline(self) -> bytes: # type: ignore[override] """Read until first occurrence of newline character Note that, because of character encoding, this is not necessarily a @@ -1996,7 +2014,7 @@ def readline(self): """ return self.readuntil(b"\n") - def __next__(self): + def __next__(self) -> bytes: out = self.readline() if out: return out @@ -2005,7 +2023,7 @@ def __next__(self): def __iter__(self): return self - def readlines(self): + def readlines(self) -> List[bytes]: # type: ignore[override] """Return all data, split by the newline character""" data = self.read() lines = data.split(b"\n") @@ -2016,10 +2034,10 @@ def readlines(self): return out + [lines[-1]] # return list(self) ??? - def readinto1(self, b): + def readinto1(self, b) -> int: return self.readinto(b) - def close(self): + def close(self) -> None: """Close file Finalizes writes, discards cache @@ -2040,29 +2058,29 @@ def close(self): self.closed = True - def readable(self): + def readable(self) -> bool: """Whether opened for reading""" return self.mode == "rb" and not self.closed - def seekable(self): + def seekable(self) -> bool: """Whether is seekable (only in read mode)""" return self.readable() - def writable(self): + def writable(self) -> bool: """Whether opened for writing""" return self.mode in {"wb", "ab"} and not self.closed - def __del__(self): + def __del__(self) -> None: if not self.closed: self.close() - def __str__(self): + def __str__(self) -> str: return f"" __repr__ = __str__ - def __enter__(self): + def __enter__(self) -> "AbstractBufferedFile": return self - def __exit__(self, *args): + def __exit__(self, *args) -> None: self.close() diff --git a/pyproject.toml b/pyproject.toml index 48368711f..b883e0abc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,6 +128,7 @@ test_downstream = [ "aiobotocore>=2.5.4,<3.0.0", ] tqdm = ["tqdm"] +typing = ["mypy"] [project.urls] Changelog = "https://filesystem-spec.readthedocs.io/en/latest/changelog.html" @@ -228,3 +229,38 @@ ignore = [ [tool.pytest.ini_options] # custom markers, need to be defined to avoid warnings markers = ["asyncio", "vcr"] + +[tool.mypy] +incremental = false # Disabled until https://github.com/python/mypy/issues/12664 is resolved +files = [ + "fsspec/", +] +warn_unused_configs = true +warn_unused_ignores = true + +[[tool.mypy.overrides]] +ignore_errors = true +module = [ + "fsspec._version", + "fsspec.archive", + "fsspec.asyn", + "fsspec.caching", + "fsspec.callbacks", + "fsspec.compression", + "fsspec.config", + "fsspec.conftest", + "fsspec.core", + "fsspec.dircache", + "fsspec.exceptions", + "fsspec.fuse", + "fsspec.generic", + "fsspec.gui", + "fsspec.implementations.*", + "fsspec.json", + "fsspec.mapping", + "fsspec.parquet", + "fsspec.registry", + "fsspec.tests.*", + "fsspec.transaction", + "fsspec.utils", +] \ No newline at end of file