diff --git a/fsspec/asyn.py b/fsspec/asyn.py index 5e9911535..b8f8642a0 100644 --- a/fsspec/asyn.py +++ b/fsspec/asyn.py @@ -641,7 +641,7 @@ async def _info(self, path, **kwargs): async def _ls(self, path, detail=True, **kwargs): raise NotImplementedError - async def _walk(self, path, maxdepth=None, **kwargs): + async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs): if maxdepth is not None and maxdepth < 1: raise ValueError("maxdepth must be at least 1") @@ -653,7 +653,11 @@ async def _walk(self, path, maxdepth=None, **kwargs): detail = kwargs.pop("detail", False) try: listing = await self._ls(path, detail=True, **kwargs) - except (FileNotFoundError, OSError): + except (FileNotFoundError, OSError) as e: + if on_error == "raise": + raise + elif callable(on_error): + on_error(e) if detail: yield path, {}, {} else: diff --git a/fsspec/spec.py b/fsspec/spec.py index 2bdfa3854..457c082e2 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -372,7 +372,7 @@ def _ls_from_cache(self, path): except KeyError: pass - def walk(self, path, maxdepth=None, topdown=True, **kwargs): + def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs): """Return all files belows path List all files, recursing into subdirectories; output is iterator-style, @@ -399,6 +399,10 @@ def walk(self, path, maxdepth=None, topdown=True, **kwargs): topdown: bool (True) Whether to walk the directory tree from the top downwards or from the bottom upwards. + on_error: "omit", "raise", a collable + if omit (default), path with exception will simply be empty; + If raise, an underlying exception will be raised; + if callable, it will be called with a single OSError instance as argument kwargs: passed to ``ls`` """ if maxdepth is not None and maxdepth < 1: @@ -412,7 +416,11 @@ def walk(self, path, maxdepth=None, topdown=True, **kwargs): detail = kwargs.pop("detail", False) try: listing = self.ls(path, detail=True, **kwargs) - except (FileNotFoundError, OSError): + except (FileNotFoundError, OSError) as e: + if on_error == "raise": + raise + elif callable(on_error): + on_error(e) if detail: return path, {}, {} return path, [], [] diff --git a/fsspec/tests/test_api.py b/fsspec/tests/test_api.py index 8187dda81..dd3374fe7 100644 --- a/fsspec/tests/test_api.py +++ b/fsspec/tests/test_api.py @@ -4,6 +4,7 @@ import os import pickle import tempfile +from unittest.mock import Mock import pytest @@ -480,3 +481,18 @@ def _walk(*args, **kwargs): (dir12, [], ["file121"]), (dir1, ["dir11", "dir12"], ["file11"]), ] + + # on_error omit by default + assert list(m.walk("do_not_exist")) == [] + # on_error omit + assert list(m.walk("do_not_exist", on_error="omit")) == [] + # on_error raise + with pytest.raises(FileNotFoundError): + list(m.walk("do_not_exist", on_error="raise")) + # on_error callable function + mock = Mock() + assert list(m.walk("do_not_exist", on_error=mock.onerror)) == [] + mock.onerror.assert_called() + assert mock.onerror.call_args.kwargs == {} + assert len(mock.onerror.call_args.args) == 1 + assert isinstance(mock.onerror.call_args.args[0], FileNotFoundError)