From 66ee7d83dd747708757b88447a04b40fa1ea65ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Saugat=20Pachhai=20=28=E0=A4=B8=E0=A5=8C=E0=A4=97=E0=A4=BE?= =?UTF-8?q?=E0=A4=A4=29?= Date: Fri, 18 Jun 2021 20:38:24 +0545 Subject: [PATCH 1/2] avoid isfile calls on find as much as possible If `walk` is also returning the same file, it should already have been included in the `out`. If `out` is not empty, we can safely assume that it is either a directory or a file. If it's empty, it could be that it is either an empty directory or, the path is a file and was not included from the `walk` results. This time we will make a `isfile` call. This is unfortunate but better than before, where we were making an additional for each find. This was noticed in DVC, which was making remote cache querying slower when traversing through the caches (512 calls instead of 256 :( ). --- fsspec/asyn.py | 2 +- fsspec/spec.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fsspec/asyn.py b/fsspec/asyn.py index bd022f222..efdac868b 100644 --- a/fsspec/asyn.py +++ b/fsspec/asyn.py @@ -581,7 +581,7 @@ async def _find(self, path, maxdepth=None, withdirs=False, **kwargs): if withdirs: files.update(dirs) out.update({info["name"]: info for name, info in files.items()}) - if (await self._isfile(path)) and path not in out: + if not out and (await self._isfile(path)): # walk works on directories, but find should also return [path] # when path happens to be a file out[path] = {} diff --git a/fsspec/spec.py b/fsspec/spec.py index 02674867b..13a775799 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -445,7 +445,7 @@ def find(self, path, maxdepth=None, withdirs=False, **kwargs): if withdirs: files.update(dirs) out.update({info["name"]: info for name, info in files.items()}) - if self.isfile(path) and path not in out: + if not out and self.isfile(path): # walk works on directories, but find should also return [path] # when path happens to be a file out[path] = {} From 1e85844e1b451c085e332cb6fe1b0af9bec81dba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Saugat=20Pachhai=20=28=E0=A4=B8=E0=A5=8C=E0=A4=97=E0=A4=BE?= =?UTF-8?q?=E0=A4=A4=29?= Date: Mon, 21 Jun 2021 18:13:29 +0545 Subject: [PATCH 2/2] add tests --- fsspec/tests/test_spec.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fsspec/tests/test_spec.py b/fsspec/tests/test_spec.py index 3e2af2f4d..74dac81dd 100644 --- a/fsspec/tests/test_spec.py +++ b/fsspec/tests/test_spec.py @@ -232,6 +232,14 @@ def test_find_details(): assert details[filename] == test_fs.info(filename) +def test_find_file(): + test_fs = DummyTestFS() + + filename = "misc/foo.txt" + assert test_fs.find(filename) == [filename] + assert test_fs.find(filename, detail=True) == {filename: {}} + + def test_cache(): fs = DummyTestFS() fs2 = DummyTestFS()