Skip to content

Commit 952cd98

Browse files
authored
Speed up git backend (#1712)
1 parent 03e89cc commit 952cd98

File tree

3 files changed

+61
-40
lines changed

3 files changed

+61
-40
lines changed

fsspec/implementations/git.py

Lines changed: 27 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ def _path_to_object(self, path, ref):
5555
tree = comm.tree
5656
for part in parts:
5757
if part and isinstance(tree, pygit2.Tree):
58+
if part not in tree:
59+
raise FileNotFoundError(path)
5860
tree = tree[part]
5961
return tree
6062

@@ -69,46 +71,32 @@ def _get_kwargs_from_urls(path):
6971
out["ref"], path = path.split("@", 1)
7072
return out
7173

74+
@staticmethod
75+
def _object_to_info(obj, path=None):
76+
# obj.name and obj.filemode are None for the root tree!
77+
is_dir = isinstance(obj, pygit2.Tree)
78+
return {
79+
"type": "directory" if is_dir else "file",
80+
"name": (
81+
"/".join([path, obj.name or ""]).lstrip("/") if path else obj.name
82+
),
83+
"hex": str(obj.id),
84+
"mode": "100644" if obj.filemode is None else f"{obj.filemode:o}",
85+
"size": 0 if is_dir else obj.size,
86+
}
87+
7288
def ls(self, path, detail=True, ref=None, **kwargs):
73-
path = self._strip_protocol(path)
74-
tree = self._path_to_object(path, ref)
75-
if isinstance(tree, pygit2.Tree):
76-
out = []
77-
for obj in tree:
78-
if isinstance(obj, pygit2.Tree):
79-
out.append(
80-
{
81-
"type": "directory",
82-
"name": "/".join([path, obj.name]).lstrip("/"),
83-
"hex": str(obj.id),
84-
"mode": f"{obj.filemode:o}",
85-
"size": 0,
86-
}
87-
)
88-
else:
89-
out.append(
90-
{
91-
"type": "file",
92-
"name": "/".join([path, obj.name]).lstrip("/"),
93-
"hex": str(obj.id),
94-
"mode": f"{obj.filemode:o}",
95-
"size": obj.size,
96-
}
97-
)
98-
else:
99-
obj = tree
100-
out = [
101-
{
102-
"type": "file",
103-
"name": obj.name,
104-
"hex": str(obj.id),
105-
"mode": f"{obj.filemode:o}",
106-
"size": obj.size,
107-
}
108-
]
109-
if detail:
110-
return out
111-
return [o["name"] for o in out]
89+
tree = self._path_to_object(self._strip_protocol(path), ref)
90+
return [
91+
GitFileSystem._object_to_info(obj, path)
92+
if detail
93+
else GitFileSystem._object_to_info(obj, path)["name"]
94+
for obj in (tree if isinstance(tree, pygit2.Tree) else [tree])
95+
]
96+
97+
def info(self, path, ref=None, **kwargs):
98+
tree = self._path_to_object(self._strip_protocol(path), ref)
99+
return GitFileSystem._object_to_info(tree, path)
112100

113101
def ukey(self, path, ref=None):
114102
return self.info(path, ref=ref)["hex"]

fsspec/implementations/tests/test_git.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,39 @@ def test_refs(repo):
6161
assert f.read() == b"data3"
6262

6363

64+
def _check_FileNotFoundError(f, *args, **kwargs):
65+
with pytest.raises(FileNotFoundError):
66+
f(*args, **kwargs)
67+
68+
69+
def test_file_existence_checks(repo):
70+
d, sha = repo
71+
72+
fs, _ = fsspec.url_to_fs(f"git://{d}:abranch@")
73+
74+
assert fs.lexists("inner")
75+
assert fs.exists("inner")
76+
assert fs.isdir("inner")
77+
assert fs.info("inner")
78+
assert fs.ls("inner")
79+
80+
assert fs.lexists("inner/file1")
81+
assert fs.exists("inner/file1")
82+
assert fs.info("inner/file1")
83+
assert fs.ls("inner/file1")
84+
85+
assert not fs.lexists("non-existing-file")
86+
assert not fs.exists("non-existing-file")
87+
88+
assert not fs.isfile("non-existing-file")
89+
assert not fs.isdir("non-existing-file")
90+
91+
_check_FileNotFoundError(fs.info, "non-existing-file")
92+
_check_FileNotFoundError(fs.size, "non-existing-file")
93+
_check_FileNotFoundError(fs.ls, "non-existing-file")
94+
_check_FileNotFoundError(fs.open, "non-existing-file")
95+
96+
6497
def test_url(repo):
6598
d, sha = repo
6699
fs, _, paths = fsspec.core.get_fs_token_paths(f"git://file1::file://{d}")

fsspec/spec.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ def info(self, path, **kwargs):
648648
Returns a single dictionary, with exactly the same information as ``ls``
649649
would with ``detail=True``.
650650
651-
The default implementation should calls ls and could be overridden by a
651+
The default implementation calls ls and could be overridden by a
652652
shortcut. kwargs are passed on to ```ls()``.
653653
654654
Some file systems might not be able to measure the file's size, in

0 commit comments

Comments
 (0)