From c36066ce002fbc45efecab87bd260f6562e75bbf Mon Sep 17 00:00:00 2001 From: buckwheat445 <177609215+buckwheat445@users.noreply.github.com> Date: Wed, 11 Dec 2024 23:55:01 +0800 Subject: [PATCH] fix: allow protocol prefixed paths for webhdfs (#1761) --- fsspec/implementations/tests/test_webhdfs.py | 13 +++++++++++++ fsspec/implementations/webhdfs.py | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/fsspec/implementations/tests/test_webhdfs.py b/fsspec/implementations/tests/test_webhdfs.py index fac34c76b..0f179d380 100644 --- a/fsspec/implementations/tests/test_webhdfs.py +++ b/fsspec/implementations/tests/test_webhdfs.py @@ -195,3 +195,16 @@ def test_write_read_verify_file_with_equals(hdfs_cluster): assert len(file_info) == 1 assert file_info[0]["name"] == file_path assert file_info[0]["size"] == len(content) + + +def test_protocol_prefixed_path(hdfs_cluster): + fs = WebHDFS( + hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"} + ) + protocol_prefixed_path = "webhdfs://localhost:50070/user/testuser/test_dir" + + fs.mkdir(protocol_prefixed_path) + assert fs.exists(protocol_prefixed_path) + + file_info = fs.ls(protocol_prefixed_path, detail=True) + assert len(file_info) == 0 diff --git a/fsspec/implementations/webhdfs.py b/fsspec/implementations/webhdfs.py index 300bb9cdf..c6e0d8446 100644 --- a/fsspec/implementations/webhdfs.py +++ b/fsspec/implementations/webhdfs.py @@ -166,7 +166,8 @@ def _connect(self): self.session.auth = HTTPBasicAuth(self.user, self.password) def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs): - url = self._apply_proxy(self.url + quote(path or "", safe="/=")) + path = self._strip_protocol(path) if path is not None else "" + url = self._apply_proxy(self.url + quote(path, safe="/=")) args = kwargs.copy() args.update(self.pars) args["op"] = op.upper()