diff --git a/HISTORY.md b/HISTORY.md index 1a68ce12..0deb48db 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,9 @@ # cloudpathlib Changelog +## UNRELEASED + +- Added support for custom schemes in CloudPath and Client subclases. (Issue [#466](https://github.com/drivendataorg/cloudpathlib/issues/466), PR [#467](https://github.com/drivendataorg/cloudpathlib/pull/467)) + ## v0.19.0 (2024-08-29) - Fixed an error that occurred when loading and dumping `CloudPath` objects using pickle multiple times. (Issue [#450](https://github.com/drivendataorg/cloudpathlib/issues/450), PR [#454](https://github.com/drivendataorg/cloudpathlib/pull/454), thanks to [@kujenga](https://github.com/kujenga)) diff --git a/cloudpathlib/azure/azblobclient.py b/cloudpathlib/azure/azblobclient.py index 98189378..289eb236 100644 --- a/cloudpathlib/azure/azblobclient.py +++ b/cloudpathlib/azure/azblobclient.py @@ -276,12 +276,14 @@ def _list_dir( ) -> Iterable[Tuple[AzureBlobPath, bool]]: if not cloud_path.container: for container in self.service_client.list_containers(): - yield self.CloudPath(f"az://{container.name}"), True + yield self.CloudPath(f"{cloud_path.cloud_prefix}{container.name}"), True if not recursive: continue - yield from self._list_dir(self.CloudPath(f"az://{container.name}"), recursive=True) + yield from self._list_dir( + self.CloudPath(f"{cloud_path.cloud_prefix}{container.name}"), recursive=True + ) return container_client = self.service_client.get_container_client(cloud_path.container) @@ -295,7 +297,9 @@ def _list_dir( paths = file_system_client.get_paths(path=cloud_path.blob, recursive=recursive) for path in paths: - yield self.CloudPath(f"az://{cloud_path.container}/{path.name}"), path.is_directory + yield self.CloudPath( + f"{cloud_path.cloud_prefix}{cloud_path.container}/{path.name}" + ), path.is_directory else: if not recursive: @@ -306,7 +310,9 @@ def _list_dir( for blob in blobs: # walk_blobs returns folders with a trailing slash blob_path = blob.name.rstrip("/") - blob_cloud_path = self.CloudPath(f"az://{cloud_path.container}/{blob_path}") + blob_cloud_path = self.CloudPath( + f"{cloud_path.cloud_prefix}{cloud_path.container}/{blob_path}" + ) yield blob_cloud_path, ( isinstance(blob, BlobPrefix) diff --git a/cloudpathlib/gs/gsclient.py b/cloudpathlib/gs/gsclient.py index 56d0e2c5..edd5b88a 100644 --- a/cloudpathlib/gs/gsclient.py +++ b/cloudpathlib/gs/gsclient.py @@ -183,7 +183,8 @@ def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[Tuple[GSPat ) yield from ( - (self.CloudPath(f"gs://{str(b)}"), True) for b in self.client.list_buckets() + (self.CloudPath(f"{cloud_path.cloud_prefix}{str(b)}"), True) + for b in self.client.list_buckets() ) return @@ -200,11 +201,16 @@ def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[Tuple[GSPat # if we haven't surfaced this directory already if parent not in yielded_dirs and str(parent) != ".": yield ( - self.CloudPath(f"gs://{cloud_path.bucket}/{prefix}{parent}"), + self.CloudPath( + f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{prefix}{parent}" + ), True, # is a directory ) yielded_dirs.add(parent) - yield (self.CloudPath(f"gs://{cloud_path.bucket}/{o.name}"), False) # is a file + yield ( + self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{o.name}"), + False, + ) # is a file else: iterator = bucket.list_blobs(delimiter="/", prefix=prefix) @@ -212,13 +218,13 @@ def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[Tuple[GSPat # see: https://github.com/googleapis/python-storage/issues/863 for file in iterator: yield ( - self.CloudPath(f"gs://{cloud_path.bucket}/{file.name}"), + self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{file.name}"), False, # is a file ) for directory in iterator.prefixes: yield ( - self.CloudPath(f"gs://{cloud_path.bucket}/{directory}"), + self.CloudPath(f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{directory}"), True, # is a directory ) diff --git a/cloudpathlib/s3/s3client.py b/cloudpathlib/s3/s3client.py index ac2ecaa6..db130e82 100644 --- a/cloudpathlib/s3/s3client.py +++ b/cloudpathlib/s3/s3client.py @@ -217,7 +217,7 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat ) yield from ( - (self.CloudPath(f"s3://{b['Name']}"), True) + (self.CloudPath(f"{cloud_path.cloud_prefix}{b['Name']}"), True) for b in self.client.list_buckets().get("Buckets", []) ) return @@ -241,7 +241,9 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat canonical = result_prefix.get("Prefix").rstrip("/") # keep a canonical form if canonical not in yielded_dirs: yield ( - self.CloudPath(f"s3://{cloud_path.bucket}/{canonical}"), + self.CloudPath( + f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{canonical}" + ), True, ) yielded_dirs.add(canonical) @@ -254,7 +256,9 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat parent_canonical = prefix + str(parent).rstrip("/") if parent_canonical not in yielded_dirs and str(parent) != ".": yield ( - self.CloudPath(f"s3://{cloud_path.bucket}/{parent_canonical}"), + self.CloudPath( + f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{parent_canonical}" + ), True, ) yielded_dirs.add(parent_canonical) @@ -267,7 +271,9 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat # s3 fake directories have 0 size and end with "/" if result_key.get("Key").endswith("/") and result_key.get("Size") == 0: yield ( - self.CloudPath(f"s3://{cloud_path.bucket}/{canonical}"), + self.CloudPath( + f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{canonical}" + ), True, ) yielded_dirs.add(canonical) @@ -275,7 +281,9 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat # yield object as file else: yield ( - self.CloudPath(f"s3://{cloud_path.bucket}/{result_key.get('Key')}"), + self.CloudPath( + f"{cloud_path.cloud_prefix}{cloud_path.bucket}/{result_key.get('Key')}" + ), False, ) diff --git a/tests/test_client.py b/tests/test_client.py index a665a5a6..fd58535b 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,11 +1,16 @@ import mimetypes import os -from pathlib import Path import random import string +from pathlib import Path + +import pytest from cloudpathlib import CloudPath +from cloudpathlib.client import register_client_class +from cloudpathlib.cloudpath import implementation_registry, register_path_class from cloudpathlib.s3.s3client import S3Client +from cloudpathlib.s3.s3path import S3Path def test_default_client_instantiation(rig): @@ -116,3 +121,49 @@ def my_content_type(path): for suffix, content_type in mimes: _test_write_content_type(suffix, content_type, rig) + + +@pytest.fixture +def custom_s3_path(): + # A fixture isolates these classes as they modify the global registry of + # implementations. + @register_path_class("mys3") + class MyS3Path(S3Path): + cloud_prefix: str = "mys3://" + + @register_client_class("mys3") + class MyS3Client(S3Client): + pass + + yield (MyS3Path, MyS3Client) + + # cleanup after use + implementation_registry.pop("mys3") + + +def test_custom_mys3path_instantiation(custom_s3_path): + CustomPath, _ = custom_s3_path + + path = CustomPath("mys3://bucket/dir/file.txt") + assert isinstance(path, CustomPath) + assert path.cloud_prefix == "mys3://" + assert path.bucket == "bucket" + assert path.key == "dir/file.txt" + + +def test_custom_mys3client_instantiation(custom_s3_path): + _, CustomClient = custom_s3_path + + client = CustomClient() + assert isinstance(client, CustomClient) + assert client.CloudPath("mys3://bucket/dir/file.txt").cloud_prefix == "mys3://" + + +def test_custom_mys3client_default_client(custom_s3_path): + _, CustomClient = custom_s3_path + + CustomClient().set_as_default_client() + + path = CloudPath("mys3://bucket/dir/file.txt") + assert isinstance(path.client, CustomClient) + assert path.cloud_prefix == "mys3://"