Skip to content

Commit

Permalink
use variable for client schemes, allowing override
Browse files Browse the repository at this point in the history
This change is intended to make the default client implementations
more flexible so that their scheme can be customized. This can be
useful in scenarios where a subclass wants to implement a custom
scheme on e.g. a S3 compatible API [1] but with a custom scheme
so that the default S3 access is still also available.

[1] https://cloudpathlib.drivendata.org/stable/authentication/#accessing-custom-s3-compatible-object-stores

The tests have been updated to include a new s3-like rig which uses the
new scheme override functionality.
  • Loading branch information
kujenga committed Aug 31, 2024
1 parent b776bee commit b458e3c
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 19 deletions.
16 changes: 12 additions & 4 deletions cloudpathlib/azure/azblobclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class AzureBlobClient(Client):
authentication options.
"""

cloud_prefix: str = "az://"

def __init__(
self,
account_url: Optional[str] = None,
Expand Down Expand Up @@ -276,12 +278,14 @@ def _list_dir(
) -> Iterable[Tuple[AzureBlobPath, bool]]:
if not cloud_path.container:
for container in self.service_client.list_containers():
yield self.CloudPath(f"az://{container.name}"), True
yield self.CloudPath(f"{self.cloud_prefix}{container.name}"), True

if not recursive:
continue

yield from self._list_dir(self.CloudPath(f"az://{container.name}"), recursive=True)
yield from self._list_dir(
self.CloudPath(f"{self.cloud_prefix}{container.name}"), recursive=True
)
return

container_client = self.service_client.get_container_client(cloud_path.container)
Expand All @@ -295,7 +299,9 @@ def _list_dir(
paths = file_system_client.get_paths(path=cloud_path.blob, recursive=recursive)

for path in paths:
yield self.CloudPath(f"az://{cloud_path.container}/{path.name}"), path.is_directory
yield self.CloudPath(
f"{self.cloud_prefix}{cloud_path.container}/{path.name}"
), path.is_directory

else:
if not recursive:
Expand All @@ -306,7 +312,9 @@ def _list_dir(
for blob in blobs:
# walk_blobs returns folders with a trailing slash
blob_path = blob.name.rstrip("/")
blob_cloud_path = self.CloudPath(f"az://{cloud_path.container}/{blob_path}")
blob_cloud_path = self.CloudPath(
f"{self.cloud_prefix}{cloud_path.container}/{blob_path}"
)

yield blob_cloud_path, (
isinstance(blob, BlobPrefix)
Expand Down
18 changes: 13 additions & 5 deletions cloudpathlib/gs/gsclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class GSClient(Client):
options.
"""

cloud_prefix: str = "gs://"

def __init__(
self,
application_credentials: Optional[Union[str, os.PathLike]] = None,
Expand Down Expand Up @@ -183,7 +185,8 @@ def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[Tuple[GSPat
)

yield from (
(self.CloudPath(f"gs://{str(b)}"), True) for b in self.client.list_buckets()
(self.CloudPath(f"{self.cloud_prefix}{str(b)}"), True)
for b in self.client.list_buckets()
)
return

Expand All @@ -200,25 +203,30 @@ def _list_dir(self, cloud_path: GSPath, recursive=False) -> Iterable[Tuple[GSPat
# if we haven't surfaced this directory already
if parent not in yielded_dirs and str(parent) != ".":
yield (
self.CloudPath(f"gs://{cloud_path.bucket}/{prefix}{parent}"),
self.CloudPath(
f"{self.cloud_prefix}{cloud_path.bucket}/{prefix}{parent}"
),
True, # is a directory
)
yielded_dirs.add(parent)
yield (self.CloudPath(f"gs://{cloud_path.bucket}/{o.name}"), False) # is a file
yield (
self.CloudPath(f"{self.cloud_prefix}{cloud_path.bucket}/{o.name}"),
False,
) # is a file
else:
iterator = bucket.list_blobs(delimiter="/", prefix=prefix)

# files must be iterated first for `.prefixes` to be populated:
# see: https://github.com/googleapis/python-storage/issues/863
for file in iterator:
yield (
self.CloudPath(f"gs://{cloud_path.bucket}/{file.name}"),
self.CloudPath(f"{self.cloud_prefix}{cloud_path.bucket}/{file.name}"),
False, # is a file
)

for directory in iterator.prefixes:
yield (
self.CloudPath(f"gs://{cloud_path.bucket}/{directory}"),
self.CloudPath(f"{self.cloud_prefix}{cloud_path.bucket}/{directory}"),
True, # is a directory
)

Expand Down
16 changes: 11 additions & 5 deletions cloudpathlib/s3/s3client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ class S3Client(Client):
instances. See documentation for the [`__init__` method][cloudpathlib.s3.s3client.S3Client.__init__]
for detailed authentication options."""

cloud_prefix: str = "s3://"

def __init__(
self,
aws_access_key_id: Optional[str] = None,
Expand Down Expand Up @@ -217,7 +219,7 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
)

yield from (
(self.CloudPath(f"s3://{b['Name']}"), True)
(self.CloudPath(f"{self.cloud_prefix}{b['Name']}"), True)
for b in self.client.list_buckets().get("Buckets", [])
)
return
Expand All @@ -241,7 +243,7 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
canonical = result_prefix.get("Prefix").rstrip("/") # keep a canonical form
if canonical not in yielded_dirs:
yield (
self.CloudPath(f"s3://{cloud_path.bucket}/{canonical}"),
self.CloudPath(f"{self.cloud_prefix}{cloud_path.bucket}/{canonical}"),
True,
)
yielded_dirs.add(canonical)
Expand All @@ -254,7 +256,9 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
parent_canonical = prefix + str(parent).rstrip("/")
if parent_canonical not in yielded_dirs and str(parent) != ".":
yield (
self.CloudPath(f"s3://{cloud_path.bucket}/{parent_canonical}"),
self.CloudPath(
f"{self.cloud_prefix}{cloud_path.bucket}/{parent_canonical}"
),
True,
)
yielded_dirs.add(parent_canonical)
Expand All @@ -267,15 +271,17 @@ def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[Tuple[S3Pat
# s3 fake directories have 0 size and end with "/"
if result_key.get("Key").endswith("/") and result_key.get("Size") == 0:
yield (
self.CloudPath(f"s3://{cloud_path.bucket}/{canonical}"),
self.CloudPath(f"{self.cloud_prefix}{cloud_path.bucket}/{canonical}"),
True,
)
yielded_dirs.add(canonical)

# yield object as file
else:
yield (
self.CloudPath(f"s3://{cloud_path.bucket}/{result_key.get('Key')}"),
self.CloudPath(
f"{self.cloud_prefix}{cloud_path.bucket}/{result_key.get('Key')}"
),
False,
)

Expand Down
45 changes: 40 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed

from cloudpathlib import AzureBlobClient, AzureBlobPath, GSClient, GSPath, S3Client, S3Path
from cloudpathlib.cloudpath import implementation_registry
from cloudpathlib.client import register_client_class
from cloudpathlib.cloudpath import implementation_registry, register_path_class
from cloudpathlib.local import (
local_azure_blob_implementation,
LocalAzureBlobClient,
Expand Down Expand Up @@ -307,8 +308,7 @@ def s3_rig(request, monkeypatch, assets_dir):
bucket.objects.filter(Prefix=test_dir).delete()


@fixture()
def custom_s3_rig(request, monkeypatch, assets_dir):
def _custom_s3_rig_helper(request, monkeypatch, assets_dir, path_class, client_class):
"""
Custom S3 rig used to test the integrations with non-AWS S3-compatible object storages like
- MinIO (https://min.io/)
Expand Down Expand Up @@ -370,8 +370,8 @@ def _spin_up_bucket():
)

rig = CloudProviderTestRig(
path_class=S3Path,
client_class=S3Client,
path_class=path_class,
client_class=client_class,
drive=drive,
test_dir=test_dir,
live_server=live_server,
Expand All @@ -393,6 +393,39 @@ def _spin_up_bucket():
bucket.objects.filter(Prefix=test_dir).delete()


@fixture()
def custom_s3_rig(request, monkeypatch, assets_dir):
"""
Custom S3 rig used to test the integrations with non-AWS S3-compatible object storages like
- MinIO (https://min.io/)
- CEPH (https://ceph.io/ceph-storage/object-storage/)
- others
"""
yield from _custom_s3_rig_helper(request, monkeypatch, assets_dir, S3Path, S3Client)


@register_path_class("mys3")
class MyS3Path(S3Path):
cloud_prefix: str = "mys3://"


@register_client_class("mys3")
class MyS3Client(S3Client):
cloud_prefix: str = "mys3://"


@fixture()
def custom_scheme_s3_rig(request, monkeypatch, assets_dir):
"""
Custom S3 rig used to test the integrations with non-AWS S3-compatible object storages like
- MinIO (https://min.io/)
- CEPH (https://ceph.io/ceph-storage/object-storage/)
- others
with the addition of a custom scheme being used.
"""
yield from _custom_s3_rig_helper(request, monkeypatch, assets_dir, MyS3Path, MyS3Client)


@fixture()
def local_azure_rig(request, monkeypatch, assets_dir):
drive = os.getenv("LIVE_AZURE_CONTAINER", DEFAULT_CONTAINER_NAME)
Expand Down Expand Up @@ -486,6 +519,7 @@ def local_s3_rig(request, monkeypatch, assets_dir):
gs_rig,
s3_rig,
custom_s3_rig,
custom_scheme_s3_rig,
local_azure_rig,
local_s3_rig,
local_gs_rig,
Expand All @@ -498,5 +532,6 @@ def local_s3_rig(request, monkeypatch, assets_dir):
[
s3_rig,
custom_s3_rig,
custom_scheme_s3_rig,
],
)

0 comments on commit b458e3c

Please sign in to comment.