Skip to content

Commit fc81205

Browse files
committed
Make bucket and profile private
Add _ prefix to bucket and profile properties in S3ResourcePath
1 parent 09b13aa commit fc81205

File tree

2 files changed

+39
-29
lines changed

2 files changed

+39
-29
lines changed

python/lsst/resources/s3.py

+31-21
Original file line numberDiff line numberDiff line change
@@ -192,19 +192,29 @@ def _transfer_config(self) -> TransferConfig:
192192
@property
193193
def client(self) -> boto3.client:
194194
"""Client object to address remote resource."""
195-
return getS3Client(self.profile)
195+
return getS3Client(self._profile)
196196

197197
@property
198-
def profile(self) -> str | None:
198+
def _profile(self) -> str | None:
199+
"""Profile name to use for looking up S3 credentials and endpoint."""
199200
return self._uri.username
200201

201202
@property
202-
def bucket(self) -> str:
203+
def _bucket(self) -> str:
204+
"""S3 bucket where the files are stored."""
205+
# Notionally the bucket is stored in the 'hostname' part of the URI.
206+
# However, Ceph S3 uses a "multi-tenant" syntax for bucket names in the
207+
# form 'tenant:bucket'. The part after the colon is parsed as the port
208+
# portion of the URI, and urllib throws an exception if you try to read
209+
# a non-integer port value. So manually split off this portion of the
210+
# URI.
203211
split = self._uri.netloc.split("@")
204212
num_components = len(split)
205213
if num_components == 2:
214+
# There is a profile@ portion of the URL, so take the second half.
206215
bucket = split[1]
207216
elif num_components == 1:
217+
# There is no profile@, so take the whole netloc.
208218
bucket = split[0]
209219
else:
210220
raise ValueError(f"Unexpected extra '@' in S3 URI: '{str(self)}'")
@@ -216,12 +226,12 @@ def bucket(self) -> str:
216226

217227
@classmethod
218228
def _mexists(cls, uris: Iterable[ResourcePath]) -> dict[ResourcePath, bool]:
219-
# Force client to be created before creating threads.
229+
# Force client to be created for each profile before creating threads.
220230
profiles = set[str | None]()
221231
for path in uris:
222232
if path.scheme == "s3":
223233
path = cast(S3ResourcePath, path)
224-
profiles.add(path.profile)
234+
profiles.add(path._profile)
225235
for profile in profiles:
226236
getS3Client(profile)
227237

@@ -232,16 +242,16 @@ def exists(self) -> bool:
232242
"""Check that the S3 resource exists."""
233243
if self.is_root:
234244
# Only check for the bucket since the path is irrelevant
235-
return bucketExists(self.bucket, self.client)
236-
exists, _ = s3CheckFileExists(self, bucket=self.bucket, client=self.client)
245+
return bucketExists(self._bucket, self.client)
246+
exists, _ = s3CheckFileExists(self, bucket=self._bucket, client=self.client)
237247
return exists
238248

239249
@backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time)
240250
def size(self) -> int:
241251
"""Return the size of the resource in bytes."""
242252
if self.dirLike:
243253
return 0
244-
exists, sz = s3CheckFileExists(self, bucket=self.bucket, client=self.client)
254+
exists, sz = s3CheckFileExists(self, bucket=self._bucket, client=self.client)
245255
if not exists:
246256
raise FileNotFoundError(f"Resource {self} does not exist")
247257
return sz
@@ -254,7 +264,7 @@ def remove(self) -> None:
254264
# for checking all the keys again, reponse is HTTP 204 OK
255265
# response all the time
256266
try:
257-
self.client.delete_object(Bucket=self.bucket, Key=self.relativeToPathRoot)
267+
self.client.delete_object(Bucket=self._bucket, Key=self.relativeToPathRoot)
258268
except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err:
259269
raise FileNotFoundError("No such resource: {self}") from err
260270

@@ -264,7 +274,7 @@ def read(self, size: int = -1) -> bytes:
264274
if size > 0:
265275
args["Range"] = f"bytes=0-{size-1}"
266276
try:
267-
response = self.client.get_object(Bucket=self.bucket, Key=self.relativeToPathRoot, **args)
277+
response = self.client.get_object(Bucket=self._bucket, Key=self.relativeToPathRoot, **args)
268278
except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err:
269279
raise FileNotFoundError(f"No such resource: {self}") from err
270280
except ClientError as err:
@@ -280,20 +290,20 @@ def write(self, data: bytes, overwrite: bool = True) -> None:
280290
if not overwrite and self.exists():
281291
raise FileExistsError(f"Remote resource {self} exists and overwrite has been disabled")
282292
with time_this(log, msg="Write to %s", args=(self,)):
283-
self.client.put_object(Bucket=self.bucket, Key=self.relativeToPathRoot, Body=data)
293+
self.client.put_object(Bucket=self._bucket, Key=self.relativeToPathRoot, Body=data)
284294

285295
@backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
286296
def mkdir(self) -> None:
287297
"""Write a directory key to S3."""
288-
if not bucketExists(self.bucket, self.client):
289-
raise ValueError(f"Bucket {self.bucket} does not exist for {self}!")
298+
if not bucketExists(self._bucket, self.client):
299+
raise ValueError(f"Bucket {self._bucket} does not exist for {self}!")
290300

291301
if not self.dirLike:
292302
raise NotADirectoryError(f"Can not create a 'directory' for file-like URI {self}")
293303

294304
# don't create S3 key when root is at the top-level of an Bucket
295305
if self.path != "/":
296-
self.client.put_object(Bucket=self.bucket, Key=self.relativeToPathRoot)
306+
self.client.put_object(Bucket=self._bucket, Key=self.relativeToPathRoot)
297307

298308
@backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
299309
def _download_file(self, local_file: IO, progress: ProgressPercentage | None) -> None:
@@ -304,7 +314,7 @@ def _download_file(self, local_file: IO, progress: ProgressPercentage | None) ->
304314
"""
305315
try:
306316
self.client.download_fileobj(
307-
self.bucket,
317+
self._bucket,
308318
self.relativeToPathRoot,
309319
local_file,
310320
Callback=progress,
@@ -349,7 +359,7 @@ def _upload_file(self, local_file: ResourcePath, progress: ProgressPercentage |
349359
"""
350360
try:
351361
self.client.upload_file(
352-
local_file.ospath, self.bucket, self.relativeToPathRoot, Callback=progress
362+
local_file.ospath, self._bucket, self.relativeToPathRoot, Callback=progress
353363
)
354364
except self.client.exceptions.NoSuchBucket as err:
355365
raise NotADirectoryError(f"Target does not exist: {err}") from err
@@ -360,11 +370,11 @@ def _upload_file(self, local_file: ResourcePath, progress: ProgressPercentage |
360370
@backoff.on_exception(backoff.expo, all_retryable_errors, max_time=max_retry_time)
361371
def _copy_from(self, src: S3ResourcePath) -> None:
362372
copy_source = {
363-
"Bucket": src.bucket,
373+
"Bucket": src._bucket,
364374
"Key": src.relativeToPathRoot,
365375
}
366376
try:
367-
self.client.copy_object(CopySource=copy_source, Bucket=self.bucket, Key=self.relativeToPathRoot)
377+
self.client.copy_object(CopySource=copy_source, Bucket=self._bucket, Key=self.relativeToPathRoot)
368378
except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err:
369379
raise FileNotFoundError("No such resource to transfer: {self}") from err
370380
except ClientError as err:
@@ -494,7 +504,7 @@ def walk(
494504
filenames = []
495505
files_there = False
496506

497-
for page in s3_paginator.paginate(Bucket=self.bucket, Prefix=prefix, Delimiter="/"):
507+
for page in s3_paginator.paginate(Bucket=self._bucket, Prefix=prefix, Delimiter="/"):
498508
# All results are returned as full key names and we must
499509
# convert them back to the root form. The prefix is fixed
500510
# and delimited so that is a simple trim
@@ -532,7 +542,7 @@ def _openImpl(
532542
*,
533543
encoding: str | None = None,
534544
) -> Iterator[ResourceHandleProtocol]:
535-
with S3ResourceHandle(mode, log, self.client, self.bucket, self.relativeToPathRoot) as handle:
545+
with S3ResourceHandle(mode, log, self.client, self._bucket, self.relativeToPathRoot) as handle:
536546
if "b" in mode:
537547
yield handle
538548
else:
@@ -554,6 +564,6 @@ def generate_presigned_put_url(self, *, expiration_time_seconds: int) -> str:
554564
def _generate_presigned_url(self, method: str, expiration_time_seconds: int) -> str:
555565
return self.client.generate_presigned_url(
556566
method,
557-
Params={"Bucket": self.bucket, "Key": self.relativeToPathRoot},
567+
Params={"Bucket": self._bucket, "Key": self.relativeToPathRoot},
558568
ExpiresIn=expiration_time_seconds,
559569
)

tests/test_s3.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -272,20 +272,20 @@ def test_s3_endpoint_url(self):
272272

273273
def test_uri_syntax(self):
274274
path1 = ResourcePath("s3://profile@bucket/path")
275-
self.assertEqual(path1.bucket, "bucket")
276-
self.assertEqual(path1.profile, "profile")
275+
self.assertEqual(path1._bucket, "bucket")
276+
self.assertEqual(path1._profile, "profile")
277277
path2 = ResourcePath("s3://bucket2/path")
278-
self.assertEqual(path2.bucket, "bucket2")
279-
self.assertIsNone(path2.profile)
278+
self.assertEqual(path2._bucket, "bucket2")
279+
self.assertIsNone(path2._profile)
280280

281281
def test_ceph_uri_syntax(self):
282282
# The Ceph S3 'multi-tenant' syntax for buckets can include colons.
283283
path1 = ResourcePath("s3://profile@ceph:bucket/path")
284-
self.assertEqual(path1.bucket, "ceph:bucket")
285-
self.assertEqual(path1.profile, "profile")
284+
self.assertEqual(path1._bucket, "ceph:bucket")
285+
self.assertEqual(path1._profile, "profile")
286286
path2 = ResourcePath("s3://ceph:bucket2/path")
287-
self.assertEqual(path2.bucket, "ceph:bucket2")
288-
self.assertIsNone(path2.profile)
287+
self.assertEqual(path2._bucket, "ceph:bucket2")
288+
self.assertIsNone(path2._profile)
289289

290290

291291
if __name__ == "__main__":

0 commit comments

Comments
 (0)