Skip to content

Commit

Permalink
Delay S3 resource construction until object requested (#111)
Browse files Browse the repository at this point in the history
Constructing a S3 handle takes up memory and time.  We retrieve the backend whenever we initialize a tile, but we want to delay the expensive work until the data is requested.

The perf issue is sufficiently bad that we want to do a release, so bumping to 4.0.1.

Test plan: Able to quickly load an experiment where all the tiles are located in the cloud.
  • Loading branch information
Tony Tung authored Jul 19, 2019
1 parent e1c9add commit ba15786
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 16 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setuptools.setup(
name="slicedimage",
version="4.0.0",
version="4.0.1",
description="Library to access sliced imaging data",
author="Tony Tung",
author_email="[email protected]",
Expand Down
34 changes: 19 additions & 15 deletions slicedimage/backends/_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,38 +15,42 @@ class S3Backend(Backend):
CONFIG_UNSIGNED_REQUESTS_KEY = "unsigned-requests"

def __init__(self, baseurl, s3_config):
unsigned_requests = s3_config.get(S3Backend.CONFIG_UNSIGNED_REQUESTS_KEY, False)

if unsigned_requests:
resource_config = Config(signature_version=UNSIGNED)
else:
resource_config = None

parsed = urllib.parse.urlparse(baseurl)
assert parsed[0].lower() == "s3"
session = boto3.session.Session()
s3 = session.resource("s3", config=resource_config)
self._bucket = s3.Bucket(parsed[1])

self._bucket = parsed[1]
if parsed[2][0] == "/":
self._basepath = PurePosixPath(parsed[2][1:])
else:
self._basepath = PurePosixPath(parsed[2])
self._s3_config = s3_config

def read_contextmanager(self, name, checksum_sha256=None):
key = str(self._basepath / name)
print(key)
return _S3ContextManager(self._bucket.Object(key), checksum_sha256)
return _S3ContextManager(self._bucket, key, checksum_sha256, self._s3_config)


class _S3ContextManager:
def __init__(self, s3_obj, checksum_sha256):
self.s3_obj = s3_obj
def __init__(self, s3_bucket, s3_key, checksum_sha256, s3_config):
self.s3_bucket = s3_bucket
self.s3_key = s3_key
self.checksum_sha256 = checksum_sha256
self.s3_config = s3_config

def __enter__(self):
unsigned_requests = self.s3_config.get(S3Backend.CONFIG_UNSIGNED_REQUESTS_KEY, False)

if unsigned_requests:
resource_config = Config(signature_version=UNSIGNED)
else:
resource_config = None

session = boto3.session.Session()
s3 = session.resource("s3", config=resource_config)
bucket = s3.Bucket(self.s3_bucket)
s3_obj = bucket.Object(self.s3_key)
self.buffer = BytesIO()
self.s3_obj.download_fileobj(self.buffer)
s3_obj.download_fileobj(self.buffer)
self.buffer.seek(0)
verify_checksum(self.buffer, self.checksum_sha256)
return self.buffer.__enter__()
Expand Down
1 change: 1 addition & 0 deletions slicedimage/io/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ def _parse_collection(parse_method, baseurl, backend_config):
"""
def parse(name_relative_path_or_url_tuple):
name, relative_path_or_url = name_relative_path_or_url_tuple

partition = parse_method(relative_path_or_url, baseurl, backend_config)
partition._name_or_url = relative_path_or_url

Expand Down

0 comments on commit ba15786

Please sign in to comment.