Skip to content

Commit

Permalink
feat: use transfer_manager for downloading multiple blobs concurrentl…
Browse files Browse the repository at this point in the history
…y in StorageObject class.
  • Loading branch information
jjjermiah committed Oct 3, 2024
1 parent 9c9a577 commit efe8dbc
Showing 1 changed file with 13 additions and 11 deletions.
24 changes: 13 additions & 11 deletions snakemake_storage_plugin_gcs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from pathlib import Path
import google.cloud.exceptions
from google.cloud import storage
from google.cloud.storage import transfer_manager
from google.api_core import retry
from google_crc32c import Checksum

Expand Down Expand Up @@ -521,17 +522,18 @@ def _download_directory(self):
"""
Handle download of a storage folder (assists retrieve_blob)
"""
# Create the directory locally
self.local_path().mkdir(exist_ok=True)

for blob in self.directory_entries():
local_name = self.provider.local_prefix / self.bucket.name / blob.name

# Don't try to create "directory blob"
if os.path.exists(local_name) and os.path.isdir(local_name):
continue

download_blob(blob, local_name)
blob_names = [blob.name for blob in self.directory_entries()]
results = transfer_manager.download_many_to_path(
bucket=self.bucket,
blob_names=blob_names,
destination_directory=self.provider.local_prefix / self.bucket.name,
create_directories=True,
)
for name, result in zip(blob_names, results):
# The results list is either `None` or an exception for each blob in
# the input list, in order.
if isinstance(result, Exception):
print("Failed to download {} due to exception: {}".format(name, result))

@lazy_property
def bucket(self):
Expand Down

0 comments on commit efe8dbc

Please sign in to comment.