Skip to content

Commit

Permalink
Snakefile: Conditionally include --no-sign-request option
Browse files Browse the repository at this point in the history
Conditionally include `--no-sign-request` option for download rules
if the sequence/metadata S3 URLs point to the public Nextstrain S3
bucket (s3://nextstrain-data/).

This means AWS credentials are no longer necessary for the public
h5n1-cattle-outbreak genome builds.
  • Loading branch information
joverlee521 committed Aug 9, 2024
1 parent 4537422 commit d1ca49c
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ These use NCBI data including consensus genomes and SRA data assembled via the A
snakemake --cores 1 -pf --configfile config/h5n1-cattle-outbreak.yaml
```

This pipeline starts by downloading data from a public S3 bucket, however credentials may still be required to interact with AWS S3 buckets.
This pipeline starts by downloading data from a public S3 bucket.


**Genome builds**
Expand Down
9 changes: 6 additions & 3 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ for rule_file in config.get('custom_rules', []):
# (2) Filter the other segments by simply force-including the same strains as (1)
SAME_STRAINS = bool(config.get('same_strains_per_segment', False))

NEXTSTRAIN_PUBLIC_BUCKET = "s3://nextstrain-data/"
S3_SRC = config.get('s3_src', {})
LOCAL_INGEST = config.get('local_ingest', None)

Expand Down Expand Up @@ -90,20 +91,22 @@ rule download_sequences:
output:
sequences = f"data/{S3_SRC.get('name', None)}/sequences_{{segment}}.fasta",
params:
address=lambda w: S3_SRC.get('sequences', None).format(segment=w.segment)
address=lambda w: S3_SRC.get('sequences', None).format(segment=w.segment),
no_sign_request=lambda w: "--no-sign-request" if S3_SRC.get('sequences', "").startswith(NEXTSTRAIN_PUBLIC_BUCKET) else ""
shell:
"""
aws s3 cp {params.address:q} - | zstd -d > {output.sequences}
aws s3 cp {params.no_sign_request:q} {params.address:q} - | zstd -d > {output.sequences}
"""

rule download_metadata:
output:
metadata = f"data/{S3_SRC.get('name', None)}/metadata.tsv",
params:
address=S3_SRC.get('metadata', None),
no_sign_request=lambda w: "--no-sign-request" if S3_SRC.get('metadata', "").startswith(NEXTSTRAIN_PUBLIC_BUCKET) else ""
shell:
"""
aws s3 cp {params.address:q} - | zstd -d > {output.metadata}
aws s3 cp {params.no_sign_request:q} {params.address:q} - | zstd -d > {output.metadata}
"""


Expand Down

0 comments on commit d1ca49c

Please sign in to comment.