diff --git a/README.md b/README.md index ba55bd7..4ec2f54 100755 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ These use NCBI data including consensus genomes and SRA data assembled via the A snakemake --cores 1 -pf --configfile config/h5n1-cattle-outbreak.yaml ``` -This pipeline starts by downloading data from a public S3 bucket, however credentials may still be required to interact with AWS S3 buckets. +This pipeline starts by downloading data from a public S3 bucket. **Genome builds** diff --git a/Snakefile b/Snakefile index f931734..bcc91af 100755 --- a/Snakefile +++ b/Snakefile @@ -17,6 +17,7 @@ for rule_file in config.get('custom_rules', []): # (2) Filter the other segments by simply force-including the same strains as (1) SAME_STRAINS = bool(config.get('same_strains_per_segment', False)) +NEXTSTRAIN_PUBLIC_BUCKET = "s3://nextstrain-data/" S3_SRC = config.get('s3_src', {}) LOCAL_INGEST = config.get('local_ingest', None) @@ -90,10 +91,11 @@ rule download_sequences: output: sequences = f"data/{S3_SRC.get('name', None)}/sequences_{{segment}}.fasta", params: - address=lambda w: S3_SRC.get('sequences', None).format(segment=w.segment) + address=lambda w: S3_SRC.get('sequences', None).format(segment=w.segment), + no_sign_request=lambda w: "--no-sign-request" if S3_SRC.get('sequences', "").startswith(NEXTSTRAIN_PUBLIC_BUCKET) else "" shell: """ - aws s3 cp {params.address:q} - | zstd -d > {output.sequences} + aws s3 cp {params.no_sign_request:q} {params.address:q} - | zstd -d > {output.sequences} """ rule download_metadata: @@ -101,9 +103,10 @@ rule download_metadata: metadata = f"data/{S3_SRC.get('name', None)}/metadata.tsv", params: address=S3_SRC.get('metadata', None), + no_sign_request=lambda w: "--no-sign-request" if S3_SRC.get('metadata', "").startswith(NEXTSTRAIN_PUBLIC_BUCKET) else "" shell: """ - aws s3 cp {params.address:q} - | zstd -d > {output.metadata} + aws s3 cp {params.no_sign_request:q} {params.address:q} - | zstd -d > {output.metadata} """