diff --git a/bio/reference/ensembl-annotation/test/ensembl_annotation_https.smk b/bio/reference/ensembl-annotation/test/ensembl_annotation_https.smk new file mode 100644 index 00000000000..a2586e2283c --- /dev/null +++ b/bio/reference/ensembl-annotation/test/ensembl_annotation_https.smk @@ -0,0 +1,14 @@ +rule get_annotation_https_protocol_gz: + output: + "refs/annotation.gtf.gz", + params: + species="homo_sapiens", + release="105", + build="GRCh37", + flavor="", + protocol="https", + log: + "logs/get_annotation.log", + cache: "omit-software" # save space and time with between workflow caching (see docs) + wrapper: + "master/bio/reference/ensembl-annotation" diff --git a/bio/reference/ensembl-annotation/wrapper.py b/bio/reference/ensembl-annotation/wrapper.py index 2f1b78c2a6a..5a5b6729c73 100644 --- a/bio/reference/ensembl-annotation/wrapper.py +++ b/bio/reference/ensembl-annotation/wrapper.py @@ -19,6 +19,7 @@ out_fmt = Path(snakemake.output[0]).suffixes out_gz = (out_fmt.pop() and True) if out_fmt[-1] == ".gz" else False out_fmt = out_fmt.pop().lstrip(".") +protocol = snakemake.params.get("protocol", "ftp") branch = "" @@ -48,7 +49,7 @@ ) -url = "ftp://ftp.ensembl.org/pub/{branch}release-{release}/{out_fmt}/{species}/{species_cap}.{build}.{gtf_release}.{flavor}{suffix}".format( +url = "{protocol}://ftp.ensembl.org/pub/{branch}release-{release}/{out_fmt}/{species}/{species_cap}.{build}.{gtf_release}.{flavor}{suffix}".format( release=release, gtf_release=gtf_release, build=build, @@ -58,6 +59,7 @@ suffix=suffix, flavor=flavor, branch=branch, + protocol=protocol, ) diff --git a/bio/reference/ensembl-sequence/test/Snakefile b/bio/reference/ensembl-sequence/test/Snakefile index a8227f4cdd1..6de1b22c485 100644 --- a/bio/reference/ensembl-sequence/test/Snakefile +++ b/bio/reference/ensembl-sequence/test/Snakefile @@ -29,6 +29,7 @@ rule get_single_chromosome: wrapper: "master/bio/reference/ensembl-sequence" + rule get_multiple_chromosome: output: "refs/chr1_and_chr2.fasta", diff --git a/bio/reference/ensembl-sequence/test/ensembl_sequence_https.smk b/bio/reference/ensembl-sequence/test/ensembl_sequence_https.smk new file mode 100644 index 00000000000..f19ffc93062 --- /dev/null +++ b/bio/reference/ensembl-sequence/test/ensembl_sequence_https.smk @@ -0,0 +1,14 @@ +rule get_genome_http_protocol: + output: + "refs/genome.fasta", + params: + species="saccharomyces_cerevisiae", + datatype="dna", + build="R64-1-1", + release="98", + protocol="http", + log: + "logs/get_genome.log", + cache: "omit-software" # save space and time with between workflow caching (see docs) + wrapper: + "master/bio/reference/ensembl-sequence" diff --git a/bio/reference/ensembl-sequence/wrapper.py b/bio/reference/ensembl-sequence/wrapper.py index df9a6eef693..a13ccc612f6 100644 --- a/bio/reference/ensembl-sequence/wrapper.py +++ b/bio/reference/ensembl-sequence/wrapper.py @@ -11,6 +11,7 @@ species = snakemake.params.species.lower() release = int(snakemake.params.release) build = snakemake.params.build +protocol = snakemake.params.get("protocol", "ftp") branch = "" if release >= 81 and build == "GRCh37": @@ -51,7 +52,7 @@ ) spec = spec.format(build=build, release=release) -url_prefix = f"ftp://ftp.ensembl.org/pub/{branch}release-{release}/fasta/{species}/{datatype}/{species.capitalize()}.{spec}" +url_prefix = f"{protocol}://ftp.ensembl.org/pub/{branch}release-{release}/fasta/{species}/{datatype}/{species.capitalize()}.{spec}" success = False for suffix in suffixes: diff --git a/bio/reference/ensembl-variation/test/chrom_wise.smk b/bio/reference/ensembl-variation/test/chrom_wise.smk index 3faacc41854..780617891e2 100644 --- a/bio/reference/ensembl-variation/test/chrom_wise.smk +++ b/bio/reference/ensembl-variation/test/chrom_wise.smk @@ -1,13 +1,13 @@ rule get_variation: output: - vcf="refs/variation.vcf.gz" + vcf="refs/variation.vcf.gz", params: species="homo_sapiens", release="104", build="GRCh38", - type="all", # one of "all", "somatic", "structural_variation" + type="all", # one of "all", "somatic", "structural_variation" chromosome="21", log: - "logs/get_variation.log" + "logs/get_variation.log", wrapper: "master/bio/reference/ensembl-variation" diff --git a/bio/reference/ensembl-variation/test/ensembl_variation_https.smk b/bio/reference/ensembl-variation/test/ensembl_variation_https.smk new file mode 100644 index 00000000000..d51363eac9b --- /dev/null +++ b/bio/reference/ensembl-variation/test/ensembl_variation_https.smk @@ -0,0 +1,14 @@ +rule get_variation_https_protocol: + output: + vcf="refs/variation.vcf.gz", + params: + species="saccharomyces_cerevisiae", + release="98", + build="R64-1-1", + type="all", + protocol="https", + log: + "logs/get_variation.log", + cache: "omit-software" # save space and time with between workflow caching (see docs) + wrapper: + "master/bio/reference/ensembl-variation" diff --git a/bio/reference/ensembl-variation/test/grch37.smk b/bio/reference/ensembl-variation/test/grch37.smk index a44a10d5ee6..97b788cd6a4 100644 --- a/bio/reference/ensembl-variation/test/grch37.smk +++ b/bio/reference/ensembl-variation/test/grch37.smk @@ -1,14 +1,14 @@ rule get_variation_with_contig_lengths: input: - fai="refs/grch37.fasta.fai" + fai="refs/grch37.fasta.fai", output: - vcf="refs/variation.vcf.gz" + vcf="refs/variation.vcf.gz", params: species="homo_sapiens", release="100", build="GRCh37", - type="all" # one of "all", "somatic", "structural_variation" + type="all", # one of "all", "somatic", "structural_variation" log: - "logs/get_variation.log" + "logs/get_variation.log", wrapper: "master/bio/reference/ensembl-variation" diff --git a/bio/reference/ensembl-variation/test/old_release.smk b/bio/reference/ensembl-variation/test/old_release.smk index 23cafdb26cf..52dff157f67 100644 --- a/bio/reference/ensembl-variation/test/old_release.smk +++ b/bio/reference/ensembl-variation/test/old_release.smk @@ -1,16 +1,14 @@ rule get_variation: output: - vcf="refs/variation.vcf.gz" + vcf="refs/variation.vcf.gz", # optional: add fai to get VCF with annotated contig lengths (as required by GATK) # fai="refs/genome.fasta.fai" params: species="saccharomyces_cerevisiae", release="98", build="R64-1-1", - type="all" # one of "all", "somatic", "structural_variation" + type="all", # one of "all", "somatic", "structural_variation" log: - "logs/get_variation.log" + "logs/get_variation.log", wrapper: "master/bio/reference/ensembl-variation" - - diff --git a/bio/reference/ensembl-variation/test/with_fai.smk b/bio/reference/ensembl-variation/test/with_fai.smk index bc7746d998e..4952c31e10d 100644 --- a/bio/reference/ensembl-variation/test/with_fai.smk +++ b/bio/reference/ensembl-variation/test/with_fai.smk @@ -1,14 +1,14 @@ rule get_variation_with_contig_lengths: input: - fai="refs/genome.fasta.fai" + fai="refs/genome.fasta.fai", output: - vcf="refs/variation.vcf.gz" + vcf="refs/variation.vcf.gz", params: species="saccharomyces_cerevisiae", release="98", build="R64-1-1", - type="all" # one of "all", "somatic", "structural_variation" + type="all", # one of "all", "somatic", "structural_variation" log: - "logs/get_variation.log" + "logs/get_variation.log", wrapper: "master/bio/reference/ensembl-variation" diff --git a/bio/reference/ensembl-variation/wrapper.py b/bio/reference/ensembl-variation/wrapper.py index f42f7aa5287..952386e5fa8 100644 --- a/bio/reference/ensembl-variation/wrapper.py +++ b/bio/reference/ensembl-variation/wrapper.py @@ -15,6 +15,7 @@ build = snakemake.params.build type = snakemake.params.type chromosome = snakemake.params.get("chromosome", "") +protocol = snakemake.params.get("protocol", "ftp") branch = "" @@ -63,12 +64,13 @@ species_filename = species if release >= 91 else species.capitalize() urls = [ - "ftp://ftp.ensembl.org/pub/{branch}release-{release}/variation/vcf/{species}/{species_filename}{suffix}.vcf.gz".format( + "{protocol}://ftp.ensembl.org/pub/{branch}release-{release}/variation/vcf/{species}/{species_filename}{suffix}.vcf.gz".format( release=release, species=species, suffix=suffix, species_filename=species_filename, branch=branch, + protocol=protocol, ) for suffix in suffixes ] diff --git a/bio/vep/cache/test/vep_cache_https.smk b/bio/vep/cache/test/vep_cache_https.smk new file mode 100644 index 00000000000..8b25352965c --- /dev/null +++ b/bio/vep/cache/test/vep_cache_https.smk @@ -0,0 +1,14 @@ + +rule get_vep_cache_https_protocol: + output: + directory("resources/vep/cache"), + params: + species="saccharomyces_cerevisiae", + build="R64-1-1", + release="98", + protocol="https", + log: + "logs/vep/cache.log", + cache: "omit-software" # save space and time with between workflow caching (see docs) + wrapper: + "master/bio/vep/cache" diff --git a/bio/vep/cache/wrapper.py b/bio/vep/cache/wrapper.py index 543f1b26169..4c1d7857986 100644 --- a/bio/vep/cache/wrapper.py +++ b/bio/vep/cache/wrapper.py @@ -9,6 +9,7 @@ extra = snakemake.params.get("extra", "") +protocol = snakemake.params.get("protocol", "ftp") try: release = int(snakemake.params.release) @@ -24,7 +25,7 @@ ) log = snakemake.log_fmt_shell(stdout=True, stderr=True) shell( - "curl -L ftp://ftp.ensembl.org/pub/release-{snakemake.params.release}/" + "curl -L {protocol}://ftp.ensembl.org/pub/release-{snakemake.params.release}/" "variation/{vep_dir}/{cache_tarball} " "-o {tmpdir}/{cache_tarball} {log}" ) diff --git a/test.py b/test.py index af555e9d58e..84a4faf7c10 100644 --- a/test.py +++ b/test.py @@ -5329,6 +5329,22 @@ def test_ensembl_sequence_old_release(): ) +@skip_if_not_modified +def test_ensembl_sequence_https(): + run( + "bio/reference/ensembl-sequence", + [ + "snakemake", + "-s", + "ensembl_sequence_https.smk", + "--cores", + "1", + "--use-conda", + "-F", + ], + ) + + @skip_if_not_modified def test_ensembl_sequence_chromosome(): run( @@ -5378,6 +5394,23 @@ def test_ensembl_annotation_gtf_gz(): ) +@skip_if_not_modified +def test_ensembl_annotation_gtf_https_gz(): + run( + "bio/reference/ensembl-annotation", + [ + "snakemake", + "--cores", + "1", + "refs/annotation.gtf.gz", + "--use-conda", + "-F", + "-s", + "ensembl_annotation_https.smk", + ], + ) + + @skip_if_not_modified def test_ensembl_variation(): run( @@ -5428,17 +5461,26 @@ def test_ensembl_variation_with_contig_lengths(): @skip_if_not_modified -def test_ega_fetch(): +def test_ensembl_variation_old_release(): run( - "bio/ega/fetch", + "bio/reference/ensembl-variation", [ "snakemake", + "-s", + "ensembl_variation_https.smk", "--cores", "1", "--use-conda", "-F", - "data/EGAF00007243774.cram" - ] + ], + ) + + +@skip_if_not_modified +def test_ega_fetch(): + run( + "bio/ega/fetch", + ["snakemake", "--cores", "1", "--use-conda", "-F", "data/EGAF00007243774.cram"], ) @@ -5801,11 +5843,28 @@ def test_vep_cache(): ) +@skip_if_not_modified +def test_vep_cache_https_protocol(): + run( + "bio/vep/cache", + ["snakemake", "--cores", "1", "resources/vep/cache", "--use-conda", "-F"], + ) + + @skip_if_not_modified def test_vep_plugins(): run( "bio/vep/plugins", - ["snakemake", "--cores", "1", "resources/vep/plugins", "--use-conda", "-F"], + [ + "snakemake", + "--cores", + "1", + "resources/vep/plugins", + "--use-conda", + "-F", + "-s", + "vep_cache_https.smk", + ], )