Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: issue #366 and #2649 #2928

Merged
merged 9 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bio/reference/ensembl-annotation/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ authors:
- Johannes Köster
output:
- Ensemble GTF or GFF3 anotation file
params:
- url: URL from where to download cache data (optional; by default is ``ftp://ftp.ensembl.org/pub``)
2 changes: 2 additions & 0 deletions bio/reference/ensembl-annotation/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ rule get_annotation_gz:
# branch="plants", # optional: specify branch
log:
"logs/get_annotation.log",
params:
url="http://ftp.ensembl.org/pub",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-annotation"
13 changes: 2 additions & 11 deletions bio/reference/ensembl-annotation/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,8 @@
)


url = "ftp://ftp.ensembl.org/pub/{branch}release-{release}/{out_fmt}/{species}/{species_cap}.{build}.{gtf_release}.{flavor}{suffix}".format(
release=release,
gtf_release=gtf_release,
build=build,
species=species,
out_fmt=out_fmt,
species_cap=species.capitalize(),
suffix=suffix,
flavor=flavor,
branch=branch,
)
url = snakemake.params.get("url", "ftp://ftp.ensembl.org/pub")
url = f"{url}/{branch}release-{release}/{out_fmt}/{species}/{species.capitalize()}.{build}.{gtf_release}.{flavor}{suffix}"


try:
Expand Down
4 changes: 4 additions & 0 deletions bio/reference/ensembl-sequence/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@ name: ensembl-sequence
description: Download sequences (e.g. genome) from ENSEMBL FTP servers, and store them in a single .fasta file.
authors:
- Johannes Köster
output:
- fasta file
params:
- url: URL from where to download cache data (optional; by default is ``ftp://ftp.ensembl.org/pub``)
2 changes: 2 additions & 0 deletions bio/reference/ensembl-sequence/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ rule get_single_chromosome:
# branch="plants", # optional: specify branch
log:
"logs/get_genome.log",
params:
url="http://ftp.ensembl.org/pub",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-sequence"
Expand Down
3 changes: 2 additions & 1 deletion bio/reference/ensembl-sequence/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@
"invalid datatype, to select a single chromosome the datatype must be dna"
)

url = snakemake.params.get("url", "ftp://ftp.ensembl.org/pub")
spec = spec.format(build=build, release=release)
url_prefix = f"ftp://ftp.ensembl.org/pub/{branch}release-{release}/fasta/{species}/{datatype}/{species.capitalize()}.{spec}"
url_prefix = f"{url}/{branch}release-{release}/fasta/{species}/{datatype}/{species.capitalize()}.{spec}"

success = False
for suffix in suffixes:
Expand Down
4 changes: 4 additions & 0 deletions bio/reference/ensembl-variation/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@ name: ensembl-variation
description: Download known genomic variants from ENSEMBL FTP servers, and store them in a single .vcf.gz file.
authors:
- Johannes Köster
output:
- VCF file
params:
- url: URL from where to download cache data (optional; by default is ``ftp://ftp.ensembl.org/pub``)
2 changes: 2 additions & 0 deletions bio/reference/ensembl-variation/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ rule get_variation:
type="all", # one of "all", "somatic", "structural_variation"
# chromosome="21", # optionally constrain to chromosome, only supported for homo_sapiens
# branch="plants", # optional: specify branch
params:
url="http://ftp.ensembl.org/pub",
log:
"logs/get_variation.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
Expand Down
10 changes: 3 additions & 7 deletions bio/reference/ensembl-variation/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,12 @@

species_filename = species if release >= 91 else species.capitalize()

url = snakemake.params.get("url", "ftp://ftp.ensembl.org/pub")
urls = [
"ftp://ftp.ensembl.org/pub/{branch}release-{release}/variation/vcf/{species}/{species_filename}{suffix}.vcf.gz".format(
release=release,
species=species,
suffix=suffix,
species_filename=species_filename,
branch=branch,
)
f"{url}/{branch}release-{release}/variation/vcf/{species}/{species_filename}{suffix}.vcf.gz"
for suffix in suffixes
]

names = [os.path.basename(url) for url in urls]

try:
Expand Down
7 changes: 7 additions & 0 deletions bio/vep/cache/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,10 @@ description: Download VEP cache for given species, build and release.
url: http://www.ensembl.org/info/docs/tools/vep/index.html
authors:
- Johannes Köster
output:
- directory to store the VEP cache
params:
- url: URL from where to download cache data (optional; by default is ``ftp://ftp.ensembl.org/pub``)
- species: species to download cache data
- build: build to download cache data
- release: release to download cache data
15 changes: 15 additions & 0 deletions bio/vep/cache/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,18 @@ rule get_vep_cache:
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/vep/cache"


rule get_vep_cache_ebi:
output:
directory("resources/vep/cache_ebi"),
params:
url="ftp://ftp.ebi.ac.uk/ensemblgenomes/pub/plants",
species="cyanidioschyzon_merolae",
build="ASM9120v1",
release="58",
log:
"logs/vep/cache_ebi.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/vep/cache"
11 changes: 6 additions & 5 deletions bio/vep/cache/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,25 @@


extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)


try:
release = int(snakemake.params.release)
except ValueError:
raise ValueError("The parameter release is supposed to be an integer.")


with tempfile.TemporaryDirectory() as tmpdir:
# We download the cache tarball manually because vep_install does not consider proxy settings (in contrast to curl).
# See https://github.com/bcbio/bcbio-nextgen/issues/1080
vep_dir = "vep" if release >= 97 else "VEP"
cache_url = snakemake.params.get("url", "ftp://ftp.ensembl.org/pub")
cache_tarball = (
f"{snakemake.params.species}_vep_{release}_{snakemake.params.build}.tar.gz"
)
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
vep_dir = "vep" if snakemake.params.get("url") or release >= 97 else "VEP"
shell(
"curl -L ftp://ftp.ensembl.org/pub/release-{snakemake.params.release}/"
"variation/{vep_dir}/{cache_tarball} "
"-o {tmpdir}/{cache_tarball} {log}"
"curl -L {cache_url}/release-{release}/variation/{vep_dir}/{cache_tarball} -o {tmpdir}/{cache_tarball} {log}"
)

log = snakemake.log_fmt_shell(stdout=True, stderr=True, append=True)
Expand Down
5 changes: 5 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5915,6 +5915,11 @@ def test_vep_cache():
["snakemake", "--cores", "1", "resources/vep/cache", "--use-conda", "-F"],
)

run(
"bio/vep/cache",
["snakemake", "--cores", "1", "resources/vep/cache_ebi", "--use-conda", "-F"],
)


@skip_if_not_modified
def test_vep_plugins():
Expand Down
Loading