-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add cache download sub command
- Loading branch information
Ryan Routsong
committed
Nov 29, 2023
1 parent
8427060
commit 931fe21
Showing
5 changed files
with
131 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"bcl2fastq": "docker://umccr/bcl2fastq:latest", | ||
"weave": "docker://rroutsong/weave_ngsqc:0.0.1", | ||
"kraken": "https://genome-idx.s3.amazonaws.com/kraken/k2_pluspfp_16gb_20231009.tar.gz", | ||
"kaiju": "https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_euk_2023-05-10.tgz", | ||
"fastq_screen": "filelist://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/genome_locations.txt" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,4 @@ requests | |
terminaltables | ||
pyyaml | ||
tabulate | ||
progressbar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: UTF-8 -*- | ||
# ~~~~~~~~~~~~~~~ | ||
# Miscellaneous utility functions for caching | ||
# pipeline resources | ||
# ~~~~~~~~~~~~~~~ | ||
import subprocess | ||
import json | ||
import urllib.request | ||
import progressbar | ||
from argparse import ArgumentTypeError | ||
from pathlib import Path | ||
from urllib.parse import urlparse | ||
|
||
from .config import remote_resource_confg | ||
from .utils import esc_colors | ||
|
||
|
||
parse_uri = lambda uri: (str(uri).split('://')[0], str(uri).split('://')[1]) if '://' in uri else None | ||
info_download = lambda msg: print(esc_colors.OKGREEN + msg + esc_colors.ENDC) | ||
|
||
|
||
class DownloadProgressBar(): | ||
def __init__(self): | ||
self.pbar = None | ||
|
||
def __call__(self, block_num, block_size, total_size): | ||
if not self.pbar: | ||
self.pbar=progressbar.ProgressBar(maxval=total_size) | ||
self.pbar.start() | ||
|
||
downloaded = block_num * block_size | ||
if downloaded < total_size: | ||
self.pbar.update(downloaded) | ||
else: | ||
self.pbar.finish() | ||
|
||
|
||
def valid_dir(path): | ||
"""Validate path input for argument parsing | ||
Returns: | ||
(str): Absolute path to vetted output path | ||
""" | ||
|
||
if Path(path).is_dir(): | ||
return str(Path(path).absolute()) | ||
elif not Path(path).exists(): | ||
try: | ||
Path(path).mkdir(mode=0o777, parents=True) | ||
except: | ||
raise ArgumentTypeError(f"dir:{path} doesn't exist and can't be created") | ||
|
||
return str(Path(path).absolute()) | ||
|
||
raise ArgumentTypeError(f"readable_dir:{path} is not a valid path") | ||
|
||
|
||
|
||
def download(output_dir, local=False): | ||
"""Download the resource bundle for | ||
Returns: | ||
(bool): True if successful, False otherwise. | ||
""" | ||
print(esc_colors.WARNING + 'Warning: cache download only implemented in serial local mode currently' + esc_colors.ENDC) | ||
#TODO: slurm implementation | ||
resources_to_download = json.loads(open(remote_resource_confg).read()) | ||
|
||
for resource, uri in resources_to_download.items(): | ||
protocol, url = parse_uri(uri) | ||
handle_download(output_dir, resource, protocol, url) | ||
|
||
print(esc_colors.OKGREEN + 'All resources downloaded!' + esc_colors.ENDC) | ||
|
||
return True | ||
|
||
|
||
def handle_download(output_dir, resource, protocol, url): | ||
uri = protocol + "://" + url | ||
if protocol in ('http', 'https', 'ftp'): | ||
info_download(f"Getting web resource {resource}...") | ||
fnurl = Path(urlparse(url).path).stem | ||
urllib.request.urlretrieve(uri, filename=Path(output_dir, fnurl), reporthook=DownloadProgressBar()) | ||
elif protocol in ('docker'): | ||
info_download(f"Getting docker resource {resource}...") | ||
docker_tag = url.split('/')[-1] | ||
docker_v = docker_tag.split(':')[1] | ||
docker_name = docker_tag.split(':')[0] | ||
subprocess.check_call(['singularity', 'pull', '-F', f"{docker_name}_{docker_v}.sif", uri], cwd=output_dir) | ||
elif protocol in ('filelist'): | ||
info_download(f"Getting meta-resource {resource}...") | ||
file_list = urllib.request(url) | ||
for i, _file_uri in enumerate(file_list, start=1): | ||
this_protocol, this_url = parse_uri(_file_uri) | ||
print(f"\t Getting resource {str(i)} of {str(len(file_list))}") | ||
handle_download(output_dir, resource, this_protocol, this_url) | ||
else: | ||
raise ValueError(f"Unsupported resource protocol: {protocol}") | ||
return |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters