diff --git a/pangolin/__init__.py b/pangolin/__init__.py index 5169b7e..3640e70 100644 --- a/pangolin/__init__.py +++ b/pangolin/__init__.py @@ -1,5 +1,5 @@ _program = "pangolin" -__version__ = "3.1.7" +__version__ = "3.1.8" __all__ = ["pangolearn", diff --git a/pangolin/command.py b/pangolin/command.py index 15340a4..33594e5 100644 --- a/pangolin/command.py +++ b/pangolin/command.py @@ -2,13 +2,16 @@ from pangolin import __version__ import argparse import os.path +import itertools import snakemake +import shutil import sys +import tarfile from urllib import request from distutils.version import LooseVersion import subprocess import json -from tempfile import gettempdir +from tempfile import TemporaryDirectory, TemporaryFile, gettempdir, tempdir import tempfile import pprint import json @@ -65,6 +68,16 @@ thisdir = os.path.abspath(os.path.dirname(__file__)) cwd = os.getcwd() +def version_from_init(init_file): + with open(init_file, "r") as fr: + for l in fr: + if l.startswith("__version__"): + l = l.rstrip("\n") + version = l.split('=')[1] + version = version.replace('"',"").replace(" ","") + break + return version + def main(sysargs = sys.argv[1:]): parser = argparse.ArgumentParser(prog = _program, @@ -99,6 +112,48 @@ def main(sysargs = sys.argv[1:]): args = parser.parse_args(sysargs) args = parser.parse_args() + # find the data + alias_file = None + pango_designation_dir = pango_designation.__path__[0] + constellations_dir = constellations.__path__[0] + constellation_files = [] + data_locations = [os.walk(pango_designation_dir), os.walk(constellations_dir)] + if args.datadir is not None: + data_locations.append(os.walk(args.datadir)) + # the logic of this is to search the "built-in" pango_designation and constellations + # paths first and then if as custom datadir is passed, follow up with those, so that + # any files found in the datadir supercede the "built-in" modules. The assumption + # here is that the datadir contains newer (user updated) data + for r, _, f in itertools.chain.from_iterable(data_locations): + if r.endswith('/constellations') or r.endswith('/constellations/definitions'): + constellation_files = [] # only collect the constellations from the last directory found + for fn in f: + if r.endswith('/pango_designation') and fn == "alias_key.json": + alias_file = os.path.join(r, fn) + # the __init__.py file for pango_designation is on the same level as alias_key.json + pango_designation.__version__ = version_from_init(os.path.join(r, '__init__.py')) + elif r.endswith('/constellations') and fn == '__init__.py': + constellations.__version__ = version_from_init(os.path.join(r, fn)) + elif (r.endswith('/constellations') or r.endswith('/constellations/definitions')) and fn.endswith('.json'): + constellation_files.append(os.path.join(r, fn)) + + + if args.datadir: + data_dir = os.path.join(cwd, args.datadir) + version = "Unknown" + for r,d,f in os.walk(data_dir): + for fn in f: + if r.endswith('pangoLEARN') and fn == "__init__.py": + # print("Found __init__.py") + version = version_from_init(os.path.join(r, fn)) + # print("pangoLEARN version",version) + pangoLEARN.__version__ = version + + else: + pangoLEARN_dir = pangoLEARN.__path__[0] + data_dir = os.path.join(pangoLEARN_dir,"data") + + # print(f"Looking in {data_dir} for data files...") if args.update: update({'pangolin': __version__, 'pangolearn': pangoLEARN.__version__, @@ -110,14 +165,8 @@ def main(sysargs = sys.argv[1:]): if args.update_data: update({'pangolearn': pangoLEARN.__version__, 'constellations': constellations.__version__, - 'pango-designation': pango_designation.__version__}) + 'pango-designation': pango_designation.__version__}, args.datadir) - alias_file = None - pango_designation_dir = pango_designation.__path__[0] - for r, d, f in os.walk(pango_designation_dir): - for fn in f: - if fn == "alias_key.json": - alias_file = os.path.join(r, fn) if not alias_file: sys.stderr.write(cyan('Could not find alias file: please update pango-designation with \n') + "pip install git+https://github.com/cov-lineages/pango-designation.git") @@ -269,6 +318,7 @@ def main(sysargs = sys.argv[1:]): "trim_end":29674, # where to pad after using datafunk "qc_fail":qc_fail, "alias_file": alias_file, + "constellation_files": constellation_files, "verbose":args.verbose, "pangoLEARN_version":pangoLEARN.__version__, "pangolin_version":__version__, @@ -281,27 +331,6 @@ def main(sysargs = sys.argv[1:]): dependency_checks.set_up_verbosity(config) - # find the data - if args.datadir: - data_dir = os.path.join(cwd, args.datadir) - version = "Unknown" - for r,d,f in os.walk(data_dir): - for fn in f: - if fn == "__init__.py": - print("Found __init__.py") - with open(os.path.join(r, fn),"r") as fr: - for l in fr: - if l.startswith("__version__"): - l = l.rstrip("\n") - version = l.split('=')[1] - version = version.replace('"',"").replace(" ","") - print("pangoLEARN version",version) - config["pangoLEARN_version"] = version - - else: - pangoLEARN_dir = pangoLEARN.__path__[0] - data_dir = os.path.join(pangoLEARN_dir,"data") - # print(f"Looking in {data_dir} for data files...") trained_model = "" header_file = "" designated_hash="" @@ -384,7 +413,7 @@ def main(sysargs = sys.argv[1:]): return 1 -def update(version_dictionary): +def update(version_dictionary, data_dir=None): """ Using the github releases API check for the latest current release of the set of depdencies provided e.g., pangolin, scorpio, pangolearn and @@ -412,6 +441,10 @@ def update(version_dictionary): pango_designation data module} """ + package_names = {'pangolearn': 'pangoLEARN', + 'pango-designation': 'pango_designation' + } + # flag if any element is update if everything is the latest release # we want to just continue running for dependency, version in version_dictionary.items(): @@ -434,6 +467,7 @@ def update(version_dictionary): sys.exit(-1) latest_release = json.load(latest_release) + latest_release_tarball = latest_release[0]['tarball_url'] latest_release = LooseVersion(latest_release[0]['tag_name']) #print(dependency, version, latest_release) @@ -457,11 +491,25 @@ def update(version_dictionary): version = LooseVersion(version) if version < latest_release: - subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade', - f"git+https://github.com/cov-lineages/{dependency}.git@{latest_release}"], - check=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + if data_dir is not None: + # this path only gets followed when the user has --update_data and they + # have also specified a --datadir + with TemporaryDirectory() as tempdir: + dependency_package = package_names.get(dependency, dependency) + tarball_path = os.path.join(tempdir, 'tarball.tgz') + open(tarball_path, 'wb').write(request.urlopen(latest_release_tarball).read()) + tf = tarfile.open(tarball_path) + extracted_dir = tf.next().name + tf.extractall(path=tempdir) + tf.close() + destination_directory = os.path.join(data_dir, dependency_package) + shutil.move(os.path.join(tempdir, extracted_dir, dependency_package), destination_directory) + else: + subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade', + f"git+https://github.com/cov-lineages/{dependency}.git@{latest_release}"], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) print(f"{dependency} updated to {latest_release}", file=sys.stderr) elif version > latest_release: print(f"{dependency} ({version}) is newer than latest stable " diff --git a/pangolin/scripts/pangolearn.smk b/pangolin/scripts/pangolearn.smk index 4f3279a..d7df12a 100644 --- a/pangolin/scripts/pangolearn.smk +++ b/pangolin/scripts/pangolearn.smk @@ -163,6 +163,8 @@ rule add_failed_seqs: rule scorpio: input: fasta = rules.align_to_reference.output.fasta, + params: + constellation_files = " ".join(config["constellation_files"]) output: report = os.path.join(config["tempdir"],"VOC_report.scorpio.csv") threads: @@ -176,6 +178,7 @@ rule scorpio: -o {output.report:q} \ -t {workflow.cores} \ --output-counts \ + --constellations {params.constellation_files} \ --pangolin \ --list-incompatible \ --long &> {log:q}