Skip to content

Commit

Permalink
change how constellation files parse addressing fixing PR #282 and ad… (
Browse files Browse the repository at this point in the history
#286)

Change how constellation files parse addressing fixing PR #282 and addressing issue #283
  • Loading branch information
rmcolq authored Aug 3, 2021
1 parent 7bc70bb commit 531d531
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 36 deletions.
2 changes: 1 addition & 1 deletion pangolin/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
_program = "pangolin"
__version__ = "3.1.7"
__version__ = "3.1.8"


__all__ = ["pangolearn",
Expand Down
118 changes: 83 additions & 35 deletions pangolin/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
from pangolin import __version__
import argparse
import os.path
import itertools
import snakemake
import shutil
import sys
import tarfile
from urllib import request
from distutils.version import LooseVersion
import subprocess
import json
from tempfile import gettempdir
from tempfile import TemporaryDirectory, TemporaryFile, gettempdir, tempdir
import tempfile
import pprint
import json
Expand Down Expand Up @@ -65,6 +68,16 @@
thisdir = os.path.abspath(os.path.dirname(__file__))
cwd = os.getcwd()

def version_from_init(init_file):
with open(init_file, "r") as fr:
for l in fr:
if l.startswith("__version__"):
l = l.rstrip("\n")
version = l.split('=')[1]
version = version.replace('"',"").replace(" ","")
break
return version

def main(sysargs = sys.argv[1:]):

parser = argparse.ArgumentParser(prog = _program,
Expand Down Expand Up @@ -99,6 +112,48 @@ def main(sysargs = sys.argv[1:]):
args = parser.parse_args(sysargs)
args = parser.parse_args()

# find the data
alias_file = None
pango_designation_dir = pango_designation.__path__[0]
constellations_dir = constellations.__path__[0]
constellation_files = []
data_locations = [os.walk(pango_designation_dir), os.walk(constellations_dir)]
if args.datadir is not None:
data_locations.append(os.walk(args.datadir))
# the logic of this is to search the "built-in" pango_designation and constellations
# paths first and then if as custom datadir is passed, follow up with those, so that
# any files found in the datadir supercede the "built-in" modules. The assumption
# here is that the datadir contains newer (user updated) data
for r, _, f in itertools.chain.from_iterable(data_locations):
if r.endswith('/constellations') or r.endswith('/constellations/definitions'):
constellation_files = [] # only collect the constellations from the last directory found
for fn in f:
if r.endswith('/pango_designation') and fn == "alias_key.json":
alias_file = os.path.join(r, fn)
# the __init__.py file for pango_designation is on the same level as alias_key.json
pango_designation.__version__ = version_from_init(os.path.join(r, '__init__.py'))
elif r.endswith('/constellations') and fn == '__init__.py':
constellations.__version__ = version_from_init(os.path.join(r, fn))
elif (r.endswith('/constellations') or r.endswith('/constellations/definitions')) and fn.endswith('.json'):
constellation_files.append(os.path.join(r, fn))


if args.datadir:
data_dir = os.path.join(cwd, args.datadir)
version = "Unknown"
for r,d,f in os.walk(data_dir):
for fn in f:
if r.endswith('pangoLEARN') and fn == "__init__.py":
# print("Found __init__.py")
version = version_from_init(os.path.join(r, fn))
# print("pangoLEARN version",version)
pangoLEARN.__version__ = version

else:
pangoLEARN_dir = pangoLEARN.__path__[0]
data_dir = os.path.join(pangoLEARN_dir,"data")

# print(f"Looking in {data_dir} for data files...")
if args.update:
update({'pangolin': __version__,
'pangolearn': pangoLEARN.__version__,
Expand All @@ -110,14 +165,8 @@ def main(sysargs = sys.argv[1:]):
if args.update_data:
update({'pangolearn': pangoLEARN.__version__,
'constellations': constellations.__version__,
'pango-designation': pango_designation.__version__})
'pango-designation': pango_designation.__version__}, args.datadir)

alias_file = None
pango_designation_dir = pango_designation.__path__[0]
for r, d, f in os.walk(pango_designation_dir):
for fn in f:
if fn == "alias_key.json":
alias_file = os.path.join(r, fn)
if not alias_file:
sys.stderr.write(cyan('Could not find alias file: please update pango-designation with \n') +
"pip install git+https://github.com/cov-lineages/pango-designation.git")
Expand Down Expand Up @@ -269,6 +318,7 @@ def main(sysargs = sys.argv[1:]):
"trim_end":29674, # where to pad after using datafunk
"qc_fail":qc_fail,
"alias_file": alias_file,
"constellation_files": constellation_files,
"verbose":args.verbose,
"pangoLEARN_version":pangoLEARN.__version__,
"pangolin_version":__version__,
Expand All @@ -281,27 +331,6 @@ def main(sysargs = sys.argv[1:]):

dependency_checks.set_up_verbosity(config)

# find the data
if args.datadir:
data_dir = os.path.join(cwd, args.datadir)
version = "Unknown"
for r,d,f in os.walk(data_dir):
for fn in f:
if fn == "__init__.py":
print("Found __init__.py")
with open(os.path.join(r, fn),"r") as fr:
for l in fr:
if l.startswith("__version__"):
l = l.rstrip("\n")
version = l.split('=')[1]
version = version.replace('"',"").replace(" ","")
print("pangoLEARN version",version)
config["pangoLEARN_version"] = version

else:
pangoLEARN_dir = pangoLEARN.__path__[0]
data_dir = os.path.join(pangoLEARN_dir,"data")
# print(f"Looking in {data_dir} for data files...")
trained_model = ""
header_file = ""
designated_hash=""
Expand Down Expand Up @@ -384,7 +413,7 @@ def main(sysargs = sys.argv[1:]):
return 1


def update(version_dictionary):
def update(version_dictionary, data_dir=None):
"""
Using the github releases API check for the latest current release
of the set of depdencies provided e.g., pangolin, scorpio, pangolearn and
Expand Down Expand Up @@ -412,6 +441,10 @@ def update(version_dictionary):
pango_designation data module}
"""
package_names = {'pangolearn': 'pangoLEARN',
'pango-designation': 'pango_designation'
}

# flag if any element is update if everything is the latest release
# we want to just continue running
for dependency, version in version_dictionary.items():
Expand All @@ -434,6 +467,7 @@ def update(version_dictionary):
sys.exit(-1)

latest_release = json.load(latest_release)
latest_release_tarball = latest_release[0]['tarball_url']
latest_release = LooseVersion(latest_release[0]['tag_name'])

#print(dependency, version, latest_release)
Expand All @@ -457,11 +491,25 @@ def update(version_dictionary):
version = LooseVersion(version)

if version < latest_release:
subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade',
f"git+https://github.com/cov-lineages/{dependency}.git@{latest_release}"],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
if data_dir is not None:
# this path only gets followed when the user has --update_data and they
# have also specified a --datadir
with TemporaryDirectory() as tempdir:
dependency_package = package_names.get(dependency, dependency)
tarball_path = os.path.join(tempdir, 'tarball.tgz')
open(tarball_path, 'wb').write(request.urlopen(latest_release_tarball).read())
tf = tarfile.open(tarball_path)
extracted_dir = tf.next().name
tf.extractall(path=tempdir)
tf.close()
destination_directory = os.path.join(data_dir, dependency_package)
shutil.move(os.path.join(tempdir, extracted_dir, dependency_package), destination_directory)
else:
subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade',
f"git+https://github.com/cov-lineages/{dependency}.git@{latest_release}"],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
print(f"{dependency} updated to {latest_release}", file=sys.stderr)
elif version > latest_release:
print(f"{dependency} ({version}) is newer than latest stable "
Expand Down
3 changes: 3 additions & 0 deletions pangolin/scripts/pangolearn.smk
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ rule add_failed_seqs:
rule scorpio:
input:
fasta = rules.align_to_reference.output.fasta,
params:
constellation_files = " ".join(config["constellation_files"])
output:
report = os.path.join(config["tempdir"],"VOC_report.scorpio.csv")
threads:
Expand All @@ -176,6 +178,7 @@ rule scorpio:
-o {output.report:q} \
-t {workflow.cores} \
--output-counts \
--constellations {params.constellation_files} \
--pangolin \
--list-incompatible \
--long &> {log:q}
Expand Down

0 comments on commit 531d531

Please sign in to comment.