-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlist_projects_ready.py
executable file
·95 lines (78 loc) · 3.19 KB
/
list_projects_ready.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python3
"""Runs in the output directory, and prints a list of projects
that are ready now.
Will check for .done flags per-cell in pbpipeline,
of for the global pbpipeline/aborted flag, therefore must be run after
the flags are set in driver.py.
Then reads sc_data.yaml and all the .info.yml files for the cells.
"""
import os, sys, re
from smrtino import glob, load_yaml
import logging as L
def find_projects_from_yaml(filename):
"""Get a list of all the projects from all the barcodes, given an
info.yaml filemame.
"""
ydata = load_yaml(filename)
if 'barcodes' in ydata:
# Resolve the @ lines
for idx in range(len(ydata['barcodes'])):
v = ydata['barcodes'][idx]
if type(v) is str and v.startswith("@"):
ydata['barcodes'][idx] = load_yaml(v[1:], relative_to=filename)
# What should I do here if some barcodes have a project and others do not?
# I'll make it an error for now.
return (bc['ws_project'] for bc in ydata['barcodes'])
else:
return (ydata['ws_project'],)
def glob_sc_data():
"""If there is sc_data.yaml, use that. Else look for sc_data.*.yaml
"""
g = glob("sc_data.yaml")
if g:
return g
g = glob("sc_data.*.yaml")
if g:
return g
raise FileNotFoundError("No match for 'sc_data.yaml' or 'sc_data.*.yaml'")
def list_the_projects():
plist = []
# Strategy is:
# 1) Look in pbpipeline to see what cells are done
if glob('pbpipeline/aborted'):
return []
# A cell should not really be both aborted and done, but .aborted takes precedence
touch_files = glob(r"pbpipeline/[0-9]_???.done") + glob(r"pbpipeline/[0-9]_???.ready")
cells_done = [ c for f in touch_files
for c in re.findall(r"(?<=/).+(?=\.)", f)
if not os.path.exists(f"pbpipeline/{c}.aborted") ]
# 2) Load up sc_data.yaml. As of SMRTino 3.7 there may be multiple files
# I could just glob() the info.yaml files directly but I want assurance that
# everything is there as it should be.
yaml_info_files = set()
for sc_data_file in glob_sc_data():
L.debug(f"Reading from {sc_data_file}")
sc_data = load_yaml(sc_data_file)
if (not sc_data) or ('cells' not in sc_data):
L.warning(f"No 'cells' key in {sc_data_file}")
continue
for acell, cdict in sc_data['cells'].items():
if cdict['slot'] in cells_done:
# It's a candidate
yaml_info_files.add(f"{acell}.info.yaml")
# 3) Get the projects (this used to be in Snakefile.report)
L.debug(f"Will look into {len(yaml_info_files)} cells")
for yif in yaml_info_files:
try:
plist.extend(find_projects_from_yaml(yif))
except KeyError:
L.warning(f"No ws_project in {yif} - indicates no 5-digit project name in readset XML")
# And that is that
return plist
def main():
verbose = (os.environ.get("VERBOSE") or "0") != "0"
L.basicConfig(level=(L.DEBUG if verbose else L.WARNING), stream=sys.stderr)
for p in sorted(set(list_the_projects())):
print(p)
if __name__ == '__main__':
main()