-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathreprocess.py
135 lines (113 loc) · 5.15 KB
/
reprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python3
from pathlib import Path
import subprocess, os, re, sys
import json
from multiprocessing import Pool, Manager
import shutil
from argparse import ArgumentParser
from jsonenc import JSONEnc
import scale
def json_to_dict(jsonfile):
assert jsonfile.is_file(), "JSON File does not exist or cannot be loaded"
d = json.load(open(jsonfile))
return d
def gen_reproc_list(jsonfile):
'''makes a list of directories to reprocess'''
d = []
for k,v in json_to_dict(jsonfile).items():
for x,y in v.items():
if (y['processing_successful']):
d.append(y['path'])
return d
def update_xds_INP(p,args):
with open(Path( p / 'XDS.INP'), 'r') as source:
data='\n'.join(line.rstrip() for line in source)
data = re.sub('JOB=.*', 'JOB= CORRECT', data,re.DOTALL)
data = re.sub('SPACE_GROUP_NUMBER=.*', 'SPACE_GROUP_NUMBER={}'.format(args.spacegroup),data,re.DOTALL)
data = re.sub('UNIT_CELL_CONSTANTS=.*', 'UNIT_CELL_CONSTANTS={}'.format(args.unitcell), data, re.DOTALL)
Path(p / 'XDS.INP').write_text(data)
def rerun_xds(args, xdsdir):
p = Path(xdsdir)
# print('rerunning XDS in {a} with SG {b} and unit cell {c}'.format(a=xdsdir, b= args.spacegroup, c=args.unitcell))
copy_xds_results(p,'P1')
update_xds_INP(p, args)
with open(Path(p / 'XDS.log'), 'w') as f:
subprocess.call(r"xds_par", stdout=f, shell=True, cwd=p)
def get_accepted_reflections(xdsdir):
with Path(xdsdir / 'CORRECT.LP').open() as file:
for line in file:
if 'NUMBER OF ACCEPTED OBSERVATIONS (INCLUDING SYSTEMATIC ABSENCES' in line:
value = re.search(r'\d+',line)
return int(value.group(0))
def get_spacegroup(xdsdir):
with Path(xdsdir / 'XDS_ASCII.HKL').open() as file:
for line in file:
if '!SPACE_GROUP_NUMBER=' in line:
return line.split()[-1]
return None
def get_unitcell(xdsdir):
with Path(xdsdir / 'XDS_ASCII.HKL').open() as file:
for line in file:
if '!UNIT_CELL_CONSTANTS=' in line:
l = line.split()
return '{a} {b} {c} {al} {be} {ga}'.format(a=l[-6], b=l[-5], c=l[-4], al=l[-3], be=l[-2], ga=l[-1])
return None
def reprocess(args):
d = gen_reproc_list(args.input)
pool = Pool()
for xdsdir in d:
xdsdir = Path(xdsdir)
pool.apply_async(rerun_xds, args=(args,xdsdir))
#convoluted method for managed dict updating, this is a Python bug
mdict_xdsdir_parent_name = mdict[xdsdir.parent.name]
mdict_xdsdir_parent_name_xdsdir_name = mdict_xdsdir_parent_name[xdsdir.name]
mdict_xdsdir_parent_name_xdsdir_name['accepted_reflections'] = get_accepted_reflections(xdsdir)
mdict_xdsdir_parent_name_xdsdir_name['space_group'] = get_spacegroup(xdsdir)
mdict_xdsdir_parent_name_xdsdir_name['unit_cell'] = get_unitcell(xdsdir)
mdict_xdsdir_parent_name[xdsdir.name] = mdict_xdsdir_parent_name_xdsdir_name
mdict[xdsdir.parent.name] = mdict_xdsdir_parent_name
pool.close()
pool.join()
def copy_xds_results(xdsdir, suffix='old'):
if Path(xdsdir / 'XDS.INP').is_file():
shutil.copy(Path(xdsdir / 'XDS.INP'), Path(xdsdir / 'XDS_{}.INP'.format(suffix)))
if Path(xdsdir / 'XDS_ASCII.HKL').is_file():
shutil.copy(Path(xdsdir / 'XDS_ASCII.HKL'), Path(xdsdir / 'XDS_ASCII_{}.HKL'.format(suffix)))
if Path(xdsdir / 'XDS.log').is_file():
shutil.copy(Path(xdsdir / 'XDS.log'), Path(xdsdir / 'XDS_{}.log'.format(suffix)))
def write_to_json(args, mdict):
Path(args.input.parent / 'results_{b}.json'.format(b='reprocessed')).write_text(
json.dumps(mdict.copy(), indent=2, sort_keys=True, cls=JSONEnc))
def main():
parser = ArgumentParser(description=
"""
Reprocess XDS data in chosen space group and unit cell.
results.json file required
""")
parser.add_argument('-i', '--input', type=lambda p: Path(p, exists=True).absolute(),
help='Path of restuls.json file')
parser.add_argument('-s', '--spacegroup', type=int, help='Space Group Number')
parser.add_argument('-u', '--unitcell', type=str, help='Unit Cell')
parser.add_argument('--rescale', action='store_true', help="Rescale after reprocessing")
parser.add_argument('--isocluster', action='store_true', help="Run xscale_isocluster on reprocessed set")
parser.add_argument('--assert_P1', action='store_true', help="Assert P1")
parser.parse_args()
args = parser.parse_args()
global mdict
mdict = Manager().dict()
mdict.update(json.load(open(args.input)))
reprocess(args)
write_to_json(args, mdict)
if args.rescale:
xsdir = scale.generate_xscale_directory(args.input.parent)
scale.generate_xscaleINP(mdict, args, xsdir)
scale.run_xscale(xsdir)
if args.isocluster:
xsdir = scale.get_xscale_directory(args.input.parent)
scale.copy_xscale_results(xsdir, 'old')
scale.run_isocluster(xsdir)
result = scale.filter_isocluster(scale.sort_isocluster(xsdir))
scale.gen_sorted_xscaleINP(result,args,xsdir)
scale.rerun_xscale(xsdir)
if __name__ == '__main__':
main()