-
Notifications
You must be signed in to change notification settings - Fork 5
/
mtz.py
129 lines (116 loc) · 4.82 KB
/
mtz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from collections import OrderedDict
import sys
from dimple.utils import put_error, comment, silently_run
from dimple.cell import Cell, match_symmetry
DEFAULT_FREE_COLS = ['FreeR_flag', 'FREE', 'RFREE', 'FREER']
class MtzMeta(Cell):
d_eps = 0.00051 # dmax precision (so low b/c it is read from mtzdump)
def __init__(self, cell, symmetry, sg_number, dmin, dmax, columns,
filename):
#assert isinstance(columns[0], tuple)
Cell.__init__(self, cell, symmetry)
self.sg_number = sg_number
self.dmin = dmin
self.dmax = dmax
assert dmin >= dmax # yes, min > max here
self.columns = OrderedDict(columns)
self.filename = filename
def info(self):
return '\n'.join([
'symmetry: %s (SG no. %d)' % (self.symmetry, self.sg_number),
'cell: (%s)' % self.parameters_as_str(),
'resolution range: %s - %s' % (self.dmin, self.dmax),
'columns: ' + ' '.join('%s:%s' % kv for kv in self.columns.items())
]) # noqa: E123 - bracket indentation
def check_col_type(self, label, expected_type):
if label not in self.columns:
put_error("Column '%s' not found in %s" % (label, self.filename))
sys.exit(1)
col_type = self.columns[label]
if col_type != expected_type:
put_error("Column '%s' in %s has type '%s' (expected '%s')" %
(label, self.filename, col_type, expected_type))
return False
return True
def _run_mtzdump(hklin, keys):
retcode, out, _ = silently_run(['mtzdump', 'HKLIN', hklin],
stdin_text='\n'.join(keys + ['END']))
if retcode != 0:
raise RuntimeError('mtzdump of %s failed' % hklin)
return out
def read_metadata(hklin):
'for now using mtzdump, directly calling libccp4/mtzlib would be better'
lines = _run_mtzdump(hklin, ['HEAD']).splitlines()
cell = None
for n, line in enumerate(lines):
if not line.startswith(b' * '):
continue
if line.startswith(b' * Dataset ID, project/crystal/dataset names, ce'):
try:
cell = tuple(float(x) for x in lines[n + 5].split())
except ValueError:
pass
elif line.startswith(b' * Cell Dimensions :') and cell is None:
try:
cell = tuple(float(x) for x in lines[n + 2].split())
except ValueError:
pass
elif line.startswith(b' * Space group = '):
symmetry = line.split(b"'")[1].strip().decode()
sg_number = int(line.split()[-1].rstrip(b')'))
elif line.startswith(b' * Resolution Range'):
res_line = lines[n+2]
# 0.00125 0.48742 ( 28.339 - 1.432 A )
lower_resol = float(res_line[28:40])
upper_resol = float(res_line[41:53])
elif line.startswith(b' * Column Labels'):
column_names = lines[n+2].decode().split()
elif line.startswith(b' * Column Types'):
column_types = lines[n+2].decode().split()
columns = zip(column_names, column_types)
return MtzMeta(cell, symmetry=symmetry, sg_number=sg_number,
dmin=lower_resol, dmax=upper_resol, columns=columns,
filename=hklin)
def check_freerflags_column(free_mtz, expected_symmetry, column):
rfree_meta = read_metadata(free_mtz)
if expected_symmetry and not match_symmetry(rfree_meta, expected_symmetry):
comment('\nWARNING: R-free flag reference file is %s not %s.' %
(rfree_meta.symmetry, expected_symmetry.symmetry))
if column is not None:
if not rfree_meta.check_col_type(column, 'I'):
sys.exit(1)
return column
for name in DEFAULT_FREE_COLS:
if name in rfree_meta.columns:
rfree_meta.check_col_type(name, 'I')
return name
put_error('free-R column not found in %s' % free_mtz)
sys.exit(1)
def get_num_missing(hklin, col):
# for now using mtzdump
out = _run_mtzdump(hklin, ['NREF 0'])
try:
start = out.index(b'\n Col Sort')
end = out.index(b'\n No. of reflections')
lines = out[start+1:end].splitlines()[3:-1]
for line in lines:
sp = line.decode().split()
if sp[-1] == col:
return int(sp[4])
except ValueError:
pass
# for testing only
def main():
if len(sys.argv) < 2:
sys.stderr.write('No filenames.\n')
sys.exit(1)
if sys.argv[1] == '-m' and len(sys.argv) >= 3:
col = sys.argv[2]
for arg in sys.argv[3:]:
print('%s %s' % (arg, get_num_missing(arg, col)))
else:
for filename in sys.argv[1:]:
print('File: %s' % filename)
print(read_metadata(filename))
if __name__ == '__main__':
main()