-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclusteratoms.py
executable file
·86 lines (78 loc) · 3.04 KB
/
clusteratoms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/python
# Code by Peter Kasson, 2014
"""Agglomerative clustering of atoms in a structure."""
import re
import sys
import gflags
import numpy
from scipy.cluster import hierarchy
import ndx
import PDB
def clusteratoms(PDBfilename, atomnamesel, nclusters, writendx=None, resname='',
radial_dist=False):
"""Cluster atoms into k groups by nearest neighbor.
Args:
PDBfilename: input file
atomnamesel: regular expression for atom names to select
nclusters: number of clusters to make
writendx: optional filename of Gromacs index to write
Rets:
clusterdata: array where each row is atomid, resid, clusternum.
"""
infile = open(PDBfilename, 'r')
pdbrecord = PDB.readPDB(infile)
atomlines = []
idxctr = 0
for line in pdbrecord[0]:
if line.__class__ in [PDB.ATOM, PDB.HETATM]:
if re.match(atomnamesel, line.name) and re.match(resname, line.resName):
atomlines.append([line.serial, line.resSeq, idxctr,
line.x, line.y, line.z])
idxctr += 1
atomarr = numpy.array(atomlines)
print atomarr.shape
# alternate approach:
# specify maxdist, then:
# clusteridx = hierarchy.fclusterdata(atomarr[:, 3:6], maxdist,
# criterion='distance')
if radial_dist:
ctr_coord = numpy.mean(atomarr[:, 3:6], 0)
radial_arr = numpy.array([[numpy.linalg.norm(ctr_coord - line[3:6])]
for line in atomarr])
clusteridx = hierarchy.fclusterdata(radial_arr, nclusters,
criterion='maxclust')
else:
clusteridx = hierarchy.fclusterdata(atomarr[:, 3:6], nclusters,
criterion='maxclust')
# slightly kludgy construction of return data structure
clusterdata = numpy.zeros([len(clusteridx), 3])
clusterdata[:, 0] = atomarr[:, 0]
clusterdata[:, 1] = atomarr[:, 1]
clusterdata[:, 2] = clusteridx
# option to write a Gromacs index
if writendx:
indexdict = {}
indexdict['IndexNames'] = ['Cluster%d' % i for i in range(nclusters)]
indexdict['IndexGroups'] = []
for i in range(nclusters):
indexdict['IndexGroups'].append(atomarr[numpy.nonzero(clusteridx == i+1)[0], 2])
ndx.write_index(indexdict, writendx)
return clusterdata
if __name__ == '__main__':
FLAGS = gflags.FLAGS
gflags.DEFINE_string('infile', '',
'Input PDB')
gflags.DEFINE_string('atomsel', '(.+)',
'Regexp to select atom names')
gflags.DEFINE_string('ressel', '',
'Optional regex to select atom names')
gflags.DEFINE_string('outfile', 'out.dat',
'Output file name')
gflags.DEFINE_string('ndxfile', None,
'Optional output index')
gflags.DEFINE_integer('numclusters', 2,
'Number of clusters to make')
argv = FLAGS(sys.argv)
clusters = clusteratoms(FLAGS.infile, FLAGS.atomsel, FLAGS.numclusters,
FLAGS.ndxfile)
numpy.savetxt(FLAGS.outfile, clusters, fmt='%d\t%d\t%d')