Skip to content

Commit cbb8448

Browse files
committed
lazy build RA, SR
1 parent c983609 commit cbb8448

File tree

1 file changed

+41
-42
lines changed

1 file changed

+41
-42
lines changed

package/MDAnalysis/core/topology.py

Lines changed: 41 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,6 @@
6464
from ..exceptions import NoDataError
6565

6666

67-
# TODO Notes:
68-
# Could make downshift tables lazily built! This would
69-
# a) Make these not get built when not used
70-
# b) Optimise moving multiple atoms between residues as only built once
71-
# afterwards
72-
73-
# Could optimise moves by only updating the two parent tables rather than
74-
# rebuilding everything!
75-
76-
7767
def make_downshift_arrays(upshift, nparents):
7868
"""From an upwards translation table, create the opposite direction
7969
@@ -99,7 +89,7 @@ def make_downshift_arrays(upshift, nparents):
9989
To find the residue to atom mappings for a given atom to residue mapping:
10090
10191
>>> atom2res = np.array([0, 1, 0, 2, 2, 0, 2])
102-
>>> make_downshift_arrays(atom2res)
92+
>>> make_downshift_arrays(atom2res, 3)
10393
array([array([0, 2, 5]), array([1]), array([3, 4, 6]), None], dtype=object)
10494
10595
Entry 0 corresponds to residue 0 and says that this contains atoms 0, 2 & 5
@@ -115,7 +105,7 @@ def make_downshift_arrays(upshift, nparents):
115105
if not len(upshift):
116106
return np.array([], dtype=object)
117107

118-
upshift = np.array(upshift)
108+
# mergesort for a stable ordered array for the same value.
119109
order = np.argsort(upshift, kind="mergesort")
120110

121111
upshift_sorted = upshift[order]
@@ -199,7 +189,7 @@ def __init__(self,
199189
self._AR = np.asarray(atom_resindex, dtype=np.intp).copy()
200190
if not len(self._AR) == n_atoms:
201191
raise ValueError("atom_resindex must be len n_atoms")
202-
self._RA = make_downshift_arrays(self._AR, n_residues)
192+
self._RA = None
203193

204194
# built residue-to-segment mapping, and vice-versa
205195
if residue_segindex is None:
@@ -208,13 +198,27 @@ def __init__(self,
208198
self._RS = np.asarray(residue_segindex, dtype=np.intp).copy()
209199
if not len(self._RS) == n_residues:
210200
raise ValueError("residue_segindex must be len n_residues")
211-
self._SR = make_downshift_arrays(self._RS, n_segments)
201+
self._SR = None
212202

213203
def copy(self):
214204
"""Return a deepcopy of this Transtable"""
215205
return self.__class__(self.n_atoms, self.n_residues, self.n_segments,
216206
atom_resindex=self._AR, residue_segindex=self._RS)
217207

208+
@property
209+
def RA(self):
210+
if self._RA is None:
211+
self._RA = make_downshift_arrays(self._AR,
212+
self.n_residues)
213+
return self._RA
214+
215+
@property
216+
def SR(self):
217+
if self._SR is None:
218+
self._SR = make_downshift_arrays(self._RS,
219+
self.n_segments)
220+
return self._SR
221+
218222
@property
219223
def size(self):
220224
"""The shape of the table, ``(n_atoms, n_residues, n_segments)``.
@@ -253,11 +257,12 @@ def residues2atoms_1d(self, rix):
253257
indices of atoms present in residues, collectively
254258
255259
"""
260+
RA = self.RA
256261
try:
257-
return np.concatenate(self._RA[rix])
262+
return np.concatenate(RA[rix])
258263
except ValueError: # rix is not iterable or empty
259264
# don't accidentally return a view!
260-
return self._RA[rix].astype(np.intp, copy=True)
265+
return RA[rix].astype(np.intp, copy=True)
261266

262267
def residues2atoms_2d(self, rix):
263268
"""Get atom indices represented by each residue index.
@@ -275,10 +280,11 @@ def residues2atoms_2d(self, rix):
275280
in that residue
276281
277282
"""
283+
RA = self.RA
278284
try:
279-
return [self._RA[r].copy() for r in rix]
285+
return [RA[r].copy() for r in rix]
280286
except TypeError:
281-
return [self._RA[rix].copy()] # why would this be singular for 2d?
287+
return [RA[rix].copy()] # why would this be singular for 2d?
282288

283289
def residues2segments(self, rix):
284290
"""Get segment indices for each residue.
@@ -310,11 +316,12 @@ def segments2residues_1d(self, six):
310316
sorted indices of residues present in segments, collectively
311317
312318
"""
319+
SR = self.SR
313320
try:
314-
return np.concatenate(self._SR[six])
321+
return np.concatenate(SR[six])
315322
except ValueError: # six is not iterable or empty
316323
# don't accidentally return a view!
317-
return self._SR[six].astype(np.intp, copy=True)
324+
return SR[six].astype(np.intp, copy=True)
318325

319326
def segments2residues_2d(self, six):
320327
"""Get residue indices represented by each segment index.
@@ -332,10 +339,11 @@ def segments2residues_2d(self, six):
332339
present in that segment
333340
334341
"""
342+
SR = self.SR
335343
try:
336-
return [self._SR[s].copy() for s in six]
344+
return [SR[s].copy() for s in six]
337345
except TypeError:
338-
return [self._SR[six].copy()]
346+
return [SR[six].copy()]
339347

340348
# Compound moves, does 2 translations
341349
def atoms2segments(self, aix):
@@ -396,43 +404,34 @@ def segments2atoms_2d(self, six):
396404
def move_atom(self, aix, rix):
397405
"""Move aix to be in rix"""
398406
self._AR[aix] = rix
399-
self._RA = make_downshift_arrays(self._AR, self.n_residues)
407+
self._RA = None
400408

401409
def move_residue(self, rix, six):
402410
"""Move rix to be in six"""
403411
self._RS[rix] = six
404-
self._SR = make_downshift_arrays(self._RS, self.n_segments)
412+
self._SR = None
405413

406414
def add_Residue(self, segidx):
407415
# segidx - index of parent
408416
self.n_residues += 1
409-
self._RA = make_downshift_arrays(self._AR, self.n_residues)
417+
self._RA = None
410418
self._RS = np.concatenate([self._RS, np.array([segidx])])
411-
self._SR = make_downshift_arrays(self._RS, self.n_segments)
419+
self._SR = None
420+
412421

413422
return self.n_residues - 1
414423

415424
def add_Segment(self):
416425
self.n_segments += 1
417-
# self._RS remains the same, no residues point to the new segment yet
418-
self._SR = make_downshift_arrays(self._RS, self.n_segments)
419-
426+
self._SR = None
420427
return self.n_segments - 1
421428

422429
def __getstate__(self):
423-
return (self.n_atoms, self.n_residues, self.n_segments,
424-
self._AR, self._RS)
425-
426-
def __setstate__(self, args):
427-
# rebuild _RA and _SR instead of serializing them.
428-
n_atoms = args[0]
429-
n_residues = args[1]
430-
n_segments = args[2]
431-
_AR = args[3]
432-
_RS = args[4]
433-
return self.__init__(n_atoms, n_residues, n_segments,
434-
atom_resindex=_AR, residue_segindex=_RS)
435-
430+
# don't serialize _RA and _SR for performance.
431+
attrs = self.__dict__
432+
attrs['_RA'] = None
433+
attrs['_SR'] = None
434+
return attrs
436435

437436

438437
class Topology(object):

0 commit comments

Comments
 (0)