-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsnapshot.py
561 lines (445 loc) · 18.5 KB
/
snapshot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
from copy import copy
import numpy as np
from .fortranio import FortranFile
from .snapview import SnapshotView
class SnapshotIOException(Exception):
"""Base class for exceptions in the the snapshot module."""
def __init__(self, message):
super(SnapshotIOException, self).__init__(message)
class SnapshotHeader(object):
"""
A class for a Gadget-like header.
Accessing header data
---------------------
The header information from a header, hdr, can be accessed as,
>>> hdr.header_entry_name
where 'hdr_entry_name' can be any of the strings acting as keys of the
schema dictionary for this header type. All valid keys are contained within
the list hdr.fields.
All (entry_name, entry_value) pairs may be iterated through using
>>> for (name, data) in hdr.iterfields():
>>> # Do something
Acccessing metadata
-------------------
The Snapshot file name with which this header is associated may be accessed
as
>>> hdr.fname
'some_file_name'
"""
def __init__(self, fname, header_schema):
super(SnapshotHeader, self).__init__()
self._fname = fname
# Use copy so that reference schema is not altered.
self._schema = copy(header_schema)
self._fields = []
self.verify_schema()
self.init_fields()
@property
def fields(self):
return self._fields
@property
def fname(self):
return self._fname
@fname.setter
def fname(self, fname):
self._fname = fname
def init_fields(self):
"""Reset all header attributes to zero-like values."""
for (name, fmt) in self._schema.items():
dtype, size = fmt
data = np.zeros(size, dtype=dtype)
if size == 1:
data = data[0]
setattr(self, name, data)
def iterfields(self):
for name in self.fields:
yield (name, getattr(self, name))
def load(self):
"""Load the snapshot header from the current file."""
with FortranFile(self.fname, 'rb') as ffile:
self._load(ffile)
def to_array(self):
"""Return a structured array representing the header data."""
dtype = [(k, dt, size) for k, (dt, size) in self._schema.items()]
values = tuple(getattr(self, name) for name in self.fields)
return np.array(values, dtype=dtype)
def save(self, fname=None):
"""
Write the snapshot header to the current file, overwriting the file.
A different file name to write to may optionally be provided. This
does not modify the header's fname attribute, so later calling
load() will re-load data from the original file.
The method will raise a SnapshotIOException if the current header is
not valid. See verify().
"""
if fname is None:
fname = self.fname
if self.verify() != []:
raise SnapshotIOException("Current header state invalid")
with FortranFile(fname, 'wb') as ffile:
self._save(ffile)
def verify(self):
"""
Return a list of header attributes which do not conform to the schema.
An empty list indicates that the header is valid.
"""
malformed = []
for (name, fmt) in self._schema.items():
dtype, size = fmt
data = getattr(self, name)
try:
count = len(data)
except TypeError:
count = 1
if count != size:
malformed.append(name)
else:
try:
converted = np.asarray(data).view(dtype=dtype)
except ValueError:
malformed.append(name)
return malformed
def verify_schema(self):
"""
Verify the header formatter, and update it if necessary.
When an element type is not supplied, it is assumed to be a 4-byte
float.
When an element length is also not supplied, it is assumed to be one.
Completes the header schema if possible, else raises a
SnapshotIOException exception.
"""
self._ptypes = 0
for (name, fmt) in self._schema.items():
# So that these are defined even for an invalid formatter
dtype, size = ('f4', 1)
if len(fmt) == 2:
dtype, size = fmt
elif len(fmt) == 1:
dtype, size = (fmt[0], 1)
else:
message = "Schema for header element '%s' is invalid" % name
raise SnapshotIOException(message)
try:
dtype = np.dtype(dtype)
except TypeError:
# Given dtype does not correspond to a numpy dtype.
message = "Data type for header element '%s' is invalid." % name
raise SnapshotIOException(message)
try:
size = int(size)
except TypeError:
message = "Data size for header element '%s' is invalid." % name
raise SnapshotIOException(message)
if (dtype.itemsize * size) % 4 != 0:
message = "Data bytes for header element '%s' not a multiple of 4" % name
raise SnapshotIOException(message)
self._schema[name] = (dtype, size)
self._ptypes = max(size, self._ptypes)
self._fields = self._schema.keys()
def _load(self, ffile):
raw_header = ffile.read_record('b1')
offset = 0
for (name, fmt) in self._schema.items():
dtype, size = fmt
bytewords = dtype.itemsize * size
# Must be non-scalar ndarray, hence wrap in np.array()
raw_data = np.array(raw_header[offset:offset + bytewords])
try:
data = raw_data.view(dtype=dtype)
except ValueError:
raise SnapshotIOException('Could not reinterpret')
if size == 1:
data = data[0]
offset += bytewords
setattr(self, name, data)
def _save(self, ffile):
array = self.to_array()
ffile.write_ndarray(array)
class SnapshotBase(object):
"""
A base class for a single Gadget-like simulation snapshot.
This class defines general attributes, properties and methods for
snapshot classes. All snapshot types derive from this class.
This class is not intended to be used directly. If implementing a subclass,
it is most likely it should be a subclass of GadgetSnapshot, not this class.
Subclasses will likely need to implement the _load_block() and
_parse_block() methods.
Acessing Arrays
---------------
An array may be acessed from an instantiated SnapshotBase object, s, as,
>>> array = s.block_name
'block_name' can be any of the strings acting as keys of the schema
dictionary for this snapshot type. A list is returned, with one item for
each particle type associated with this snapshot. If a particle type is not
valid for this block, its entry in the list is None. Otherwise, it is a
numpy.ndarray. For valid-but-empty particle data in a block, an empty
numpy.ndarray is present. All valid keys are contained within the list
s.fields.
All (block_name, block_data) pairs may be iterated through using
>>> for (name, data) in s.iterfields():
>>> # Do something
Particle Type Aliases
---------------------
If provied, particle type indices may be aliased to attributes. For example,
if gas particles have particle type 0, and 'pos' is a valid field, then
>>> s.pos[0] is s.gas.pos
True
However, note that s.gas is a SnapshotView, which is a read-only object.
In order to modify the dataset one must, in general, operate on s.pos[0] or
similar.
In the case that no index-to-name mapping is provided, s.gas or similar will
raise an AttributeError. The dictionary of index-to-name mappings may be
accessed as s.ptype_aliases. It will be None if no mapping is present, it
is not required to map all valid particle indices, and it cannot be
assigned to.
Acessing metadata
-----------------
The file name and header are both properties of the snapshot, accessed
as
>>> s.fname
'some_file_name'
>>> s.header
For the latter, see the SnapshotHeader class.
The indices of all valid particle types for this snapshot are stored in the
list s.ptype_indices.
"""
def __init__(self, fname, header_schema=None, blocks_schema=None,
ptype_aliases=None, **kwargs):
"""
Initializes a Gadget-like snapshot.
header_schema defines the schema for loading the file header.
blocks_schema defines the schema for loading the various field data
ptype_aliases is an optional string-to-index mapping for the particle
types contained in the snapshot
"""
if header_schema is None:
raise TypeError("header_schema is required")
if blocks_schema is None:
raise TypeError("blocks_schema is required")
super(SnapshotBase, self).__init__(**kwargs)
self._fname = fname
self._aliases = ptype_aliases
self.header = SnapshotHeader(fname, header_schema)
self._fields = []
# Use copy so that reference schema is not altered.
self._schema = copy(blocks_schema)
self._ptypes = 0
self.verify_schema()
self.init_fields()
def __getattr__(self, name):
if self._aliases and name in self._aliases:
idx = self._aliases[name]
return self._ptype_view(idx)
else:
msg = "'%s' object has no attribute %s" % (type(self).__name__, name)
raise AttributeError(msg)
@property
def fields(self):
return self._fields
@property
def fname(self):
return self._fname
@fname.setter
def fname(self, fname):
self.header.fname = fname
self._fname = fname
@property
def ptype_aliases(self):
return self._aliases
@property
def ptype_indices(self):
"""
A list of the Gadget-like particle type indices in this snapshot.
Contains all valid particle types, some of which may not have any
associated data in the snapshot.
"""
return range(self._ptypes)
@ptype_indices.setter
def ptype_indices(self, value):
"""
Set the valid Gadget-like particle type indices for this snapshot.
Must be an iterable containing all required particle types. Gaps are
allowed; both [0, 1, 2, 3] and [0, 3] result in identical behaviour.
"""
self._ptypes = max(value)
def init_fields(self):
"""Reset all data attributes to zero-like values."""
for (name, fmt) in self._schema.items():
dtype, ndims, ptypes, _ = fmt
pdata = self._null_block(dtype, ndims, ptypes)
setattr(self, name, pdata)
def iterfields(self):
for name in self.fields:
yield (name, getattr(self, name))
def load(self):
"""Load in snapshot data from the current file."""
with FortranFile(self.fname, 'rb') as ffile:
self.header._load(ffile)
self._load(ffile)
def save(self, fname=None):
"""
Write header and snapshot to the current file, overwriting the file.
A different file name to write to may optionally be provided. This
does not modify the header's or the snapshot's fname attribute, so
later calling load() will re-load data from the original file.
The method will raise a SnapshotIOException if the any field is not
valid. See verify().
"""
if fname is None:
fname = self.fname
if self.header.verify() != []:
raise SnapshotIOException("Current header state invalid")
if self.verify() != []:
raise SnapshotIOException("A field does not match the schema")
self.update_header()
with FortranFile(fname, 'wb') as ffile:
self.header._save(ffile)
self._save(ffile)
def update_header(self):
"""
Update the header based on the current snapshot state.
This method has no effect, but is called when saving a snapshot to file.
It should be overridden by subclasses.
"""
pass
def verify(self):
"""
Return a list of fields which do not conform to the schema.
An empty list indicates that all fields are valid.
"""
malformed = []
for name in self.fields:
# If a is an empty numpy array, nothing will be written, so we
# do not need to filter out empty arrays.
dtype, ndims, _, _ = self._schema[name]
arrays = [a for a in getattr(self, name) if a is not None]
for a in arrays:
if a.dtype != dtype or (a.ndim > 1 and a.shape[-1] != ndims):
print name, a.dtype, dtype
malformed.append(name)
# Don't want duplicates; one problem is sufficient.
break
return malformed
def verify_schema(self):
"""Verify the current schema."""
self._verify_schema()
def _block_exists(self, name, ptypes):
"""
Return True if specified particle types exist for specified block.
Must be overriden by subclasses.
"""
raise NotImplementedError("Subclassees must override _block_exists")
def _get_flag(self, flag):
if isinstance(flag, str):
return getattr(self.header, flag)
else:
return flag
def _load(self, ffile):
"""
Load data for each block in the schema from the open FortranFile ffile.
Only blocks with flags resolving to True are loaded from the file.
"""
for (name, fmt) in self._schema.items():
dtype, ndims, ptypes, flag = fmt
if self._block_exists(name, ptypes) and self._get_flag(flag):
block_data = self._load_block(ffile, name, dtype)
pdata = self._parse_block(block_data, name, dtype, ndims, ptypes)
else:
pdata = self._null_block(dtype, ndims, ptypes)
setattr(self, name, pdata)
def _load_block(self, ffile, name, dtype):
"""
Return the next block from the open FortranFile ffile as an ndarray.
This is called before parsing each block's raw data, and may need to
be overriden by subclasses.
"""
return ffile.read_record(dtype)
def _null_array(self, dtype):
"""Return an empty numpy array of element type dtype."""
return np.empty(0, dtype=dtype)
def _null_block(self, dtype, ndims, ptypes):
"""
Return a block of zero-like data, or None where ptype not appropriate.
"""
pdata = []
for p in self.ptype_indices:
if p not in ptypes:
parray = None
else:
parray = self._null_array(dtype)
if ndims > 1:
parray.shape = (0, ndims)
pdata.append(parray)
return pdata
def _parse_block(self, block_data, name, dtype, ndims, ptypes):
"""
Return a list of data for each particle type in the block.
Interpret the raw data within block_data according to the schema,
and apply the specified particle type and dimensionality operations.
Must be overriden by subclasses.
"""
raise NotImplementedError("Subclasses must override _parse_block")
def _ptype_view(self, index):
ptype_data = ((name, field[index]) for name, field in self.iterfields())
view = SnapshotView(self, ptype_data)
return view
def _save(self, ffile):
for name in self.fields:
# If a is an empty numpy array, nothing will be written, so we
# do not need to filter out empty arrays.
arrays = [a for a in getattr(self, name) if a is not None]
ffile.write_ndarrays(arrays)
def _verify_schema(self):
"""
Verifies the block formatter, and updates it if necessary.
When a block's data type is not supplied, it is assumed to be 4-byte
floats.
When a block's N-dimesion value is also not supplied, it is assumed to
be 1.
When a block's particle type is also not supplied, it is assumed to
apply to all particle types.
All valid particle types must appear in at least one of the block
schemas, though a particle type of 0 is always assumed.
When called with no arguments, the internal block formatter is used.
Completes the block schema if possible, else raises a
SnapshotIOException.
"""
max_ptype = -1
for (name, fmt) in self._schema.items():
# So that these are defined even for an invalid formatter.
dtype, ndims, ptypes, flag = ('f4', 1, [None, ], True)
if len(fmt) == 4:
dtype, ndims, ptypes, flag = fmt
elif len(fmt) == 3:
dtype, ndims, ptypes = fmt
elif len(fmt) == 2:
dtype, ndims = fmt
elif len(fmt) == 1:
dtype, = fmt
else:
message = "Formatter for block '%s' is invalid" % name
raise SnapshotIOException(message)
try:
dtype = np.dtype(dtype)
except TypeError:
# Given dtype does not correspond to a numpy dtype.
message = "Data type for block '%s' is invalid." % name
raise SnapshotIOException(message)
try:
ndims = int(ndims)
except TypeError:
message = "N-dimensions size for block '%s' is invalid." % name
raise SnapshotIOException(message)
max_ptype = max(max_ptype, max(ptypes))
self._schema[name] = (dtype, ndims, ptypes, flag)
if max_ptype == -1:
message = 'At least one block schema must have specified ptypes'
raise SnapshotIOException(message)
# For any block which had no ptypes set, assume it is valid for all
# ptypes.
self._ptypes = max_ptype + 1
for (name, fmt) in self._schema.items():
_, _, ptype, _ = fmt
if ptypes == [None]:
self._schema[name] = self.ptype_indices
self._fields = self._schema.keys()