This repository has been archived by the owner on Nov 11, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
DataAggregator.py
79 lines (66 loc) · 2.53 KB
/
DataAggregator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python
"""Short docstring
Long Docstring
"""
import csv
import numpy as np
__author__ = "Michael Pitcher"
__copyright__ = "Copyright 2017"
__credits__ = ["Michael Pitcher"]
__license__ = ""
__version__ = ""
__email__ = "[email protected]"
__status__ = "Development"
def aggregate_data(location, ids):
node_data = {}
compartments = None
timesteps = []
for run_id in ids:
if location:
filename = location + '/' + str(run_id) + '.csv'
else:
filename = str(run_id) + '.csv'
csv_file = open(filename, 'r')
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
if not compartments:
compartments = sorted(list(csv_reader.fieldnames))
compartments.remove('node_id')
compartments.remove('timestep')
if row['node_id'] not in node_data:
node_data[row['node_id']] = {}
if row['timestep'] not in node_data[row['node_id']]:
node_data[row['node_id']][row['timestep']] = {}
if row['timestep'] not in timesteps:
timesteps.append(row['timestep'])
for c in compartments:
if c not in node_data[row['node_id']][row['timestep']]:
node_data[row['node_id']][row['timestep']][c] = []
node_data[row['node_id']][row['timestep']][c].append(float(row[c]))
aggregated_node_data = {}
for n in node_data:
aggregated_node_data[n] = {}
for t in node_data[n]:
aggregated_node_data[n][t] = {}
for c in node_data[n][t]:
aggregated_node_data[n][t][c] = np.mean(node_data[n][t][c])
if location:
output_csv = open(location + '/aggregated.csv', 'wb')
else:
output_csv = open('aggregated.csv', 'wb')
fieldnames = ['timestep', 'node_id'] + compartments
writer = csv.DictWriter(output_csv, fieldnames=fieldnames)
writer.writeheader()
for t in timesteps:
for n in aggregated_node_data:
row = {'timestep': t, 'node_id': n}
for c in aggregated_node_data[n][t]:
row[c] = aggregated_node_data[n][t][c]
writer.writerow(row)
# for n in aggregated_node_data:
# timesteps = sorted(aggregated_node_data[n].keys())
# for t in timesteps:
# row = {'timestep': t, 'node_id': n}
# for c in aggregated_node_data[n][t]:
# row[c] = aggregated_node_data[n][t][c]
# writer.writerow(row)