-
Notifications
You must be signed in to change notification settings - Fork 13
Simple benchmarking notes
Lalaland edited this page Oct 25, 2022
·
6 revisions
Time and memory to perform the following operation on the 1% extract:
lengths = collections.defaultdict(int)
for patient in patients:
num_nones = sum(event.value is not None for event in patient.events)
lengths[num_nones] += 1
Approach | Time | Memory | Disk |
---|---|---|---|
C++ in memory | 0.46 seconds | 0.2 GB | 4.6 GB |
Python in memory | 4.58 seconds | 26 GB | 5.5 GB |
Python in C++ database | 136 seconds | 3.9 GB | 4.6 GB |
Python in pickle database | 69 seconds | 5.8 GB | 5.5 GB |
#include "absl/container/flat_hash_map.h"
#include "database.hh"
boost::filesystem::path extract =
"/local-scratch/nigam/projects/ethanid/piton/target/";
int main() {
PatientDatabase database(extract, true);
absl::flat_hash_map<uint32_t, uint32_t> length_counts;
auto iter = database.iterator();
for (uint32_t patient_id = 0; patient_id < database.size(); patient_id++) {
const Patient& p = iter.get_patient(patient_id);
int count = 0;
for (const auto& event : p.events) {
count += event.value_type != ValueType::NONE;
}
length_counts[count] += 1;
}
}
import piton.datasets
import collections
source = "/local-scratch/nigam/projects/ethanid/piton/target"
data = piton.datasets.PatientDatabase(source, True)
patients = {}
try:
for patient in data:
patients[patient.patient_id] = patient
except:
pass
import time
start = time.time()
lengths = collections.defaultdict(int)
for patient in patients.values():
num_nones = sum(event.value is not None for event in patient.events)
lengths[num_nones] += 1
end = time.time()
print(end - start)
import piton.datasets
import collections
source = "/local-scratch/nigam/projects/ethanid/piton/target"
data = piton.datasets.PatientDatabase(source, True)
lengths = collections.defaultdict(int)
for patient in data:
num_nones = sum(event.value is not None for event in patient.events)
lengths[num_nones] += 1
import constdb
import collections
import pickle
lengths = collections.defaultdict(int)
with constdb.MmapReader("patient_database") as reader:
for k in reader.keys():
patient = pickle.loads(reader.get(k))
num_nones = sum(event.value is not None for event in patient.events)
lengths[num_nones] += 1