-
Notifications
You must be signed in to change notification settings - Fork 0
/
inspect_h5py.py
65 lines (46 loc) · 1.66 KB
/
inspect_h5py.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Software Name : mislabeled-benchmark
# SPDX-FileCopyrightText: Copyright (c) Orange Innovation
# SPDX-License-Identifier: MIT
#
# This software is distributed under the MIT license,
# see the "LICENSE.md" file for more details
# or https://github.com/Orange-OpenSource/mislabeled-benchmark/blob/master/LICENSE.md
# %%
import json
import os
import h5py
import numpy as np
from datasets import datasets_ranked_by_time
# %%
base_path = os.path.join(os.path.expanduser("~"), f"output/detect/noise")
# %%
detectors = []
datasets = []
for detector in os.listdir(base_path):
detectors.append(detector)
for filename in os.listdir(os.path.join(base_path, detector)):
dataset, ext = filename.split(".")
if ext != "json":
continue
datasets.append(dataset)
h5py_path = os.path.join(base_path, detector, f"{dataset}.hdf5")
json_path = os.path.join(base_path, detector, f"{dataset}.json")
if not os.path.isfile(h5py_path):
print("missing h5py", dataset, detector)
with open(json_path) as f:
d = json.load(f)
results = []
f = h5py.File(h5py_path, "w")
ts_store = f.create_group("trust_scores")
for i, res in enumerate(d):
ts = np.array(res.pop("trust_scores"))
dset = ts_store.create_dataset(str(i), shape=ts.shape, dtype=ts.dtype)
dset[...] = ts
results.append(res)
with open(json_path, mode="w") as output_file:
json.dump(results, output_file)
detectors = np.unique(detectors)
datasets = np.unique(datasets)
print(detectors)
print(datasets)
# %%