-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmetrics.py
123 lines (98 loc) · 3.84 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import itertools
import time
import numpy as np
import sklearn
from .util import chunks
import matplotlib.pyplot as plt
def calculate_auroc(a_k, test_data, predictions, plot=False):
assert len(a_k.shape) == 2
threshold_count = 31
thresholds = np.linspace(start=0, stop=1, num=threshold_count)
digits = int(test_data.shape[1] / 2)
assert test_data.shape[0] == len(predictions)
Tones_obs = np.zeros(thresholds.size - 1)
Tzeroes_obs = np.zeros(thresholds.size - 1)
total_ones = 0
total_zeroes = 0
for n in range(test_data.shape[0]):
cluster_bottom = a_k[predictions[n], digits:]
obs_bottom = test_data[n, digits:]
total_ones += obs_bottom[np.where(obs_bottom == 1)].size
total_zeroes += obs_bottom[np.where(obs_bottom == 0)].size
for idx in range(len(thresholds) - 1):
predictions_mask = np.where((cluster_bottom < thresholds[idx + 1]) & (cluster_bottom >= thresholds[idx]))
selected_obs = obs_bottom[predictions_mask]
Tones_obs[idx] += selected_obs[np.where(selected_obs == 1)].size
Tzeroes_obs[idx] += selected_obs[np.where(selected_obs == 0)].size
pdf_ones = Tones_obs / total_ones
pdf_zeroes = Tzeroes_obs / total_zeroes
cdf_ones = np.cumsum(pdf_ones)
cdf_zeroes = np.cumsum(pdf_zeroes)
centers = (thresholds[1:] + thresholds[:-1]) / 2
if plot:
ax = plt.gca()
plt.ylim((0.0, 1.0))
plt.xlim((0.0, 1.0))
plt.xlabel("Cumulative Proportion of Ones")
plt.ylabel("Cumulative Proportion of Zeroes")
plt.plot(ax.get_xlim(), ax.get_ylim(), ls="--", c=".3")
plt.plot(cdf_ones, cdf_zeroes, marker="x", markersize=6, linewidth=3, color="blue")
plt.fill_between(cdf_ones, 0, cdf_zeroes, alpha=0.2, color='blue')
plt.title(f"Receiver Operating Characteristic")
plt.grid(True, which="both")
plt.show()
return sklearn.metrics.auc(cdf_ones, cdf_zeroes)
def naive_align_labels(reference_params, estimated_params):
K = reference_params.shape[0]
D = reference_params.shape[1]
permutations = itertools.permutations(range(K))
best_permutation = None
smallest_MSE = 10000
start_time = time.time()
for permutation in permutations:
MSE = np.sum(np.power(reference_params - estimated_params[permutation, :], 2)) / (K * D)
if MSE < smallest_MSE:
best_permutation = permutation
smallest_MSE = MSE
return best_permutation, smallest_MSE
def fast_align_labels(reference_params, estimated_params):
K = reference_params.shape[0]
D = reference_params.shape[1]
alignments = []
for idx in range(K):
mask = np.isin(np.arange(K), np.array(alignments), invert=True)
selected = estimated_params[mask][
np.argmin(np.sum(np.power(reference_params[idx, :] - estimated_params[mask], 2), axis=1))]
for idx, row in enumerate(estimated_params):
if np.all(row == selected):
alignments.append(idx)
break
MSE = np.sum(np.power(reference_params - estimated_params[alignments, :], 2)) / (K * D)
return alignments, MSE
def find_clusters(a_k, data_obs, alignments=None, partial=False):
assert len(a_k.shape) == 2
assert a_k.shape[0] < a_k.shape[1]
digits = int(data_obs.shape[1])
if partial:
digits = int(data_obs.shape[1] / 2)
labels = []
if alignments is None:
alignments = np.arange(a_k.shape[0])
realigned_ak = a_k[alignments, :]
if len(data_obs.shape) > 1:
for n in range(data_obs.shape[0]):
labels.append(np.argmin(np.power(data_obs[n, :digits] - realigned_ak[:, :digits], 2).sum(axis=1)))
else:
labels.append(np.argmin(np.power(data_obs[:digits] - realigned_ak[:, :digits], 2).sum(axis=1)))
return labels
def average_of_estimates(input, size=10):
assert (float(len(input)) / float(size)).is_integer()
groups = chunks(input, size)
means = []
for idx, chunk in enumerate(groups):
ref_params = chunk[0]
aligned_params = [chunk[0]]
for estimate in chunk[1:]:
aligned_params.append(estimate[fast_align_labels(ref_params, estimate)[0], :])
means.append(np.mean(np.stack(aligned_params, axis=0), axis=0))
return means