-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbin_metrics.py
162 lines (128 loc) · 5.55 KB
/
bin_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# -*- coding: utf-8 -*-
"""Bin Metrics.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1TSET7SJWo94UruMIGOm2TAJYcuSGjLJS
# Bin Metrics
"""
import numpy as np
import plotly.graph_objects as go
from scipy import integrate
"""
bin_plot: Plots the actual and predicted number of modes for different segments of the spectra
known as bins.
args:
predicted modes: list,
actual modes: list,
[default 20] number of bins: int
[default 1000000] number of modes: int
[default True] integral: plot cumulative number of modes detected
returns: None
"""
def bin_plot(predicted_modes, modes, num_bins=20, num_modes = 1000000, integral=True):
peaks_detected_count = []
peaks_detected = []
for s in range(num_bins):
window_peaks = [i for i in predicted_modes if (i >= s * num_modes/num_bins) and (i < (s+1) * num_modes/num_bins)]
peaks_detected_count.append(len(window_peaks))
window_peaks = [i for i in modes if (i >= s * num_modes/num_bins) and (i < (s+1) * num_modes/num_bins)]
peaks_detected.append(len(window_peaks))
x_bins=list(range(num_bins))
fig = go.Figure()
if not integral:
# actual mode distribution by bin
fig.add_trace(go.Scatter(x=x_bins, y=peaks_detected,
mode='lines+markers',
name='Actual values'))
# predicted mode distribution by bin
fig.add_trace(go.Scatter(x=x_bins, y=peaks_detected_count,
mode='lines+markers',
name='Predicted values'))
fig.update_layout(title='Number of Modes by Bins of Normalized Hertz',
xaxis_title='Bin Number',
yaxis_title='Number of Modes')
fig.show()
else:
# creating the actual and predicted integrals
x_bins=list(range(num_bins))
y_int = integrate.cumtrapz(peaks_detected_count, x_bins, initial=0)
y_int_actual = integrate.cumtrapz(peaks_detected, x_bins, initial=0)
fig.add_trace(go.Scatter(x=x_bins, y=y_int_actual,
mode='lines+markers',
name='Actual values'))
fig.add_trace(go.Scatter(x=x_bins, y=y_int,
mode='lines+markers',
name='Predicted values'))
fig.update_layout(title='Number of Modes by Bins of Normalized Hertz',
xaxis_title='Bin Number',
yaxis_title='Cumulative Number of Modes')
fig.show()
# examples
#bin_plot(predicted_modes, modes)
#bin_plot(predicted_modes, modes, num_bins = 15, integral = False)
"""
bin_counts: Plots the actual and predicted number of modes for different segments of the spectra
known as bins.
args:
predicted modes: list,
actual modes: list,
[default 20] number of bins: int
[default 1000000] number of modes: int
[default True] integral: plot cumulative number of modes detected
returns: predicted and actual mode counts for each bin: dict{str:list}
"""
def bin_counts(predicted_modes, modes, num_bins=20, num_modes = 1000000):
peaks_detected_count = []
peaks_detected = []
for s in range(num_bins):
window_peaks = [i for i in predicted_modes if (i >= s * num_modes/num_bins) and (i < (s+1) * num_modes/num_bins)]
peaks_detected_count.append(len(window_peaks))
window_peaks = [i for i in modes if (i >= s * num_modes/num_bins) and (i < (s+1) * num_modes/num_bins)]
peaks_detected.append(len(window_peaks))
predictions_dict = {'actual': peaks_detected, 'predicted': peaks_detected_count}
return predictions_dict
# examples
#bin_counts(predicted_modes, modes)
#bin_counts(predicted_modes, modes, num_bins = 15)
"""
bin_metrics: Calculates three different performance metrics based on the bins.
args:
predicted modes: list,
actual modes: list,
[default 20] number of bins: int
[default 1000000] number of modes: int
returns:
dict: average number of modes missed by each bin, total number of modes missed, and direction of error
"""
def bin_metrics(predicted_modes, modes, num_bins=20, num_modes=1000000):
peaks_detected_count = []
peaks_detected = []
for s in range(num_bins):
window_peaks = [i for i in predicted_modes if (i >= s * num_modes/num_bins) and (i < (s+1) * num_modes/num_bins)]
peaks_detected_count.append(len(window_peaks))
window_peaks = [i for i in modes if (i >= s * num_modes/num_bins) and (i < (s+1) * num_modes/num_bins)]
peaks_detected.append(len(window_peaks))
diff = []
total_missed = 0
pred_direction = 0
for i in range(num_bins):
diff.append(abs(peaks_detected_count[i] - peaks_detected[i]))
total_missed += abs(peaks_detected_count[i] - peaks_detected[i])
pred_direction += peaks_detected_count[i] - peaks_detected[i]
avg_missed = sum(diff)/num_bins
avg_direction = int(pred_direction)/num_bins
x_bins=list(range(num_bins))
poly = np.polyfit(x_bins, peaks_detected_count,1)
actual_poly = np.polyfit(x_bins, peaks_detected, 1)
# put diff metrics in dictionary
result_dict = {'average_missed': avg_missed, 'total_missed': total_missed, 'error_direction': avg_direction, 'predicted_slope':poly[0], 'actual_slope':actual_poly[0]}
print("Each bin on average misses %f modes" % avg_missed)
print("This model missed a total of %d modes" % total_missed)
if pred_direction > 0:
print("On average, each bin tends to overpredict the number of modes by %f" % avg_direction)
else:
print("On average, each bin tends to underpredict the number of modes by %f" % avg_direction)
return result_dict
# examples
#bin_metrics(predicted_modes, modes)
#bin_metrics(predicted_modes, modes, 30)