forked from GavinKerrigan/conf_matrix_and_calibration
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmetrics.py
72 lines (56 loc) · 2.36 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import torch
import numpy as np
from torch import nn
from imax_calib.evaluations import calibration_metrics as cal_metrics # Imax paper
import calibration as cal # Kumar et al, Verified Uncertainty Calibration
# Implements various metrics.
def get_acc(y_pred, y_true):
""" Computes the accuracy of predictions.
If y_pred is 2D, it is assumed that it is a matrix of scores (e.g. probabilities) of shape (n_samples, n_classes)
"""
if y_pred.ndim == 1:
return np.mean(y_pred == y_true)
elif y_pred.ndim == 2:
return np.mean(np.argmax(y_pred, axis=1), y_true)
def get_cw_ECE(probs, y_true, mode='mass', threshold_mode='class', num_bins=15):
""" Estimates the class-wise ECE by binning.
Args:
probs: shape (n_samples, n_classes)
y_true: shape (n_samples, )
mode: Either 'mass' or 'width' -- determines binning scheme
threshold_mode: Either 'class' or None -- determines if thresholding is used in estimation
num_bins: Number of bins used in estimation
"""
if mode == 'mass':
_mode = 'mECE'
elif mode == 'width':
_mode = 'dECE'
evals = cal_metrics.compute_top_1_and_CW_ECEs(probs, y_true, list_approximators=[_mode],
num_bins=num_bins, threshold_mode=threshold_mode)
return evals[f'cw_{_mode}']
def get_ECE(probs, y_true, mode='mass', num_bins=15):
""" Estimates the top-label ECE by binning.
Args:
probs: shape (n_samples, n_classes)
y_true: shape (n_samples, )
mode: Either 'mass' or 'width' -- determines binning scheme
num_bins: Number of bins used in estimation
"""
if mode == 'mass':
_mode = 'mECE'
elif mode == 'width':
_mode = 'dECE'
evals = cal_metrics.compute_top_1_and_CW_ECEs(probs, y_true, list_approximators=[_mode], num_bins=num_bins)
return evals[f'top_1_{_mode}']
def get_MCE(probs, y_true):
""" Estimates the class-wise ECE. Not recommended for use.
"""
return cal.get_calibration_error(probs, y_true,
p=1, debias=False, mode='marginal')
def get_NLL(probs, y_true):
""" Computes the negative log likelihood.
"""
nll = nn.NLLLoss()
_probs = np.clip(probs, 1e-100, 1)
logprobs = torch.from_numpy(np.log(_probs))
return nll(logprobs, torch.from_numpy(y_true)).item()