-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathrun.py
98 lines (71 loc) · 2.87 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python
import logging
import argparse
import numpy
import torch
import pyro
import pyro.infer
import pyro.infer.mcmc
import pyro.distributions as dist
def get_arguments():
parser = argparse.ArgumentParser(description='Bayesian Calibration arguments')
parser.add_argument('--logging-level', default='WARNING', choices=['WARNING', 'DEBUG'])
parser.add_argument('--input-array', type=str, required=True)
parser.add_argument('--num-samples', type=int, default=150)
parser.add_argument('--num-warmup-samples', type=int, default=100)
args = parser.parse_args()
return args
def sigmoid(x):
return 1./(1. + numpy.exp(-x))
def model_single_score(data, config):
"""
p(m_a) ~ U(0,3): prior for each model score
p(s_a) ~ N(s_a; m_a, 1^2): prior for each score data point given model sampled mean
"""
zm = []
for mi in range(config['n_models']):
mu_ = pyro.sample("model-mean-{}".format(mi), dist.Uniform(0., 3.))
zm.append(pyro.sample("model-{}".format(mi), dist.Normal(mu_, 1.)))
"""
p(s_t) ~ N(s_t; 0, 1^2): prior score for each annotator, no bias by default
"""
tm = []
for ti in range(config['n_turkers']):
tm.append(pyro.sample("turker-mean-{}".format(ti), dist.Normal(0., 1.)))
"""
p(s|a, t) = N(s, s_a + s_t, 1^2): likelihood mean for each score given by annotator
t for model a
"""
mu = []
for ii, sc in enumerate(data):
mu.append(zm[int(sc[0])] + tm[int(sc[1])]) # original
mu_ = torch.stack(mu)
return pyro.sample("scores", dist.Normal(mu_, 1.))
def infer(data, config):
observed_single_scores = torch.Tensor([tup[2] for tup in data])
single_score_condition = pyro.condition(model_single_score, data={'scores': observed_single_scores})
nuts_kernel = pyro.infer.mcmc.NUTS(single_score_condition, adapt_step_size=True, step_size=0.1)
mcmc_run = pyro.infer.mcmc.MCMC(nuts_kernel, num_samples=config['num-samples'], warmup_steps=config['warmup-steps']).run(data, config)
score_marginal = pyro.infer.EmpiricalMarginal(mcmc_run, sites=["model-{}".format(mi) for mi in range(config['n_models'])])
return score_marginal.mean, score_marginal.stddev
def prepare_data(args):
data = numpy.load(args.input_array)
# we assume models and annotator indexing from 0
n_turkers = max([a[1] for a in data])+1
n_models = max([a[0] for a in data])+1
config = {
'logging-level': args.logging_level,
'num-samples': args.num_samples,
'warmup-steps': args.num_warmup_samples,
'n_models': n_models,
'n_turkers': n_turkers,
}
return config, data
def main():
args = get_arguments()
config, data = prepare_data(args)
mean, std = infer(data, config)
print('Empirical mean: {}\n\n'.format(mean))
print('Empirical std: {}\n\n'.format(std))
if __name__ == '__main__':
main()