-
Notifications
You must be signed in to change notification settings - Fork 0
/
belief.py
77 lines (67 loc) · 2.35 KB
/
belief.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy as np
import geo
import human
def gaussian(mu, var):
return lambda x: np.exp(-((x - mu)**2)/(2*var**2))/(np.sqrt(2 * np.pi) * var)
def entropy(beliefs):
"""
the shannon entropy of a list of nums
"""
if not np.isclose(sum(beliefs), 1):
raise Exception("entropy: beliefs don't sum to 1!")
return -sum([b*np.log(b) for b in beliefs])
# A likelihood is:
#
# P(goal | u_r, u_h, state)
#
# Example:
# >>> beta = 1.0
# >>> alpha = 0.5
# >>> l = boltzmann(beta)
# ...
# >>> p_u_h_given = l(alpha, state, u_h, u_r, goal)
def boltzmann(beta):
"""
boltzmann is the boltzmann distribution observation
model, where the human acts approximately optimal
according to a cost function, here given by
distance to go to goal.
"""
def likelihood(alpha, state, u_h, past_u_r, goal):
return np.exp(beta*
(geo.norm((state + human.optimal(alpha, state, goal)) - goal)
- geo.norm((state + u_h) - goal))
)
return likelihood
def lazy(threshold):
"""
lazy is an observation model based on the expectation
that the human will not offer corrective controls if the
robot is acting correctly, and will offer overly corrective
controls if the robot is acting incorrectly.
"""
def lazylike(alpha, state, u_h, past_u_r, goal):
if human.looks_almost_optimal(alpha, state, goal, past_u_r, threshold):
if np.alltrue(np.allclose(u_h, np.zeros(u_h.shape))):
return 1.0 - 1e-10
else:
return 0.0 + 1e-10
else:
return gaussian(0, np.pi/4)(
geo.angle_between(human.pull_back(alpha, state, goal, past_u_r), u_h)
)
return lazylike
def update(alpha, state, u_h, past_u_r, goals, beliefs, likelihood):
"""
update performs a bayesian step update on
beliefs given the observation: u_h.
use to generate the belief update/what the belief
update would be if you were in one state, saw u_h
and had the passed in goals and beliefs.
"""
beliefs = np.copy(beliefs)
beliefs = [b*likelihood(alpha, state, u_h, past_u_r, g)
for (g, b) in zip(goals, beliefs)]
# normalize!
beliefs = beliefs/np.sum(beliefs)
return beliefs