-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharm.py
110 lines (82 loc) · 2.91 KB
/
arm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""
This file contains Arm interface and its implemented classes.
Users (typically Bandits) interact with Arms through the pull() method, which:
- returns a reward value
- advances state of the arm's parameters (if valid)
"""
import numpy as np
class Arm:
""" A bandit arm
Should keep track of the internal state of the arm
and return the appropriate reward when pulled.
"""
def __init__(self, **kwargs):
raise NotImplementedError
@property
def name(self):
raise NotImplementedError
def pull(self):
""" Pull the arm
Pulls the bandit arm, returns a double representing the
award, and advances internal state (if any).
"""
raise NotImplementedError
class WhiteNoiseArm(Arm):
def __init__(self, name, rng):
self.__name = name
self._rng = rng
@property
def name(self):
return self.__name
def pull(self):
return self._rng()
class BernoulliArm(WhiteNoiseArm):
"""Generates iid observations from a Bernoulli white noise"""
def __init__(self, prob):
WhiteNoiseArm.__init__(self,
name='bernoulli_arm',
rng=lambda: np.random.binomial(n=1, p=prob))
self.prob = prob
class GaussianArm(WhiteNoiseArm):
"""Generates iid observations from a Gaussian white noise"""
def __init__(self, mu, sigma):
WhiteNoiseArm.__init__(self,
name='gaussian_arm',
rng=lambda: np.random.normal(loc=mu, scale=sigma))
self.mu = mu
self.sigma = sigma
class LinearInterpolationArm(Arm):
""" Linear interpolation arm
"""
def __init__(self, means, periods, iteration, noise_func=None, **kwargs):
self.__name = "lin_interp_arm"
self.num_periods = len(means)
self.means = means
self.iteration = iteration
self.periods = periods
if noise_func is None:
self.noise_func = lambda mean: np.random.normal(loc=mean)
else:
self.noise_func = noise_func
if np.size(periods) != self.num_periods:
raise ValueError("periods not correct size")
return
@property
def name(self):
return self.__name
def pull(self):
iter_to_end_period = self.iteration % np.sum(self.periods)
end_period = 0
while (iter_to_end_period >= 0):
iter_to_end_period -= self.periods[end_period]
end_period += 1
start_period = end_period - 1
end_period = end_period % self.num_periods
start_frac = np.abs(iter_to_end_period) / self.periods[start_period]
arm_mean = (
start_frac * self.means[start_period] +
(1.0 - start_frac) * self.means[end_period]
)
reward = self.noise_func(arm_mean)
self.iteration += 1
return reward