-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsampler.py
128 lines (101 loc) · 3.51 KB
/
sampler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import abc as _abc
import numpy as _np
from sklearn.utils import check_random_state
class AbstractSampler(object, metaclass=_abc.ABCMeta):
def _check_Xs(self, *arrays):
for i in range(1, len(arrays)):
if arrays[i].shape[0] != arrays[i-1].shape[0]:
raise ValueError('arrays must have the same number of samples')
@_abc.abstractmethod
def sample(self, *arrays):
"""
Return an generator over samples
"""
pass
@_abc.abstractmethod
def get_n_iters(self, *arrays):
"""
The number of iterations produced by *sample*
"""
pass
@_abc.abstractmethod
def get_n_samples(self, *arrays):
"""
The number of samples produced at each iteration
"""
pass
class SubSampler(AbstractSampler):
def __init__(self, n_iters, subsample_size=1.0, random_state=None):
self.random_state = check_random_state(random_state)
self.subsample_size = subsample_size
self.n_iters = n_iters
def sample(self, *arrays):
ss_n = self._get_ssn(*arrays)
n = arrays[0].shape[0]
for i in range(self.n_iters):
idx = self.random_state.permutation(n)[:ss_n]
if len(arrays) == 1:
yield arrays[0][idx]
else:
yield tuple(dset[idx] for dset in arrays)
def _get_ssn(self, *arrays):
self._check_Xs(*arrays)
if isinstance(self.subsample_size, (int, _np.int32, _np.int16, _np.int8)):
return self.subsample_size
else:
self._check_Xs(*arrays)
n = arrays[0].shape[0]
return int(self.subsample_size * n)
def get_n_iters(self, *arrays):
return self.n_iters
def get_n_samples(self, *arrays):
return self._get_ssn(*arrays)
class BootstrapSampler(AbstractSampler):
def __init__(self, n_iters, random_state=None):
self.random_state = check_random_state(random_state)
self.n_iters = n_iters
def get_n_iters(self, *arrays):
return self.n_iters
def sample(self, *arrays):
self._check_Xs(*arrays)
n = arrays[0].shape[0]
for i in range(self.n_iters):
idx = self.random_state.randint(n, size=n)
if len(arrays) == 1:
yield arrays[0][idx]
else:
yield tuple(dset[idx] for dset in arrays)
def get_n_samples(self, *arrays):
self._check_Xs(*arrays)
return arrays[0].shape[0]
class JackknifeSampler(AbstractSampler):
def __init__(self, indices=None):
if isinstance(indices, (tuple, list)):
self.indices = _np.array(indices)
else:
self.indices = indices
def get_n_iters(self, *arrays):
self._check_Xs(*arrays)
if self.indices is None:
return arrays[0].shape[0]
else:
return self.indices.shape[0]
def sample(self, *arrays):
self._check_Xs(*arrays)
n = arrays[0].shape[0]
indices = self.indices
if indices is None:
indices = _np.arange(n)
mask = _np.ones(n, dtype=bool)
ar_idx = _np.arange(n)
for i in indices:
mask[i] = False
idx = ar_idx[mask]
mask[i] = True
if len(arrays) == 1:
yield arrays[0][idx]
else:
yield tuple(dset[idx] for dset in arrays)
def get_n_samples(self, *arrays):
self._check_Xs(*arrays)
return arrays[0].shape[0] - 1