-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsupport_code.py
238 lines (186 loc) · 7.71 KB
/
support_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import numpy as np
from scipy.optimize import minimize
from scipy.linalg import cho_factor, cho_solve
def params_unwrap(param_vec, shapes, sizes):
"""Helper routine for minimize_list"""
args = []
pos = 0
for i in range(len(shapes)):
sz = sizes[i]
args.append(param_vec[pos:pos+sz].reshape(shapes[i]))
pos += sz
return args
def params_wrap(param_list):
"""Helper routine for minimize_list"""
param_list = [np.array(x) for x in param_list]
shapes = [x.shape for x in param_list]
sizes = [x.size for x in param_list]
param_vec = np.zeros(sum(sizes))
pos = 0
for param in param_list:
sz = param.size
param_vec[pos:pos+sz] = param.ravel()
pos += sz
unwrap = lambda pvec: params_unwrap(pvec, shapes, sizes)
return param_vec, unwrap
def minimize_list(cost, init_list, args):
"""Optimize a list of arrays (wrapper of scipy.optimize.minimize)
The input function "cost" should take a list of parameters,
followed by any extra arguments:
cost(init_list, *args)
should return the cost of the initial condition, and a list in the same
format as init_list giving gradients of the cost wrt the parameters.
The options to the optimizer have been hard-coded. You may wish
to change disp to True to get more diagnostics. You may want to
decrease maxiter while debugging. Although please report all results
in Q2-5 using maxiter=500.
"""
opt = {'maxiter': 500, 'disp': False}
init, unwrap = params_wrap(init_list)
def wrap_cost(vec, *args):
E, params_bar = cost(unwrap(vec), *args)
vec_bar, _ = params_wrap(params_bar)
return E, vec_bar
res = minimize(wrap_cost, init, args, 'L-BFGS-B', jac=True, options=opt)
return unwrap(res.x)
def linreg_cost(params, X, yy, alpha):
"""Regularized least squares cost function and gradients
Can be optimized with minimize_list -- see fit_linreg_gradopt for a
demonstration.
Inputs:
params: tuple (ww, bb): weights ww (D,), bias bb scalar
X: N,D design matrix of input features
yy: N, real-valued targets
alpha: regularization constant
Outputs: (E, [ww_bar, bb_bar]), cost and gradients
"""
# Unpack parameters from list
ww, bb = params
# forward computation of error
ff = np.dot(X, ww) + bb
res = ff - yy
E = np.dot(res, res) + alpha*np.dot(ww, ww)
# reverse computation of gradients
ff_bar = 2*res
bb_bar = np.sum(ff_bar)
ww_bar = np.dot(X.T, ff_bar) + 2*alpha*ww
return E, [ww_bar, bb_bar]
def fit_linreg_gradopt(X, yy, alpha):
"""
fit a regularized linear regression model with gradient opt
ww, bb = fit_linreg_gradopt(X, yy, alpha)
Find weights and bias by using a gradient-based optimizer
(minimize_list) to improve the regularized least squares cost:
np.sum(((np.dot(X,ww) + bb) - yy)**2) + alpha*np.dot(ww,ww)
Inputs:
X N,D design matrix of input features
yy N, real-valued targets
alpha scalar regularization constant
Outputs:
ww D, fitted weights
bb scalar fitted bias
"""
D = X.shape[1]
args = (X, yy, alpha)
init = (np.zeros(D), np.array(0))
ww, bb = minimize_list(linreg_cost, init, args)
return ww, bb
def logreg_cost(params, X, yy, alpha):
"""Regularized logistic regression cost function and gradients
Can be optimized with minimize_list -- see fit_linreg_gradopt for a
demonstration of fitting a similar function.
Inputs:
params: tuple (ww, bb): weights ww (D,), bias bb scalar
X: N,D design matrix of input features
yy: N, real-valued targets
alpha: regularization constant
Outputs: (E, [ww_bar, bb_bar]), cost and gradients
"""
# Unpack parameters from list
ww, bb = params
# Force targets to be +/- 1
yy = 2*(yy==1) - 1
# forward computation of error
aa = yy*(np.dot(X, ww) + bb)
sigma = 1/(1 + np.exp(-aa))
E = -np.sum(np.log(sigma)) + alpha*np.dot(ww, ww)
# reverse computation of gradients
aa_bar = sigma - 1
bb_bar = np.dot(aa_bar, yy)
ww_bar = np.dot(X.T, yy*aa_bar) + 2*alpha*ww
return E, (ww_bar, bb_bar)
def nn_cost(params, X, yy=None, alpha=None):
"""NN_COST simple neural network cost function and gradients, or predictions
E, params_bar = nn_cost([ww, bb, V, bk], X, yy, alpha)
pred = nn_cost([ww, bb, V, bk], X)
Cost function E can be minimized with minimize_list
Inputs:
params (ww, bb, V, bk), where:
--------------------------------
ww K, hidden-output weights
bb scalar output bias
V K,D hidden-input weights
bk K, hidden biases
--------------------------------
X N,D input design matrix
yy N, regression targets
alpha scalar regularization for weights
Outputs:
E sum of squares error
params_bar gradients wrt params, same format as params
OR
pred N, predictions if only params and X are given as inputs
"""
# Unpack parameters from list
ww, bb, V, bk = params
# Forwards computation of cost
A = np.dot(X, V.T) + bk[None,:] # N,K
P = 1 / (1 + np.exp(-A)) # N,K
F = np.dot(P, ww) + bb # N,
if yy is None:
# user wants prediction rather than training signal:
return F
res = F - yy # N,
E = np.dot(res, res) + alpha*(np.sum(V*V) + np.dot(ww,ww)) # 1x1
# Reverse computation of gradients
F_bar = 2*res # N,
ww_bar = np.dot(P.T, F_bar) + 2*alpha*ww # K,
bb_bar = np.sum(F_bar) # scalar
P_bar = np.dot(F_bar[:,None], ww[None,:]) # N,K
A_bar = P_bar * P * (1 - P) # N,K
V_bar = np.dot(A_bar.T, X) + 2*alpha*V # K,D
bk_bar = np.sum(A_bar, 0)
return E, (ww_bar, bb_bar, V_bar, bk_bar)
def rbf_fn(X1, X2):
"""Helper routine for gp_post_par"""
return np.exp((np.dot(X1,(2*X2.T))-np.sum(X1*X1,1)[:,None]) - np.sum(X2*X2,1)[None,:])
def gauss_kernel_fn(X1, X2, ell, sigma_f):
"""Helper routine for gp_post_par"""
return sigma_f**2 * rbf_fn(X1/(np.sqrt(2)*ell), X2/(np.sqrt(2)*ell))
def gp_post_par(X_rest, X_obs, yy, sigma_y=0.05, ell=5.0, sigma_f=0.1):
"""GP_POST_PAR means and covariances of a posterior Gaussian process
rest_cond_mu, rest_cond_cov = gp_post_par(X_rest, X_obs, yy)
rest_cond_mu, rest_cond_cov = gp_post_par(X_rest, X_obs, yy, sigma_y, ell, sigma_f)
Calculate the means and covariances at all test locations of the posterior Gaussian
process conditioned on the observations yy at observed locations X_obs.
Inputs:
X_rest GP test locations
X_obs locations of observations
yy observed values
sigma_y observation noise standard deviation
ell kernel function length scale
sigma_f kernel function standard deviation
Outputs:
rest_cond_mu mean at each location in X_rest
rest_cond_cov covariance matrix between function values at all test locations
"""
X_rest = X_rest[:, None]
X_obs = X_obs[:, None]
K_rest = gauss_kernel_fn(X_rest, X_rest, ell, sigma_f)
K_rest_obs = gauss_kernel_fn(X_rest, X_obs, ell, sigma_f)
K_obs = gauss_kernel_fn(X_obs, X_obs, ell, sigma_f)
M = K_obs + sigma_y**2 * np.eye(yy.size)
M_cho, M_low = cho_factor(M)
rest_cond_mu = np.dot(K_rest_obs, cho_solve((M_cho, M_low), yy))
rest_cond_cov = K_rest - np.dot(K_rest_obs, cho_solve((M_cho, M_low), K_rest_obs.T))
return rest_cond_mu, rest_cond_cov