Skip to content

Commit a04f008

Browse files
author
Kieran Elmes
committedApr 2, 2019
add wip glmnet-style update
1 parent 3e3ddce commit a04f008

File tree

1 file changed

+55
-4
lines changed

1 file changed

+55
-4
lines changed
 

‎test_lasso.c

+55-4
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
//#define N 30
1616
//#define P 21110
1717
#define P 100
18-
#define HALT_BETA_DIFF 50
18+
#define HALT_BETA_DIFF 0
1919

2020
static int VERBOSE;
2121

@@ -122,6 +122,48 @@ double *read_y_csv(char *fn, int n) {
122122
return Y;
123123
}
124124

125+
// n.b.: for glmnet gamma should be lambda * [alpha=1] = lambda
126+
double soft_threshold(double z, double gamma) {
127+
if (fabs(z) < gamma)
128+
return 0.0;
129+
double val = fabs(z) - gamma;
130+
if (signbit(z))
131+
return -val;
132+
else
133+
return val;
134+
}
135+
136+
//TODO: applies when the x variables are standardized to have unit variance, is this the case?
137+
//TODO: glmnet also standardizes Y before computing its lambda sequence.
138+
double update_beta_glmnet(int **X, double *Y, int n, int p, double lambda, double *beta, int k, double dBMax, double intercept) {
139+
double derivative = 0.0;
140+
double sumk = 0.0;
141+
double sumn = 0.0;
142+
double sump;
143+
double new_beta;
144+
145+
for (int i = 0; i < n; i++) {
146+
sump = 0.0;
147+
for (int j = 0; j < p; j++) {
148+
if (j != k)
149+
sump += X[i][j]?beta[j]:0.0;
150+
}
151+
//sumn += (Y[i] - sump)*(double)X[i][k];
152+
sumn += X[i][k]?(Y[i] - intercept - sump):0.0;
153+
sumk += X[i][k] * X[i][k];
154+
}
155+
156+
new_beta = soft_threshold(sumn/n, lambda);
157+
// soft thresholding of n, lambda*[alpha=1]
158+
159+
if (fabs(beta[k] - new_beta) > dBMax)
160+
dBMax = fabs(beta[k] - new_beta);
161+
beta[k] = new_beta;
162+
if (VERBOSE)
163+
printf("beta_%d is now %f\n", k, beta[k]);
164+
return dBMax;
165+
}
166+
125167
double update_beta_cyclic(int **X, double *Y, int n, int p, double lambda, double *beta, int k, double dBMax, double intercept) {
126168
double derivative = 0.0;
127169
double sumk = 0.0;
@@ -235,10 +277,11 @@ double *simple_coordinate_descent_lasso(int **X, double *Y, int n, int p, double
235277
double *beta = malloc(p*sizeof(double)); // probably too big in most cases.
236278
memset(beta, 0, p*sizeof(double));
237279

238-
int max_iter = 100;
280+
int max_iter = 5;
239281

240282
double error = 0, prev_error;
241283
double intercept = 0.0;
284+
double iter_lambda;
242285
int use_cyclic = 0, use_greedy = 0;
243286

244287
if (strcmp(method,"cyclic") == 0) {
@@ -260,7 +303,9 @@ double *simple_coordinate_descent_lasso(int **X, double *Y, int n, int p, double
260303
double dBMax = 0.0; // largest beta diff this cycle
261304

262305
// update intercept
263-
intercept = update_intercept_cyclic(intercept, X, Y, beta, n, p);
306+
//intercept = update_intercept_cyclic(intercept, X, Y, beta, n, p);
307+
//iter_lambda = lambda*(max_iter-iter)/max_iter;
308+
//printf("using lambda = %f\n", iter_lambda);
264309

265310
for (int k = 0; k < p; k++) {
266311
// update the predictor \Beta_k
@@ -296,7 +341,7 @@ double *simple_coordinate_descent_lasso(int **X, double *Y, int n, int p, double
296341
int **X2_from_X(int **X, int n, int p) {
297342
int **X2 = malloc(n*sizeof(int*));
298343
for (int row = 0; row < n; row++) {
299-
X2[row] = malloc(((p*p)/2 + p/2)*sizeof(int));
344+
X2[row] = malloc(((p*p)/2 + p/2 + 1)*sizeof(int));
300345
int offset = 0;
301346
for (int i = 0; i < p; i++) {
302347
for (int j = i; j < p; j++) {
@@ -377,6 +422,12 @@ int main(int argc, char** argv) {
377422
}
378423
printf("\n");
379424

425+
printf("indices significantly negative (-500):\n");
426+
for (int i = 0; i < nbeta; i++) {
427+
if (beta[i] < -500)
428+
printf("%d: %f\n", i, beta[i]);
429+
}
430+
380431
printf("freeing X/Y\n");
381432
free(xmatrix.X);
382433
free(Y);

0 commit comments

Comments
 (0)