15
15
//#define N 30
16
16
//#define P 21110
17
17
#define P 100
18
- #define HALT_BETA_DIFF 50
18
+ #define HALT_BETA_DIFF 0
19
19
20
20
static int VERBOSE ;
21
21
@@ -122,6 +122,48 @@ double *read_y_csv(char *fn, int n) {
122
122
return Y ;
123
123
}
124
124
125
+ // n.b.: for glmnet gamma should be lambda * [alpha=1] = lambda
126
+ double soft_threshold (double z , double gamma ) {
127
+ if (fabs (z ) < gamma )
128
+ return 0.0 ;
129
+ double val = fabs (z ) - gamma ;
130
+ if (signbit (z ))
131
+ return - val ;
132
+ else
133
+ return val ;
134
+ }
135
+
136
+ //TODO: applies when the x variables are standardized to have unit variance, is this the case?
137
+ //TODO: glmnet also standardizes Y before computing its lambda sequence.
138
+ double update_beta_glmnet (int * * X , double * Y , int n , int p , double lambda , double * beta , int k , double dBMax , double intercept ) {
139
+ double derivative = 0.0 ;
140
+ double sumk = 0.0 ;
141
+ double sumn = 0.0 ;
142
+ double sump ;
143
+ double new_beta ;
144
+
145
+ for (int i = 0 ; i < n ; i ++ ) {
146
+ sump = 0.0 ;
147
+ for (int j = 0 ; j < p ; j ++ ) {
148
+ if (j != k )
149
+ sump += X [i ][j ]?beta [j ]:0.0 ;
150
+ }
151
+ //sumn += (Y[i] - sump)*(double)X[i][k];
152
+ sumn += X [i ][k ]?(Y [i ] - intercept - sump ):0.0 ;
153
+ sumk += X [i ][k ] * X [i ][k ];
154
+ }
155
+
156
+ new_beta = soft_threshold (sumn /n , lambda );
157
+ // soft thresholding of n, lambda*[alpha=1]
158
+
159
+ if (fabs (beta [k ] - new_beta ) > dBMax )
160
+ dBMax = fabs (beta [k ] - new_beta );
161
+ beta [k ] = new_beta ;
162
+ if (VERBOSE )
163
+ printf ("beta_%d is now %f\n" , k , beta [k ]);
164
+ return dBMax ;
165
+ }
166
+
125
167
double update_beta_cyclic (int * * X , double * Y , int n , int p , double lambda , double * beta , int k , double dBMax , double intercept ) {
126
168
double derivative = 0.0 ;
127
169
double sumk = 0.0 ;
@@ -235,10 +277,11 @@ double *simple_coordinate_descent_lasso(int **X, double *Y, int n, int p, double
235
277
double * beta = malloc (p * sizeof (double )); // probably too big in most cases.
236
278
memset (beta , 0 , p * sizeof (double ));
237
279
238
- int max_iter = 100 ;
280
+ int max_iter = 5 ;
239
281
240
282
double error = 0 , prev_error ;
241
283
double intercept = 0.0 ;
284
+ double iter_lambda ;
242
285
int use_cyclic = 0 , use_greedy = 0 ;
243
286
244
287
if (strcmp (method ,"cyclic" ) == 0 ) {
@@ -260,7 +303,9 @@ double *simple_coordinate_descent_lasso(int **X, double *Y, int n, int p, double
260
303
double dBMax = 0.0 ; // largest beta diff this cycle
261
304
262
305
// update intercept
263
- intercept = update_intercept_cyclic (intercept , X , Y , beta , n , p );
306
+ //intercept = update_intercept_cyclic(intercept, X, Y, beta, n, p);
307
+ //iter_lambda = lambda*(max_iter-iter)/max_iter;
308
+ //printf("using lambda = %f\n", iter_lambda);
264
309
265
310
for (int k = 0 ; k < p ; k ++ ) {
266
311
// update the predictor \Beta_k
@@ -296,7 +341,7 @@ double *simple_coordinate_descent_lasso(int **X, double *Y, int n, int p, double
296
341
int * * X2_from_X (int * * X , int n , int p ) {
297
342
int * * X2 = malloc (n * sizeof (int * ));
298
343
for (int row = 0 ; row < n ; row ++ ) {
299
- X2 [row ] = malloc (((p * p )/2 + p /2 )* sizeof (int ));
344
+ X2 [row ] = malloc (((p * p )/2 + p /2 + 1 )* sizeof (int ));
300
345
int offset = 0 ;
301
346
for (int i = 0 ; i < p ; i ++ ) {
302
347
for (int j = i ; j < p ; j ++ ) {
@@ -377,6 +422,12 @@ int main(int argc, char** argv) {
377
422
}
378
423
printf ("\n" );
379
424
425
+ printf ("indices significantly negative (-500):\n" );
426
+ for (int i = 0 ; i < nbeta ; i ++ ) {
427
+ if (beta [i ] < -500 )
428
+ printf ("%d: %f\n" , i , beta [i ]);
429
+ }
430
+
380
431
printf ("freeing X/Y\n" );
381
432
free (xmatrix .X );
382
433
free (Y );
0 commit comments