fix real-value upper bound for interactions

Kieran Elmes · Kieran Elmes · commit f8acb69e1e69 · 2022-08-08T16:04:16.000+12:00
diff --git a/coverage-badge.svg b/coverage-badge.svg
@@ -1 +1 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="90" height="20" role="img" aria-label="coverage: 0%"><title>coverage: 0%</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="90" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="61" height="20" fill="#555"/><rect x="61" width="29" height="20" fill="#e05d44"/><rect width="90" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="315" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">coverage</text><text x="315" y="140" transform="scale(.1)" fill="#fff" textLength="510">coverage</text><text aria-hidden="true" x="745" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="190">0%</text><text x="745" y="140" transform="scale(.1)" fill="#fff" textLength="190">0%</text></g></svg>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="96" height="20" role="img" aria-label="coverage: 77%"><title>coverage: 77%</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="96" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="61" height="20" fill="#555"/><rect x="61" width="35" height="20" fill="#fe7d37"/><rect width="96" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="315" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">coverage</text><text x="315" y="140" transform="scale(.1)" fill="#fff" textLength="510">coverage</text><text aria-hidden="true" x="775" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="250">77%</text><text x="775" y="140" transform="scale(.1)" fill="#fff" textLength="250">77%</text></g></svg>
diff --git a/src/Pint.cpp b/src/Pint.cpp
@@ -328,6 +328,7 @@ SEXP lasso_(SEXP X_, SEXP Y_, SEXP lambda_min_, SEXP lambda_max_,
     for (int_fast64_t i = 0; i < p; i++)
         X[i] = (int_fast64_t*)malloc(n * sizeof *X[i]);
 
+    float overall_max_val = 0.0;
     for (int_fast64_t i = 0; i < p; i++) {
         float col_max_val = 0.0;
         for (int_fast64_t j = 0; j < n; j++) {
@@ -342,11 +343,15 @@ SEXP lasso_(SEXP X_, SEXP Y_, SEXP lambda_min_, SEXP lambda_max_,
             }
         }
         col_max_vals[i] = col_max_val;
+        if (fabs(col_max_val) > overall_max_val) {
+            overall_max_val = fabs(col_max_val);
+        }
     }
     struct continuous_info ci;
     ci.col_max_vals = col_max_vals;
     ci.col_real_vals = col_real_vals;
     ci.use_cont = continuous_X;
+    ci.overall_max_val = overall_max_val;
    
     float* Y = (float*)malloc(n * sizeof(float));
     for (int_fast64_t i = 0; i < n; i++) {
diff --git a/src/liblasso.h b/src/liblasso.h
@@ -103,6 +103,8 @@ struct continuous_info {
     bool use_cont;
     std::vector<float>* col_real_vals;
     float* col_max_vals;
+    float overall_max_val;
+    int depth;
 };
 void free_continuous_info(struct continuous_info ci);
 
@@ -245,4 +247,4 @@ extern int_pair* cached_nums;
 extern bool VERBOSE;
 extern float total_sqrt_error;
 
-#endif
+#endif
diff --git a/src/pruning.cpp b/src/pruning.cpp
@@ -70,8 +70,12 @@ float l2_combined_estimate(X_uncompressed X, float lambda, int_fast64_t k,
         alpha = 0.0;
 
     float remainder = pessimistic_estimate(alpha, last_rowsum, rowsum, col, X.host_col_nz[k]);
-    if (ci->use_cont)
-        remainder *= fabs(ci->col_max_vals[k]);
+    if (ci->use_cont) {
+        if (ci->depth == 2)
+            remainder *= fabs(ci->overall_max_val) * fabs(ci->col_max_vals[k]);
+        else
+            remainder *= fabs(ci->overall_max_val) * fabs(ci->overall_max_val) * fabs(ci->col_max_vals[k]);
+    }
 
     float total_estimate = fabs(last_max * alpha) + remainder;
     return total_estimate;
diff --git a/tests/func-tests.cpp b/tests/func-tests.cpp
@@ -1392,6 +1392,8 @@ void check_small_continuous() {
     ci.col_real_vals = new vector<float>[p];
     ci.col_max_vals = new float[p];
     ci.use_cont = true;
+    ci.overall_max_val = 0.0;
+    ci.depth = 2;
 
     XMatrix xm;
     xm.X = calloc(p, sizeof(int_fast64_t*));
@@ -1408,14 +1410,19 @@ void check_small_continuous() {
     ci.col_real_vals[3] = {0.2, 0.2, 0.4, -3.3, 2.1};
     ci.col_real_vals[4] = {-0.2, -1.2, 3.2, 3.5, 0.1};
 
+    float overall_max = 0.0;
     for (int j = 0; j < p; j++) {
         float max_val = 0.0;
         for (auto v : ci.col_real_vals[j]) {
             if (fabs(v) > fabs(max_val))
                 max_val = v;
         }
         ci.col_max_vals[j] = max_val;
+        if (fabs(max_val) > overall_max) {
+            overall_max = fabs(max_val);
+        }
     }
+    ci.overall_max_val = overall_max;
 
     std::vector<float> beta = {0.3, 1.1, 0.9, -2.2, 1.5};
     float* Y = calloc(n, sizeof(float));
@@ -1479,6 +1486,8 @@ void check_continous_ones(UpdateFixture* fixture,
     ci.col_max_vals = col_max_vals;
     ci.col_real_vals = col_real_vals;
     ci.use_cont = true;
+    ci.overall_max_val = 1.0;
+    ci.depth = 3;
     bool check_duplicates = true;
     Lasso_Result lr = simple_coordinate_descent_lasso(fixture->xmatrix, fixture->Y, fixture->n, fixture->p,
         -1, 0.01, 200,