MNT raise error from cython

scikit-learn-contrib · Aug 2, 2024 · 5e63efb · 5e63efb
1 parent 7b409d2
commit 5e63efb
Show file tree

Hide file tree

Showing 5 changed files with 68 additions and 56 deletions.
diff --git a/fastcan/_cancorr_fast.pyx b/fastcan/_cancorr_fast.pyx
@@ -26,22 +26,25 @@ cdef int _iamax(
 
 cdef void _normv(
     floating[::1] x,            # IN/OUT
-) noexcept nogil:
+) except * nogil:
     """
-    Vector normialization by Euclidean norm.
+    Vector normalization by Euclidean norm.
     x (IN) : (1, n_samples) Vector.
     x (OUT) : (1, n_samples) Normalized vector.
     """
     cdef:
         unsigned int n_samples = x.shape[0]
         floating x_norm
 
-    x_norm = 1.0/_nrm2(n_samples, &x[0], 1)
+    x_norm = _nrm2(n_samples, &x[0], 1)
+    if x_norm == 0.0:
+        raise ZeroDivisionError("Cannot normalize a vector of all zeros.")
+    x_norm = 1.0/x_norm
     _scal(n_samples, x_norm, &x[0], 1)
 
 cdef void _normm(
     floating[::1, :] X,     # IN/OUT
-) noexcept nogil:
+) except * nogil:
     """
     Matrix column-wise normalization by Euclidean norm.
     X (IN) : (n_samples, nx) Matrix.
@@ -55,7 +58,12 @@ cdef void _normm(
 
     # X = X/norm(X)
     for j in range(nx):
-        x_norm = 1.0/_nrm2(n_samples, &X[0, j], 1)
+        x_norm = _nrm2(n_samples, &X[0, j], 1)
+        if x_norm == 0.0:
+            raise ZeroDivisionError(
+                "Cannot normalize a matrix containing a vector of all zeros."
+            )
+        x_norm = 1.0/x_norm
         _scal(n_samples, x_norm, &X[0, j], 1)
 
 
@@ -135,7 +143,7 @@ cdef void _mgsvv(
 
 cdef void _orth(
     floating[::1, :] X,         # IN/OUT
-) noexcept nogil:
+) except * nogil:
     """Orthogonalization of a matrix by the modified Gram-Schmidt.
     X (IN) : (n_samples, n_features) Matrix.
     X (OUT) : (n_samples, n_features) Orthonormal matrix.
@@ -172,7 +180,7 @@ cpdef void _forward_search(
     uint8_t[::1] mask,                # IN/TEMP
     int32_t[::1] indices,             # OUT
     floating[::1] scores,             # OUT
-) noexcept nogil:
+) except * nogil:
     """
     Greedy search with SSC.
     X (IN) : (n_samples, n_features) Centered feature matrix.
@@ -236,12 +244,6 @@ cpdef void _forward_search(
 
             # Find max scores and update indices, X, mask, and scores
             index = _iamax(n_features, r2, 1)
-            if r2[index] <= 0:
-                raise RuntimeError(
-                    "No improvement can be found. "
-                    "The best candidate feature contributes the SSC score "
-                    f"{r2[index]} given the selected {i} features."
-                )
             indices[i] = index
             scores[i] = r2[index]
 

diff --git a/fastcan/_fastcan.py b/fastcan/_fastcan.py
@@ -132,8 +132,13 @@ def fit(self, X, y):
         """
         self._validate_params()
         # X y
-        check_X_params = {"order": "F", "dtype": float}
-        check_y_params = {"ensure_2d": False, "order": "F", "dtype": float}
+        check_X_params = {"ensure_min_samples": 2, "order": "F", "dtype": float}
+        check_y_params = {
+            "ensure_min_samples": 2,
+            "ensure_2d": False,
+            "order": "F",
+            "dtype": float,
+        }
         X, y = self._validate_data(
             X=X,
             y=y,
@@ -205,10 +210,6 @@ def fit(self, X, y):
             indices=indices,
             scores=scores,
         )
-        if -1 in indices:
-            raise RuntimeError("The selection is interrupted by error!!!")
-        if self.verbose == 1:
-            print()
         support = np.zeros(shape=self.n_features_in_, dtype=bool)
         support[indices] = True
         self.indices_ = indices

diff --git a/pixi.lock b/pixi.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "fastcan"
-version = "0.1.34"
+version = "0.1.35"
 description = "A fast canonical-correlation-based feature selection method"
 authors = [
     { name = "Matthew Sikai Zhang", email = "[email protected]" },

diff --git a/tests/test_correlation.py b/tests/test_correlation.py
@@ -196,24 +196,33 @@ def test_cython_errors():
     # Test whether fastcan raise cython errors properly
     rng = np.random.default_rng(0)
     n_samples = 20
-    n_informative = 15
-    x_1 = rng.random((n_samples, n_informative))
-    x_2 = x_1[:, 0]
-    X = np.c_[x_1, x_2]
+    n_informative = 3
+    x_sub = rng.random((n_samples, n_informative))
     y = rng.random((n_samples))
 
 
-    selector_non_singular = FastCan(
+    selector_zero_vector = FastCan(
         n_features_to_select=n_informative+1,
     )
 
-    with pytest.raises(RuntimeError, match="The selection is interrupted by error!!!"):
-        # No candidate
-        selector_non_singular.fit(X, y)
-
-    with pytest.raises(RuntimeError, match="The selection is interrupted by error!!!"):
-        # No improvement
-        selector_no_improve = FastCan(
+    with pytest.raises(
+        ZeroDivisionError,
+        match="Cannot normalize a vector of all zeros."
+    ):
+        # Zeros vector during orthogonalization
+        selector_zero_vector.fit(np.c_[x_sub, x_sub[:, 0]], y)
+
+    with pytest.raises(
+        ZeroDivisionError,
+        match="Cannot normalize a matrix containing a vector of all zeros."
+    ):
+        # Constant vector
+        selector_const_vector = FastCan(
             n_features_to_select=2,
         )
-        selector_no_improve.fit(np.zeros((3, 2)), np.zeros(3))
+        selector_const_vector.fit(np.zeros((3, 2)), [1, 2, 3])
+
+    with pytest.raises(RuntimeError, match=r"No candidate feature can .*"):
+        # No candidate
+        selector_zero_vector.fit(np.c_[x_sub, x_sub[:, 0]+x_sub[:, 1]], y)
+