Optimize CDF Calculation and Convert NumPy Arrays to Tensors in Bench…

…mark (#399) Summary: ### PR Description This PR addresses the first step in making AEPsych's functions consistently return PyTorch tensors and expect tensors as input, improving compatibility with GPUs and reducing redundant conversions between NumPy arrays and PyTorch tensors(partially solving #365). #### Key changes include: 1. **Conversion of `np.arrays` to tensors** in the following files: - **`aepsych/models/base.py`**: - Refactored the `p_below_threshold` method to operate fully with PyTorch tensors. - Replaced `norm.cdf()` with `torch.distributions.Normal(0, 1).cdf()` for better GPU compatibility. - **`aepsych/benchmark/problem.py`**: - Significant changes made to ensure consistent use of tensors across the pipeline. - The result of `f_threshold()` now directly returns a PyTorch tensor, ensuring consistency. - Additionally, used `detach().cpu().numpy()` in places where the `super().evaluate()` method returns float values, ensuring compatibility. 2. **Updates in `aepsych/tests/test_benchmark.py`**: - Migrated all operations from NumPy to PyTorch. - This includes calculations for Brier score and misclassification error, now utilizing `torch.mean()`, `torch.square()`, `torch.isclose()`, and `torch.all()` to fully align with tensor operations. #### Stability: All test cases have passed successfully in the workflow. Pull Request resolved: #399 Reviewed By: crasanders Differential Revision: D64245698 Pulled By: JasonKChow fbshipit-source-id: 3ed3d7b627f488ec61da5b9013a46cafc8b83556
facebookresearch · Oct 15, 2024 · 45d8e2d · 45d8e2d
1 parent b278dd1
commit 45d8e2d
Show file tree

Hide file tree

Showing 3 changed files with 101 additions and 85 deletions.
diff --git a/aepsych/benchmark/problem.py b/aepsych/benchmark/problem.py
@@ -10,7 +10,7 @@
 import aepsych
 import numpy as np
 import torch
-from scipy.stats import bernoulli, norm, pearsonr
+from scipy.stats import bernoulli
 from aepsych.strategy import SequentialStrategy, Strategy
 from aepsych.utils import make_scaled_sobol
 
@@ -51,22 +51,25 @@ def metadata(self) -> Dict[str, Any]:
         Benchmark's output dataframe, with its associated value stored in each row."""
         return {"name": self.name}
 
-    def p(self, x: np.ndarray) -> np.ndarray:
-        """Evaluate response probability from test function.
+    def p(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Evaluate response probability from test function.
 
         Args:
-            x (np.ndarray): Points at which to evaluate.
+            x (torch.Tensor): Points at which to evaluate.
 
         Returns:
-            np.ndarray: Response probability at queries points.
+            torch.Tensor: Response probability at queried points.
         """
-        return norm.cdf(self.f(x))
+        normal_dist = torch.distributions.Normal(0, 1)  # Standard normal distribution
+        return normal_dist.cdf(self.f(x))  # Use PyTorch's CDF equivalent
+
 
-    def sample_y(self, x: np.ndarray) -> np.ndarray:
+    def sample_y(self, x: torch.Tensor) -> np.ndarray: # TODO: This can be done with  torch.bernoulli(self.p(x)), but Strategy.add_data() expects a numpy array for now
         """Sample a response from test function.
 
         Args:
-            x (np.ndarray): Points at which to sample.
+            x (torch.Tensor): Points at which to sample.
 
         Returns:
             np.ndarray: A single (bernoulli) sample at points.
@@ -86,13 +89,13 @@ def f_hat(self, model: aepsych.models.base.ModelProtocol) -> torch.Tensor:
         return f_hat
 
     @cached_property
-    def f_true(self) -> np.ndarray:
+    def f_true(self) -> torch.Tensor:
         """Evaluate true test function over evaluation grid.
 
         Returns:
             torch.Tensor: Values of true test function over evaluation grid.
         """
-        return self.f(self.eval_grid).detach().numpy()
+        return self.f(self.eval_grid)
 
     @cached_property
     def p_true(self) -> torch.Tensor:
@@ -101,7 +104,8 @@ def p_true(self) -> torch.Tensor:
         Returns:
             torch.Tensor: Values of true response probability over evaluation grid.
         """
-        return norm.cdf(self.f_true)
+        normal_dist = torch.distributions.Normal(0, 1) 
+        return normal_dist.cdf(self.f_true)  
 
     def p_hat(self, model: aepsych.models.base.ModelProtocol) -> torch.Tensor:
         """Generate mean predictions from the model over the evaluation grid.
@@ -142,61 +146,61 @@ def evaluate(
         assert model is not None, "Cannot evaluate strategy without a model!"
 
         # always eval f
-        f_hat = self.f_hat(model).detach().numpy()
-        p_hat = self.p_hat(model).detach().numpy()
+        f_hat = self.f_hat(model)
+        p_hat = self.p_hat(model)
         assert (
             self.f_true.shape == f_hat.shape
         ), f"self.f_true.shape=={self.f_true.shape} != f_hat.shape=={f_hat.shape}"
 
-        mae_f = np.mean(np.abs(self.f_true - f_hat))
-        mse_f = np.mean((self.f_true - f_hat) ** 2)
-        max_abs_err_f = np.max(np.abs(self.f_true - f_hat))
-        corr_f = pearsonr(self.f_true.flatten(), f_hat.flatten())[0]
-        mae_p = np.mean(np.abs(self.p_true - p_hat))
-        mse_p = np.mean((self.p_true - p_hat) ** 2)
-        max_abs_err_p = np.max(np.abs(self.p_true - p_hat))
-        corr_p = pearsonr(self.p_true.flatten(), p_hat.flatten())[0]
-        brier = np.mean(2 * np.square(self.p_true - p_hat))
+        mae_f = torch.mean(torch.abs(self.f_true - f_hat))
+        mse_f = torch.mean((self.f_true - f_hat) ** 2)
+        max_abs_err_f = torch.max(torch.abs(self.f_true - f_hat))
+        corr_f = torch.corrcoef(torch.stack((self.f_true.flatten(), f_hat.flatten())))[0, 1]
+        mae_p = torch.mean(torch.abs(self.p_true - p_hat))
+        mse_p = torch.mean((self.p_true - p_hat) ** 2)
+        max_abs_err_p = torch.max(torch.abs(self.p_true - p_hat))
+        corr_p = torch.corrcoef(torch.stack((self.p_true.flatten(), p_hat.flatten())))[0, 1]
+        brier = torch.mean(2 * torch.square(self.p_true - p_hat))
 
         # eval in samp-based expectation over posterior instead of just mean
-        fsamps = model.sample(self.eval_grid, num_samples=1000).detach().numpy()
+        fsamps = model.sample(self.eval_grid, num_samples=1000)
         try:
             psamps = (
                 model.sample(self.eval_grid, num_samples=1000, probability_space=True)  # type: ignore
-                .detach()
-                .numpy()
+
             )
         except (
             TypeError
         ):  # vanilla models don't have proba_space samps, TODO maybe we should add them
-            psamps = norm.cdf(fsamps)
+            normal_dist = torch.distributions.Normal(0, 1)  # Standard normal distribution
+            psamps = normal_dist.cdf(fsamps)
 
         ferrs = fsamps - self.f_true[None, :]
-        miae_f = np.mean(np.abs(ferrs))
-        mise_f = np.mean(ferrs**2)
+        miae_f = torch.mean(torch.abs(ferrs))
+        mise_f = torch.mean(ferrs**2)
 
         perrs = psamps - self.p_true[None, :]
-        miae_p = np.mean(np.abs(perrs))
-        mise_p = np.mean(perrs**2)
+        miae_p = torch.mean(torch.abs(perrs))
+        mise_p = torch.mean(perrs**2)
 
-        expected_brier = (2 * np.square(self.p_true[None, :] - psamps)).mean()
+        expected_brier = torch.mean((2 * torch.square(self.p_true[None, :] - psamps)))
 
         metrics = {
-            "mean_abs_err_f": mae_f,
-            "mean_integrated_abs_err_f": miae_f,
-            "mean_square_err_f": mse_f,
-            "mean_integrated_square_err_f": mise_f,
-            "max_abs_err_f": max_abs_err_f,
-            "pearson_corr_f": corr_f,
-            "mean_abs_err_p": mae_p,
-            "mean_integrated_abs_err_p": miae_p,
-            "mean_square_err_p": mse_p,
-            "mean_integrated_square_err_p": mise_p,
-            "max_abs_err_p": max_abs_err_p,
-            "pearson_corr_p": corr_p,
-            "brier": brier,
-            "expected_brier": expected_brier,
-        }
+        "mean_abs_err_f": mae_f.item(),
+        "mean_integrated_abs_err_f": miae_f.item(),
+        "mean_square_err_f": mse_f.item(),
+        "mean_integrated_square_err_f": mise_f.item(),
+        "max_abs_err_f": max_abs_err_f.item(),
+        "pearson_corr_f": corr_f.item(),
+        "mean_abs_err_p": mae_p.item(),
+        "mean_integrated_abs_err_p": miae_p.item(),
+        "mean_square_err_p": mse_p.item(),
+        "mean_integrated_square_err_p": mise_p.item(),
+        "max_abs_err_p": max_abs_err_p.item(),
+        "pearson_corr_p": corr_p.item(),
+        "brier": brier.item(),
+        "expected_brier": expected_brier.item(),
+    }
 
         return metrics
 
@@ -211,7 +215,7 @@ class LSEProblem(Problem):
     def __init__(self, thresholds: Union[float, List]):
         super().__init__()
         thresholds = [thresholds] if isinstance(thresholds, float) else thresholds
-        self.thresholds = np.array(thresholds)
+        self.thresholds = torch.tensor(thresholds)
 
     @property
     def metadata(self) -> Dict[str, Any]:
@@ -225,27 +229,33 @@ def metadata(self) -> Dict[str, Any]:
         )
         return md
 
-    def f_threshold(self, model=None):
-
+    def f_threshold(self, model=None) -> torch.Tensor:
         try:
             inverse_torch = model.likelihood.objective.inverse
 
             def inverse_link(x):
-                return inverse_torch(torch.tensor(x)).numpy()
+                return inverse_torch(x)
 
         except AttributeError:
-            inverse_link = norm.ppf
-        return inverse_link(self.thresholds).astype(np.float32)
+            def inverse_link(x):
+                normal_dist = torch.distributions.Normal(0, 1)  
+                return normal_dist.icdf(x)
+
+
+        return inverse_link(self.thresholds).float()  # Return as float32 tensor
+
+
+
 
     @cached_property
-    def true_below_threshold(self) -> np.ndarray:
+    def true_below_threshold(self) -> torch.Tensor:
         """
         Evaluate whether the true function is below threshold over the eval grid
         (used for proper scoring and threshold missclassification metric).
         """
         return (
             self.p(self.eval_grid).reshape(1, -1) <= self.thresholds.reshape(-1, 1)
-        ).astype(float)
+        ).to(torch.float32)
 
     def evaluate(self, strat: Union[Strategy, SequentialStrategy]) -> Dict[str, float]:
         """Evaluate the model with respect to this problem.
@@ -284,16 +294,16 @@ def evaluate(self, strat: Union[Strategy, SequentialStrategy]) -> Dict[str, floa
             and p_l.shape[0] == len(self.thresholds)
         )
 
-        # Predict p(below threshold) at test points
-        brier_p_below_thresh = np.mean(2 * np.square(true_p_l - p_l), axis=1)
+        # Now, perform the Brier score calculation and classification error in PyTorch
+        brier_p_below_thresh = torch.mean(2 * torch.square(true_p_l - p_l), dim=1)
         # Classification error
-        misclass_on_thresh = np.mean(
-            p_l * (1 - true_p_l) + (1 - p_l) * true_p_l, axis=1
+        misclass_on_thresh = torch.mean(
+        p_l * (1 - true_p_l) + (1 - p_l) * true_p_l, dim=1
         )
 
         for i_threshold, threshold in enumerate(self.thresholds):
-            metrics[f"brier_p_below_{threshold}"] = brier_p_below_thresh[i_threshold]
-            metrics[f"misclass_on_thresh_{threshold}"] = misclass_on_thresh[i_threshold]
+            metrics[f"brier_p_below_{threshold}"] = brier_p_below_thresh.detach().cpu().numpy()[i_threshold]
+            metrics[f"misclass_on_thresh_{threshold}"] = misclass_on_thresh.detach().cpu().numpy()[i_threshold]
         return metrics
 
 
@@ -323,15 +333,15 @@ def evaluate(self, strat):
         ub2 = ub - self.eps * r
 
         near_edge = (
-            np.logical_or(
+            torch.logical_or(
                 (strat.x[-n_opt_trials:, :] <= lb2), (strat.x[-n_opt_trials:, :] >= ub2)
             )
-            .any(axis=-1)
+            .any(dim=-1)
             .double()
         )
 
         metrics["prop_edge_sampling_mean"] = near_edge.mean().item()
         metrics["prop_edge_sampling_err"] = (
-            2 * near_edge.std() / np.sqrt(len(near_edge))
+            2 * near_edge.std() / torch.sqrt(len(near_edge))
         ).item()
         return metrics
diff --git a/aepsych/models/base.py b/aepsych/models/base.py
@@ -105,7 +105,7 @@ def update(
     ) -> None:
         pass
 
-    def p_below_threshold(self, x, f_thresh) -> np.ndarray:
+    def p_below_threshold(self, x, f_thresh) -> torch.Tensor:
         pass
 
 
@@ -378,9 +378,12 @@ def _fit_mll(
         )
         return res
 
-    def p_below_threshold(self, x, f_thresh) -> np.ndarray:
+    def p_below_threshold(self, x, f_thresh) -> torch.Tensor:  # Return a tensor instead of NumPy array
         f, var = self.predict(x)
         f_thresh = f_thresh.reshape(-1, 1)
         f = f.reshape(1, -1)
         var = var.reshape(1, -1)
-        return norm.cdf((f_thresh - f.detach().numpy()) / var.sqrt().detach().numpy())
+
+        # Perform all operations in PyTorch (no .detach().numpy())
+        z = (f_thresh - f) / var.sqrt()
+        return torch.distributions.Normal(0, 1).cdf(z)  # Use PyTorch's CDF equivalent
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
@@ -70,34 +70,35 @@ def setUp(self):
         np.random.seed(1)
 
         self.n_thresholds = 5
-        self.thresholds = np.linspace(0.55, 0.95, self.n_thresholds)
+        self.thresholds = torch.linspace(0.55, 0.95, self.n_thresholds)
         self.test_problem = example_problems.DiscrimLowDim(thresholds=self.thresholds)
         self.model = GPClassificationModel(
             lb=self.test_problem.lb, ub=self.test_problem.ub
         )
 
-    def unvectorized_p_below_threshold(self, x, f_thresh) -> np.ndarray:
+    def unvectorized_p_below_threshold(self, x, f_thresh) -> torch.Tensor:
         """this is the original p_below_threshold method in the AEPsychMixin that calculates model prediction
         of the probability of the stimulus being below a threshold
         for one single threshold"""
         f, var = self.model.predict(x)
-        return norm.cdf((f_thresh - f.detach().numpy()) / var.sqrt().detach().numpy())
+
+        # Perform all operations in PyTorch (no .detach().numpy())
+        z = (f_thresh - f) / var.sqrt()
+        return torch.distributions.Normal(0, 1).cdf(z)  # Use PyTorch's CDF equivalent
 
     def unvectorized_true_below_threshold(self, threshold):
         """the original true_below_threshold method in the LSEProblem class"""
-        return (self.test_problem.p(self.test_problem.eval_grid) <= threshold).astype(
-            float
-        )
+        return (self.test_problem.p(self.test_problem.eval_grid) <= threshold).to(torch.float32)
 
     def test_vectorized_score_calculation(self):
         f_thresholds = self.test_problem.f_threshold(self.model)
         p_l = self.model.p_below_threshold(self.test_problem.eval_grid, f_thresholds)
         true_p_l = self.test_problem.true_below_threshold
-        # Predict p(below threshold) at test points
-        brier_p_below_thresh = np.mean(2 * np.square(true_p_l - p_l), axis=1)
+        # Now, perform the Brier score calculation and classification error in PyTorch
+        brier_p_below_thresh = torch.mean(2 * torch.square(true_p_l - p_l), dim=1)
         # Classification error
-        misclass_on_thresh = np.mean(
-            p_l * (1 - true_p_l) + (1 - p_l) * true_p_l, axis=1
+        misclass_on_thresh = torch.mean(
+        p_l * (1 - true_p_l) + (1 - p_l) * true_p_l, dim=1
         )
         assert (
             p_l.ndim == 2
@@ -106,31 +107,33 @@ def test_vectorized_score_calculation(self):
         )
 
         for i_threshold, single_threshold in enumerate(self.thresholds):
-            single_f_threshold = norm.ppf(single_threshold)
-            assert np.isclose(single_f_threshold, f_thresholds[i_threshold])
+            normal_dist = torch.distributions.Normal(0, 1)  
+            single_f_threshold = normal_dist.icdf(single_threshold).float()   # equivalent to norm.ppf
+
+            assert torch.isclose(single_f_threshold, f_thresholds[i_threshold])
 
             unvectorized_p_l = self.unvectorized_p_below_threshold(
                 self.test_problem.eval_grid, single_f_threshold
             )
-            assert np.all(np.isclose(unvectorized_p_l, p_l[i_threshold]))
+            assert torch.all(torch.isclose(unvectorized_p_l, p_l[i_threshold]))
 
             unvectorized_true_p_l = self.unvectorized_true_below_threshold(
                 single_threshold
             )
-            assert np.all(np.isclose(unvectorized_true_p_l, true_p_l[i_threshold]))
+            assert torch.all(torch.isclose(unvectorized_true_p_l, true_p_l[i_threshold]))
 
-            unvectorized_brier_score = np.mean(
-                2 * np.square(unvectorized_true_p_l - unvectorized_p_l)
+            unvectorized_brier_score = torch.mean(
+                2 * torch.square(unvectorized_true_p_l - unvectorized_p_l)
             )
-            assert np.isclose(
+            assert torch.isclose(
                 unvectorized_brier_score, brier_p_below_thresh[i_threshold]
             )
 
-            unvectorized_misclass_err = np.mean(
+            unvectorized_misclass_err = torch.mean(
                 unvectorized_p_l * (1 - unvectorized_true_p_l)
                 + (1 - unvectorized_p_l) * unvectorized_true_p_l
             )
-            assert np.isclose(
+            assert torch.isclose(
                 unvectorized_misclass_err, misclass_on_thresh[i_threshold]
             )