Remove normalize_inputs and replace with parameter transform (faceboo…

…kresearch#431) Summary: Pull Request resolved: facebookresearch#431 `normalize_inputs` (the one that min-max scales paraemters) is confusingly named (there's another `normalize_inputs` that concatenates data and ensures they're all the right types) and is a hard-coded transformation that is applied to all parameters. This means that there's no way to turn the behavior off selectively nor is it obvious that it is happening. This diff removes the normalize_inputs method and replaces it with an parameter transform that will also allow selective application of the transform via an index. Differential Revision: D65069497
JasonKChow · Nov 1, 2024 · cd971a0 · cd971a0
1 parent 46d0957
commit cd971a0
Show file tree

Hide file tree

Showing 16 changed files with 304 additions and 97 deletions.
diff --git a/aepsych/config.py b/aepsych/config.py
@@ -8,6 +8,7 @@
 import ast
 import configparser
 import json
+import logging
 import re
 import warnings
 from types import ModuleType
@@ -168,24 +169,36 @@ def update(
 
         # Warn if ub/lb is defined in common section
         if "ub" in self["common"] and "lb" in self["common"]:
-            warnings.warn(
-                "ub and lb have been defined in common section, ignoring parameter specific blocks, be very careful!"
-            )
-        elif "parnames" in self["common"]:  # it's possible to pass no parnames
-            par_names = self.getlist(
-                "common", "parnames", element_type=str, fallback=[]
+            logging.warning(
+                "ub and lb have been defined in common section, parameter-specific bounds take precendence over these."
             )
-            lb = [None] * len(par_names)
-            ub = [None] * len(par_names)
-            for i, par_name in enumerate(par_names):
-                # Validate the parameter-specific block
-                self._check_param_settings(par_name)
 
-                lb[i] = self[par_name]["lower_bound"]
-                ub[i] = self[par_name]["upper_bound"]
-
-            self["common"]["lb"] = f"[{', '.join(lb)}]"
-            self["common"]["ub"] = f"[{', '.join(ub)}]"
+        if "parnames" in self["common"]:  # it's possible to pass no parnames
+            try:
+                par_names = self.getlist(
+                    "common", "parnames", element_type=str, fallback=[]
+                )
+                lb = [None] * len(par_names)
+                ub = [None] * len(par_names)
+                for i, par_name in enumerate(par_names):
+                    # Validate the parameter-specific block
+                    self._check_param_settings(par_name)
+
+                    lb[i] = self[par_name]["lower_bound"]
+                    ub[i] = self[par_name]["upper_bound"]
+
+                self["common"]["lb"] = f"[{', '.join(lb)}]"
+                self["common"]["ub"] = f"[{', '.join(ub)}]"
+            except ValueError:
+                # Check if ub/lb exists in common
+                if "ub" in self["common"] and "lb" in self["common"]:
+                    logging.warning(
+                        "Parameter-specific bounds are incomplete, falling back to ub/lb in [common]"
+                    )
+                else:
+                    raise ValueError(
+                        "Missing ub or lb in [common] with incomplete parameter-specific bounds, cannot fallback!"
+                    )
 
         # Deprecation warning for "experiment" section
         if "experiment" in self:

diff --git a/aepsych/models/base.py b/aepsych/models/base.py
@@ -328,10 +328,6 @@ def set_train_data(self, inputs: Optional[torch.Tensor] = None, targets: Optiona
         if targets is not None:
             self.train_targets = targets
 
-    def normalize_inputs(self, x: torch.Tensor) -> torch.Tensor:
-        scale = self.ub - self.lb
-        return (x - self.lb) / scale
-
     def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultivariateNormal:
         """Evaluate GP
 
@@ -342,9 +338,8 @@ def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultivariateNormal:
             gpytorch.distributions.MultivariateNormal: Distribution object
                 holding mean and covariance at x.
         """
-        transformed_x = self.normalize_inputs(x)
-        mean_x = self.mean_module(transformed_x)
-        covar_x = self.covar_module(transformed_x)
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
         pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
         return pred
 

diff --git a/aepsych/models/monotonic_rejection_gp.py b/aepsych/models/monotonic_rejection_gp.py
@@ -342,11 +342,7 @@ def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultivariateNormal:
             gpytorch.distributions.MultivariateNormal: Distribution object
                 holding mean and covariance at x.
         """
-
-        # final dim is deriv index, we only normalize the "real" dims
-        transformed_x = x.clone()
-        transformed_x[..., :-1] = self.normalize_inputs(transformed_x[..., :-1])
-        mean_x = self.mean_module(transformed_x)
-        covar_x = self.covar_module(transformed_x)
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
         latent_pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
         return latent_pred
diff --git a/aepsych/models/multitask_regression.py b/aepsych/models/multitask_regression.py
@@ -78,10 +78,11 @@ def __init__(
             self.covar_module, num_tasks=num_outputs, rank=rank
         )
 
-    def forward(self, x: torch.Tensor) -> gpytorch.distributions.MultitaskMultivariateNormal:
-        transformed_x = self.normalize_inputs(x)
-        mean_x = self.mean_module(transformed_x)
-        covar_x = self.covar_module(transformed_x)
+    def forward(
+        self, x: torch.Tensor
+    ) -> gpytorch.distributions.MultitaskMultivariateNormal:
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
         return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)
 
     @classmethod

diff --git a/aepsych/models/semi_p.py b/aepsych/models/semi_p.py
@@ -521,18 +521,17 @@ def forward(self, x: torch.Tensor) -> MultivariateNormal:
         Returns:
             MVN object evaluated at samples
         """
-        transformed_x = self.normalize_inputs(x)
         # TODO: make slope prop to intensity width.
-        slope_mean = self.slope_mean_module(transformed_x)
+        slope_mean = self.slope_mean_module(x)
 
         # kc mvn
-        offset_mean = self.offset_mean_module(transformed_x)
+        offset_mean = self.offset_mean_module(x)
 
-        slope_cov = self.slope_covar_module(transformed_x)
-        offset_cov = self.offset_covar_module(transformed_x)
+        slope_cov = self.slope_covar_module(x)
+        offset_cov = self.offset_covar_module(x)
 
         mean_x, cov_x = _hadamard_mvn_approx(
-            x_intensity=transformed_x[..., self.stim_dim],
+            x_intensity=x[..., self.stim_dim],
             slope_mean=slope_mean,
             slope_cov=slope_cov,
             offset_mean=offset_mean,

diff --git a/aepsych/models/utils.py b/aepsych/models/utils.py
@@ -172,6 +172,9 @@ def get_extremum(
         timeout_sec=max_time,
     )
 
+    if hasattr(model, "transforms"):
+        best_point = model.transforms.untransform(best_point)
+
     # PosteriorMean flips the sign on minimize, we flip it back
     if extremum_type == "min":
         best_val = -best_val

diff --git a/aepsych/strategy.py b/aepsych/strategy.py
@@ -138,8 +138,8 @@ def __init__(
 
         self.transforms = transforms
         if self.transforms is not None:
-            self.lb = self.transforms.transform(self.lb)
-            self.ub = self.transforms.transform(self.ub)
+            self.lb = self.transforms.transform(self.lb.unsqueeze(0))[0]
+            self.ub = self.transforms.transform(self.ub.unsqueeze(0))[0]
 
         self.min_post_range = min_post_range
         if self.min_post_range is not None:

diff --git a/aepsych/transforms/parameters.py b/aepsych/transforms/parameters.py
@@ -6,6 +6,7 @@
 # LICENSE file in the root directory of this source tree.
 import ast
 from abc import ABC, abstractmethod
+from configparser import NoOptionError
 from copy import deepcopy
 from typing import Any, Callable, List, Optional, Tuple, Type
 
@@ -16,7 +17,7 @@
 from aepsych.generators.base import AEPsychGenerator
 from aepsych.models.base import AEPsychMixin, ModelProtocol
 from botorch.acquisition import AcquisitionFunction
-from botorch.models.transforms.input import ChainedInputTransform, Log10
+from botorch.models.transforms.input import ChainedInputTransform, Log10, Normalize
 from botorch.models.transforms.utils import subset_transform
 from botorch.posteriors import Posterior
 from torch import Tensor
@@ -107,23 +108,52 @@ def from_config(cls, config: Config) -> "ParameterTransforms":
             ParameterTransforms: A configured ParamaterTransform for the config.
         """
         parnames: List[str] = config.getlist("common", "parnames", element_type=str)
+
+        # Try to build a full array of bounds based on parameter-specific bounds
+        try:
+            _lower_bounds = torch.tensor(
+                [config.getfloat(par, "lower_bound") for par in parnames]
+            )
+            _upper_bounds = torch.tensor(
+                [config.getfloat(par, "upper_bound") for par in parnames]
+            )
+
+            bounds = torch.stack((_lower_bounds, _upper_bounds))
+
+        except NoOptionError:  # Look for general lb/ub array
+            _lb = config.gettensor("common", "lb")
+            _ub = config.gettensor("common", "ub")
+            bounds = torch.stack((_lb, _ub))
+
         transformDict = {}
         for i, par in enumerate(parnames):
             # This is the order that transforms are potentially applied, order matters
 
             # Log scale
             if config.getboolean(par, "log_scale", fallback=False):
-                lb = config.getfloat(par, "lower_bound")
+                lb = bounds[0, i].numpy()
                 if lb < 0.0:
-                    transformDict[f"{par}_Log10Plus"] = Log10Plus(
-                        indices=[i], constant=np.abs(lb) + 1.0
-                    )
+                    xform = Log10Plus(indices=[i], constant=np.abs(lb) + 1.0)
+                    transformDict[f"{par}_Log10Plus"] = xform
+
                 elif lb < 1.0:
-                    transformDict[f"{par}_Log10Plus"] = Log10Plus(
-                        indices=[i], constant=1.0
-                    )
+                    xform = Log10Plus(indices=[i], constant=1.0)
+                    transformDict[f"{par}_Log10Plus"] = xform
+
                 else:
-                    transformDict[f"{par}_Log10"] = Log10(indices=[i])
+                    xform = Log10(indices=[i])
+                    transformDict[f"{par}_Log10"] = xform
+
+                # Transform bounds
+                bounds = xform.transform(bounds)
+
+            # Normalize scale (defaults true)
+            if config.getboolean(par, "normalize_scale", fallback=True):
+                xform = Normalize(d=len(parnames), indices=[i], bounds=bounds)
+                transformDict[f"{par}_Normalize"] = xform
+
+                # Transform bounds
+                bounds = xform.transform(bounds)
 
         return cls(**transformDict)
 
@@ -192,9 +222,9 @@ def __init__(
         # Figure out what we need to do with generator
         if isinstance(generator, type):
             if "lb" in kwargs:
-                kwargs["lb"] = transforms.transform(kwargs["lb"].float())
+                kwargs["lb"] = transforms.transform(kwargs["lb"].to(torch.float64))
             if "ub" in kwargs:
-                kwargs["ub"] = transforms.transform(kwargs["ub"].float())
+                kwargs["ub"] = transforms.transform(kwargs["ub"].to(torch.float64))
             _base_obj = generator(**kwargs)
         else:
             _base_obj = generator
@@ -326,9 +356,9 @@ def __init__(
         # Alternative instantiation method for analysis (and not live)
         if isinstance(model, type):
             if "lb" in kwargs:
-                kwargs["lb"] = transforms.transform(kwargs["lb"].float())
+                kwargs["lb"] = transforms.transform(kwargs["lb"].to(torch.float64))
             if "ub" in kwargs:
-                kwargs["ub"] = transforms.transform(kwargs["ub"].float())
+                kwargs["ub"] = transforms.transform(kwargs["ub"].to(torch.float64))
             _base_obj = model(**kwargs)
         else:
             _base_obj = model

diff --git a/docs/parameters.md b/docs/parameters.md
@@ -15,7 +15,6 @@ what parameter types are used and whatever transformations are used.
 Currently, we only support continuous parameters. More parameter types soon to come!
 
 <h3>Continuous<h3>
-
 ```ini
 [parameter]
 par_type = continuous
@@ -58,3 +57,31 @@ For parameters with lower bounds that are positive but still less 1, we will alw
 a constant value of 1 (i.e., `Log10(x + 1)` and `10 ^ (x - 1)`). For parameters with
 lower bounds that are negative, we will use a constant value of the absolute value of
 the lower bound + 1 (i.e., `Log10(x + |lb| + 1)` and `10 ^ (x - |lb| - 1)`).
+
+<h3>Normalize scale</h3>
+By default, all parameters will have their scale min-max normalized to the range of 
+[0, 1]. This prevents any particular parameter with a large scale to completely dominate
+the other parameters. Very rarely, this behavior may not be desired and can be turned 
+off for specific parameters.
+
+```ini
+[parameter]
+par_type = continuous
+lower_bound = 1 
+upper_bound = 100
+normalize_scale = False # turn it on with any of true/yes/on, turn it off with any of false/no/off; case insensitive
+```
+
+By setting the `normalize_scale` option to False, this parameter will not be scaled 
+before being given to the model and therefore maintain its original magnitude. This is
+very rarely necessary and should be used with caution. 
+
+<h2>Order of operations</h2>
+Parameter types and parameter-specific transforms are all handled by the 
+`ParameterTransform` API. Transforms built from config files will have a specific order
+of operation, regardless of how the options were set in the config file. Each parameter
+is transformed entirely separately. 
+
+Currently, the order is as follows:
+* Log scale
+* Normalize scale
diff --git a/tests/generators/test_manual_generator.py b/tests/generators/test_manual_generator.py
@@ -54,8 +54,8 @@ def test_manual_generator(self):
         config.update(config_str=config_str)
         # gen = ManualGenerator.from_config(config)
         gen = GeneratorWrapper.from_config("init_strat", config)
-        npt.assert_equal(gen.lb.numpy(), np.array([10, 10]))
-        npt.assert_equal(gen.ub.numpy(), np.array([11, 11]))
+        npt.assert_equal(gen.lb.numpy(), np.array([0, 0]))
+        npt.assert_equal(gen.ub.numpy(), np.array([1, 1]))
         self.assertFalse(gen.finished)
 
         p1 = list(gen.gen()[0])

diff --git a/tests/models/test_gp_classification.py b/tests/models/test_gp_classification.py
@@ -23,6 +23,8 @@
 from aepsych.generators import OptimizeAcqfGenerator, SobolGenerator
 from aepsych.models import GPClassificationModel
 from aepsych.strategy import SequentialStrategy, Strategy
+from aepsych.transforms import ModelWrapper, ParameterTransforms
+from aepsych.transforms.parameters import Normalize
 from botorch.acquisition import qUpperConfidenceBound
 from botorch.optim.fit import fit_gpytorch_mll_torch
 from botorch.optim.stopping import ExpMAStoppingCriterion
@@ -208,11 +210,21 @@ def test_1d_classification_different_scales(self):
         X, y = torch.Tensor(X), torch.Tensor(y)
         X[:, 0] = X[:, 0] * 1000
         X[:, 1] = X[:, 1] / 1000
-        lb = [-3000, -0.003]
-        ub = [3000, 0.003]
-
-        model = GPClassificationModel(lb=lb, ub=ub, inducing_size=20)
+        lb = torch.tensor([-3000, -0.003])
+        ub = torch.tensor([3000, 0.003])
 
+        transforms = ParameterTransforms(
+            normalize=Normalize(
+                2, bounds=torch.stack((lb, ub))
+            )
+        )
+        model = ModelWrapper(
+            model=GPClassificationModel,
+            lb=lb,
+            ub=ub,
+            inducing_size=20,
+            transforms=transforms,
+        )
         model.fit(X[:50], y[:50])
 
         # pspace

diff --git a/tests/models/test_gp_regression.py b/tests/models/test_gp_regression.py
@@ -89,8 +89,8 @@ def test_extremum(self):
 
     def test_from_config(self):
         model = self.server.strat.model
-        npt.assert_allclose(model.lb, [-1.0])
-        npt.assert_allclose(model.ub, [3.0])
+        npt.assert_allclose(model.transforms.untransform(model.lb), [-1.0])
+        npt.assert_allclose(model.transforms.untransform(model.ub), [3.0])
         self.assertEqual(model.dim, 1)
         self.assertIsInstance(model.likelihood, GaussianLikelihood)
         self.assertEqual(model.max_fit_time, 1)