Skip to content

Commit

Permalink
Robust Gaussian Processes via Relevance Pursuit
Browse files Browse the repository at this point in the history
Summary: This commit adds the implementation of the Robust Gaussian Processes via Relevance Pursuit models and algorithms of the NeurIPS 2024 article.

Differential Revision: D65343571
  • Loading branch information
SebastianAment authored and facebook-github-bot committed Nov 1, 2024
1 parent 3ca48d0 commit 4684fac
Show file tree
Hide file tree
Showing 6 changed files with 1,571 additions and 2 deletions.
374 changes: 374 additions & 0 deletions botorch/models/likelihoods/sparse_outlier_noise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,374 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from typing import Any
from warnings import warn

import torch
from botorch.exceptions.warnings import InputDataWarning
from botorch.models.model import Model
from botorch.models.relevance_pursuit import RelevancePursuitMixin
from botorch.utils.constraints import NonTransformedInterval
from gpytorch.distributions import MultivariateNormal
from gpytorch.likelihoods import _GaussianLikelihoodBase
from gpytorch.likelihoods.noise_models import FixedGaussianNoise, Noise
from gpytorch.mlls import ExactMarginalLogLikelihood
from gpytorch.priors import Prior
from linear_operator.operators import DiagLinearOperator, LinearOperator
from linear_operator.utils.cholesky import psd_safe_cholesky
from torch import Tensor
from torch.nn.parameter import Parameter


class SparseOutlierGaussianLikelihood(_GaussianLikelihoodBase):
def __init__(
self,
base_noise: Noise | FixedGaussianNoise,
dim: int,
outlier_indices: list[int] | None = None,
rho_prior: Prior | None = None,
rho_constraint: NonTransformedInterval | None = None,
batch_shape: torch.Size | None = None,
convex_parameterization: bool = True,
loo: bool = True,
) -> None:
"""A likelihood that models the noise of a GP with a sparse outlier model that
permits additional "robust" variance for a small set of outlier data points.
Notably, the indices of the outlier data points can be inferred during the
optimization of the associated log marginal likelihood.
Args:
base_noise: The base noise model.
dim: The number of training observations on which to apply the noise model.
We could also get this from the forward pass when the model is in
training mode and cache it, but it's better to be explicit.
outlier_indices: The indices of the outliers.
rho_prior: Prior for the rho parameter.
rho_constraint: Constraint for the rho parameter. Needs to be a
NonTransformedInterval because exact sparsity cannot be represented
using smooth transforms like a softplus or sigmoid.
batch_shape: The batch shape of the learned noise parameter (default: []).
convex_parameterization: Whether to use a convex parameterization of rho.
loo: Whether to use leave-one-out (LOO) update equations that can compute
the optimal values of each individual rho, keeping all else equal.
"""
noise_covar = SparseOutlierNoise(
base_noise=base_noise,
dim=dim,
outlier_indices=outlier_indices,
rho_prior=rho_prior,
rho_constraint=rho_constraint,
batch_shape=batch_shape,
convex_parameterization=convex_parameterization,
loo=loo,
)
super().__init__(noise_covar=noise_covar)

# pyre-ignore[14]: Inconsistent override
def marginal(
self,
function_dist: MultivariateNormal,
*params: Any,
) -> MultivariateNormal:
mean, covar = function_dist.mean, function_dist.lazy_covariance_matrix
# this scales the rhos by the diagonal of the "non-robust" covariance matrix
diag_K = covar.diagonal() if self.noise_covar.convex_parameterization else None
noise_covar = self.noise_covar.forward(*params, shape=mean.shape, diag_K=diag_K)
full_covar = covar + noise_covar
return function_dist.__class__(mean, full_covar)

def expected_log_prob(
self, target: Tensor, input: MultivariateNormal, *params: Any, **kwargs: Any
) -> Tensor:
raise NotImplementedError

Check warning on line 85 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L85

Added line #L85 was not covered by tests


# Tangential to this, could introduce mixed variable / fixed noise
# class that would allow us to exactly condition the process on certain
# pseudo-observations corresponding to prior knowledge (i.e. concrete strength
# is zero at time zero).
class SparseOutlierNoise(Noise, RelevancePursuitMixin):
def __init__(
self,
base_noise: Noise | FixedGaussianNoise,
dim: int,
outlier_indices: list[int] | None = None,
rho_prior: Prior | None = None,
rho_constraint: NonTransformedInterval | None = None,
batch_shape: torch.Size | None = None,
convex_parameterization: bool = True,
loo: bool = True,
):
"""A noise model that permits additional "robust" variance for a small set of
outlier data points. See also SparseOutlierGaussianLikelihood, which leverages
this noise model.
NOTE: Let base_noise also use the non-transformed constraints, which is
probably more stable but orthogonal implementation-wise.
Args:
base_noise: The base noise model.
dim: The number of training observations on which to apply the noise model.
We could also get this from the forward pass when the model is in
training mode and cache it, but it's better to be explicit.
outlier_indices: The indices of the outliers.
rho_prior: Prior for the rho parameter.
rho_constraint: Constraint for the rho parameter. Needs to be a
NonTransformedInterval because exact sparsity cannot be represented
using smooth transforms like a softplus or sigmoid.
batch_shape: The batch shape of the learned noise parameter (default: []).
convex_parameterization: Whether to use a convex parameterization of rho.
loo: Whether to use leave-one-out (LOO) update equations that can compute
the optimal values of each individual rho, keeping all else equal.
"""
super().__init__()
RelevancePursuitMixin.__init__(self, dim=dim, support=outlier_indices)

if batch_shape is None:
batch_shape = base_noise.noise.shape[:-1]

self.base_noise = base_noise
device = base_noise.noise.device
if rho_constraint is None:
cvx_upper_bound = 1 - 1e-3 # < 1 to avoid singularities
rho_constraint = NonTransformedInterval(
lower_bound=0.0,
upper_bound=cvx_upper_bound if convex_parameterization else torch.inf,
initial_value=0.0,
)
else:
if not isinstance(rho_constraint, NonTransformedInterval):
raise ValueError("Requires NonTransformedInterval constraints.")

Check warning on line 143 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L142-L143

Added lines #L142 - L143 were not covered by tests

if rho_constraint.lower_bound < 0:
raise ValueError(

Check warning on line 146 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L145-L146

Added lines #L145 - L146 were not covered by tests
"SparseOutlierNoise requires rho_constraint.lower_bound >= 0."
)

if convex_parameterization and rho_constraint.upper_bound > 1:
raise ValueError(

Check warning on line 151 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L150-L151

Added lines #L150 - L151 were not covered by tests
"Convex parameterization requires rho_constraint.upper_bound <= 1."
)

# NOTE: Prefer to keep the initialization of the sparse_parameter in the
# derived classes of the Mixin, because it might require additional logic
# that we don't want to put into RelevancePursuitMixin.
num_outliers = len(self.support)
self.register_parameter(
"raw_rho",
parameter=Parameter(
torch.zeros(
*batch_shape,
num_outliers,
dtype=base_noise.noise.dtype,
device=device,
)
),
)

if rho_prior is not None:

def _rho_param(m):
return m.rho

Check warning on line 174 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L173-L174

Added lines #L173 - L174 were not covered by tests

def _rho_closure(m, v):
return m._set_rho(v)

Check warning on line 177 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L176-L177

Added lines #L176 - L177 were not covered by tests

self.register_prior("rho_prior", rho_prior, _rho_param, _rho_closure)

Check warning on line 179 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L179

Added line #L179 was not covered by tests

self.register_constraint("raw_rho", rho_constraint)
# only publicly exposing getter of convex parameterization
# since post-hoc modification can lead to inconsistencies
# with the rho constraints.
self._convex_parameterization = convex_parameterization
self.loo = loo

@property
def sparse_parameter(self) -> Parameter:
return self.raw_rho

def set_sparse_parameter(self, value: Parameter) -> None:
"""Sets the sparse parameter.
NOTE: We can't use the property setter @sparse_parameter.setter because of
the special way PyTorch treats Parameter types, including custom setters.
"""
self.raw_rho = torch.nn.Parameter(value.to(self.raw_rho))

@property
def convex_parameterization(self) -> bool:
return self._convex_parameterization

@staticmethod
def _from_model(model: Model) -> RelevancePursuitMixin:
sparse_module = model.likelihood.noise_covar
if not isinstance(sparse_module, SparseOutlierNoise):
raise ValueError(

Check warning on line 208 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L208

Added line #L208 was not covered by tests
"The model's likelihood does not have a SparseOutlierNoise noise "
f"as its noise_covar module, but instead a {type(sparse_module)}."
)
return sparse_module

@property
def _convex_rho(self) -> Tensor:
"""Transforms the raw_rho parameter such that `rho ~= 1 / (1 - raw_rho) - 1`,
which is a diffeomorphism from [0, 1] to [0, inf] whose derivative is nowhere
zero. This transforms the marginal log likelihood to be a convex function of
the raw_rho parameter, when the covariance matrix is well conditioned.
NOTE: The convex parameterization also includes a scaling of the rho values by
the diagonal of the covariance matrix, which is carried out in the `marginal`
call in the SparseOutlierGaussianLikelihood.
"""
# pyre-ignore[7]: It is not have an incompatible return type, pyre just doesn't
# recognize that the result gets promoted to a Tensor.
return 1 / (1 - self.raw_rho) - 1

# these two don't need to be methods, could pass these as local closures
@property
def rho(self) -> Tensor:
"""Dense representation of the potentially sparsely represented raw_rho values,
so that the last dimension is equal to the number of training points self.dim.
NOTE: In this case the getter needs to be different than the sparse_parameter
getter, because the latter must be able to return the parameter in its sparse
representation. The rho property embeds the sparse representation in a dense
tensor in order only to propagate gradients to the sparse rhos in the support.
Returns:
A `batch_shape x self.dim`-dim Tensor of robustness variances.
"""
# NOTE: don't need to do transform / untransform since we are
# enforcing NonTransformedIntervals.
rho_outlier = self._convex_rho if self.convex_parameterization else self.raw_rho
if not self.is_sparse: # in the dense representation, we're done.
return rho_outlier

Check warning on line 247 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L247

Added line #L247 was not covered by tests

# If rho_outlier is in the sparse representation, we need to pad the
# rho values with zeros at the correct positions. The difference
# between this and calling RelevancePursuit's `to_dense` is that
# the latter will propagate gradients through all rhos, whereas
# the path here only propagates gradients to the sparse set of
# outliers, which is important for the optimization of the support.
rho_inlier = torch.zeros(
1, dtype=rho_outlier.dtype, device=rho_outlier.device
).expand(rho_outlier.shape[:-1] + (1,))
rho = torch.cat(
[rho_outlier, rho_inlier], dim=-1
) # batch_shape x (num_outliers + 1)

return rho[..., self._rho_selection_indices]

@property
def _rho_selection_indices(self) -> Tensor:
# num_train is cached in the forward pass in training mode
# if an index is not in the outlier indices, we get the zeros from the
# last index of "rho"
# is this related to a sparse to dense mapping used in RP?
rho_selection_indices = torch.full(
self.raw_rho.shape[:-1] + (self.dim,),
-1,
dtype=torch.long,
device=self.raw_rho.device,
)
for i, j in enumerate(self.support):
rho_selection_indices[j] = i

return rho_selection_indices

# pyre-ignore[14]: Inconsistent override
def forward(
self,
*params: Any,
diag_K: Tensor | None = None,
shape: torch.Size | None = None,
) -> LinearOperator | Tensor:
"""Computes the covariance matrix of the sparse outlier noise model.
Args:
params: The parameters of noise model, same as for GPyTorch's noise model.
diag_K: The diagonal of the covariance matrix, which is used to scale the
rho values in the convex parameterization.
shape: The shape of the covariance matrix, which is used to broadcast the
rho values to the correct shape.
Returns:
A `batch_shape x self.dim`-dim Tensor of robustness variances.
"""
noise_covar = self.base_noise(*params, shape=shape)
# rho should always be applied to the training set, irrespective of whether or
# not we are in training mode.
# NOTE: if `posterior`` is called with `observation_noise=True`, and if the test
# set has the same size as the training set, this would still apply the rhos.
# Should improve this by testing equality with training inputs.
rho = self.rho
if noise_covar.shape[-1] == rho.shape[-1]:
if diag_K is not None:
rho = (diag_K + noise_covar.diagonal()) * rho
noise_covar = noise_covar + DiagLinearOperator(rho)
else:
warn(
"SparseOutlierNoise: Robust rho not applied because the shape of the "
"base noise covariance is not compatible with the shape of rho. This "
"usually happens when the model posterior is evaluated on test data.",
InputDataWarning,
)
return noise_covar

# relevance pursuit method expansion and contraction related methods
def expansion_objective(self, mll: ExactMarginalLogLikelihood) -> Tensor:
"""Computes an objective value for all the inactive parameters, i.e.
self.sparse_parameter[~self.is_active] since we can't add already active
parameters to the support. This value will be used to select the parameters.
Args:
mll: The marginal likelihood, containing the model to optimize.
Returns:
The expansion objective value for all the inactive parameters.
"""
# Could check if the biggest change in rho coincides with the largest
# change in likelihood, if not, adjust the objective here.
f = self._optimal_rhos if self.loo else self._sparse_parameter_gradient
return f(mll)

Check warning on line 335 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L334-L335

Added lines #L334 - L335 were not covered by tests

def _optimal_rhos(self, mll: ExactMarginalLogLikelihood) -> Tensor:
"""Computes the optimal rho deltas for the given model.
Args:
mll: The marginal likelihood, containing the model to optimize.
Returns:
A `batch_shape x self.dim`-dim Tensor of optimal rho deltas.
"""
# train() is important, since we want to evaluate the prior with mll.model(X),
# but in eval(), __call__ gives the posterior.
mll.train() # NOTE: this changes model.train_inputs to be unnormalized.
X, Y = mll.model.train_inputs[0], mll.model.train_targets
F = mll.model(X)
L = mll.likelihood(F)
S = L.covariance_matrix # (Kernel Matrix + Noise Matrix)

Check warning on line 352 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L348-L352

Added lines #L348 - L352 were not covered by tests

# NOTE: The following computation is mathematically equivalent to the formula
# in this comment, but leverages the positive-definiteness of S via its
# Cholesky factorization.
# S_inv = S.inverse()
# diag_S_inv = S_inv.diagonal(dim1=-1, dim2=-2)
# loo_var = 1 / S_inv.diagonal(dim1=-1, dim2=-2)
# loo_mean = Y - (S_inv @ Y) / diag_S_inv

chol = psd_safe_cholesky(S, upper=True)
eye = torch.eye(chol.size(-1), device=chol.device, dtype=chol.dtype)
inv_root = torch.linalg.solve_triangular(chol, eye, upper=True)

Check warning on line 364 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L362-L364

Added lines #L362 - L364 were not covered by tests

# test: inv_root.square().sum(dim=-1) - S.inverse().diag()
diag_S_inv = inv_root.square().sum(dim=-1)
loo_var = 1 / diag_S_inv
S_inv_Y = torch.cholesky_solve(Y.unsqueeze(-1), chol, upper=True).squeeze(-1)
loo_mean = Y - S_inv_Y / diag_S_inv

Check warning on line 370 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L367-L370

Added lines #L367 - L370 were not covered by tests

loo_error = loo_mean - Y
optimal_rho_deltas = loo_error.square() - loo_var
return (optimal_rho_deltas - self.rho).clamp(0)[~self.is_active]

Check warning on line 374 in botorch/models/likelihoods/sparse_outlier_noise.py

View check run for this annotation

Codecov / codecov/patch

botorch/models/likelihoods/sparse_outlier_noise.py#L372-L374

Added lines #L372 - L374 were not covered by tests
Loading

0 comments on commit 4684fac

Please sign in to comment.