Skip to content
This repository was archived by the owner on Jan 27, 2025. It is now read-only.

Commit dea8b65

Browse files
committed
enh: initial implementation of a cross-validated hyperparameter selection
1 parent 2ac0106 commit dea8b65

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

src/eddymotion/model/gpr.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,27 +28,33 @@
2828
from typing import Callable, Mapping, Sequence
2929

3030
import numpy as np
31+
from ConfigSpace import Configuration
3132
from scipy import optimize
3233
from scipy.optimize._minimize import Bounds
34+
from sklearn.base import clone
3335
from sklearn.gaussian_process import GaussianProcessRegressor
3436
from sklearn.gaussian_process.kernels import (
3537
Hyperparameter,
3638
Kernel,
3739
)
3840
from sklearn.metrics.pairwise import cosine_similarity
41+
from sklearn.model_selection import RepeatedKFold, cross_val_score
3942
from sklearn.utils._param_validation import Interval, StrOptions
4043

4144
BOUNDS_A: tuple[float, float] = (0.1, 2.35)
4245
"""The limits for the parameter *a* (angular distance in rad)."""
4346
BOUNDS_LAMBDA: tuple[float, float] = (1e-3, 1000)
4447
"""The limits for the parameter λ (signal scaling factor)."""
48+
BOUNDS_ALPHA: tuple[float, float] = (1e-3, 500)
49+
"""The limits for the parameter σ² (noise adjustment, alpha in Scikit-learn's GP regressor)."""
4550
THETA_EPSILON: float = 1e-5
4651
"""Minimum nonzero angle."""
4752
LBFGS_CONFIGURABLE_OPTIONS = {"disp", "maxiter", "ftol", "gtol"}
4853
"""The set of extended options that can be set on the default BFGS."""
4954
CONFIGURABLE_OPTIONS: Mapping[str, set] = {
5055
"Nelder-Mead": {"disp", "maxiter", "adaptive", "fatol"},
5156
"CG": {"disp", "maxiter", "gtol"},
57+
"cross-validation": {"scoring", "n_folds", "n_evaluations"},
5258
}
5359
"""
5460
A mapping from optimizer names to the option set they allow.
@@ -161,6 +167,9 @@ class EddyMotionGPR(GaussianProcessRegressor):
161167
"normalize_y": ["boolean"],
162168
"n_targets": [Interval(Integral, 1, None, closed="left"), None],
163169
"random_state": ["random_state"],
170+
"n_folds": [Interval(Integral, 3, None, closed="left")],
171+
"n_evaluations": [Interval(Integral, 3, None, closed="left")],
172+
"n_trials": [Interval(Integral, 3, None, closed="left")],
164173
}
165174

166175
def __init__(
@@ -182,6 +191,10 @@ def __init__(
182191
gtol: float | None = None,
183192
adaptive: bool | int | None = None,
184193
fatol: float | None = None,
194+
scoring: str = "neg_root_mean_squared_error",
195+
n_folds: int | None = 10,
196+
n_evaluations: int | None = 40,
197+
n_trials: int | None = 200,
185198
):
186199
super().__init__(
187200
kernel,
@@ -202,6 +215,10 @@ def __init__(
202215
self.gtol = gtol
203216
self.adaptive = adaptive
204217
self.fatol = fatol
218+
self.scoring = scoring
219+
self.n_folds = n_folds
220+
self.n_evaluations = n_evaluations
221+
self.n_trials = n_trials
205222

206223
def _constrained_optimization(
207224
self,
@@ -210,6 +227,40 @@ def _constrained_optimization(
210227
bounds: Sequence[tuple[float, float]] | Bounds,
211228
) -> tuple[float, float]:
212229
options = {}
230+
231+
if self.optimizer == "cross-validation":
232+
from ConfigSpace import ConfigurationSpace, Float
233+
from smac import HyperparameterOptimizationFacade, Scenario
234+
235+
cs = ConfigurationSpace()
236+
beta_a = Float(
237+
"kernel__beta_a",
238+
tuple(self.kernel.a_bounds),
239+
default=self.kernel_.beta_a,
240+
log=True,
241+
)
242+
beta_l = Float(
243+
"kernel__beta_l",
244+
tuple(self.kernel.l_bounds),
245+
default=self.kernel_.beta_l,
246+
log=True,
247+
)
248+
cs.add([beta_a, beta_l])
249+
250+
# Scenario object specifying the optimization environment
251+
scenario = Scenario(cs, n_trials=self.n_trials)
252+
253+
# Use SMAC to find the best configuration/hyperparameters
254+
smac = HyperparameterOptimizationFacade(
255+
scenario,
256+
self.cross_validation,
257+
)
258+
incumbent = smac.optimize()
259+
return (
260+
np.log([incumbent["kernel__beta_a"], incumbent["kernel__beta_l"]]),
261+
0,
262+
)
263+
213264
if self.optimizer == "fmin_l_bfgs_b":
214265
from sklearn.utils.optimize import _check_optimize_result
215266

@@ -252,6 +303,27 @@ def _constrained_optimization(
252303

253304
raise ValueError(f"Unknown optimizer {self.optimizer}.")
254305

306+
def cross_validation(
307+
self,
308+
config: Configuration,
309+
seed: int | None = None,
310+
) -> float:
311+
rkf = RepeatedKFold(
312+
n_splits=self.n_folds,
313+
n_repeats=max(self.n_evaluations // self.n_folds, 1),
314+
)
315+
gpr = clone(self)
316+
gpr.set_params(**dict(config))
317+
gpr.optimizer = None
318+
scores = cross_val_score(
319+
gpr,
320+
self.X_train_,
321+
self.y_train_,
322+
scoring=self.scoring,
323+
cv=rkf,
324+
)
325+
return np.mean(scores)
326+
255327

256328
class ExponentialKriging(Kernel):
257329
"""A scikit-learn's kernel for DWI signals."""

0 commit comments

Comments
 (0)