From 6bb0aa520e176c271182a40488b67da82cce577a Mon Sep 17 00:00:00 2001 From: Franck Charras <29153872+fcharras@users.noreply.github.com> Date: Fri, 12 Jan 2024 20:50:27 +0100 Subject: [PATCH] Adress review Co-authored-by: Olivier Grisel --- benchmarks/ridge/consolidate_result_csv.py | 8 +------- benchmarks/ridge/datasets/simulated_blobs.py | 7 +++---- benchmarks/ridge/objective.py | 16 ++++++++++++++-- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/benchmarks/ridge/consolidate_result_csv.py b/benchmarks/ridge/consolidate_result_csv.py index 4cc59d1..ddf5938 100644 --- a/benchmarks/ridge/consolidate_result_csv.py +++ b/benchmarks/ridge/consolidate_result_csv.py @@ -36,7 +36,6 @@ COMPUTE_DEVICE = "Compute device" COMPUTE_RUNTIME = "Compute runtime" DATA_RANDOM_STATE = "Data random state" -DATA_SAMPLE_WEIGHTS = "Data sample weights" DTYPE = "Dtype" NB_DATA_FEATURES = "Nb data features" NB_DATA_SAMPLES = "Nb data samples" @@ -66,7 +65,6 @@ NB_DATA_SAMPLES, NB_DATA_FEATURES, NB_DATA_TARGETS, - DATA_SAMPLE_WEIGHTS, WALLTIME, BACKEND_PROVIDER, COMPUTE_DEVICE, @@ -94,7 +92,6 @@ NB_DATA_SAMPLES: np.int64, NB_DATA_FEATURES: np.int64, NB_DATA_TARGETS: np.int64, - DATA_SAMPLE_WEIGHTS: str, WALLTIME: np.float64, BACKEND_PROVIDER: str, COMPUTE_DEVICE: str, @@ -116,7 +113,7 @@ COMMENT: str, } -COLUMNS_WITH_NONE_STRING = [DATA_SAMPLE_WEIGHTS] +COLUMNS_WITH_NONE_STRING = [] # If all those fields have equal values for two given benchmarks, then the oldest # benchmark (given by RUN_DATE) will be discarded @@ -126,7 +123,6 @@ NB_DATA_SAMPLES, NB_DATA_FEATURES, NB_DATA_TARGETS, - DATA_SAMPLE_WEIGHTS, BACKEND_PROVIDER, SOLVER, COMPUTE_DEVICE, @@ -146,7 +142,6 @@ (NB_DATA_SAMPLES, False), (NB_DATA_FEATURES, False), (NB_DATA_TARGETS, False), - (DATA_SAMPLE_WEIGHTS, True), (WALLTIME, True), (BACKEND_PROVIDER, True), (COMPUTE_DEVICE, True), @@ -180,7 +175,6 @@ objective_dataset_param_dtype=DTYPE, objective_dataset_param_random_state=DATA_RANDOM_STATE, objective_objective_param_random_state=SOLVER_RANDOM_STATE, - objective_objective_param_sample_weight=DATA_SAMPLE_WEIGHTS, objective_objective_param_solver=SOLVER, objective_solver_param___name=BACKEND_PROVIDER, objective_solver_param_device=COMPUTE_DEVICE, diff --git a/benchmarks/ridge/datasets/simulated_blobs.py b/benchmarks/ridge/datasets/simulated_blobs.py index e8db00c..74daf4a 100644 --- a/benchmarks/ridge/datasets/simulated_blobs.py +++ b/benchmarks/ridge/datasets/simulated_blobs.py @@ -10,13 +10,12 @@ class Dataset(BaseDataset): parameters = { "n_samples, n_features": [ - (20_000_000, 100), + (10_000_000, 100), (5000, 5000), - (20_000_000, 10), + (10_000_000, 10), (2_000_000, 100), - (1500, 1500), ], - "n_targets": [1, 5, 20], + "n_targets": [1, 10], "dtype": ["float32"], "random_state": [123], } diff --git a/benchmarks/ridge/objective.py b/benchmarks/ridge/objective.py index 9242ecc..1a7dc89 100644 --- a/benchmarks/ridge/objective.py +++ b/benchmarks/ridge/objective.py @@ -21,7 +21,7 @@ class Objective(BaseObjective): "alpha": [1.0], "fit_intercept": [True], "solver, max_iter, tol": [("svd", None, 0)], - "sample_weight": ["None", "random"], + "sample_weight": ["None"], # NB: add "random" to test non None weights "random_state": [123], } @@ -86,7 +86,19 @@ def evaluate_result(self, weights, intercept, **solver_parameters): ) def get_one_result(self): - return dict(objective=1) + n_features = self.dataset_parameters["n_features"] + n_targets = self.dataset_parameters["n_targets"] + if n_targets == 1: + weights = np.ones((n_features,)) + else: + weights = np.ones( + ( + n_targets, + n_features, + ) + ) + + return dict(weights=weights, intercept=np.ones((n_targets,))) def get_objective(self): # Copy the data before sending to the solver, to ensure that no unfortunate