From 1ed7c275e38832e46c1f664e32f64bd1f39e6aa7 Mon Sep 17 00:00:00 2001 From: Franck Charras <29153872+fcharras@users.noreply.github.com> Date: Tue, 10 Oct 2023 18:06:00 +0200 Subject: [PATCH] remove uninteresting parameter combinations for benchmarks for kmeans and pca - fix pca tests --- .github/workflows/test_cpu_benchmarks.yaml | 3 +- benchmarks/kmeans/datasets/simulated_blobs.py | 2 +- benchmarks/kmeans/solvers/kmeans_dpcpp.py | 2 +- .../kmeans/solvers/scikit_learn_intelex.py | 3 -- .../kmeans/solvers/sklearn_numba_dpex.py | 2 +- benchmarks/pca/datasets/simulated_blobs.py | 4 +-- benchmarks/pca/objective.py | 2 -- benchmarks/pca/solvers/cuml.py | 8 ++--- benchmarks/pca/solvers/scikit_learn.py | 34 +++++++------------ .../pca/solvers/scikit_learn_intelex.py | 30 ++++++---------- 10 files changed, 31 insertions(+), 59 deletions(-) diff --git a/.github/workflows/test_cpu_benchmarks.yaml b/.github/workflows/test_cpu_benchmarks.yaml index 913e2ec..b566564 100644 --- a/.github/workflows/test_cpu_benchmarks.yaml +++ b/.github/workflows/test_cpu_benchmarks.yaml @@ -139,8 +139,7 @@ jobs: - name: Run k-means benchmarks run: | - export PYTHONPATH=$PYTHONPATH:$(realpath ./kmeans_dpcpp/) cd benchmarks/kmeans - benchopt run --no-plot -l -d Simulated_correlated_data[n_samples=1000,n_features=14] + PYTHONPATH=$PYTHONPATH:$(realpath ./kmeans_dpcpp/) benchopt run --no-plot -l -d Simulated_correlated_data[n_samples=1000,n_features=14] cd ../pca benchopt run --no-plot -l -d Simulated_correlated_data[n_samples=100,n_features=100] diff --git a/benchmarks/kmeans/datasets/simulated_blobs.py b/benchmarks/kmeans/datasets/simulated_blobs.py index a0670d1..2d6cdc3 100644 --- a/benchmarks/kmeans/datasets/simulated_blobs.py +++ b/benchmarks/kmeans/datasets/simulated_blobs.py @@ -10,7 +10,7 @@ class Dataset(BaseDataset): parameters = { "n_samples, n_features": [(50_000_000, 14), (10_000_000, 14), (1_000_000, 14)], - "dtype": ["float32", "float64"], + "dtype": ["float32"], "random_state": [123], } diff --git a/benchmarks/kmeans/solvers/kmeans_dpcpp.py b/benchmarks/kmeans/solvers/kmeans_dpcpp.py index 72afa7c..4a2087f 100644 --- a/benchmarks/kmeans/solvers/kmeans_dpcpp.py +++ b/benchmarks/kmeans/solvers/kmeans_dpcpp.py @@ -66,7 +66,7 @@ class Solver(BaseSolver): "kmeans_dpcpp", ] - parameters = dict(device=["cpu", "gpu"], runtime=["level_zero", "opencl"]) + parameters = {"device, runtime": [("cpu", "opencl"), ("gpu", "level_zero")]} stopping_criterion = SingleRunCriterion(1) diff --git a/benchmarks/kmeans/solvers/scikit_learn_intelex.py b/benchmarks/kmeans/solvers/scikit_learn_intelex.py index d4f44bd..086a8f4 100644 --- a/benchmarks/kmeans/solvers/scikit_learn_intelex.py +++ b/benchmarks/kmeans/solvers/scikit_learn_intelex.py @@ -24,10 +24,7 @@ class Solver(BaseSolver): parameters = { "device, runtime": [ ("cpu", "numpy"), - ("cpu", "level_zero"), - ("cpu", "opencl"), ("gpu", "level_zero"), - ("gpu", "opencl"), ] } diff --git a/benchmarks/kmeans/solvers/sklearn_numba_dpex.py b/benchmarks/kmeans/solvers/sklearn_numba_dpex.py index 0c5d746..8302c3f 100644 --- a/benchmarks/kmeans/solvers/sklearn_numba_dpex.py +++ b/benchmarks/kmeans/solvers/sklearn_numba_dpex.py @@ -22,7 +22,7 @@ class Solver(BaseSolver): "sklearn-numba-dpex", ] - parameters = dict(device=["cpu", "gpu"], runtime=["level_zero", "opencl"]) + parameters = {"device, runtime": [("cpu", "opencl"), ("gpu", "level_zero")]} stopping_criterion = SingleRunCriterion(1) diff --git a/benchmarks/pca/datasets/simulated_blobs.py b/benchmarks/pca/datasets/simulated_blobs.py index 1e5e932..a0f3cba 100644 --- a/benchmarks/pca/datasets/simulated_blobs.py +++ b/benchmarks/pca/datasets/simulated_blobs.py @@ -9,8 +9,8 @@ class Dataset(BaseDataset): name = "Simulated_correlated_data" parameters = { - "n_samples, n_features": [(1_000_000, 10_000), (10_000, 10_000)], - "dtype": ["float32", "float64"], + "n_samples, n_features": [(1_000_000, 100), (10_000, 10_000)], + "dtype": ["float32"], "random_state": [123], } diff --git a/benchmarks/pca/objective.py b/benchmarks/pca/objective.py index 35ffc72..6c24d6e 100644 --- a/benchmarks/pca/objective.py +++ b/benchmarks/pca/objective.py @@ -15,7 +15,6 @@ class Objective(BaseObjective): # solver. parameters = dict( n_components=[10], - whiten=[False], tol=[0.0], iterated_power=[15], n_oversamples=[10], @@ -57,7 +56,6 @@ def get_objective(self): return dict( X=self.X, n_components=self.n_components, - whiten=self.whiten, tol=self.tol, iterated_power=self.iterated_power, n_oversamples=self.n_oversamples, diff --git a/benchmarks/pca/solvers/cuml.py b/benchmarks/pca/solvers/cuml.py index 4755348..1a63bbc 100644 --- a/benchmarks/pca/solvers/cuml.py +++ b/benchmarks/pca/solvers/cuml.py @@ -39,7 +39,6 @@ def set_objective( self, X, n_components, - whiten, tol, iterated_power, n_oversamples, @@ -54,22 +53,21 @@ def set_objective( else: self.X = cupy.asarray(X) - self.components = n_components - self.whiten = whiten + self.n_components = n_components self.tol = tol # if tol == 0: # tol = 1e-16 # self.tol = tol - self.iterated_power = self.iterated_power + self.iterated_power = iterated_power self.n_oversamples = n_oversamples self.random_state = random_state self.verbose = verbose def run(self, _): estimator = cuml.PCA( - copy=False, + copy=True, iterated_power=self.iterated_power, n_components=self.n_components, random_state=self.random_state, diff --git a/benchmarks/pca/solvers/scikit_learn.py b/benchmarks/pca/solvers/scikit_learn.py index 41cbf5c..e6b0f9b 100644 --- a/benchmarks/pca/solvers/scikit_learn.py +++ b/benchmarks/pca/solvers/scikit_learn.py @@ -11,30 +11,21 @@ class Solver(BaseSolver): name = "scikit-learn" requirements = ["scikit-learn"] - parameters = dict( - svd_solver=["full", "arpack", "randomized"], - power_iteration_normalizer=["QR", "LU", "none"], - ) + parameters = { + "svd_solver, power_iteration_normalizer": [ + (svd_solver, power_iteration_normalizer) + for svd_solver in ["full", "randomized"] + for power_iteration_normalizer in ["LU"] + ] + + [("arpack", "none")] + } stopping_criterion = SingleRunCriterion(1) - def skip(self, **objective_dict): - svd_solver = objective_dict["svd_solver"] - power_iteration_normalizer = objective_dict["power_iteration_normalizer"] - - if (svd_solver == "arpack") and power_iteration_normalizer != "none": - return True, ( - "arpack solver expect power iteration normalizer parameter set to " - "'none'" - ) - - return False, None - def set_objective( self, X, n_components, - whiten, tol, iterated_power, n_oversamples, @@ -45,10 +36,9 @@ def set_objective( # effects can happen self.X = X.copy() - self.components = n_components - self.whiten = whiten + self.n_components = n_components self.tol = tol - self.iterated_power = self.iterated_power + self.iterated_power = iterated_power self.n_oversamples = n_oversamples self.random_state = random_state self.verbose = verbose @@ -56,8 +46,8 @@ def set_objective( def run(self, _): estimator = PCA( n_components=self.n_components, - copy=False, - whiten=self.whiten, + copy=True, + whiten=False, svd_solver=self.svd_solver, tol=self.tol, iterated_power=self.iterated_power, diff --git a/benchmarks/pca/solvers/scikit_learn_intelex.py b/benchmarks/pca/solvers/scikit_learn_intelex.py index 2959456..a485771 100644 --- a/benchmarks/pca/solvers/scikit_learn_intelex.py +++ b/benchmarks/pca/solvers/scikit_learn_intelex.py @@ -24,13 +24,14 @@ class Solver(BaseSolver): parameters = { "device, runtime": [ ("cpu", "numpy"), - ("cpu", "level_zero"), - ("cpu", "opencl"), ("gpu", "level_zero"), - ("gpu", "opencl"), ], - "svd_solver": ["full", "arpack", "randomized"], - "power_iteration_normalizer": ["QR", "LU", "none"], + "svd_solver, power_iteration_normalizer": [ + (svd_solver, power_iteration_normalizer) + for svd_solver in ["full", "randomized"] + for power_iteration_normalizer in ["LU"] + ] + + [("arpack", "none")], } stopping_criterion = SingleRunCriterion(1) @@ -51,22 +52,12 @@ def skip(self, **objective_dict): f"This {self.device} device has no support for float64 compute" ) - svd_solver = objective_dict["svd_solver"] - power_iteration_normalizer = objective_dict["power_iteration_normalizer"] - - if (svd_solver == "arpack") and power_iteration_normalizer != "none": - return True, ( - "arpack solver expect power iteration normalizer parameter set to " - "'none'" - ) - return False, None def set_objective( self, X, n_components, - whiten, tol, iterated_power, n_oversamples, @@ -83,10 +74,9 @@ def set_objective( else: self.X = X.copy() - self.components = n_components - self.whiten = whiten + self.n_components = n_components self.tol = tol - self.iterated_power = self.iterated_power + self.iterated_power = iterated_power self.n_oversamples = n_oversamples self.random_state = random_state self.verbose = verbose @@ -94,8 +84,8 @@ def set_objective( def run(self, _): estimator = PCA( n_components=self.n_components, - copy=False, - whiten=self.whiten, + copy=True, + whiten=False, svd_solver=self.svd_solver, tol=self.tol, iterated_power=self.iterated_power,