Skip to content

Commit

Permalink
remove uninteresting parameter combinations for benchmarks for kmeans…
Browse files Browse the repository at this point in the history
… and pca - fix pca tests
  • Loading branch information
fcharras committed Oct 10, 2023
1 parent dd5c0fe commit 1ed7c27
Show file tree
Hide file tree
Showing 10 changed files with 31 additions and 59 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/test_cpu_benchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,7 @@ jobs:

- name: Run k-means benchmarks
run: |
export PYTHONPATH=$PYTHONPATH:$(realpath ./kmeans_dpcpp/)
cd benchmarks/kmeans
benchopt run --no-plot -l -d Simulated_correlated_data[n_samples=1000,n_features=14]
PYTHONPATH=$PYTHONPATH:$(realpath ./kmeans_dpcpp/) benchopt run --no-plot -l -d Simulated_correlated_data[n_samples=1000,n_features=14]
cd ../pca
benchopt run --no-plot -l -d Simulated_correlated_data[n_samples=100,n_features=100]
2 changes: 1 addition & 1 deletion benchmarks/kmeans/datasets/simulated_blobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class Dataset(BaseDataset):

parameters = {
"n_samples, n_features": [(50_000_000, 14), (10_000_000, 14), (1_000_000, 14)],
"dtype": ["float32", "float64"],
"dtype": ["float32"],
"random_state": [123],
}

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/kmeans/solvers/kmeans_dpcpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class Solver(BaseSolver):
"kmeans_dpcpp",
]

parameters = dict(device=["cpu", "gpu"], runtime=["level_zero", "opencl"])
parameters = {"device, runtime": [("cpu", "opencl"), ("gpu", "level_zero")]}

stopping_criterion = SingleRunCriterion(1)

Expand Down
3 changes: 0 additions & 3 deletions benchmarks/kmeans/solvers/scikit_learn_intelex.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@ class Solver(BaseSolver):
parameters = {
"device, runtime": [
("cpu", "numpy"),
("cpu", "level_zero"),
("cpu", "opencl"),
("gpu", "level_zero"),
("gpu", "opencl"),
]
}

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/kmeans/solvers/sklearn_numba_dpex.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class Solver(BaseSolver):
"sklearn-numba-dpex",
]

parameters = dict(device=["cpu", "gpu"], runtime=["level_zero", "opencl"])
parameters = {"device, runtime": [("cpu", "opencl"), ("gpu", "level_zero")]}

stopping_criterion = SingleRunCriterion(1)

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/pca/datasets/simulated_blobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ class Dataset(BaseDataset):
name = "Simulated_correlated_data"

parameters = {
"n_samples, n_features": [(1_000_000, 10_000), (10_000, 10_000)],
"dtype": ["float32", "float64"],
"n_samples, n_features": [(1_000_000, 100), (10_000, 10_000)],
"dtype": ["float32"],
"random_state": [123],
}

Expand Down
2 changes: 0 additions & 2 deletions benchmarks/pca/objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ class Objective(BaseObjective):
# solver.
parameters = dict(
n_components=[10],
whiten=[False],
tol=[0.0],
iterated_power=[15],
n_oversamples=[10],
Expand Down Expand Up @@ -57,7 +56,6 @@ def get_objective(self):
return dict(
X=self.X,
n_components=self.n_components,
whiten=self.whiten,
tol=self.tol,
iterated_power=self.iterated_power,
n_oversamples=self.n_oversamples,
Expand Down
8 changes: 3 additions & 5 deletions benchmarks/pca/solvers/cuml.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def set_objective(
self,
X,
n_components,
whiten,
tol,
iterated_power,
n_oversamples,
Expand All @@ -54,22 +53,21 @@ def set_objective(
else:
self.X = cupy.asarray(X)

self.components = n_components
self.whiten = whiten
self.n_components = n_components
self.tol = tol

# if tol == 0:
# tol = 1e-16
# self.tol = tol

self.iterated_power = self.iterated_power
self.iterated_power = iterated_power
self.n_oversamples = n_oversamples
self.random_state = random_state
self.verbose = verbose

def run(self, _):
estimator = cuml.PCA(
copy=False,
copy=True,
iterated_power=self.iterated_power,
n_components=self.n_components,
random_state=self.random_state,
Expand Down
34 changes: 12 additions & 22 deletions benchmarks/pca/solvers/scikit_learn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,21 @@ class Solver(BaseSolver):
name = "scikit-learn"
requirements = ["scikit-learn"]

parameters = dict(
svd_solver=["full", "arpack", "randomized"],
power_iteration_normalizer=["QR", "LU", "none"],
)
parameters = {
"svd_solver, power_iteration_normalizer": [
(svd_solver, power_iteration_normalizer)
for svd_solver in ["full", "randomized"]
for power_iteration_normalizer in ["LU"]
]
+ [("arpack", "none")]
}

stopping_criterion = SingleRunCriterion(1)

def skip(self, **objective_dict):
svd_solver = objective_dict["svd_solver"]
power_iteration_normalizer = objective_dict["power_iteration_normalizer"]

if (svd_solver == "arpack") and power_iteration_normalizer != "none":
return True, (
"arpack solver expect power iteration normalizer parameter set to "
"'none'"
)

return False, None

def set_objective(
self,
X,
n_components,
whiten,
tol,
iterated_power,
n_oversamples,
Expand All @@ -45,19 +36,18 @@ def set_objective(
# effects can happen
self.X = X.copy()

self.components = n_components
self.whiten = whiten
self.n_components = n_components
self.tol = tol
self.iterated_power = self.iterated_power
self.iterated_power = iterated_power
self.n_oversamples = n_oversamples
self.random_state = random_state
self.verbose = verbose

def run(self, _):
estimator = PCA(
n_components=self.n_components,
copy=False,
whiten=self.whiten,
copy=True,
whiten=False,
svd_solver=self.svd_solver,
tol=self.tol,
iterated_power=self.iterated_power,
Expand Down
30 changes: 10 additions & 20 deletions benchmarks/pca/solvers/scikit_learn_intelex.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@ class Solver(BaseSolver):
parameters = {
"device, runtime": [
("cpu", "numpy"),
("cpu", "level_zero"),
("cpu", "opencl"),
("gpu", "level_zero"),
("gpu", "opencl"),
],
"svd_solver": ["full", "arpack", "randomized"],
"power_iteration_normalizer": ["QR", "LU", "none"],
"svd_solver, power_iteration_normalizer": [
(svd_solver, power_iteration_normalizer)
for svd_solver in ["full", "randomized"]
for power_iteration_normalizer in ["LU"]
]
+ [("arpack", "none")],
}

stopping_criterion = SingleRunCriterion(1)
Expand All @@ -51,22 +52,12 @@ def skip(self, **objective_dict):
f"This {self.device} device has no support for float64 compute"
)

svd_solver = objective_dict["svd_solver"]
power_iteration_normalizer = objective_dict["power_iteration_normalizer"]

if (svd_solver == "arpack") and power_iteration_normalizer != "none":
return True, (
"arpack solver expect power iteration normalizer parameter set to "
"'none'"
)

return False, None

def set_objective(
self,
X,
n_components,
whiten,
tol,
iterated_power,
n_oversamples,
Expand All @@ -83,19 +74,18 @@ def set_objective(
else:
self.X = X.copy()

self.components = n_components
self.whiten = whiten
self.n_components = n_components
self.tol = tol
self.iterated_power = self.iterated_power
self.iterated_power = iterated_power
self.n_oversamples = n_oversamples
self.random_state = random_state
self.verbose = verbose

def run(self, _):
estimator = PCA(
n_components=self.n_components,
copy=False,
whiten=self.whiten,
copy=True,
whiten=False,
svd_solver=self.svd_solver,
tol=self.tol,
iterated_power=self.iterated_power,
Expand Down

0 comments on commit 1ed7c27

Please sign in to comment.