Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PLS: Adjust coeffs to newest sklearn #6867

Merged
merged 2 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions Orange/widgets/model/owpls.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ def update_model(self):

def _create_output_coeffs_loadings(self) -> Table:
intercept = self.model.intercept.T[None, :]
coefficients = self.model.coefficients.T
coefficients = self.model.coefficients
_, y_loadings = self.model.loadings
x_rotations, _ = self.model.rotations

n_features, n_targets = coefficients.shape
n_targets, n_features = coefficients.shape
n_components = x_rotations.shape[1]

names = [f"coef ({v.name})" for v in self.model.domain.class_vars]
names += [f"coef/X_sd ({v.name})" for v in self.model.domain.class_vars]
names += [f"coef * X_sd ({v.name})" for v in self.model.domain.class_vars]
names += [f"w*c {i + 1}" for i in range(n_components)]
domain = Domain(
[ContinuousVariable(n) for n in names],
Expand All @@ -85,18 +85,16 @@ def _create_output_coeffs_loadings(self) -> Table:
)

data = self.model.data_to_model_domain(self.data)
x_std = np.std(data.X, axis=0)
coeffs_x_std = coefficients.T / x_std
X_features = np.hstack((coefficients,
coeffs_x_std.T,
X_features = np.hstack((coefficients.T,
(coefficients * np.std(data.X, axis=0)).T,
x_rotations))
X_targets = np.hstack((np.full((n_targets, n_targets), np.nan),
np.full((n_targets, n_targets), np.nan),
y_loadings))

coeffs = coeffs_x_std * np.mean(data.X, axis=0)
X_intercepts = np.hstack((intercept,
intercept - coeffs.sum(),
coeffs = coefficients * np.mean(data.X, axis=0)
X_intercepts = np.hstack((intercept - coeffs.sum(),
intercept,
np.full((1, n_components), np.nan)))
X = np.vstack((X_features, X_targets, X_intercepts))

Expand Down
30 changes: 22 additions & 8 deletions Orange/widgets/model/tests/test_owpls.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest
import numpy as np
from sklearn.cross_decomposition import PLSRegression

from Orange.data import Table, Domain, StringVariable
from Orange.widgets.model.owpls import OWPLS
Expand Down Expand Up @@ -30,6 +31,19 @@ def setUp(self):
ParameterMapping('n_components', self.widget.controls.n_components)
]

def test_coeffs_compare_sklearn(self):
self.send_signal(self.widget.Inputs.data, self._data)
coefsdata = self.get_output(self.widget.Outputs.coefsdata)
intercept = coefsdata.X[-1, 0]
coeffs = coefsdata.X[:-2, 0]
Y_orange = self._data.X @ coeffs + intercept

pls = PLSRegression(n_components=2)
pls.fit(self._data.X, self._data.Y)
Y_sklearn = pls.predict(self._data.X)

np.testing.assert_almost_equal(Y_sklearn, Y_orange)

def test_output_coefsdata(self):
self.send_signal(self.widget.Inputs.data, self._data)
coefsdata = self.get_output(self.widget.Outputs.coefsdata)
Expand All @@ -38,7 +52,7 @@ def test_output_coefsdata(self):
self.assertEqual(coefsdata.Y.shape, (15, 0))
self.assertEqual(coefsdata.metas.shape, (15, 2))

self.assertEqual(["coef (MEDV)", "coef/X_sd (MEDV)", "w*c 1", "w*c 2"],
self.assertEqual(["coef (MEDV)", "coef * X_sd (MEDV)", "w*c 1", "w*c 2"],
[v.name for v in coefsdata.domain.attributes])
self.assertEqual(["Variable name", "Variable role"],
[v.name for v in coefsdata.domain.metas])
Expand All @@ -47,9 +61,9 @@ def test_output_coefsdata(self):
self.assertTrue((coefsdata.metas[:-2, 1] == 0).all())
self.assertTrue((coefsdata.metas[-2, 1] == 1))
self.assertTrue(np.isnan(coefsdata.metas[-1, 1]))
self.assertAlmostEqual(coefsdata.X[0, 2], 0.237, 3)
self.assertAlmostEqual(coefsdata.X[13, 2], -0.304, 3)
self.assertAlmostEqual(coefsdata.X[-1, 0], 22.5, 1)
self.assertAlmostEqual(coefsdata.X[0, 3], 0.012, 3)
self.assertAlmostEqual(coefsdata.X[13, 3], 0.389, 3)
self.assertAlmostEqual(coefsdata.X[-1, 0], 13.7, 1)
self.assertTrue(np.isnan(coefsdata.X[-1, 2:]).all())

def test_output_coefsdata_multi_target(self):
Expand All @@ -60,8 +74,8 @@ def test_output_coefsdata_multi_target(self):
self.assertEqual(coefsdata.Y.shape, (15, 0))
self.assertEqual(coefsdata.metas.shape, (15, 2))

attr_names = ["coef (MEDV)", "coef (CRIM)", "coef/X_sd (MEDV)",
"coef/X_sd (CRIM)", "w*c 1", "w*c 2"]
attr_names = ["coef (MEDV)", "coef (CRIM)", "coef * X_sd (MEDV)",
"coef * X_sd (CRIM)", "w*c 1", "w*c 2"]
self.assertEqual(attr_names,
[v.name for v in coefsdata.domain.attributes])
self.assertEqual(["Variable name", "Variable role"],
Expand All @@ -75,8 +89,8 @@ def test_output_coefsdata_multi_target(self):
self.assertAlmostEqual(coefsdata.X[0, 4], -0.198, 3)
self.assertAlmostEqual(coefsdata.X[12, 4], -0.288, 3)
self.assertAlmostEqual(coefsdata.X[13, 4], 0.243, 3)
self.assertAlmostEqual(coefsdata.X[-1, 0], 22.5, 1)
self.assertAlmostEqual(coefsdata.X[-1, 1], 3.6, 1)
self.assertAlmostEqual(coefsdata.X[-1, 0], 6.7, 1)
self.assertAlmostEqual(coefsdata.X[-1, 1], -12.2, 1)
self.assertTrue(np.isnan(coefsdata.X[-1, 4:]).all())

def test_output_data(self):
Expand Down
2 changes: 1 addition & 1 deletion requirements-core.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pip>=19.3
python-louvain>=0.13
pyyaml
requests
scikit-learn>=1.4.0
scikit-learn>=1.5.1
scipy>=1.9
serverfiles # for Data Sets synchronization
xgboost>=1.7.4,<2.1
Expand Down
Loading