Skip to content

Commit

Permalink
tests
Browse files Browse the repository at this point in the history
  • Loading branch information
noahnovsak committed Jul 21, 2023
1 parent 6f14e87 commit a50e880
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 21 deletions.
2 changes: 1 addition & 1 deletion Orange/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def __call__(self, data, progress_callback=None):

progress_callback(0.1, "Fitting...")
model = self._fit_model(data)
model.used_vals = [np.unique(y).astype(int) for y in data.Y[:, None].T]
model.used_vals = [np.asarray(np.unique(y), dtype=int) for y in data.Y[:, None].T]
if not hasattr(model, "domain") or model.domain is None:
# some models set domain themself and it should be respected
# e.g. calibration learners set the base_learner's domain which
Expand Down
31 changes: 19 additions & 12 deletions Orange/classification/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
from Orange.preprocess.score import LearnerScorer
from Orange.data import Variable, DiscreteVariable

try:
import dask_ml.linear_model as dask_linear_model
except ImportError:
dask_linear_model = skl_linear_model



__all__ = ["LogisticRegressionLearner"]

Expand All @@ -26,11 +32,11 @@ def score(self, data):
class LogisticRegressionClassifier(SklModel):
@property
def intercept(self):
return self.skl_model.intercept_
return np.atleast_1d(self.skl_model.intercept_)

@property
def coefficients(self):
return self.skl_model.coef_
return np.atleast_2d(self.skl_model.coef_)


class LogisticRegressionLearner(SklLearner, _FeatureScorerMixin):
Expand All @@ -47,22 +53,23 @@ def __init__(self, penalty="l2", dual=False, tol=0.0001, C=1.0,

def _initialize_wrapped(self, X=None, Y=None):
params = self.params.copy()
solver = params.pop("solver")
penalty = params.get("penalty") or "none"

if isinstance(X, da.Array) or isinstance(Y, da.Array):
try:
import dask_ml.linear_model

params["solver"] = "admm"
if params["penalty"] == "none":
params["solver"] = "gradient_descent"

return dask_ml.linear_model.LogisticRegression(**params)
except ImportError:
if dask_linear_model is skl_linear_model:
warnings.warn("dask_ml is not installed, using sklearn instead.")
else:
if solver == "auto":
if penalty in "none":
solver = "gradient_descent"
else:
solver = "admm"
params["solver"], params["penalty"] = solver, penalty
return dask_linear_model.LogisticRegression(**params)

# The default scikit-learn solver `lbfgs` (v0.22) does not support the
# l1 penalty.
solver, penalty = params.pop("solver"), params.get("penalty")
if solver == "auto":
if penalty == "l1":
solver = "liblinear"
Expand Down
54 changes: 49 additions & 5 deletions Orange/tests/test_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import numpy as np
import sklearn
import dask_ml

from Orange.data import Table, ContinuousVariable, Domain
from Orange.classification import LogisticRegressionLearner, Model
Expand Down Expand Up @@ -115,9 +116,9 @@ def test_coefficients(self):

def test_predict_on_instance(self):
lr = LogisticRegressionLearner()
m = lr(self.zoo)
probs = m(self.zoo[50], m.Probs)
probs2 = m(self.zoo[50, :], m.Probs)
m = lr(self.heart_disease)
probs = m(self.heart_disease[50], m.Probs)
probs2 = m(self.heart_disease[50, :], m.Probs)
np.testing.assert_almost_equal(probs, probs2[0])

def test_single_class(self):
Expand Down Expand Up @@ -154,9 +155,52 @@ def test_auto_solver(self):
self.assertEqual(skl_clf.penalty, "l1")


class TestLRLOnDask(TestLogisticRegressionLearner):
class TestLogisticRegressionOnDask(TestLogisticRegressionLearner):
@classmethod
def setUpClass(cls):
cls.iris = temp_dasktable(Table('iris'))
cls.heart_disease = temp_dasktable(Table('heart_disease.tab'))
cls.zoo = temp_dasktable(Table('zoo'))

def test_learner_scorer(self):
# for some reason dask_ml and sklearn yields different results
learner = LogisticRegressionLearner()
scores = learner.score_data(self.heart_disease)
self.assertEqual('major vessels colored',
self.heart_disease.domain.attributes[np.argmax(scores)].name)
self.assertEqual(scores.shape, (1, len(self.heart_disease.domain.attributes)))

@unittest.skip("Discretizer is not yet implemented")
def test_learner_scorer_previous_transformation(self):
super().test_learner_scorer_previous_transformation()

@unittest.skip("Multiclass regression is not supported")
def test_learner_scorer_multiclass(self):
super().test_learner_scorer_multiclass()

@unittest.skip("Dask-ML does not support multiclass regression")
def test_learner_scorer_multiclass_feature(self):
super().test_learner_scorer_multiclass_feature()

@unittest.skip("Dask-ML accepts single class")
def test_single_class(self):
super().test_single_class()

@unittest.skip("Dask-ML accepts single class")
def test_sklearn_single_class(self):
super().test_sklearn_single_class()

def test_auto_solver(self):
lr = LogisticRegressionLearner(penalty="l2", solver="auto")
skl_clf = lr._initialize_wrapped(self.iris.X)
self.assertEqual(skl_clf.solver, "admm")
self.assertEqual(skl_clf.penalty, "l2")

lr = LogisticRegressionLearner(penalty="l1", solver="auto")
skl_clf = lr._initialize_wrapped(self.iris.X)
self.assertEqual(skl_clf.solver, "admm")
self.assertEqual(skl_clf.penalty, "l1")

lr = LogisticRegressionLearner(penalty=None, solver="auto")
skl_clf = lr._initialize_wrapped(self.iris.X)
self.assertEqual(skl_clf.solver, "gradient_descent")
self.assertEqual(skl_clf.penalty, "none")
7 changes: 4 additions & 3 deletions Orange/widgets/model/owlogisticregression.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,16 @@ def get_learner_parameters(self):
def check_data(self):
valid = super().check_data()
if valid and isinstance(self.data, DaskTable) \
and len(self.data.domain.class_var.values) > 2:
and len(self.data.domain.class_var.values) > 2 \
and len(np.unique(self.data).compute()) > 2:
self.Error.data_error("Data contains too many target values.")
valid = False
return valid


def create_coef_table(classifier):
i = np.atleast_1d(classifier.intercept)
c = np.atleast_2d(classifier.coefficients)
i = classifier.intercept
c = classifier.coefficients
if c.shape[0] > 2: # multi-class
values = [classifier.domain.class_var.values[int(i)]
for i in np.asarray(classifier.used_vals[0])]
Expand Down

0 comments on commit a50e880

Please sign in to comment.