Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update sklearn version #142

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hiclass/BinaryPolicy.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def get_binary_examples(self, node) -> tuple:
elif isinstance(self.X, csr_matrix) or isinstance(self.X, csr_array):
X = vstack([positive_x, negative_x])
sample_weights = (
vstack([positive_weights, negative_weights])
np.concatenate([positive_weights, negative_weights])
if self.sample_weight is not None
else None
)
Expand Down
22 changes: 22 additions & 0 deletions hiclass/HierarchicalClassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
MultiplyCombiner,
)

from hiclass.probability_combiner import (
init_strings as probability_combiner_init_strings,
)

try:
import ray
except ImportError:
Expand Down Expand Up @@ -79,6 +83,7 @@ def __init__(
bert: bool = False,
classifier_abbreviation: str = "",
calibration_method: str = None,
probability_combiner: str = "multiply",
tmp_dir: str = None,
):
"""
Expand Down Expand Up @@ -107,6 +112,13 @@ def __init__(
The abbreviation of the local hierarchical classifier to be displayed during logging.
calibration_method : {"ivap", "cvap", "platt", "isotonic", "beta"}, str, default=None
If set, use the desired method to calibrate probabilities returned by predict_proba().
probability_combiner: {"geometric", "arithmetic", "multiply", None}, str, default="multiply"
Specify the rule for combining probabilities over multiple levels:

- `geometric`: Each levels probabilities are calculated by taking the geometric mean of itself and its predecessors;
- `arithmetic`: Each levels probabilities are calculated by taking the arithmetic mean of itself and its predecessors;
- `multiply`: Each levels probabilities are calculated by multiplying itself with its predecessors.
- `None`: No aggregation.
tmp_dir : str, default=None
Temporary directory to persist local classifiers that are trained. If the job needs to be restarted,
it will skip the pre-trained local classifier found in the temporary directory.
Expand All @@ -119,6 +131,7 @@ def __init__(
self.bert = bert
self.classifier_abbreviation = classifier_abbreviation
self.calibration_method = calibration_method
self.probability_combiner = probability_combiner
self.tmp_dir = tmp_dir

def fit(self, X, y, sample_weight=None):
Expand Down Expand Up @@ -152,6 +165,15 @@ def fit(self, X, y, sample_weight=None):
self._clean_up()

def _pre_fit(self, X, y, sample_weight):
# check params
if (
self.probability_combiner
and self.probability_combiner not in probability_combiner_init_strings
):
raise ValueError(
f"probability_combiner must be one of {', '.join(probability_combiner_init_strings)} or None."
)

# Check that X and y have correct shape
# and convert them to np.ndarray if need be

Expand Down
20 changes: 8 additions & 12 deletions hiclass/LocalClassifierPerLevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,12 @@
import numpy as np
from joblib import Parallel, delayed
from sklearn.base import BaseEstimator
from sklearn.utils._tags import ClassifierTags
from sklearn.utils.validation import check_array, check_is_fitted

from hiclass._calibration.Calibrator import _Calibrator
from hiclass.ConstantClassifier import ConstantClassifier
from hiclass.HierarchicalClassifier import HierarchicalClassifier
from hiclass._calibration.Calibrator import _Calibrator

from hiclass.probability_combiner import (
init_strings as probability_combiner_init_strings,
)

try:
import ray
Expand Down Expand Up @@ -113,13 +110,12 @@ def __init__(
self.return_all_probabilities = return_all_probabilities
self.probability_combiner = probability_combiner

if (
self.probability_combiner
and self.probability_combiner not in probability_combiner_init_strings
):
raise ValueError(
f"probability_combiner must be one of {', '.join(probability_combiner_init_strings)} or None."
)
def __sklearn_tags__(self):
"""Configure annotations of estimator to allow inspection of capabilities, such as sparse matrix support."""
tags = super().__sklearn_tags__()
tags.input_tags.sparse = True
tags.classifier_tags = ClassifierTags()
return tags

def fit(self, X, y, sample_weight=None):
"""
Expand Down
23 changes: 9 additions & 14 deletions hiclass/LocalClassifierPerNode.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,14 @@
import networkx as nx
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.utils._tags import ClassifierTags
from sklearn.utils.validation import check_array, check_is_fitted

from hiclass import BinaryPolicy
from hiclass.ConstantClassifier import ConstantClassifier
from hiclass.HierarchicalClassifier import HierarchicalClassifier
from hiclass._calibration.Calibrator import _Calibrator

from hiclass.probability_combiner import (
init_strings as probability_combiner_init_strings,
)

from hiclass._hiclass_utils import _normalize_probabilities
from hiclass.ConstantClassifier import ConstantClassifier
from hiclass.HierarchicalClassifier import HierarchicalClassifier


class LocalClassifierPerNode(BaseEstimator, HierarchicalClassifier):
Expand Down Expand Up @@ -122,13 +118,12 @@ def __init__(
self.return_all_probabilities = return_all_probabilities
self.probability_combiner = probability_combiner

if (
self.probability_combiner
and self.probability_combiner not in probability_combiner_init_strings
):
raise ValueError(
f"probability_combiner must be one of {', '.join(probability_combiner_init_strings)} or None."
)
def __sklearn_tags__(self):
"""Configure annotations of estimator to allow inspection of capabilities, such as sparse matrix support."""
tags = super().__sklearn_tags__()
tags.input_tags.sparse = True
tags.classifier_tags = ClassifierTags()
return tags

def fit(self, X, y, sample_weight=None):
"""
Expand Down
23 changes: 9 additions & 14 deletions hiclass/LocalClassifierPerParentNode.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,13 @@
import networkx as nx
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.utils._tags import ClassifierTags
from sklearn.utils.validation import check_array, check_is_fitted

from hiclass.ConstantClassifier import ConstantClassifier
from hiclass.HierarchicalClassifier import HierarchicalClassifier
from hiclass._calibration.Calibrator import _Calibrator

from hiclass.probability_combiner import (
init_strings as probability_combiner_init_strings,
)

from hiclass._hiclass_utils import _normalize_probabilities
from hiclass.ConstantClassifier import ConstantClassifier
from hiclass.HierarchicalClassifier import HierarchicalClassifier


class LocalClassifierPerParentNode(BaseEstimator, HierarchicalClassifier):
Expand Down Expand Up @@ -108,13 +104,12 @@ def __init__(
self.return_all_probabilities = return_all_probabilities
self.probability_combiner = probability_combiner

if (
self.probability_combiner
and self.probability_combiner not in probability_combiner_init_strings
):
raise ValueError(
f"probability_combiner must be one of {', '.join(probability_combiner_init_strings)} or None."
)
def __sklearn_tags__(self):
"""Configure annotations of estimator to allow inspection of capabilities, such as sparse matrix support."""
tags = super().__sklearn_tags__()
tags.input_tags.sparse = True
tags.classifier_tags = ClassifierTags()
return tags

def fit(self, X, y, sample_weight=None):
"""
Expand Down
8 changes: 0 additions & 8 deletions hiclass/MultiLabelHierarchicalClassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,6 @@
from sklearn.linear_model import LogisticRegression
from sklearn.utils.validation import _check_sample_weight

import functools
import sklearn.utils.validation

# TODO: Move to MultiLabelHierarchicalClassifier (Parent Class)
sklearn.utils.validation.check_array = functools.partial(
sklearn.utils.validation.check_array, allow_nd=True
)

try:
import ray
except ImportError:
Expand Down
28 changes: 17 additions & 11 deletions hiclass/MultiLabelLocalClassifierPerNode.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@
Numeric and string output labels are both handled.
"""

# monkeypatching check_array to accept 3 dimensional arrays
import functools
from copy import deepcopy

import functools
import networkx as nx
import numpy as np
import sklearn.utils.validation
from sklearn.base import BaseEstimator
from sklearn.utils._tags import ClassifierTags, TargetTags
from sklearn.utils.validation import check_is_fitted

from hiclass import BinaryPolicy
from hiclass.ConstantClassifier import ConstantClassifier
Expand All @@ -17,13 +22,6 @@
make_leveled,
)

from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_is_fitted

# monkeypatching check_array to accept 3 dimensional arrays
import sklearn.utils.validation

# TODO: Move to MultiLabelHierarchicalClassifier (Parent Class)
sklearn.utils.validation.check_array = functools.partial(
sklearn.utils.validation.check_array, allow_nd=True
)
Expand Down Expand Up @@ -108,6 +106,16 @@ def __init__(
self.binary_policy = binary_policy
self.tolerance = tolerance

def __sklearn_tags__(self):
"""Configure annotations of estimator to allow inspection of capabilities, such as sparse matrix support."""
tags = super().__sklearn_tags__()
tags.input_tags.sparse = True
tags.classifier_tags = ClassifierTags()
tags.target_tags = TargetTags(required=True)
tags.target_tags.multi_output = True
tags.target_tags.single_output = False
return tags

def fit(self, X, y, sample_weight=None):
"""
Fit a local classifier per node.
Expand Down Expand Up @@ -175,9 +183,7 @@ def predict(self, X, tolerance: float = None) -> np.ndarray:

# Input validation
if not self.bert:
X = sklearn.utils.validation.check_array(
X, accept_sparse="csr"
) # TODO: Decide allow_nd True or False
X = sklearn.utils.validation.check_array(X, accept_sparse="csr")
else:
X = np.array(X)

Expand Down
22 changes: 19 additions & 3 deletions hiclass/MultiLabelLocalClassifierPerParentNode.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,29 @@
Numeric and string output labels are both handled.
"""

from copy import deepcopy
# monkeypatching check_array to accept 3 dimensional arrays
import functools
from collections import defaultdict
from copy import deepcopy

import networkx as nx
import numpy as np
import sklearn.utils.validation
from scipy.sparse import csr_matrix, vstack
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_array, check_is_fitted
from sklearn.utils._tags import ClassifierTags, TargetTags
from sklearn.utils.validation import check_is_fitted

from hiclass.ConstantClassifier import ConstantClassifier
from hiclass.MultiLabelHierarchicalClassifier import (
MultiLabelHierarchicalClassifier,
make_leveled,
)

sklearn.utils.validation.check_array = functools.partial(
sklearn.utils.validation.check_array, allow_nd=True
)


class MultiLabelLocalClassifierPerParentNode(
BaseEstimator, MultiLabelHierarchicalClassifier
Expand Down Expand Up @@ -88,6 +96,14 @@ def __init__(
bert=bert,
)

def __sklearn_tags__(self):
"""Configure annotations of estimator to allow inspection of capabilities, such as sparse matrix support."""
tags = super().__sklearn_tags__()
tags.input_tags.sparse = True
tags.classifier_tags = ClassifierTags()
tags.target_tags = TargetTags(required=True)
return tags

def fit(self, X, y, sample_weight=None):
"""
Fit a local classifier per parent node.
Expand Down Expand Up @@ -152,7 +168,7 @@ def predict(self, X, tolerance: float = None):

# Input validation
if not self.bert:
X = check_array(X, accept_sparse="csr")
X = sklearn.utils.validation.check_array(X, accept_sparse="csr")
else:
X = np.array(X)

Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@
KEYWORDS = ["hierarchical classification"]
DACS_SOFTWARE = "https://gitlab.com/dacs-hpi"
# What packages are required for this module to be executed?
REQUIRED = ["networkx", "numpy", "scikit-learn<1.5", "scipy<1.13"]
REQUIRED = ["networkx", "numpy", "scikit-learn", "scipy"]

# What packages are optional?
# 'fancy feature': ['django'],}
EXTRAS = {
"ray": ["ray>=1.11.0"],
"xai": ["shap==0.44.1", "xarray==2023.1.0"],
"xai": ["shap", "xarray"],
"dev": [
"flake8",
"pytest",
Expand All @@ -43,8 +43,8 @@
"black==24.2.0",
"pre-commit==2.20.0",
"ray",
"shap==0.44.1",
"xarray==2023.1.0",
"shap",
"xarray",
"bert-sklearn",
],
}
Expand Down
Loading
Loading