From 2a811afb0849644caef53152974ca63a542fd6c5 Mon Sep 17 00:00:00 2001 From: Konstantin Lopuhin Date: Wed, 26 Mar 2025 21:52:40 +0000 Subject: [PATCH 1/3] modernize type signatures in sklearn/text.py --- eli5/sklearn/text.py | 45 +++++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/eli5/sklearn/text.py b/eli5/sklearn/text.py index fb2748bf..56b2cfe9 100644 --- a/eli5/sklearn/text.py +++ b/eli5/sklearn/text.py @@ -1,11 +1,7 @@ -from __future__ import absolute_import -from typing import Any, Union, Callable, Dict, List, Optional, Set, Tuple +from typing import Callable, Optional from sklearn.pipeline import FeatureUnion -try: - from sklearn.feature_extraction.text import _VectorizerMixin as VectorizerMixin -except ImportError: # Changed in scikit-learn 0.22 - from sklearn.feature_extraction.text import VectorizerMixin +from sklearn.feature_extraction.text import _VectorizerMixin as VectorizerMixin from eli5.base import ( DocWeightedSpans, WeightedSpans, FeatureWeights, FeatureWeight, @@ -15,8 +11,7 @@ from eli5.sklearn._span_analyzers import build_span_analyzer -def get_weighted_spans(doc, vec, feature_weights): - # type: (Any, Any, FeatureWeights) -> Optional[WeightedSpans] +def get_weighted_spans(doc, vec, feature_weights: FeatureWeights) -> Optional[WeightedSpans]: """ If possible, return a dict with preprocessed document and a list of spans with weights, corresponding to features in the document. """ @@ -33,8 +28,7 @@ def get_weighted_spans(doc, vec, feature_weights): return None -def add_weighted_spans(doc, vec, vectorized, target_expl): - # type: (Any, Any, bool, TargetExplanation) -> None +def add_weighted_spans(doc, vec, vectorized: bool, target_expl: TargetExplanation) -> None: """ Compute and set ``target_expl.weighted_spans`` attribute, when possible. """ @@ -47,15 +41,14 @@ def add_weighted_spans(doc, vec, vectorized, target_expl): target_expl.weighted_spans = weighted_spans -FoundFeatures = Dict[Tuple[str, int], float] +FoundFeatures = dict[tuple[str, int], float] def _get_doc_weighted_spans(doc, vec, - feature_weights, # type: FeatureWeights - feature_fn=None # type: Optional[Callable[[str], str]] - ): - # type: (...) -> Optional[Tuple[FoundFeatures, DocWeightedSpans]] + feature_weights: FeatureWeights, + feature_fn: Optional[Callable[[str], str]] = None, + ) -> Optional[tuple[FoundFeatures, DocWeightedSpans]]: if isinstance(vec, InvertableHashingVectorizer): vec = vec.vec @@ -88,10 +81,9 @@ def _get_doc_weighted_spans(doc, ) -def _get_feature_weights_dict(feature_weights, # type: FeatureWeights - feature_fn # type: Optional[Callable[[str], str]] - ): - # type: (...) -> Dict[str, Tuple[float, Tuple[str, int]]] +def _get_feature_weights_dict(feature_weights: FeatureWeights, + feature_fn: Optional[Callable[[str], str]], + ) -> dict[str, tuple[float, tuple[str, int]]]: """ Return {feat_name: (weight, (group, idx))} mapping. """ return { # (group, idx) is an unique feature identifier, e.g. ('pos', 2) @@ -112,8 +104,8 @@ def _get_features(feature, feature_fn=None): return features -def _get_weighted_spans_from_union(doc, vec_union, feature_weights): - # type: (Any, FeatureUnion, FeatureWeights) -> Optional[WeightedSpans] +def _get_weighted_spans_from_union( + doc, vec_union: FeatureUnion, feature_weights: FeatureWeights) -> Optional[WeightedSpans]: docs_weighted_spans = [] named_found_features = [] for vec_name, vec in vec_union.transformer_list: @@ -142,12 +134,13 @@ def feature_fn(name): return None -def _get_other(feature_weights, named_found_features): - # type: (FeatureWeights, List[Tuple[str, FoundFeatures]]) -> FeatureWeights +def _get_other( + feature_weights: FeatureWeights, named_found_features: list[tuple[str, FoundFeatures]], + ) -> FeatureWeights: # search for items that were not accounted at all. - other_items = [] # type: List[FeatureWeight] - accounted_keys = set() # type: Set[Tuple[str, int]] - all_found_features = set() # type: Set[Tuple[str, int]] + other_items: list[FeatureWeight] = [] + accounted_keys: set[tuple[str, int]] = set() + all_found_features: set[tuple[str, int]] = set() for _, found_features in named_found_features: all_found_features.update(found_features) From dca98dd2c6039659dcf4d05a59557fdceb783e26 Mon Sep 17 00:00:00 2001 From: Konstantin Lopuhin Date: Sun, 30 Mar 2025 22:18:59 +0100 Subject: [PATCH 2/3] modernize more types --- eli5/base.py | 20 ++++----- eli5/formatters/html.py | 98 ++++++++++++++++------------------------- 2 files changed, 47 insertions(+), 71 deletions(-) diff --git a/eli5/base.py b/eli5/base.py index bb072499..44834a6d 100644 --- a/eli5/base.py +++ b/eli5/base.py @@ -11,7 +11,7 @@ @attrs -class Explanation(object): +class Explanation: """ An explanation for classifier or regressor, it can either explain weights or a single prediction. """ @@ -49,7 +49,7 @@ def _repr_html_(self): @attrs -class FeatureImportances(object): +class FeatureImportances: """ Feature importances with number of remaining non-zero features. """ def __init__(self, importances, remaining): @@ -64,7 +64,7 @@ def from_names_values(cls, names, values, std=None, **kwargs): @attrs -class TargetExplanation(object): +class TargetExplanation: """ Explanation for a single target or class. Feature weights are stored in the :feature_weights: attribute, and features highlighted in text in the :weighted_spans: attribute. @@ -92,7 +92,7 @@ def __init__(self, @attrs -class FeatureWeights(object): +class FeatureWeights: """ Weights for top features, :pos: for positive and :neg: for negative, sorted by descending absolute value. Number of remaining positive and negative features are stored in @@ -111,7 +111,7 @@ def __init__(self, @attrs -class FeatureWeight(object): +class FeatureWeight: def __init__(self, feature: Feature, weight: float, std: Optional[float] = None, value=None): self.feature = feature self.weight = weight @@ -120,7 +120,7 @@ def __init__(self, feature: Feature, weight: float, std: Optional[float] = None, @attrs -class WeightedSpans(object): +class WeightedSpans: """ Holds highlighted spans for parts of document - a DocWeightedSpans object for each vectorizer, and other features not highlighted anywhere. """ @@ -140,7 +140,7 @@ def __init__(self, @attrs -class DocWeightedSpans(object): +class DocWeightedSpans: """ Features highlighted in text. :document: is a pre-processed document before applying the analyzer. :weighted_spans: holds a list of spans for features found in text (span indices correspond to @@ -161,7 +161,7 @@ def __init__(self, @attrs -class TransitionFeatureWeights(object): +class TransitionFeatureWeights: """ Weights matrix for transition features. """ def __init__(self, class_names: list[str], coef): self.class_names = class_names @@ -169,7 +169,7 @@ def __init__(self, class_names: list[str], coef): @attrs -class TreeInfo(object): +class TreeInfo: """ Information about the decision tree. :criterion: is the name of the function to measure the quality of a split, :tree: holds all nodes of the tree, and :graphviz: is the tree rendered in graphviz .dot format. @@ -182,7 +182,7 @@ def __init__(self, criterion: str, tree: 'NodeInfo', graphviz: str, is_classific @attrs -class NodeInfo(object): +class NodeInfo: """ A node in a binary tree. Pointers to left and right children are in :left: and :right: attributes. """ diff --git a/eli5/formatters/html.py b/eli5/formatters/html.py index dc597090..2f601ec5 100644 --- a/eli5/formatters/html.py +++ b/eli5/formatters/html.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- -from __future__ import absolute_import from itertools import groupby -from typing import List, Optional, Tuple +from html import escape +from typing import Optional import numpy as np from jinja2 import Environment, PackageLoader @@ -32,16 +31,15 @@ )) -def format_as_html(explanation, # type: Explanation - include_styles=True, # type: bool - force_weights=True, # type: bool +def format_as_html(explanation: Explanation, + include_styles=True, + force_weights=True, show=fields.ALL, - preserve_density=None, # type: Optional[bool] - highlight_spaces=None, # type: Optional[bool] - horizontal_layout=True, # type: bool - show_feature_values=False # type: bool - ): - # type: (...) -> str + preserve_density: Optional[bool] = None, + highlight_spaces: Optional[bool] = None, + horizontal_layout=True, + show_feature_values=False, + ) -> str: """ Format explanation as html. Most styles are inline, but some are included separately in