From 2a811afb0849644caef53152974ca63a542fd6c5 Mon Sep 17 00:00:00 2001
From: Konstantin Lopuhin <kostia.lopuhin@gmail.com>
Date: Wed, 26 Mar 2025 21:52:40 +0000
Subject: [PATCH 1/3] modernize type signatures in sklearn/text.py

---
 eli5/sklearn/text.py | 45 +++++++++++++++++++-------------------------
 1 file changed, 19 insertions(+), 26 deletions(-)

diff --git a/eli5/sklearn/text.py b/eli5/sklearn/text.py
index fb2748bf..56b2cfe9 100644
--- a/eli5/sklearn/text.py
+++ b/eli5/sklearn/text.py
@@ -1,11 +1,7 @@
-from __future__ import absolute_import
-from typing import Any, Union, Callable, Dict, List, Optional, Set, Tuple
+from typing import Callable, Optional
 
 from sklearn.pipeline import FeatureUnion
-try:
-    from sklearn.feature_extraction.text import _VectorizerMixin as VectorizerMixin
-except ImportError:  # Changed in scikit-learn 0.22
-    from sklearn.feature_extraction.text import VectorizerMixin
+from sklearn.feature_extraction.text import _VectorizerMixin as VectorizerMixin
 
 from eli5.base import (
     DocWeightedSpans, WeightedSpans, FeatureWeights, FeatureWeight,
@@ -15,8 +11,7 @@
 from eli5.sklearn._span_analyzers import build_span_analyzer
 
 
-def get_weighted_spans(doc, vec, feature_weights):
-    # type: (Any, Any, FeatureWeights) -> Optional[WeightedSpans]
+def get_weighted_spans(doc, vec, feature_weights: FeatureWeights) -> Optional[WeightedSpans]:
     """ If possible, return a dict with preprocessed document and a list
     of spans with weights, corresponding to features in the document.
     """
@@ -33,8 +28,7 @@ def get_weighted_spans(doc, vec, feature_weights):
     return None
 
 
-def add_weighted_spans(doc, vec, vectorized, target_expl):
-    # type: (Any, Any, bool, TargetExplanation) -> None
+def add_weighted_spans(doc, vec, vectorized: bool, target_expl: TargetExplanation) -> None:
     """
     Compute and set ``target_expl.weighted_spans`` attribute, when possible.
     """
@@ -47,15 +41,14 @@ def add_weighted_spans(doc, vec, vectorized, target_expl):
         target_expl.weighted_spans = weighted_spans
 
 
-FoundFeatures = Dict[Tuple[str, int], float]
+FoundFeatures = dict[tuple[str, int], float]
 
 
 def _get_doc_weighted_spans(doc,
                             vec,
-                            feature_weights,  # type: FeatureWeights
-                            feature_fn=None   # type: Optional[Callable[[str], str]]
-                            ):
-    # type: (...) -> Optional[Tuple[FoundFeatures, DocWeightedSpans]]
+                            feature_weights: FeatureWeights,
+                            feature_fn: Optional[Callable[[str], str]] = None,
+                            ) -> Optional[tuple[FoundFeatures, DocWeightedSpans]]:
     if isinstance(vec, InvertableHashingVectorizer):
         vec = vec.vec
 
@@ -88,10 +81,9 @@ def _get_doc_weighted_spans(doc,
     )
 
 
-def _get_feature_weights_dict(feature_weights,  # type: FeatureWeights
-                              feature_fn        # type: Optional[Callable[[str], str]]
-                              ):
-    # type: (...) -> Dict[str, Tuple[float, Tuple[str, int]]]
+def _get_feature_weights_dict(feature_weights: FeatureWeights,
+                              feature_fn: Optional[Callable[[str], str]],
+                              ) -> dict[str, tuple[float, tuple[str, int]]]:
     """ Return {feat_name: (weight, (group, idx))} mapping. """
     return {
         # (group, idx) is an unique feature identifier, e.g. ('pos', 2)
@@ -112,8 +104,8 @@ def _get_features(feature, feature_fn=None):
     return features
 
 
-def _get_weighted_spans_from_union(doc, vec_union, feature_weights):
-    # type: (Any, FeatureUnion, FeatureWeights) -> Optional[WeightedSpans]
+def _get_weighted_spans_from_union(
+        doc, vec_union: FeatureUnion, feature_weights: FeatureWeights) -> Optional[WeightedSpans]:
     docs_weighted_spans = []
     named_found_features = []
     for vec_name, vec in vec_union.transformer_list:
@@ -142,12 +134,13 @@ def feature_fn(name):
         return None
 
 
-def _get_other(feature_weights, named_found_features):
-    # type: (FeatureWeights, List[Tuple[str, FoundFeatures]]) -> FeatureWeights
+def _get_other(
+        feature_weights: FeatureWeights, named_found_features: list[tuple[str, FoundFeatures]],
+        ) -> FeatureWeights:
     # search for items that were not accounted at all.
-    other_items = []  # type: List[FeatureWeight]
-    accounted_keys = set()  # type: Set[Tuple[str, int]]
-    all_found_features = set()  # type: Set[Tuple[str, int]]
+    other_items: list[FeatureWeight] = []
+    accounted_keys: set[tuple[str, int]] = set()
+    all_found_features: set[tuple[str, int]] = set()
     for _, found_features in named_found_features:
         all_found_features.update(found_features)
 

From dca98dd2c6039659dcf4d05a59557fdceb783e26 Mon Sep 17 00:00:00 2001
From: Konstantin Lopuhin <kostia.lopuhin@gmail.com>
Date: Sun, 30 Mar 2025 22:18:59 +0100
Subject: [PATCH 2/3] modernize more types

---
 eli5/base.py            | 20 ++++-----
 eli5/formatters/html.py | 98 ++++++++++++++++-------------------------
 2 files changed, 47 insertions(+), 71 deletions(-)

diff --git a/eli5/base.py b/eli5/base.py
index bb072499..44834a6d 100644
--- a/eli5/base.py
+++ b/eli5/base.py
@@ -11,7 +11,7 @@
 
 
 @attrs
-class Explanation(object):
+class Explanation:
     """ An explanation for classifier or regressor,
     it can either explain weights or a single prediction.
     """
@@ -49,7 +49,7 @@ def _repr_html_(self):
 
 
 @attrs
-class FeatureImportances(object):
+class FeatureImportances:
     """ Feature importances with number of remaining non-zero features.
     """
     def __init__(self, importances, remaining):
@@ -64,7 +64,7 @@ def from_names_values(cls, names, values, std=None, **kwargs):
 
 
 @attrs
-class TargetExplanation(object):
+class TargetExplanation:
     """ Explanation for a single target or class.
     Feature weights are stored in the :feature_weights: attribute,
     and features highlighted in text in the :weighted_spans: attribute.
@@ -92,7 +92,7 @@ def __init__(self,
 
 
 @attrs
-class FeatureWeights(object):
+class FeatureWeights:
     """ Weights for top features, :pos: for positive and :neg: for negative,
     sorted by descending absolute value.
     Number of remaining positive and negative features are stored in
@@ -111,7 +111,7 @@ def __init__(self,
 
 
 @attrs
-class FeatureWeight(object):
+class FeatureWeight:
     def __init__(self, feature: Feature, weight: float, std: Optional[float] = None, value=None):
         self.feature = feature
         self.weight = weight
@@ -120,7 +120,7 @@ def __init__(self, feature: Feature, weight: float, std: Optional[float] = None,
 
 
 @attrs
-class WeightedSpans(object):
+class WeightedSpans:
     """ Holds highlighted spans for parts of document - a DocWeightedSpans
     object for each vectorizer, and other features not highlighted anywhere.
     """
@@ -140,7 +140,7 @@ def __init__(self,
 
 
 @attrs
-class DocWeightedSpans(object):
+class DocWeightedSpans:
     """ Features highlighted in text. :document: is a pre-processed document
     before applying the analyzer. :weighted_spans: holds a list of spans
     for features found in text (span indices correspond to
@@ -161,7 +161,7 @@ def __init__(self,
 
 
 @attrs
-class TransitionFeatureWeights(object):
+class TransitionFeatureWeights:
     """ Weights matrix for transition features. """
     def __init__(self, class_names: list[str], coef):
         self.class_names = class_names
@@ -169,7 +169,7 @@ def __init__(self, class_names: list[str], coef):
 
 
 @attrs
-class TreeInfo(object):
+class TreeInfo:
     """ Information about the decision tree. :criterion: is the name of
     the function to measure the quality of a split, :tree: holds all nodes
     of the tree, and :graphviz: is the tree rendered in graphviz .dot format.
@@ -182,7 +182,7 @@ def __init__(self, criterion: str, tree: 'NodeInfo', graphviz: str, is_classific
 
 
 @attrs
-class NodeInfo(object):
+class NodeInfo:
     """ A node in a binary tree.
     Pointers to left and right children are in :left: and :right: attributes.
     """
diff --git a/eli5/formatters/html.py b/eli5/formatters/html.py
index dc597090..2f601ec5 100644
--- a/eli5/formatters/html.py
+++ b/eli5/formatters/html.py
@@ -1,7 +1,6 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
 from itertools import groupby
-from typing import List, Optional, Tuple
+from html import escape
+from typing import Optional
 
 import numpy as np
 from jinja2 import Environment, PackageLoader
@@ -32,16 +31,15 @@
 ))
 
 
-def format_as_html(explanation,  # type: Explanation
-                   include_styles=True,  # type: bool
-                   force_weights=True,  # type: bool
+def format_as_html(explanation: Explanation,
+                   include_styles=True,
+                   force_weights=True,
                    show=fields.ALL,
-                   preserve_density=None,  # type: Optional[bool]
-                   highlight_spaces=None,  # type: Optional[bool]
-                   horizontal_layout=True,  # type: bool
-                   show_feature_values=False  # type: bool
-                   ):
-    # type: (...) -> str
+                   preserve_density: Optional[bool] = None,
+                   highlight_spaces: Optional[bool] = None,
+                   horizontal_layout=True,
+                   show_feature_values=False,
+                   ) -> str:
     """ Format explanation as html.
     Most styles are inline, but some are included separately in <style> tag,
     you can omit them by passing ``include_styles=False`` and call
@@ -130,8 +128,7 @@ def format_as_html(explanation,  # type: Explanation
 '''.replace('\n', ' ')
 
 
-def format_html_styles():
-    # type: () -> str
+def format_html_styles() -> str:
     """ Format just the styles,
     use with ``format_as_html(explanation, include_styles=False)``.
     """
@@ -139,10 +136,9 @@ def format_html_styles():
 
 
 def render_targets_weighted_spans(
-        targets,  # type: List[TargetExplanation]
-        preserve_density,  # type: Optional[bool]
-    ):
-    # type: (...) -> List[Optional[str]]
+        targets: list[TargetExplanation],
+        preserve_density: Optional[bool],
+    ) -> list[Optional[str]]:
     """ Return a list of rendered weighted spans for targets.
     Function must accept a list in order to select consistent weight
     ranges across all targets.
@@ -150,22 +146,19 @@ def render_targets_weighted_spans(
     prepared_weighted_spans = prepare_weighted_spans(
         targets, preserve_density)
 
-    def _fmt_pws(pws):
-        # type: (PreparedWeightedSpans) -> str
+    def _fmt_pws(pws: PreparedWeightedSpans) -> str:
         name = ('<b>{}:</b> '.format(pws.doc_weighted_spans.vec_name)
                 if pws.doc_weighted_spans.vec_name else '')
         return '{}{}'.format(name, render_weighted_spans(pws))
 
-    def _fmt_pws_list(pws_lst):
-        # type: (List[PreparedWeightedSpans]) -> str
+    def _fmt_pws_list(pws_lst: list[PreparedWeightedSpans]) -> str:
         return '<br/>'.join(_fmt_pws(pws) for pws in pws_lst)
 
     return [_fmt_pws_list(pws_lst) if pws_lst else None
             for pws_lst in prepared_weighted_spans]
 
 
-def render_weighted_spans(pws):
-    # type: (PreparedWeightedSpans) -> str
+def render_weighted_spans(pws: PreparedWeightedSpans) -> str:
     # TODO - for longer documents, an option to remove text
     # without active features
     return ''.join(
@@ -177,11 +170,10 @@ def render_weighted_spans(pws):
             key=lambda x: x[1]))
 
 
-def _colorize(token,  # type: str
-              weight,  # type: float
-              weight_range,  # type: float
-              ):
-    # type: (...) -> str
+def _colorize(token: str,
+              weight: float,
+              weight_range: float,
+              ) -> str:
     """ Return token wrapped in a span with some styles
     (calculated from weight and weight_range) applied.
     """
@@ -208,8 +200,7 @@ def _colorize(token,  # type: str
         )
 
 
-def _weight_opacity(weight, weight_range):
-    # type: (float, float) -> str
+def _weight_opacity(weight: float, weight_range: float) -> str:
     """ Return opacity value for given weight as a string.
     """
     min_opacity = 0.8
@@ -220,11 +211,10 @@ def _weight_opacity(weight, weight_range):
     return '{:.2f}'.format(min_opacity + (1 - min_opacity) * rel_weight)
 
 
-_HSL_COLOR = Tuple[float, float, float]
+_HSL_COLOR = tuple[float, float, float]
 
 
-def weight_color_hsl(weight, weight_range, min_lightness=0.8):
-    # type: (float, float, float) -> _HSL_COLOR
+def weight_color_hsl(weight: float, weight_range: float, min_lightness=0.8) -> _HSL_COLOR:
     """ Return HSL color components for given weight,
     where the max absolute weight is given by weight_range.
     """
@@ -235,21 +225,18 @@ def weight_color_hsl(weight, weight_range, min_lightness=0.8):
     return hue, saturation, lightness
 
 
-def format_hsl(hsl_color):
-    # type: (_HSL_COLOR) -> str
+def format_hsl(hsl_color: _HSL_COLOR) -> str:
     """ Format hsl color as css color string.
     """
     hue, saturation, lightness = hsl_color
     return 'hsl({}, {:.2%}, {:.2%})'.format(hue, saturation, lightness)
 
 
-def _hue(weight):
-    # type: (float) -> float
+def _hue(weight: float) -> float:
     return 120 if weight > 0 else 0
 
 
-def get_weight_range(weights):
-    # type: (FeatureWeights) -> float
+def get_weight_range(weights: FeatureWeights) -> float:
     """ Max absolute feature for pos and neg weights.
     """
     return max_or_0(abs(fw.weight)
@@ -258,11 +245,10 @@ def get_weight_range(weights):
 
 
 def remaining_weight_color_hsl(
-        ws,  # type: List[FeatureWeight]
-        weight_range,  # type: float
-        pos_neg,  # type: str
-    ):
-    # type: (...) -> _HSL_COLOR
+        ws: list[FeatureWeight],
+        weight_range: float,
+        pos_neg: str,
+    ) -> _HSL_COLOR:
     """ Color for "remaining" row.
     Handles a number of edge cases: if there are no weights in ws or weight_range
     is zero, assume the worst (most intensive positive or negative color).
@@ -278,8 +264,7 @@ def remaining_weight_color_hsl(
     return weight_color_hsl(weight, weight_range)
 
 
-def _format_unhashed_feature(feature, weight, hl_spaces):
-    # type: (...) -> str
+def _format_unhashed_feature(feature, weight, hl_spaces) -> str:
     """ Format unhashed feature: show first (most probable) candidate,
     display other candidates in title attribute.
     """
@@ -295,8 +280,7 @@ def _format_unhashed_feature(feature, weight, hl_spaces):
         return html
 
 
-def _format_feature(feature, weight, hl_spaces):
-    # type: (...) -> str
+def _format_feature(feature, weight, hl_spaces) -> str:
     """ Format any feature.
     """
     if isinstance(feature, FormattedFeatureName):
@@ -308,14 +292,12 @@ def _format_feature(feature, weight, hl_spaces):
         return _format_single_feature(feature, weight, hl_spaces=hl_spaces)
 
 
-def _format_single_feature(feature, weight, hl_spaces):
-    # type: (str, float, bool) -> str
+def _format_single_feature(feature: str, weight: float, hl_spaces: bool) -> str:
     feature = html_escape(feature)
     if not hl_spaces:
         return feature
 
-    def replacer(n_spaces, side):
-        # type: (int, str) -> str
+    def replacer(n_spaces: int, side: str) -> str:
         m = '0.1em'
         margins = {'left': (m, 0), 'right': (0, m), 'center': (m, m)}[side]
         style = '; '.join([
@@ -331,18 +313,12 @@ def replacer(n_spaces, side):
     return replace_spaces(feature, replacer)
 
 
-def _format_decision_tree(treedict):
-    # type: (...) -> str
+def _format_decision_tree(treedict) -> str:
     if treedict.graphviz and _graphviz.is_supported():
         return _graphviz.dot2svg(treedict.graphviz)
     else:
         return tree2text(treedict)
 
 
-def html_escape(text):
-    # type: (str) -> str
-    try:
-        from html import escape
-    except ImportError:
-        from cgi import escape  # type: ignore
+def html_escape(text) -> str:
     return escape(text, quote=True)

From 9093a373009c833c9991f5445e00d9e931595311 Mon Sep 17 00:00:00 2001
From: Konstantin Lopuhin <kostia.lopuhin@gmail.com>
Date: Sun, 30 Mar 2025 22:21:28 +0100
Subject: [PATCH 3/3] modernize text helpers types

---
 eli5/formatters/text_helpers.py | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/eli5/formatters/text_helpers.py b/eli5/formatters/text_helpers.py
index c63c66b2..4436eac3 100644
--- a/eli5/formatters/text_helpers.py
+++ b/eli5/formatters/text_helpers.py
@@ -1,15 +1,16 @@
 from collections import Counter
-from typing import List, Optional
+from typing import Optional
 
 import numpy as np
 
-from eli5.base import TargetExplanation, WeightedSpans, DocWeightedSpans
+from eli5.base import TargetExplanation, DocWeightedSpans
 from eli5.base_utils import attrs
 from eli5.utils import max_or_0
 
 
-def get_char_weights(doc_weighted_spans, preserve_density=None):
-    # type: (DocWeightedSpans, Optional[bool]) -> np.ndarray
+def get_char_weights(
+        doc_weighted_spans: DocWeightedSpans, preserve_density: Optional[bool] = None,
+    ) -> np.ndarray:
     """ Return character weights for a text document with highlighted features.
     If preserve_density is True, then color for longer fragments will be
     less intensive than for shorter fragments, so that "sum" of intensities
@@ -35,11 +36,10 @@ def get_char_weights(doc_weighted_spans, preserve_density=None):
 @attrs
 class PreparedWeightedSpans(object):
     def __init__(self,
-                 doc_weighted_spans,  # type: DocWeightedSpans
-                 char_weights,  # type: np.ndarray
-                 weight_range,  # type: float
+                 doc_weighted_spans: DocWeightedSpans,
+                 char_weights: np.ndarray,
+                 weight_range: float,
                  ):
-        # type: (...) -> None
         self.doc_weighted_spans = doc_weighted_spans
         self.char_weights = char_weights
         self.weight_range = weight_range
@@ -55,25 +55,24 @@ def __eq__(self, other):
         return False
 
 
-def prepare_weighted_spans(targets,  # type: List[TargetExplanation]
-                           preserve_density=None,  # type: Optional[bool]
-                           ):
-    # type: (...) -> List[Optional[List[PreparedWeightedSpans]]]
+def prepare_weighted_spans(targets: list[TargetExplanation],
+                           preserve_density: Optional[bool] = None,
+                           ) -> list[Optional[list[PreparedWeightedSpans]]]:
     """ Return weighted spans prepared for rendering.
     Calculate a separate weight range for each different weighted
     span (for each different index): each target has the same number
     of weighted spans.
     """
-    targets_char_weights = [
+    targets_char_weights: list[Optional[list[np.ndarray]]] = [
         [get_char_weights(ws, preserve_density=preserve_density)
          for ws in t.weighted_spans.docs_weighted_spans]
          if t.weighted_spans else None
-         for t in targets]  # type: List[Optional[List[np.ndarray]]]
+         for t in targets]
     max_idx = max_or_0(len(ch_w or []) for ch_w in targets_char_weights)
 
-    targets_char_weights_not_None = [
+    targets_char_weights_not_None: list[list[np.ndarray]] = [
         cw for cw in targets_char_weights
-        if cw is not None]  # type: List[List[np.ndarray]]
+        if cw is not None]
 
     spans_weight_ranges = [
         max_or_0(