Skip to content

Commit

Permalink
Sync pop
Browse files Browse the repository at this point in the history
  commit 4296e7964be0cccce99a18d8cda0c67cd4b97c41
  Author: Dominik Jain <[email protected]>
  Date:   Mon Sep 11 15:58:38 2023 +0200

      Renamed classes to improve/simplify evaluation interfaces:

       - *EvaluationUtil -> *ModelEvaluation
       - Vector*ModelEvaluatorParams -> *EvaluatorParams
         (to make applications in high-level evaluation interfaces seem less
         random and more to the point, prioritising high-level interface)

  src/sensai/evaluation/__init__.py
  src/sensai/evaluation/crossval.py
  src/sensai/evaluation/eval_util.py
  src/sensai/evaluation/evaluator.py
  src/sensai/evaluation/metric_computation.py
  src/sensai/torch/torch_eval_util.py
  • Loading branch information
opcode81 committed Sep 11, 2023
1 parent c6f76f0 commit b42bb0b
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 43 deletions.
4 changes: 2 additions & 2 deletions src/sensai/evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from .crossval import VectorClassificationModelCrossValidator, VectorRegressionModelCrossValidator, \
VectorClassificationModelCrossValidationData, VectorRegressionModelCrossValidationData, \
VectorModelCrossValidatorParams
from .eval_util import RegressionEvaluationUtil, ClassificationEvaluationUtil, MultiDataEvaluationUtil, \
from .eval_util import RegressionModelEvaluation, ClassificationModelEvaluation, MultiDataModelEvaluation, \
eval_model_via_evaluator, create_evaluation_util, create_vector_model_evaluator, create_vector_model_cross_validator
from .evaluator import VectorClassificationModelEvaluator, VectorRegressionModelEvaluator, \
VectorRegressionModelEvaluatorParams, VectorClassificationModelEvaluatorParams, \
RegressionEvaluatorParams, ClassificationEvaluatorParams, \
VectorRegressionModelEvaluationData, VectorClassificationModelEvaluationData, \
RuleBasedVectorClassificationModelEvaluator, RuleBasedVectorRegressionModelEvaluator

Expand Down
10 changes: 5 additions & 5 deletions src/sensai/evaluation/crossval.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
from .eval_stats.eval_stats_regression import RegressionEvalStats, RegressionEvalStatsCollection
from .evaluator import VectorRegressionModelEvaluationData, VectorClassificationModelEvaluationData, \
VectorModelEvaluationData, VectorClassificationModelEvaluator, VectorRegressionModelEvaluator, \
MetricsDictProvider, VectorModelEvaluator, VectorClassificationModelEvaluatorParams, \
VectorRegressionModelEvaluatorParams, MetricsDictProviderFromFunction
MetricsDictProvider, VectorModelEvaluator, ClassificationEvaluatorParams, \
RegressionEvaluatorParams, MetricsDictProviderFromFunction
from ..data import InputOutputData, DataSplitterFractional
from ..tracking.tracking_base import TrackingContext
from ..util.typing import PandasNamedTuple
Expand Down Expand Up @@ -128,7 +128,7 @@ def __init__(self,
folds: int = 5,
splitter: CrossValidationSplitter = None,
return_trained_models=False,
evaluator_params: Union[VectorRegressionModelEvaluatorParams, VectorClassificationModelEvaluatorParams] = None,
evaluator_params: Union[RegressionEvaluatorParams, ClassificationEvaluatorParams] = None,
default_splitter_random_seed=42,
default_splitter_shuffle=True):
"""
Expand Down Expand Up @@ -233,7 +233,7 @@ def _create_eval_stats_collection(self, l: List[RegressionEvalStats]) -> Regress

class VectorRegressionModelCrossValidator(VectorModelCrossValidator[VectorRegressionModelCrossValidationData]):
def _create_model_evaluator(self, training_data: InputOutputData, test_data: InputOutputData) -> VectorRegressionModelEvaluator:
evaluator_params = VectorRegressionModelEvaluatorParams.from_dict_or_instance(self.params.evaluatorParams)
evaluator_params = RegressionEvaluatorParams.from_dict_or_instance(self.params.evaluatorParams)
return VectorRegressionModelEvaluator(training_data, test_data=test_data, params=evaluator_params)

def _create_result_data(self, trained_models, eval_data_list, test_indices_list, predicted_var_names) \
Expand All @@ -249,7 +249,7 @@ def _create_eval_stats_collection(self, l: List[ClassificationEvalStats]) -> Cla

class VectorClassificationModelCrossValidator(VectorModelCrossValidator[VectorClassificationModelCrossValidationData]):
def _create_model_evaluator(self, training_data: InputOutputData, test_data: InputOutputData):
evaluator_params = VectorClassificationModelEvaluatorParams.from_dict_or_instance(self.params.evaluatorParams)
evaluator_params = ClassificationEvaluatorParams.from_dict_or_instance(self.params.evaluatorParams)
return VectorClassificationModelEvaluator(training_data, test_data=test_data, params=evaluator_params)

def _create_result_data(self, trained_models, eval_data_list, test_indices_list, predicted_var_names) \
Expand Down
39 changes: 19 additions & 20 deletions src/sensai/evaluation/eval_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from .eval_stats.eval_stats_regression import RegressionEvalStats
from .evaluator import VectorModelEvaluator, VectorModelEvaluationData, VectorRegressionModelEvaluator, \
VectorRegressionModelEvaluationData, VectorClassificationModelEvaluator, VectorClassificationModelEvaluationData, \
VectorRegressionModelEvaluatorParams, VectorClassificationModelEvaluatorParams
RegressionEvaluatorParams, ClassificationEvaluatorParams
from ..data import InputOutputData
from ..feature_importance import AggregatedFeatureImportance, FeatureImportanceProvider, plot_feature_importance, FeatureImportance
from ..tracking import TrackedExperiment
Expand Down Expand Up @@ -62,14 +62,14 @@ def _is_regression(model: Optional[VectorModel], is_regression: Optional[bool])


def create_vector_model_evaluator(data: InputOutputData, model: VectorModel = None,
is_regression: bool = None, params: Union[VectorRegressionModelEvaluatorParams, VectorClassificationModelEvaluatorParams] = None) \
is_regression: bool = None, params: Union[RegressionEvaluatorParams, ClassificationEvaluatorParams] = None) \
-> Union[VectorRegressionModelEvaluator, VectorClassificationModelEvaluator]:
is_regression = _is_regression(model, is_regression)
if params is None:
if is_regression:
params = VectorRegressionModelEvaluatorParams(fractional_split_test_fraction=0.2)
params = RegressionEvaluatorParams(fractional_split_test_fraction=0.2)
else:
params = VectorClassificationModelEvaluatorParams(fractional_split_test_fraction=0.2)
params = ClassificationEvaluatorParams(fractional_split_test_fraction=0.2)
log.debug(f"No evaluator parameters specified, using default: {params}")
if is_regression:
return VectorRegressionModelEvaluator(data, params=params)
Expand All @@ -89,13 +89,13 @@ def create_vector_model_cross_validator(data: InputOutputData,


def create_evaluation_util(data: InputOutputData, model: VectorModel = None, is_regression: bool = None,
evaluator_params: Optional[Union[VectorRegressionModelEvaluatorParams, VectorClassificationModelEvaluatorParams]] = None,
evaluator_params: Optional[Union[RegressionEvaluatorParams, ClassificationEvaluatorParams]] = None,
cross_validator_params: Optional[Dict[str, Any]] = None) \
-> Union["ClassificationEvaluationUtil", "RegressionEvaluationUtil"]:
-> Union["ClassificationModelEvaluation", "RegressionModelEvaluation"]:
if _is_regression(model, is_regression):
return RegressionEvaluationUtil(data, evaluator_params=evaluator_params, cross_validator_params=cross_validator_params)
return RegressionModelEvaluation(data, evaluator_params=evaluator_params, cross_validator_params=cross_validator_params)
else:
return ClassificationEvaluationUtil(data, evaluator_params=evaluator_params, cross_validator_params=cross_validator_params)
return ClassificationModelEvaluation(data, evaluator_params=evaluator_params, cross_validator_params=cross_validator_params)


def eval_model_via_evaluator(model: TModel, io_data: InputOutputData, test_fraction=0.2,
Expand Down Expand Up @@ -131,10 +131,10 @@ def eval_model_via_evaluator(model: TModel, io_data: InputOutputData, test_fract
fig.show()

if model.is_regression_model():
evaluator_params = VectorRegressionModelEvaluatorParams(fractional_split_test_fraction=test_fraction,
evaluator_params = RegressionEvaluatorParams(fractional_split_test_fraction=test_fraction,
fractional_split_random_seed=random_seed)
else:
evaluator_params = VectorClassificationModelEvaluatorParams(fractional_split_test_fraction=test_fraction,
evaluator_params = ClassificationEvaluatorParams(fractional_split_test_fraction=test_fraction,
compute_probabilities=compute_probabilities, fractional_split_random_seed=random_seed)
ev = create_evaluation_util(io_data, model=model, evaluator_params=evaluator_params)
return ev.perform_simple_evaluation(model, show_plots=True, log_results=True)
Expand Down Expand Up @@ -209,14 +209,13 @@ def __init__(self):
self.add_plot("threshold-counts", ClassificationEvalStatsPlotProbabilityThresholdCounts())


# TODO conceive of better class name
class EvaluationUtil(ABC, Generic[TModel, TEvaluator, TEvalData, TCrossValidator, TCrossValData, TEvalStats]):
class ModelEvaluation(ABC, Generic[TModel, TEvaluator, TEvalData, TCrossValidator, TCrossValData, TEvalStats]):
"""
Utility class for the evaluation of models based on a dataset
"""
def __init__(self, io_data: InputOutputData,
eval_stats_plot_collector: Union[RegressionEvalStatsPlotCollector, ClassificationEvalStatsPlotCollector],
evaluator_params: Optional[Union[VectorRegressionModelEvaluatorParams, VectorClassificationModelEvaluatorParams,
evaluator_params: Optional[Union[RegressionEvaluatorParams, ClassificationEvaluatorParams,
Dict[str, Any]]] = None,
cross_validator_params: Optional[Union[VectorModelCrossValidatorParams, Dict[str, Any]]] = None):
"""
Expand Down Expand Up @@ -291,7 +290,7 @@ def gather_results(result_data: VectorModelEvaluationData, res_writer, subtitle_
self.create_plots(result_data, show_plots=show_plots, result_writer=res_writer,
subtitle_prefix=subtitle_prefix, tracking_context=trackingContext)

eval_result_data = evaluator.eval_model(model, fit=True)
eval_result_data = evaluator.eval_model(model, fit=fit_model)
gather_results(eval_result_data, result_writer)
if additional_evaluation_on_training_data:
eval_result_data_train = evaluator.eval_model(model, on_training_data=True, track=False)
Expand Down Expand Up @@ -526,10 +525,10 @@ def _create_eval_stats_plots(self, eval_stats: TEvalStats, result_collector: Eva
self.eval_stats_plot_collector.create_plots(eval_stats, subtitle, result_collector)


class RegressionEvaluationUtil(EvaluationUtil[VectorRegressionModel, VectorRegressionModelEvaluator, VectorRegressionModelEvaluationData,
class RegressionModelEvaluation(ModelEvaluation[VectorRegressionModel, VectorRegressionModelEvaluator, VectorRegressionModelEvaluationData,
VectorRegressionModelCrossValidator, VectorRegressionModelCrossValidationData, RegressionEvalStats]):
def __init__(self, io_data: InputOutputData,
evaluator_params: Optional[Union[VectorRegressionModelEvaluatorParams, Dict[str, Any]]] = None,
evaluator_params: Optional[Union[RegressionEvaluatorParams, Dict[str, Any]]] = None,
cross_validator_params: Optional[Union[VectorModelCrossValidatorParams, Dict[str, Any]]] = None):
"""
:param io_data: the data set to use for evaluation
Expand All @@ -540,11 +539,11 @@ def __init__(self, io_data: InputOutputData,
cross_validator_params=cross_validator_params)


class ClassificationEvaluationUtil(EvaluationUtil[VectorClassificationModel, VectorClassificationModelEvaluator,
class ClassificationModelEvaluation(ModelEvaluation[VectorClassificationModel, VectorClassificationModelEvaluator,
VectorClassificationModelEvaluationData, VectorClassificationModelCrossValidator, VectorClassificationModelCrossValidationData,
ClassificationEvalStats]):
def __init__(self, io_data: InputOutputData,
evaluator_params: Optional[Union[VectorClassificationModelEvaluatorParams, Dict[str, Any]]] = None,
evaluator_params: Optional[Union[ClassificationEvaluatorParams, Dict[str, Any]]] = None,
cross_validator_params: Optional[Union[VectorModelCrossValidatorParams, Dict[str, Any]]] = None):
"""
:param io_data: the data set to use for evaluation
Expand All @@ -555,10 +554,10 @@ def __init__(self, io_data: InputOutputData,
cross_validator_params=cross_validator_params)


class MultiDataEvaluationUtil:
class MultiDataModelEvaluation:
def __init__(self, io_data_dict: Dict[str, InputOutputData], key_name: str = "dataset",
meta_data_dict: Optional[Dict[str, Dict[str, Any]]] = None,
evaluator_params: Optional[Union[VectorRegressionModelEvaluatorParams, VectorClassificationModelEvaluatorParams, Dict[str, Any]]] = None,
evaluator_params: Optional[Union[RegressionEvaluatorParams, ClassificationEvaluatorParams, Dict[str, Any]]] = None,
cross_validator_params: Optional[Union[VectorModelCrossValidatorParams, Dict[str, Any]]] = None):
"""
:param io_data_dict: a dictionary mapping from names to the data sets with which to evaluate models
Expand Down
36 changes: 24 additions & 12 deletions src/sensai/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from ..data import DataSplitter, DataSplitterFractional, InputOutputData
from ..data_transformation import DataFrameTransformer
from ..tracking import TrackingMixin, TrackedExperiment
from ..tracking.tracking_base import TrackingContext
from ..util.deprecation import deprecated
from ..util.string import ToStringMixin
from ..util.typing import PandasNamedTuple
from ..vector_model import VectorClassificationModel, VectorModel, VectorModelBase, VectorModelFittableBase, VectorRegressionModel
Expand Down Expand Up @@ -122,7 +122,7 @@ def get_eval_stats_collection(self):
TEvalData = TypeVar("TEvalData", bound=VectorModelEvaluationData)


class VectorModelEvaluatorParams(ToStringMixin, ABC):
class EvaluatorParams(ToStringMixin, ABC):
def __init__(self, data_splitter: DataSplitter = None, fractional_split_test_fraction: float = None, fractional_split_random_seed=42,
fractional_split_shuffle=True):
"""
Expand Down Expand Up @@ -166,7 +166,7 @@ def set_data_splitter(self, splitter: DataSplitter):


class VectorModelEvaluator(MetricsDictProvider, Generic[TEvalData], ABC):
def __init__(self, data: Optional[InputOutputData], test_data: InputOutputData = None, params: VectorModelEvaluatorParams = None):
def __init__(self, data: Optional[InputOutputData], test_data: InputOutputData = None, params: EvaluatorParams = None):
"""
Constructs an evaluator with test and training data.
Expand Down Expand Up @@ -248,7 +248,7 @@ def fit_model(self, model: VectorModelFittableBase):
model.fit(self.training_data.inputs, self.training_data.outputs)


class VectorRegressionModelEvaluatorParams(VectorModelEvaluatorParams):
class RegressionEvaluatorParams(EvaluatorParams):
def __init__(self,
data_splitter: DataSplitter = None,
fractional_split_test_fraction: float = None,
Expand Down Expand Up @@ -281,9 +281,9 @@ def __init__(self,

@classmethod
def from_dict_or_instance(cls,
params: Optional[Union[Dict[str, Any], "VectorRegressionModelEvaluatorParams"]]) -> "VectorRegressionModelEvaluatorParams":
params: Optional[Union[Dict[str, Any], "RegressionEvaluatorParams"]]) -> "RegressionEvaluatorParams":
if params is None:
return VectorRegressionModelEvaluatorParams()
return RegressionEvaluatorParams()
elif type(params) == dict:
raise Exception("Old-style dictionary parametrisation is no longer supported")
elif isinstance(params, cls):
Expand All @@ -292,9 +292,15 @@ def from_dict_or_instance(cls,
raise ValueError(f"Must provide dictionary or {cls} instance, got {params}, type {type(params)}")


class VectorRegressionModelEvaluatorParams(RegressionEvaluatorParams):
@deprecated("Use RegressionEvaluatorParams instead")
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)


class VectorRegressionModelEvaluator(VectorModelEvaluator[VectorRegressionModelEvaluationData]):
def __init__(self, data: Optional[InputOutputData], test_data: InputOutputData = None,
params: VectorRegressionModelEvaluatorParams = None):
params: RegressionEvaluatorParams = None):
"""
Constructs an evaluator with test and training data.
Expand Down Expand Up @@ -358,7 +364,7 @@ def get_misclassified_triples_pred_true_input(self) -> List[Tuple[Any, Any, pd.S
return [(eval_stats.y_predicted[i], eval_stats.y_true[i], self.input_data.iloc[i]) for i in indices]


class VectorClassificationModelEvaluatorParams(VectorModelEvaluatorParams):
class ClassificationEvaluatorParams(EvaluatorParams):
def __init__(self, data_splitter: DataSplitter = None, fractional_split_test_fraction: float = None, fractional_split_random_seed=42,
fractional_split_shuffle=True, additional_metrics: Sequence[ClassificationMetric] = None,
compute_probabilities: bool = False, binary_positive_label=GUESS):
Expand Down Expand Up @@ -387,18 +393,24 @@ def __init__(self, data_splitter: DataSplitter = None, fractional_split_test_fra

@classmethod
def from_dict_or_instance(cls,
params: Optional[Union[Dict[str, Any], "VectorClassificationModelEvaluatorParams"]]) \
-> "VectorClassificationModelEvaluatorParams":
params: Optional[Union[Dict[str, Any], "ClassificationEvaluatorParams"]]) \
-> "ClassificationEvaluatorParams":
if params is None:
return VectorClassificationModelEvaluatorParams()
return ClassificationEvaluatorParams()
elif type(params) == dict:
raise ValueError("Old-style dictionary parametrisation is no longer supported")
elif isinstance(params, VectorClassificationModelEvaluatorParams):
elif isinstance(params, ClassificationEvaluatorParams):
return params
else:
raise ValueError(f"Must provide dictionary or instance, got {params}")


class VectorClassificationModelEvaluatorParams(ClassificationEvaluatorParams):
@deprecated("Use ClassificationEvaluatorParams instead")
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)


class VectorClassificationModelEvaluator(VectorModelEvaluator[VectorClassificationModelEvaluationData]):
def __init__(self,
data: Optional[InputOutputData],
Expand Down
4 changes: 2 additions & 2 deletions src/sensai/evaluation/metric_computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Union, List, Callable

from sensai import VectorRegressionModel, VectorClassificationModel, VectorModelBase
from sensai.evaluation import MultiDataEvaluationUtil
from sensai.evaluation import MultiDataModelEvaluation
from sensai.evaluation.eval_stats import RegressionMetric, ClassificationMetric

TMetric = Union[RegressionMetric, ClassificationMetric]
Expand All @@ -26,7 +26,7 @@ def compute_metric_value(self, model_factory: Callable[[], TModel]) -> MetricCom


class MetricComputationMultiData(MetricComputation):
def __init__(self, ev_util: MultiDataEvaluationUtil, use_cross_validation: bool, metric: TMetric,
def __init__(self, ev_util: MultiDataModelEvaluation, use_cross_validation: bool, metric: TMetric,
use_combined_eval_stats: bool):
super().__init__(metric)
self.use_combined_eval_stats = use_combined_eval_stats
Expand Down
4 changes: 2 additions & 2 deletions src/sensai/torch/torch_eval_util.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from typing import Union

from . import TorchVectorRegressionModel
from ..evaluation import RegressionEvaluationUtil
from ..evaluation import RegressionModelEvaluation
from ..evaluation.crossval import VectorModelCrossValidationData, VectorRegressionModelCrossValidationData
from ..evaluation.eval_util import EvaluationResultCollector
from ..evaluation.evaluator import VectorModelEvaluationData, VectorRegressionModelEvaluationData


class TorchVectorRegressionModelEvaluationUtil(RegressionEvaluationUtil):
class TorchVectorRegressionModelEvaluationUtil(RegressionModelEvaluation):

def _create_plots(self,
data: Union[VectorRegressionModelEvaluationData, VectorRegressionModelCrossValidationData],
Expand Down

0 comments on commit b42bb0b

Please sign in to comment.