From f24e2b5f27b3bcfe912daadc5264684c917a2612 Mon Sep 17 00:00:00 2001 From: Vyacheslav Morov Date: Tue, 7 Jan 2025 13:04:18 +0100 Subject: [PATCH] Support add_descriptors in PandasDataset. --- src/evidently/future/datasets.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/evidently/future/datasets.py b/src/evidently/future/datasets.py index ece6b071ba..9ac348ea7c 100644 --- a/src/evidently/future/datasets.py +++ b/src/evidently/future/datasets.py @@ -240,15 +240,7 @@ def from_pandas( ) -> "Dataset": dataset = PandasDataset(data, data_definition) for descriptor in descriptors or []: - key = _determine_desccriptor_column_name(descriptor.alias, data.columns.tolist()) - new_column = descriptor.generate_data(dataset) - if isinstance(new_column, DatasetColumn): - dataset.add_column(key, new_column) - elif len(new_column) > 1: - for col, value in new_column.items(): - dataset.add_column(f"{key}.{col}", value) - else: - dataset.add_column(key, list(new_column.values())[0]) + dataset.add_descriptor(descriptor) return dataset @abstractmethod @@ -314,6 +306,17 @@ def add_column(self, key: str, data: DatasetColumn): if data.type == ColumnType.Categorical: self._data_definition.categorical_descriptors.append(key) + def add_descriptor(self, descriptor: Descriptor): + key = _determine_desccriptor_column_name(descriptor.alias, self._data.columns.tolist()) + new_column = descriptor.generate_data(self) + if isinstance(new_column, DatasetColumn): + self.add_column(key, new_column) + elif len(new_column) > 1: + for col, value in new_column.items(): + self.add_column(f"{key}.{col}", value) + else: + self.add_column(key, list(new_column.values())[0]) + def _collect_stats(self, column_type: ColumnType, data: pd.Series): numerical_stats = None if column_type == ColumnType.Numerical: