Skip to content

Commit

Permalink
Support add_descriptors in PandasDataset.
Browse files Browse the repository at this point in the history
  • Loading branch information
Liraim committed Jan 7, 2025
1 parent 7cd182e commit f24e2b5
Showing 1 changed file with 12 additions and 9 deletions.
21 changes: 12 additions & 9 deletions src/evidently/future/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,15 +240,7 @@ def from_pandas(
) -> "Dataset":
dataset = PandasDataset(data, data_definition)
for descriptor in descriptors or []:
key = _determine_desccriptor_column_name(descriptor.alias, data.columns.tolist())
new_column = descriptor.generate_data(dataset)
if isinstance(new_column, DatasetColumn):
dataset.add_column(key, new_column)
elif len(new_column) > 1:
for col, value in new_column.items():
dataset.add_column(f"{key}.{col}", value)
else:
dataset.add_column(key, list(new_column.values())[0])
dataset.add_descriptor(descriptor)
return dataset

@abstractmethod
Expand Down Expand Up @@ -314,6 +306,17 @@ def add_column(self, key: str, data: DatasetColumn):
if data.type == ColumnType.Categorical:
self._data_definition.categorical_descriptors.append(key)

def add_descriptor(self, descriptor: Descriptor):
key = _determine_desccriptor_column_name(descriptor.alias, self._data.columns.tolist())
new_column = descriptor.generate_data(self)
if isinstance(new_column, DatasetColumn):
self.add_column(key, new_column)
elif len(new_column) > 1:
for col, value in new_column.items():
self.add_column(f"{key}.{col}", value)
else:
self.add_column(key, list(new_column.values())[0])

def _collect_stats(self, column_type: ColumnType, data: pd.Series):
numerical_stats = None
if column_type == ColumnType.Numerical:
Expand Down

0 comments on commit f24e2b5

Please sign in to comment.