Skip to content

Commit

Permalink
rename scoreres->descriptors, add showcase.ipynb and add sql descriptor
Browse files Browse the repository at this point in the history
  • Loading branch information
mike0sv committed Dec 17, 2024
1 parent d451be5 commit 6a52347
Show file tree
Hide file tree
Showing 8 changed files with 250 additions and 86 deletions.
2 changes: 1 addition & 1 deletion src/evidently/features/text_contains_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition)
def _as_column(self) -> ColumnName:
return self._create_column(
self._feature_column_name(),
default_display_name=f"Text Contains of {self.mode} [{', '.join(self.items)}] for {self.column_name}",
default_display_name=f"Text Does Not Contain of {self.mode} [{', '.join(self.items)}] for {self.column_name}",
)

def comparison(self, item: str, string: str):
Expand Down
14 changes: 7 additions & 7 deletions src/evidently/v2/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(self, type: ColumnType, data: pd.Series) -> None:
self.data = data


class Scorer:
class Descriptor:
def __init__(self, alias: str):
self._alias = alias

Expand All @@ -50,7 +50,7 @@ def alias(self) -> str:
return self._alias


class FeatureScorer(Scorer):
class FeatureDescriptor(Descriptor):
def __init__(self, feature: GeneratedFeatures, alias: Optional[str] = None):
super().__init__(alias or f"{feature.as_column().display_name}")
self._feature = feature
Expand All @@ -67,7 +67,7 @@ def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[str, Da
return DatasetColumn(type=self._feature.get_type(), data=feature[feature.columns[0]])


def _determine_scorer_column_name(alias: str, columns: List[str]):
def _determine_desccriptor_column_name(alias: str, columns: List[str]):
index = 1
key = alias
while key in columns:
Expand All @@ -84,12 +84,12 @@ def from_pandas(
cls,
data: pd.DataFrame,
data_definition: Optional[DataDefinition] = None,
scorers: Optional[List[Scorer]] = None,
descriptors: Optional[List[Descriptor]] = None,
) -> "Dataset":
dataset = PandasDataset(data, data_definition)
for scorer in scorers or []:
key = _determine_scorer_column_name(scorer.alias, data.columns)
new_column = scorer.generate_data(dataset)
for descriptor in descriptors or []:
key = _determine_desccriptor_column_name(descriptor.alias, data.columns)
new_column = descriptor.generate_data(dataset)
if isinstance(new_column, DatasetColumn):
data[key] = new_column.data
elif len(new_column) > 1:
Expand Down
72 changes: 37 additions & 35 deletions src/evidently/v2/scorers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,42 @@
from ._custom_scorers import CustomColumnScorer
from ._custom_scorers import CustomScorer
from ._custom_descriptors import CustomColumnDescriptor
from ._custom_descriptors import CustomDescriptor
from ._text_length import TextLength
from .generated_scorers import begins_with
from .generated_scorers import bert_score
from .generated_scorers import contains
from .generated_scorers import contains_link
from .generated_scorers import does_not_contain
from .generated_scorers import ends_with
from .generated_scorers import exact_match
from .generated_scorers import excludes_words
from .generated_scorers import hugging_face
from .generated_scorers import hugging_face_toxicity
from .generated_scorers import includes_words
from .generated_scorers import is_valid_json
from .generated_scorers import is_valid_python
from .generated_scorers import item_match
from .generated_scorers import item_no_match
from .generated_scorers import json_match
from .generated_scorers import json_schema_match
from .generated_scorers import llm_judge
from .generated_scorers import non_letter_character_percentage
from .generated_scorers import oov_words_percentage
from .generated_scorers import openai
from .generated_scorers import reg_exp
from .generated_scorers import semantic_similarity
from .generated_scorers import sentence_count
from .generated_scorers import sentiment
from .generated_scorers import text_length
from .generated_scorers import trigger_words_present
from .generated_scorers import word_count
from .generated_scorers import word_match
from .generated_scorers import word_no_match
from .generated_scorers import words_presence
from .generated_descriptors import begins_with
from .generated_descriptors import bert_score
from .generated_descriptors import contains
from .generated_descriptors import contains_link
from .generated_descriptors import does_not_contain
from .generated_descriptors import ends_with
from .generated_descriptors import exact_match
from .generated_descriptors import excludes_words
from .generated_descriptors import hugging_face
from .generated_descriptors import hugging_face_toxicity
from .generated_descriptors import includes_words
from .generated_descriptors import is_valid_json
from .generated_descriptors import is_valid_python
from .generated_descriptors import is_valid_sql
from .generated_descriptors import item_match
from .generated_descriptors import item_no_match
from .generated_descriptors import json_match
from .generated_descriptors import json_schema_match
from .generated_descriptors import llm_judge
from .generated_descriptors import non_letter_character_percentage
from .generated_descriptors import oov_words_percentage
from .generated_descriptors import openai
from .generated_descriptors import reg_exp
from .generated_descriptors import semantic_similarity
from .generated_descriptors import sentence_count
from .generated_descriptors import sentiment
from .generated_descriptors import text_length
from .generated_descriptors import trigger_words_present
from .generated_descriptors import word_count
from .generated_descriptors import word_match
from .generated_descriptors import word_no_match
from .generated_descriptors import words_presence

__all__ = [
"CustomColumnScorer",
"CustomScorer",
"CustomColumnDescriptor",
"CustomDescriptor",
"TextLength",
"bert_score",
"begins_with",
Expand All @@ -50,6 +51,7 @@
"includes_words",
"is_valid_json",
"is_valid_python",
"is_valid_sql",
"item_match",
"item_no_match",
"json_match",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

from evidently.v2.datasets import Dataset
from evidently.v2.datasets import DatasetColumn
from evidently.v2.datasets import Scorer
from evidently.v2.datasets import Descriptor


class CustomColumnScorer(Scorer):
class CustomColumnDescriptor(Descriptor):
def __init__(self, column_name: str, func: Callable[[DatasetColumn], DatasetColumn], alias: Optional[str] = None):
super().__init__(alias)
self._column_name = column_name
Expand All @@ -19,7 +19,7 @@ def generate_data(self, dataset: Dataset) -> Union[DatasetColumn, Dict[str, Data
return self._func(column_data)


class CustomScorer(Scorer):
class CustomDescriptor(Descriptor):
def __init__(
self, func: Callable[[Dataset], Union[DatasetColumn, Dict[str, DatasetColumn]]], alias: Optional[str] = None
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,21 @@
from evidently.features.generated_features import GeneratedFeatures
from evidently.features.llm_judge import BaseLLMPromptTemplate
from evidently.pydantic_utils import TYPE_ALIASES
from evidently.v2.datasets import FeatureScorer
from evidently.v2.datasets import FeatureDescriptor

SOURCE_FILE = "generated_scorers.py"
SOURCE_FILE = "generated_descriptors.py"

REPLACES = {
"pandas.core.frame.DataFrame": "DataFrame",
"evidently.utils.data_preprocessing.DataDefinition": "DataDefinition",
"pandas.core.series.Series": "Series",
}

NAME_MAPPING = {"open_a_i_feature": "openai_feature", "is_valid_j_s_o_n": "is_valid_json"}
NAME_MAPPING = {
"open_a_i_feature": "openai_feature",
"is_valid_j_s_o_n": "is_valid_json",
"is_valid_s_q_l": "is_valid_sql",
}

SKIP_CLASSES = {CustomFeature, CustomPairColumnFeature, CustomSingleColumnFeature}

Expand Down Expand Up @@ -80,7 +84,7 @@ def get_args_kwargs(feature_class: Type[GeneratedFeatures]) -> Tuple[Dict[str, s
return args, kwargs


def create_scorer_function(feature_class: Type[GeneratedFeatures]):
def create_descriptor_function(feature_class: Type[GeneratedFeatures]):
class_name = feature_class.__name__
cmpx = os.path.commonprefix([class_name, class_name.upper()])[:-2]
name = cmpx.lower() + re.sub(r"(?<!^)(?=[A-Z])", "_", class_name[len(cmpx) :]).lower()
Expand Down Expand Up @@ -116,7 +120,7 @@ def main():
srcs = []
fnames = []
imports: List[Type] = [
FeatureScorer,
FeatureDescriptor,
ColumnType,
BaseLLMPromptTemplate,
Any,
Expand All @@ -129,7 +133,7 @@ def main():
continue
if feature_class in SKIP_CLASSES:
continue
src, fname = create_scorer_function(feature_class)
src, fname = create_descriptor_function(feature_class)
fnames.append(fname)
srcs.append(src)
imports.append(feature_class)
Expand Down
4 changes: 2 additions & 2 deletions src/evidently/v2/scorers/_text_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
from evidently import ColumnType
from evidently.v2.datasets import Dataset
from evidently.v2.datasets import DatasetColumn
from evidently.v2.datasets import Scorer
from evidently.v2.datasets import Descriptor


class TextLength(Scorer):
class TextLength(Descriptor):
def __init__(self, column_name: str, alias: Optional[str] = None):
super().__init__(alias)
self._column_name: str = column_name
Expand Down
Loading

0 comments on commit 6a52347

Please sign in to comment.