Skip to content

Commit

Permalink
codegen scorers (#1394)
Browse files Browse the repository at this point in the history
* codegen scorers

* rename and skip

* add alias

* imports

* remove display name
  • Loading branch information
mike0sv authored Dec 13, 2024
1 parent e142196 commit ccab7fc
Show file tree
Hide file tree
Showing 3 changed files with 470 additions and 0 deletions.
62 changes: 62 additions & 0 deletions src/evidently/v2/scorers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,71 @@
from ._custom_scorers import CustomColumnScorer
from ._custom_scorers import CustomScorer
from ._text_length import TextLength
from .generated_scorers import begins_with
from .generated_scorers import bert_score
from .generated_scorers import contains
from .generated_scorers import contains_link
from .generated_scorers import does_not_contain
from .generated_scorers import ends_with
from .generated_scorers import exact_match
from .generated_scorers import excludes_words
from .generated_scorers import hugging_face
from .generated_scorers import hugging_face_toxicity
from .generated_scorers import includes_words
from .generated_scorers import is_valid_json
from .generated_scorers import is_valid_python
from .generated_scorers import item_match
from .generated_scorers import item_no_match
from .generated_scorers import json_match
from .generated_scorers import json_schema_match
from .generated_scorers import llm_judge
from .generated_scorers import non_letter_character_percentage
from .generated_scorers import oov_words_percentage
from .generated_scorers import openai
from .generated_scorers import reg_exp
from .generated_scorers import semantic_similarity
from .generated_scorers import sentence_count
from .generated_scorers import sentiment
from .generated_scorers import text_length
from .generated_scorers import trigger_words_present
from .generated_scorers import word_count
from .generated_scorers import word_match
from .generated_scorers import word_no_match
from .generated_scorers import words_presence

__all__ = [
"CustomColumnScorer",
"CustomScorer",
"TextLength",
"bert_score",
"begins_with",
"contains",
"contains_link",
"does_not_contain",
"ends_with",
"exact_match",
"excludes_words",
"hugging_face",
"hugging_face_toxicity",
"includes_words",
"is_valid_json",
"is_valid_python",
"item_match",
"item_no_match",
"json_match",
"json_schema_match",
"llm_judge",
"non_letter_character_percentage",
"oov_words_percentage",
"openai",
"reg_exp",
"semantic_similarity",
"sentence_count",
"sentiment",
"text_length",
"trigger_words_present",
"word_count",
"word_match",
"word_no_match",
"words_presence",
]
147 changes: 147 additions & 0 deletions src/evidently/v2/scorers/_generate_scorers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import inspect
import os.path
import re
from itertools import chain
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple
from typing import Type

from pydantic.utils import import_string

from evidently import ColumnType
from evidently.features.custom_feature import CustomFeature
from evidently.features.custom_feature import CustomPairColumnFeature
from evidently.features.custom_feature import CustomSingleColumnFeature
from evidently.features.generated_features import GeneratedFeatures
from evidently.features.llm_judge import BaseLLMPromptTemplate
from evidently.pydantic_utils import TYPE_ALIASES
from evidently.v2.datasets import FeatureScorer

SOURCE_FILE = "generated_scorers.py"

REPLACES = {
"pandas.core.frame.DataFrame": "DataFrame",
"evidently.utils.data_preprocessing.DataDefinition": "DataDefinition",
"pandas.core.series.Series": "Series",
}

NAME_MAPPING = {"open_a_i_feature": "openai_feature", "is_valid_j_s_o_n": "is_valid_json"}

SKIP_CLASSES = {CustomFeature, CustomPairColumnFeature, CustomSingleColumnFeature}


def _get_type_name(tp: Type):
if tp.__module__.startswith("typing"):
return str(tp).replace("typing.", "")
return tp.__name__
# return str(tp)


def _get_value_str(value):
if isinstance(value, str):
return f'"{value}"'
return str(value)


def get_args_kwargs(feature_class: Type[GeneratedFeatures]) -> Tuple[Dict[str, str], Dict[str, Tuple[str, str]]]:
if feature_class.__dict__.get("__init__") is None:
# get from fields
args = {
key: _get_type_name(field.annotation) for key, field in feature_class.__fields__.items() if field.required
}
kwargs = {
key: (_get_type_name(field.annotation), _get_value_str(field.default))
for key, field in feature_class.__fields__.items()
if not field.required and key != "type"
}
return args, kwargs
# get from constructor
sig = inspect.getfullargspec(feature_class.__init__)

defaults = sig.defaults or []
args = {a: _get_type_name(sig.annotations.get(a, Any)) for a in sig.args[1 : -len(defaults)]}
kwargs = {
a: (_get_type_name(sig.annotations.get(a, Any)), _get_value_str(d))
for a, d in zip(sig.args[-len(defaults) :], defaults)
}
kwonlydefaults = sig.kwonlydefaults or {}
args.update({k: _get_type_name(sig.annotations.get(k, Any)) for k in sig.kwonlyargs if k not in kwonlydefaults})
kwargs.update(
{
k: (_get_type_name(sig.annotations.get(k, Any)), _get_value_str(kwonlydefaults[k]))
for k in sig.kwonlyargs
if k in kwonlydefaults
}
)
return args, kwargs


def create_scorer_function(feature_class: Type[GeneratedFeatures]):
class_name = feature_class.__name__
cmpx = os.path.commonprefix([class_name, class_name.upper()])[:-2]
name = cmpx.lower() + re.sub(r"(?<!^)(?=[A-Z])", "_", class_name[len(cmpx) :]).lower()
name = NAME_MAPPING.get(name, name)
if name.endswith("_feature"):
name = name[: -len("_feature")]

args, kwargs = get_args_kwargs(feature_class)
kwargs["alias"] = ("Optional[str]", "None")
kwargs.pop("display_name", None)
args_str = ", ".join(f"{a}: {t}" for a, t in args.items())
if len(kwargs) > 0:
kwargs_str = ", " + ", ".join(f"{a}: {t} = {d}" for a, (t, d) in kwargs.items())
else:
kwargs_str = ""

class_args = ", ".join(f"{k}={k}" for k in chain(args, kwargs) if k != "alias")
res = f"""
def {name}({args_str}{kwargs_str}):
feature = {class_name}({class_args})
return FeatureScorer(feature, alias=alias)"""
for substr, repl in REPLACES.items():
res = res.replace(substr, repl)
return res, name


def main():
for (base_class, _), classpath in list(sorted(TYPE_ALIASES.items(), key=lambda x: x[0][1])):
if base_class is GeneratedFeatures:
import_string(classpath)
subtypes__ = GeneratedFeatures.__subtypes__()

srcs = []
fnames = []
imports: List[Type] = [
FeatureScorer,
ColumnType,
BaseLLMPromptTemplate,
Any,
List,
Optional,
Dict,
]
for feature_class in sorted(subtypes__, key=lambda x: x.__name__):
if inspect.isabstract(feature_class):
continue
if feature_class in SKIP_CLASSES:
continue
src, fname = create_scorer_function(feature_class)
fnames.append(fname)
srcs.append(src)
imports.append(feature_class)
with open(Path(__file__).parent / SOURCE_FILE, "w") as f:
f.write("\n".join(f"from {t.__module__} import {t.__name__}" for t in imports) + "\n\n")
f.write("\n\n".join(srcs))

print(f"from .{SOURCE_FILE[:-3]} import ({', '.join(fnames)})")
print("__all__ = [")
print("\n".join(f'"{fname}",' for fname in fnames))
print("]")


if __name__ == "__main__":
main()
Loading

0 comments on commit ccab7fc

Please sign in to comment.