-
Notifications
You must be signed in to change notification settings - Fork 622
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* codegen scorers * rename and skip * add alias * imports * remove display name
- Loading branch information
Showing
3 changed files
with
470 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,71 @@ | ||
from ._custom_scorers import CustomColumnScorer | ||
from ._custom_scorers import CustomScorer | ||
from ._text_length import TextLength | ||
from .generated_scorers import begins_with | ||
from .generated_scorers import bert_score | ||
from .generated_scorers import contains | ||
from .generated_scorers import contains_link | ||
from .generated_scorers import does_not_contain | ||
from .generated_scorers import ends_with | ||
from .generated_scorers import exact_match | ||
from .generated_scorers import excludes_words | ||
from .generated_scorers import hugging_face | ||
from .generated_scorers import hugging_face_toxicity | ||
from .generated_scorers import includes_words | ||
from .generated_scorers import is_valid_json | ||
from .generated_scorers import is_valid_python | ||
from .generated_scorers import item_match | ||
from .generated_scorers import item_no_match | ||
from .generated_scorers import json_match | ||
from .generated_scorers import json_schema_match | ||
from .generated_scorers import llm_judge | ||
from .generated_scorers import non_letter_character_percentage | ||
from .generated_scorers import oov_words_percentage | ||
from .generated_scorers import openai | ||
from .generated_scorers import reg_exp | ||
from .generated_scorers import semantic_similarity | ||
from .generated_scorers import sentence_count | ||
from .generated_scorers import sentiment | ||
from .generated_scorers import text_length | ||
from .generated_scorers import trigger_words_present | ||
from .generated_scorers import word_count | ||
from .generated_scorers import word_match | ||
from .generated_scorers import word_no_match | ||
from .generated_scorers import words_presence | ||
|
||
__all__ = [ | ||
"CustomColumnScorer", | ||
"CustomScorer", | ||
"TextLength", | ||
"bert_score", | ||
"begins_with", | ||
"contains", | ||
"contains_link", | ||
"does_not_contain", | ||
"ends_with", | ||
"exact_match", | ||
"excludes_words", | ||
"hugging_face", | ||
"hugging_face_toxicity", | ||
"includes_words", | ||
"is_valid_json", | ||
"is_valid_python", | ||
"item_match", | ||
"item_no_match", | ||
"json_match", | ||
"json_schema_match", | ||
"llm_judge", | ||
"non_letter_character_percentage", | ||
"oov_words_percentage", | ||
"openai", | ||
"reg_exp", | ||
"semantic_similarity", | ||
"sentence_count", | ||
"sentiment", | ||
"text_length", | ||
"trigger_words_present", | ||
"word_count", | ||
"word_match", | ||
"word_no_match", | ||
"words_presence", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
import inspect | ||
import os.path | ||
import re | ||
from itertools import chain | ||
from pathlib import Path | ||
from typing import Any | ||
from typing import Dict | ||
from typing import List | ||
from typing import Optional | ||
from typing import Tuple | ||
from typing import Type | ||
|
||
from pydantic.utils import import_string | ||
|
||
from evidently import ColumnType | ||
from evidently.features.custom_feature import CustomFeature | ||
from evidently.features.custom_feature import CustomPairColumnFeature | ||
from evidently.features.custom_feature import CustomSingleColumnFeature | ||
from evidently.features.generated_features import GeneratedFeatures | ||
from evidently.features.llm_judge import BaseLLMPromptTemplate | ||
from evidently.pydantic_utils import TYPE_ALIASES | ||
from evidently.v2.datasets import FeatureScorer | ||
|
||
SOURCE_FILE = "generated_scorers.py" | ||
|
||
REPLACES = { | ||
"pandas.core.frame.DataFrame": "DataFrame", | ||
"evidently.utils.data_preprocessing.DataDefinition": "DataDefinition", | ||
"pandas.core.series.Series": "Series", | ||
} | ||
|
||
NAME_MAPPING = {"open_a_i_feature": "openai_feature", "is_valid_j_s_o_n": "is_valid_json"} | ||
|
||
SKIP_CLASSES = {CustomFeature, CustomPairColumnFeature, CustomSingleColumnFeature} | ||
|
||
|
||
def _get_type_name(tp: Type): | ||
if tp.__module__.startswith("typing"): | ||
return str(tp).replace("typing.", "") | ||
return tp.__name__ | ||
# return str(tp) | ||
|
||
|
||
def _get_value_str(value): | ||
if isinstance(value, str): | ||
return f'"{value}"' | ||
return str(value) | ||
|
||
|
||
def get_args_kwargs(feature_class: Type[GeneratedFeatures]) -> Tuple[Dict[str, str], Dict[str, Tuple[str, str]]]: | ||
if feature_class.__dict__.get("__init__") is None: | ||
# get from fields | ||
args = { | ||
key: _get_type_name(field.annotation) for key, field in feature_class.__fields__.items() if field.required | ||
} | ||
kwargs = { | ||
key: (_get_type_name(field.annotation), _get_value_str(field.default)) | ||
for key, field in feature_class.__fields__.items() | ||
if not field.required and key != "type" | ||
} | ||
return args, kwargs | ||
# get from constructor | ||
sig = inspect.getfullargspec(feature_class.__init__) | ||
|
||
defaults = sig.defaults or [] | ||
args = {a: _get_type_name(sig.annotations.get(a, Any)) for a in sig.args[1 : -len(defaults)]} | ||
kwargs = { | ||
a: (_get_type_name(sig.annotations.get(a, Any)), _get_value_str(d)) | ||
for a, d in zip(sig.args[-len(defaults) :], defaults) | ||
} | ||
kwonlydefaults = sig.kwonlydefaults or {} | ||
args.update({k: _get_type_name(sig.annotations.get(k, Any)) for k in sig.kwonlyargs if k not in kwonlydefaults}) | ||
kwargs.update( | ||
{ | ||
k: (_get_type_name(sig.annotations.get(k, Any)), _get_value_str(kwonlydefaults[k])) | ||
for k in sig.kwonlyargs | ||
if k in kwonlydefaults | ||
} | ||
) | ||
return args, kwargs | ||
|
||
|
||
def create_scorer_function(feature_class: Type[GeneratedFeatures]): | ||
class_name = feature_class.__name__ | ||
cmpx = os.path.commonprefix([class_name, class_name.upper()])[:-2] | ||
name = cmpx.lower() + re.sub(r"(?<!^)(?=[A-Z])", "_", class_name[len(cmpx) :]).lower() | ||
name = NAME_MAPPING.get(name, name) | ||
if name.endswith("_feature"): | ||
name = name[: -len("_feature")] | ||
|
||
args, kwargs = get_args_kwargs(feature_class) | ||
kwargs["alias"] = ("Optional[str]", "None") | ||
kwargs.pop("display_name", None) | ||
args_str = ", ".join(f"{a}: {t}" for a, t in args.items()) | ||
if len(kwargs) > 0: | ||
kwargs_str = ", " + ", ".join(f"{a}: {t} = {d}" for a, (t, d) in kwargs.items()) | ||
else: | ||
kwargs_str = "" | ||
|
||
class_args = ", ".join(f"{k}={k}" for k in chain(args, kwargs) if k != "alias") | ||
res = f""" | ||
def {name}({args_str}{kwargs_str}): | ||
feature = {class_name}({class_args}) | ||
return FeatureScorer(feature, alias=alias)""" | ||
for substr, repl in REPLACES.items(): | ||
res = res.replace(substr, repl) | ||
return res, name | ||
|
||
|
||
def main(): | ||
for (base_class, _), classpath in list(sorted(TYPE_ALIASES.items(), key=lambda x: x[0][1])): | ||
if base_class is GeneratedFeatures: | ||
import_string(classpath) | ||
subtypes__ = GeneratedFeatures.__subtypes__() | ||
|
||
srcs = [] | ||
fnames = [] | ||
imports: List[Type] = [ | ||
FeatureScorer, | ||
ColumnType, | ||
BaseLLMPromptTemplate, | ||
Any, | ||
List, | ||
Optional, | ||
Dict, | ||
] | ||
for feature_class in sorted(subtypes__, key=lambda x: x.__name__): | ||
if inspect.isabstract(feature_class): | ||
continue | ||
if feature_class in SKIP_CLASSES: | ||
continue | ||
src, fname = create_scorer_function(feature_class) | ||
fnames.append(fname) | ||
srcs.append(src) | ||
imports.append(feature_class) | ||
with open(Path(__file__).parent / SOURCE_FILE, "w") as f: | ||
f.write("\n".join(f"from {t.__module__} import {t.__name__}" for t in imports) + "\n\n") | ||
f.write("\n\n".join(srcs)) | ||
|
||
print(f"from .{SOURCE_FILE[:-3]} import ({', '.join(fnames)})") | ||
print("__all__ = [") | ||
print("\n".join(f'"{fname}",' for fname in fnames)) | ||
print("]") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.