Skip to content

Commit

Permalink
Add IsValidPython descriptor (#1351)
Browse files Browse the repository at this point in the history
* Add feature and descriptor for IsValidPython

* Add tests for IsValidPython descriptor feature

* Update documentation to include IsValidPython descriptor

---------

Co-authored-by: Emeli Dral <[email protected]>
  • Loading branch information
trey-capps and emeli-dral authored Oct 27, 2024
1 parent 08d1502 commit 421630f
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/book/reference/all-metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ Check for regular expression matches.
| **JSONSchemaMatch()** <ul><li>Checks if the text contains a JSON object matching the **expected_schema**. Supports exact (**exact=True**) or minimal (**exact=False**) matching, with optional strict type validation (**validate_types=True**). </li><li>Returns True/False for each row. </li></ul> Example use:<br> `JSONSchemaMatch(expected_schema={"name": str, "age": int}, exact_match=False, validate_types=True)`| **Required:** <br>`expected_schema: Dict[str, type]`<br><br>**Optional:**<ul><li>`exact_match = True` or `False`</li><li>`validate_types = True` or `False`</li></ul> |
| **JSONMatch()** <ul><li>Compares two columns of a dataframe and checks whether the two objects in each row of the dataframe are matching JSON's or not. </li><li>Returns True/False for every input. </li></ul> Example use:<br> `JSONMatch(with_column="column_2")`| **Required:** <br> `with_column : str` <br><br>**Optional:**<ul><li>`display_name`</li> |
| **ContainsLink()** <ul><li>Checks if the text contains at least one valid URL. </li><li>Returns True/False for each row. </li></ul> Example use:<br> `ContainsLink(column_name='column_1')`| **Required:** <br>`column_name: str`<br><br>**Optional:**<ul><li>`display_name`</li></ul> |
| **IsValidPython()** <ul><li>Checks if the text is valid Python code without syntax errors.</li><li>Returns True/False for every input. </li></ul> Example use:<br> `IsValidPython(column_name='column_1')`| **Required:** <br>n/a<br><br>**Optional:**<ul><li>`display_name`</li></ul> |

## Descriptors: Text stats

Expand Down
2 changes: 2 additions & 0 deletions src/evidently/descriptors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .hf_descriptor import HuggingFaceModel
from .hf_descriptor import HuggingFaceToxicityModel
from .is_valid_json_descriptor import IsValidJSON
from .is_valid_python_descriptor import IsValidPython
from .json_match_descriptor import JSONMatch
from .json_schema_match_descriptor import JSONSchemaMatch
from .llm_judges import BiasLLMEval
Expand Down Expand Up @@ -72,6 +73,7 @@
"WordNoMatch",
"IsValidJSON",
"JSONSchemaMatch",
"IsValidPython",
"_registry",
"JSONMatch",
]
5 changes: 5 additions & 0 deletions src/evidently/descriptors/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
"evidently.descriptors.hf_descriptor.HuggingFaceToxicityModel",
"evidently:descriptor:HuggingFaceToxicityModel",
)
register_type_alias(
FeatureDescriptor,
"evidently.descriptors.is_valid_python_descriptor.IsValidPython",
"evidently:descriptor:IsValidPython",
)
register_type_alias(
FeatureDescriptor,
"evidently.descriptors.json_schema_match_descriptor.JSONSchemaMatch",
Expand Down
11 changes: 11 additions & 0 deletions src/evidently/descriptors/is_valid_python_descriptor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from evidently.features import is_valid_python_feature
from evidently.features.generated_features import FeatureDescriptor
from evidently.features.generated_features import GeneratedFeature


class IsValidPython(FeatureDescriptor):
class Config:
type_alias = "evidently:descriptor:IsValidPython"

def feature(self, column_name: str) -> GeneratedFeature:
return is_valid_python_feature.IsValidPython(column_name, self.display_name)
5 changes: 5 additions & 0 deletions src/evidently/features/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@
"evidently.features.hf_feature.HuggingFaceToxicityFeature",
"evidently:feature:HuggingFaceToxicityFeature",
)
register_type_alias(
GeneratedFeatures,
"evidently.features.is_valid_python_feature.IsValidPython",
"evidently:feature:IsValidPython",
)
register_type_alias(
GeneratedFeatures,
"evidently.features.json_schema_match_feature.JSONSchemaMatch",
Expand Down
28 changes: 28 additions & 0 deletions src/evidently/features/is_valid_python_feature.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import ast
from typing import Any
from typing import ClassVar
from typing import Optional

from evidently import ColumnType
from evidently.features.generated_features import ApplyColumnGeneratedFeature


class IsValidPython(ApplyColumnGeneratedFeature):
class Config:
type_alias = "evidently:feature:IsValidPython"

__feature_type__: ClassVar = ColumnType.Categorical
display_name_template: ClassVar = "Valid Python for {column_name}"
column_name: str

def __init__(self, column_name: str, display_name: Optional[str] = None):
self.column_name = column_name
self.display_name = display_name
super().__init__()

def apply(self, value: Any) -> bool:
try:
ast.parse(value)
return True
except (SyntaxError, TypeError):
return False
54 changes: 54 additions & 0 deletions tests/features/test_is_valid_python_feature.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from typing import Any

import numpy as np
import pandas as pd
import pytest

from evidently.features.is_valid_python_feature import IsValidPython


@pytest.mark.parametrize(
("column_value", "expected"),
[
("print('Hello')", True),
("x = 5 + 3", True),
("def foo():\n return 'bar'", True),
("for i in range(10): print(i)", True),
("print('Hello'", False),
("for i in range(5) print(i)", False),
("def foo(\n return 'bar'", False),
("if True print('yes')", False),
(None, False),
("12", True),
("Sorry I can't answer this", False),
("{'name': 'test', 'age': 13}", True),
],
)
def test_is_valid_python_apply(column_value: Any, expected: bool):
is_python = IsValidPython("TestColumnName")
actual = is_python.apply(column_value)
assert actual == expected


test_data = pd.DataFrame(
{
"TestColumnName": [
"print('Hello')",
"def foo():\n return 'bar'",
"def foo(\n return 'bar'",
None,
"{'name': 'test', 'age': 13}",
np.nan,
]
}
)


@pytest.mark.parametrize(
("expected"),
[[True, True, False, False, True, False]],
)
def test_is_valid_python(expected: bool):
is_python = IsValidPython("TestColumnName")
actual = is_python.generate_feature(test_data, None)
assert actual[is_python._feature_column_name()].tolist() == expected

0 comments on commit 421630f

Please sign in to comment.