-
Notifications
You must be signed in to change notification settings - Fork 621
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
35038bf
commit 08d1502
Showing
7 changed files
with
125 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from evidently.features import json_match_feature | ||
from evidently.features.generated_features import FeatureDescriptor | ||
from evidently.features.generated_features import GeneratedFeature | ||
|
||
|
||
class JSONMatch(FeatureDescriptor): | ||
class Config: | ||
type_alias = "evidently:descriptor:JSONMatch" | ||
|
||
with_column: str | ||
|
||
def feature(self, column_name: str) -> GeneratedFeature: | ||
return json_match_feature.JSONMatch(first_column=column_name, second_column=self.with_column) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import json | ||
|
||
import pandas as pd | ||
|
||
from evidently import ColumnType | ||
from evidently.base_metric import ColumnName | ||
from evidently.features.generated_features import FeatureTypeFieldMixin | ||
from evidently.features.generated_features import GeneratedFeature | ||
from evidently.utils.data_preprocessing import DataDefinition | ||
|
||
|
||
class JSONMatch(FeatureTypeFieldMixin, GeneratedFeature): | ||
class Config: | ||
type_alias = "evidently:feature:JSONMatch" | ||
|
||
first_column: str | ||
second_column: str | ||
feature_type: ColumnType = ColumnType.Categorical | ||
|
||
def generate_feature(self, data: pd.DataFrame, data_definition: DataDefinition) -> pd.DataFrame: | ||
def compare_json_objects(first_json_object: str, second_json_object: str) -> bool: | ||
try: | ||
# Load both JSON strings into dictionaries | ||
first_json = json.loads(first_json_object) | ||
second_json = json.loads(second_json_object) | ||
|
||
# Compare dictionaries for equality, ignoring order of keys | ||
return first_json == second_json | ||
|
||
except (ValueError, TypeError): | ||
# Return False if either of the JSONs is invalid | ||
return False | ||
|
||
data[self._feature_column_name()] = data.apply( | ||
lambda x: compare_json_objects(x[self.first_column], x[self.second_column]), axis=1 | ||
) | ||
return pd.DataFrame(data[self._feature_column_name()]) | ||
|
||
def _as_column(self) -> "ColumnName": | ||
return self._create_column( | ||
self._feature_column_name(), | ||
default_display_name=f"JSON match for columns {self.first_column} and {self.second_column}", | ||
) | ||
|
||
def _feature_column_name(self): | ||
return f"JSON match for {self.first_column} and {self.second_column}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import pandas as pd | ||
|
||
from evidently.features.json_match_feature import JSONMatch | ||
from evidently.pipeline.column_mapping import ColumnMapping | ||
from evidently.utils.data_preprocessing import create_data_definition | ||
|
||
|
||
def test_is_valid_sql_feature(): | ||
feature_generator = JSONMatch( | ||
first_column="col_1", second_column="col_2", display_name="Json Match", feature_type="num", name="is_json_match" | ||
) | ||
|
||
# Define JSON strings for each scenario | ||
scenarios = [ | ||
# Scenario 1 - Matching JSONs | ||
('{"name": "Alice", "age": 25, "city": "London"}', '{"city": "London", "age": 25, "name": "Alice"}'), | ||
# Scenario 2 - Different whitespace (still matching) | ||
('{ "name" : "Bob" , "age" : 22 , "city" : "Paris" }', '{"city": "Paris", "name": "Bob", "age": 22}'), | ||
# Scenario 3 - Invalid JSON in one column | ||
( | ||
'{"name": "Eve", "age": 28, "city": "Berlin"}', | ||
'{"city": "Berlin", "age": 28, "name": Eve}', | ||
), # Missing quotes around "Eve" | ||
# Scenario 4 - Keys mismatch | ||
( | ||
'{"name": "Charlie", "age": 30, "country": "USA"}', | ||
'{"name": "Charlie", "age": 30, "city": "USA"}', | ||
), # 'country' vs 'city' | ||
# Scenario 5 - Values mismatch | ||
( | ||
'{"name": "David", "age": 35, "city": "Tokyo"}', | ||
'{"city": "Tokyo", "age": 35, "name": "Daniel"}', | ||
), # 'David' vs 'Daniel' | ||
] | ||
|
||
# Create DataFrame | ||
data = pd.DataFrame(scenarios, columns=["col_1", "col_2"]) | ||
|
||
result = feature_generator.generate_feature( | ||
data=data, | ||
data_definition=create_data_definition(None, data, ColumnMapping()), | ||
) | ||
|
||
expected_result = pd.DataFrame(dict(is_json_match=[True, True, False, False, False])) | ||
|
||
print(result) | ||
|
||
print(expected_result) | ||
|
||
try: | ||
assert result.equals(expected_result) | ||
return True | ||
except AssertionError: | ||
return False | ||
|
||
|
||
print(test_is_valid_sql_feature()) |