Skip to content

Commit 381ccf6

Browse files
authored
Clean up experimental orchestration implementation and alert features from it (#6948)
Clean up experimental orchestration implementation and alert features from it
1 parent 9e66ed1 commit 381ccf6

File tree

77 files changed

+18
-24646
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+18
-24646
lines changed

build/BUILD

-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ sh_binary(
2424
"//tfx/examples/custom_components/presto_example_gen/proto:presto_config_pb2.py",
2525
"//tfx/extensions/experimental/kfp_compatibility/proto:kfp_component_spec_pb2.py",
2626
"//tfx/extensions/google_cloud_big_query/experimental/elwc_example_gen/proto:elwc_config_pb2.py",
27-
"//tfx/orchestration/experimental/core:component_generated_alert_pb2.py",
2827
"//tfx/proto:bulk_inferrer_pb2.py",
2928
"//tfx/proto:distribution_validator_pb2.py",
3029
"//tfx/proto:evaluator_pb2.py",

setup.py

-1
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,6 @@ def run(self):
223223
# These are the subpackages of `tfx.orchestration` necessary.
224224
'tfx.orchestration',
225225
'tfx.orchestration.config',
226-
'tfx.orchestration.experimental.core',
227226
'tfx.orchestration.launcher',
228227
'tfx.orchestration.local',
229228
'tfx.orchestration.local.legacy',

tfx/components/distribution_validator/executor.py

+1-69
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@
2424
from tfx.components.distribution_validator import utils
2525
from tfx.components.statistics_gen import stats_artifact_utils
2626
from tfx.dsl.components.base import base_executor
27-
from tfx.orchestration.experimental.core import component_generated_alert_pb2
28-
from tfx.orchestration.experimental.core import constants
2927
from tfx.proto import distribution_validator_pb2
3028
from tfx.proto.orchestration import execution_result_pb2
3129
from tfx.types import artifact_utils
@@ -34,7 +32,6 @@
3432
from tfx.utils import monitoring_utils
3533
from tfx.utils import writer_utils
3634

37-
from google.protobuf import any_pb2
3835
from tensorflow_metadata.proto.v0 import anomalies_pb2
3936
from tensorflow_metadata.proto.v0 import schema_pb2
4037
from tensorflow_metadata.proto.v0 import statistics_pb2
@@ -176,55 +173,6 @@ def _add_anomalies_for_missing_comparisons(
176173
return anomalies
177174

178175

179-
def _create_anomalies_alerts(
180-
anomalies: anomalies_pb2.Anomalies,
181-
split_pair: str,
182-
span: str,
183-
) -> list[component_generated_alert_pb2.ComponentGeneratedAlertInfo]:
184-
"""Creates an alert for each anomaly in the anomalies artifact.
185-
186-
Args:
187-
anomalies: The Anomalies proto.
188-
split_pair: The tuple name of the data split, like (train, eval).
189-
span: The span of the Anomalies.
190-
191-
Returns:
192-
A list of component generated alerts, if any.
193-
"""
194-
results = []
195-
# Information about dataset-level anomalies, such as "High num examples in
196-
# current dataset versus the previous span."
197-
if anomalies.HasField('dataset_anomaly_info'):
198-
for reason in anomalies.dataset_anomaly_info.reason:
199-
results.append(
200-
component_generated_alert_pb2.ComponentGeneratedAlertInfo(
201-
alert_name=(
202-
f'[{split_pair}][span {span}] {reason.short_description}'
203-
),
204-
alert_body=(
205-
f'[{split_pair}][span {span}] {reason.description}'
206-
),
207-
)
208-
)
209-
# Information about feature-level anomalies. Generates a single alert for all
210-
# anomalous features.
211-
features_with_anomalies = ', '.join(anomalies.anomaly_info.keys())
212-
if features_with_anomalies:
213-
results.append(
214-
component_generated_alert_pb2.ComponentGeneratedAlertInfo(
215-
alert_name=(
216-
f'[{split_pair}][span {span}] Feature-level anomalies present'
217-
),
218-
alert_body=(
219-
f'[{split_pair}][span {span}] Feature(s) '
220-
f'{features_with_anomalies} contain(s) anomalies. '
221-
f'See Anomalies artifact for more details.'
222-
),
223-
)
224-
)
225-
return results
226-
227-
228176
def _get_distribution_validator_config(
229177
input_dict: Dict[str, list[types.Artifact]], exec_properties: Dict[str, Any]
230178
) -> Optional[distribution_validator_pb2.DistributionValidatorConfig]:
@@ -282,8 +230,7 @@ def Do(
282230
exec_properties: A dict of execution properties.
283231
284232
Returns:
285-
ExecutionResult proto with anomalies and the component generated alerts
286-
execution property set with anomalies alerts, if any.
233+
ExecutionResult proto with anomalies
287234
"""
288235
self._log_startup(input_dict, output_dict, exec_properties)
289236

@@ -379,7 +326,6 @@ def Do(
379326
)
380327
)
381328
current_stats_span = test_statistics.span
382-
alerts = component_generated_alert_pb2.ComponentGeneratedAlertList()
383329
for test_split, baseline_split in split_pairs:
384330
split_pair = '%s_%s' % (test_split, baseline_split)
385331
logging.info('Processing split pair %s', split_pair)
@@ -420,11 +366,6 @@ def Do(
420366
current_stats_span,
421367
validation_metrics_artifact,
422368
)
423-
alerts.component_generated_alert_list.extend(
424-
_create_anomalies_alerts(
425-
anomalies, split_pair, anomalies_artifact.span
426-
)
427-
)
428369

429370
# Set blessed custom property for Anomalies Artifact
430371
anomalies_artifact.set_json_value_custom_property(
@@ -435,13 +376,4 @@ def Do(
435376
standard_component_specs.ANOMALIES_KEY
436377
].artifacts.append(anomalies_artifact.mlmd_artifact)
437378

438-
# Set component generated alerts execution property in ExecutorOutput if
439-
# any anomalies alerts exist.
440-
if alerts.component_generated_alert_list:
441-
any_proto = any_pb2.Any()
442-
any_proto.Pack(alerts)
443-
executor_output.execution_properties[
444-
constants.COMPONENT_GENERATED_ALERTS_KEY
445-
].proto_value.CopyFrom(any_proto)
446-
447379
return executor_output

tfx/components/distribution_validator/executor_test.py

+4-145
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222
from tensorflow_data_validation.anomalies.proto import custom_validation_config_pb2
2323
from tfx.components.distribution_validator import executor
2424
from tfx.dsl.io import fileio
25-
from tfx.orchestration.experimental.core import component_generated_alert_pb2
26-
from tfx.orchestration.experimental.core import constants
2725
from tfx.proto import distribution_validator_pb2
2826
from tfx.types import artifact_utils
2927
from tfx.types import standard_artifacts
@@ -215,23 +213,6 @@ def testSplitPairs(self, split_pairs, expected_split_pair_names,
215213
}
216214
""",
217215
'anomalies_blessed_value': 0,
218-
'expected_alerts': (
219-
component_generated_alert_pb2.ComponentGeneratedAlertList(
220-
component_generated_alert_list=[
221-
component_generated_alert_pb2.ComponentGeneratedAlertInfo(
222-
alert_name=(
223-
'[train_eval][span 2] Feature-level anomalies '
224-
'present'
225-
),
226-
alert_body=(
227-
'[train_eval][span 2] Feature(s) company, '
228-
'dropoff_census_tract contain(s) anomalies. See '
229-
'Anomalies artifact for more details.'
230-
),
231-
),
232-
]
233-
)
234-
)
235216
},
236217
{
237218
'testcase_name': 'dataset_constraint',
@@ -255,24 +236,6 @@ def testSplitPairs(self, split_pairs, expected_split_pair_names,
255236
}
256237
}""",
257238
'anomalies_blessed_value': 0,
258-
'expected_alerts': (
259-
component_generated_alert_pb2.ComponentGeneratedAlertList(
260-
component_generated_alert_list=[
261-
component_generated_alert_pb2.ComponentGeneratedAlertInfo(
262-
alert_name=(
263-
'[train_eval][span 2] High num examples in '
264-
'current dataset versus the previous span.'
265-
),
266-
alert_body=(
267-
'[train_eval][span 2] The ratio of num examples '
268-
'in the current dataset versus the previous span '
269-
'is 2.02094 (up to six significant digits), '
270-
'which is above the threshold 1.'
271-
),
272-
),
273-
]
274-
)
275-
)
276239
},
277240
{
278241
'testcase_name': 'no_anomalies',
@@ -305,9 +268,6 @@ def testSplitPairs(self, split_pairs, expected_split_pair_names,
305268
}
306269
""",
307270
'anomalies_blessed_value': 1,
308-
'expected_alerts': (
309-
component_generated_alert_pb2.ComponentGeneratedAlertList()
310-
),
311271
},
312272
{
313273
'testcase_name': 'custom_anomalies',
@@ -367,23 +327,6 @@ def testSplitPairs(self, split_pairs, expected_split_pair_names,
367327
}
368328
""",
369329
'anomalies_blessed_value': 0,
370-
'expected_alerts': (
371-
component_generated_alert_pb2.ComponentGeneratedAlertList(
372-
component_generated_alert_list=[
373-
component_generated_alert_pb2.ComponentGeneratedAlertInfo(
374-
alert_name=(
375-
'[train_eval][span 2] Feature-level anomalies '
376-
'present'
377-
),
378-
alert_body=(
379-
'[train_eval][span 2] Feature(s) company '
380-
'contain(s) anomalies. See Anomalies artifact '
381-
'for more details.'
382-
),
383-
)
384-
]
385-
)
386-
)
387330
},
388331
)
389332
def testAnomaliesGenerated(
@@ -392,7 +335,6 @@ def testAnomaliesGenerated(
392335
custom_validation_config,
393336
expected_anomalies,
394337
anomalies_blessed_value,
395-
expected_alerts,
396338
):
397339
source_data_dir = os.path.join(
398340
os.path.dirname(os.path.dirname(__file__)), 'testdata')
@@ -438,7 +380,7 @@ def testAnomaliesGenerated(
438380
}
439381

440382
distribution_validator_executor = executor.Executor()
441-
executor_output = distribution_validator_executor.Do(
383+
distribution_validator_executor.Do(
442384
input_dict, output_dict, exec_properties
443385
)
444386

@@ -465,27 +407,6 @@ def testAnomaliesGenerated(
465407
),
466408
{'train_eval': anomalies_blessed_value},
467409
)
468-
actual_alerts = (
469-
component_generated_alert_pb2.ComponentGeneratedAlertList()
470-
)
471-
executor_output.execution_properties[
472-
constants.COMPONENT_GENERATED_ALERTS_KEY
473-
].proto_value.Unpack(actual_alerts)
474-
for alert in expected_alerts.component_generated_alert_list:
475-
self.assertEqual(
476-
alert.alert_name,
477-
actual_alerts.component_generated_alert_list[0].alert_name
478-
)
479-
if 'Feature-level anomalies present' in alert.alert_name:
480-
self.assertIn(
481-
'See Anomalies artifact for more details.',
482-
actual_alerts.component_generated_alert_list[0].alert_body,
483-
)
484-
else:
485-
self.assertEqual(
486-
alert.alert_body,
487-
actual_alerts.component_generated_alert_list[0].alert_body
488-
)
489410

490411
def testMissBaselineStats(self):
491412

@@ -682,19 +603,6 @@ def testStructData(self):
682603
}
683604
}""", anomalies_pb2.Anomalies())
684605

685-
expected_alerts = component_generated_alert_pb2.ComponentGeneratedAlertList(
686-
component_generated_alert_list=[
687-
component_generated_alert_pb2.ComponentGeneratedAlertInfo(
688-
alert_name=(
689-
'[train_eval][span 3] Feature-level anomalies present'),
690-
alert_body=(
691-
'[train_eval][span 3] Feature(s) '
692-
'parent_feature.value_feature contain(s) anomalies. See '
693-
'Anomalies artifact for more details.'),
694-
)
695-
],
696-
)
697-
698606
# Create stats artifacts with a struct feature.
699607
for split_dir in ['Split-eval', 'Split-train']:
700608
full_split_dir = os.path.join(stats_artifact.uri, split_dir)
@@ -733,7 +641,7 @@ def testStructData(self):
733641
}
734642

735643
distribution_validator_executor = executor.Executor()
736-
executor_output = distribution_validator_executor.Do(
644+
distribution_validator_executor.Do(
737645
input_dict, output_dict, exec_properties
738646
)
739647

@@ -752,14 +660,6 @@ def testStructData(self):
752660
distribution_anomalies.ParseFromString(distribution_anomalies_bytes)
753661
self.assertEqualExceptBaseline(expected_anomalies, distribution_anomalies)
754662

755-
actual_alerts = (
756-
component_generated_alert_pb2.ComponentGeneratedAlertList()
757-
)
758-
executor_output.execution_properties[
759-
constants.COMPONENT_GENERATED_ALERTS_KEY
760-
].proto_value.Unpack(actual_alerts)
761-
self.assertEqual(actual_alerts, expected_alerts)
762-
763663
@parameterized.named_parameters(
764664
{
765665
'testcase_name':
@@ -1076,7 +976,7 @@ def testEmptyData(self, stats_train, stats_eval, expected_anomalies):
1076976
}
1077977

1078978
distribution_validator_executor = executor.Executor()
1079-
executor_output = distribution_validator_executor.Do(
979+
distribution_validator_executor.Do(
1080980
input_dict, output_dict, exec_properties
1081981
)
1082982

@@ -1099,26 +999,6 @@ def testEmptyData(self, stats_train, stats_eval, expected_anomalies):
1099999
distribution_anomalies.ParseFromString(distribution_anomalies_bytes)
11001000
self.assertEqualExceptBaseline(expected_anomalies, distribution_anomalies)
11011001

1102-
expected_alerts = component_generated_alert_pb2.ComponentGeneratedAlertList(
1103-
component_generated_alert_list=[
1104-
component_generated_alert_pb2.ComponentGeneratedAlertInfo(
1105-
alert_name=(
1106-
'[train_eval][span 4] Feature-level anomalies present'
1107-
),
1108-
alert_body=(
1109-
'[train_eval][span 4] Feature(s) first_feature contain(s) '
1110-
'anomalies. See Anomalies artifact for more details.'
1111-
),
1112-
),
1113-
]
1114-
)
1115-
actual_alerts = (
1116-
component_generated_alert_pb2.ComponentGeneratedAlertList()
1117-
)
1118-
executor_output.execution_properties[
1119-
constants.COMPONENT_GENERATED_ALERTS_KEY
1120-
].proto_value.Unpack(actual_alerts)
1121-
self.assertEqual(actual_alerts, expected_alerts)
11221002

11231003
def testAddOutput(self):
11241004
source_data_dir = os.path.join(
@@ -1184,7 +1064,7 @@ def testAddOutput(self):
11841064
}
11851065

11861066
distribution_validator_executor = executor.Executor()
1187-
executor_output = distribution_validator_executor.Do(
1067+
distribution_validator_executor.Do(
11881068
input_dict, output_dict, exec_properties
11891069
)
11901070

@@ -1193,27 +1073,6 @@ def testAddOutput(self):
11931073
)
11941074
self.assertTrue(fileio.exists(distribution_anomalies_path))
11951075

1196-
expected_alerts = component_generated_alert_pb2.ComponentGeneratedAlertList(
1197-
component_generated_alert_list=[
1198-
component_generated_alert_pb2.ComponentGeneratedAlertInfo(
1199-
alert_name=(
1200-
'[train_eval][span 5] Feature-level anomalies present'
1201-
),
1202-
alert_body=(
1203-
'[train_eval][span 5] Feature(s) '
1204-
'parent_feature.value_feature contain(s) anomalies. See '
1205-
'Anomalies artifact for more details.'
1206-
),
1207-
),
1208-
]
1209-
)
1210-
actual_alerts = (
1211-
component_generated_alert_pb2.ComponentGeneratedAlertList()
1212-
)
1213-
executor_output.execution_properties[
1214-
constants.COMPONENT_GENERATED_ALERTS_KEY
1215-
].proto_value.Unpack(actual_alerts)
1216-
self.assertEqual(actual_alerts, expected_alerts)
12171076

12181077
def testUseArtifactDVConfig(self):
12191078
source_data_dir = os.path.join(

0 commit comments

Comments
 (0)