From 6e58c85ec68e58e1e6a84bd09501cc9c1990b699 Mon Sep 17 00:00:00 2001 From: Madison Swain-Bowden Date: Fri, 23 Aug 2024 09:46:12 -0700 Subject: [PATCH] Update DAG parsing tests, add tests for ensuring new DAGs are added (#4797) * Add a check to make sure new DAG files get added to the parsing tests * Update tests with new DAGs and expected DAG counts * Fix deprecation warning --- catalog/tests/dags/test_dag_parsing.py | 48 ++++++++++++++++++++------ 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/catalog/tests/dags/test_dag_parsing.py b/catalog/tests/dags/test_dag_parsing.py index 800fe9ac3fa..a3d217034f7 100644 --- a/catalog/tests/dags/test_dag_parsing.py +++ b/catalog/tests/dags/test_dag_parsing.py @@ -16,26 +16,39 @@ # DAG paths to test DAG_PATHS = [ - "providers/provider_workflow_dag_factory.py", + "data_refresh/dag_factory.py", + "database/batched_update/batched_update_dag.py", + "database/catalog_cleaner/catalog_cleaner.py", + "database/delete_records/delete_records_dag.py", + "database/report_pending_reported_media.py", + "database/staging_database_restore/staging_database_restore_dag.py", + "elasticsearch_cluster/create_new_es_index/create_new_es_index_dag.py", + "elasticsearch_cluster/create_proportional_by_source_staging_index/create_proportional_by_source_staging_index_dag.py", # noqa: E501 + "elasticsearch_cluster/healthcheck_dag.py", + "elasticsearch_cluster/point_es_alias/point_es_alias_dag.py", + "elasticsearch_cluster/recreate_staging_index/recreate_full_staging_index_dag.py", + "legacy_data_refresh/create_filtered_index_dag.py", + "legacy_data_refresh/dag_factory.py", + "maintenance/add_license_url.py", "maintenance/airflow_log_cleanup_workflow.py", + "maintenance/check_silenced_dags.py", + "maintenance/decode_and_deduplicate_image_tags.py", + "maintenance/flickr_audit_sub_provider_workflow.py", "maintenance/pr_review_reminders/pr_review_reminders_dag.py", "maintenance/rotate_db_snapshots.py", - "popularity/recreate_popularity_calculation_dag_factory.py", - "popularity/popularity_refresh_dag_factory.py", - "legacy_data_refresh/dag_factory.py", - "legacy_data_refresh/create_filtered_index_dag.py", - "elasticsearch_cluster/recreate_staging_index/recreate_full_staging_index_dag.py", - "elasticsearch_cluster/healthcheck_dag.py", "oauth2/authorize_dag.py", "oauth2/token_refresh_dag.py", - "database/delete_records/delete_records_dag.py", + "popularity/popularity_refresh_dag_factory.py", + "popularity/recreate_popularity_calculation_dag_factory.py", + "providers/provider_reingestion_workflow_dag_factory.py", + "providers/provider_workflow_dag_factory.py", ] # Expected count from the DagBag once a file has been parsed # (this will likely not need to be edited for new providers) EXPECTED_COUNT = { "providers/provider_workflow_dag_factory.py": len(PROVIDER_WORKFLOW_CONFIGS), - "providers/provider_ingestion_workflow_dag_factory.py": len( + "providers/provider_reingestion_workflow_dag_factory.py": len( REINGESTION_WORKFLOW_CONFIGS ), "popularity/recreate_popularity_calculation_dag_factory.py": len(MEDIA_TYPES), @@ -43,6 +56,12 @@ "legacy_data_refresh/dag_factory.py": len(MEDIA_TYPES), "legacy_data_refresh/create_filtered_index_dag.py": len(MEDIA_TYPES), "elasticsearch_cluster/healthcheck_dag.py": len(ENVIRONMENTS), + "data_refresh/dag_factory.py": len(MEDIA_TYPES) * len(ENVIRONMENTS), + "database/batched_update/batched_update_dag.py": 2, + "elasticsearch_cluster/create_new_es_index/create_new_es_index_dag.py": len( + ENVIRONMENTS + ), + "elasticsearch_cluster/point_es_alias/point_es_alias_dag.py": len(ENVIRONMENTS), } @@ -60,6 +79,13 @@ def test_dag_import_errors(): len(dagbag.import_errors) == 0 ), f"Errors found during DAG import for files: {error_string}" + all_paths = {str(dag.relative_fileloc) for dag in dagbag.dags.values()} + missing_paths = all_paths - set(DAG_PATHS) + assert len(missing_paths) == 0, ( + f"The following DAG files are unaccounted for in the DAG parse testing, " + f"please add them to `DAG_PATHS` in `test_dag_parsing.py`: {missing_paths}" + ) + # relative_path represents the path from the DAG folder to the file @pytest.mark.parametrize("relative_path", DAG_PATHS) @@ -69,8 +95,10 @@ def test_dags_loads_correct_number_with_no_errors(relative_path, tmpdir): expected_count = EXPECTED_COUNT.get(relative_path, 1) dag_bag = DagBag(dag_folder=tmpdir, include_examples=False) dag_bag.process_file(str(DAG_FOLDER / relative_path)) + assert len(dag_bag.dags), "No DAGs found in file" assert len(dag_bag.import_errors) == 0, "Errors found during DAG import" - assert len(dag_bag.dags) == expected_count, "An unexpected # of DAGs was found" + found = len(dag_bag.dags) + assert found == expected_count, f"An unexpected # of DAGs ({found}) were found" def test_dag_uses_default_args():