diff --git a/backend/cellguide/pipeline/__init__.py b/backend/cellguide/pipeline/__init__.py index b07a960e2126e..f768b0355a897 100644 --- a/backend/cellguide/pipeline/__init__.py +++ b/backend/cellguide/pipeline/__init__.py @@ -17,6 +17,12 @@ from backend.cellguide.pipeline.ontology_tree import run as run_ontology_tree_pipeline from backend.cellguide.pipeline.source_collections import run as run_source_collections_pipeline from backend.common.utils.cloudfront import create_invalidation_for_cellguide_data +from backend.common.utils.result_notification import ( + format_failed_batch_issue_slack_alert, + gen_cg_pipeline_failure_message, + gen_cg_pipeline_success_message, + notify_slack, +) logger = logging.getLogger(__name__) @@ -55,8 +61,8 @@ def run_cellguide_pipeline(): gpt_seo_output_directory=GPT_SEO_OUTPUT_DIRECTORY_FOLDERNAME, ) - upload_cellguide_pipeline_output_to_s3(output_directory=output_directory) - upload_gpt_descriptions_to_s3( + output_path = upload_cellguide_pipeline_output_to_s3(output_directory=output_directory) + description_output_path = upload_gpt_descriptions_to_s3( gpt_output_directory=GPT_OUTPUT_DIRECTORY_FOLDERNAME, gpt_seo_output_directory=GPT_SEO_OUTPUT_DIRECTORY_FOLDERNAME, ) @@ -67,6 +73,8 @@ def run_cellguide_pipeline(): # cleanup cleanup(output_directory=output_directory) + return output_path, description_output_path + def upload_cellguide_pipeline_output_to_s3(*, output_directory: str): """ @@ -97,6 +105,10 @@ def upload_cellguide_pipeline_output_to_s3(*, output_directory: str): # this is used for custom cloudfront error handling s3_provider.upload_file("404", bucket, "404", {}) + output_path = f"{bucket_path}{output_directory}" + + return output_path + def upload_gpt_descriptions_to_s3(*, gpt_output_directory: str, gpt_seo_output_directory: str) -> None: bucket_path = get_bucket_path() @@ -113,6 +125,10 @@ def upload_gpt_descriptions_to_s3(*, gpt_output_directory: str, gpt_seo_output_d num_descriptions = len(glob(f"{src_directory}/*.json")) logger.info(f"Uploaded {num_descriptions} GPT descriptions to {bucket_path}{dst_directory}/") + description_output_path = f"{bucket_path}{dst_directory}/" + + return description_output_path + def cleanup(*, output_directory: str): logger.info(f"Cleaning up {output_directory} and other CellGuide pipeline outputs") @@ -129,5 +145,14 @@ def cleanup(*, output_directory: str): if __name__ == "__main__": - run_cellguide_pipeline() + try: + output_path, description_output_path = run_cellguide_pipeline() + success_message = gen_cg_pipeline_success_message(output_path, description_output_path) + notify_slack(success_message) + except Exception as e: + logger.exception("Cell Guide Pipeline failed") + failure_message = format_failed_batch_issue_slack_alert( + gen_cg_pipeline_failure_message(f"Issue with Cell Guide pipeline run: {e}. See logs for more detail.") + ) + notify_slack(failure_message) sys.exit() diff --git a/backend/common/providers/crossref_provider.py b/backend/common/providers/crossref_provider.py index 79c356729290c..7b996d93d9359 100644 --- a/backend/common/providers/crossref_provider.py +++ b/backend/common/providers/crossref_provider.py @@ -75,7 +75,8 @@ def _fetch_crossref_payload(self, doi): res.raise_for_status() except requests.RequestException as e: if e.response is not None and e.response.status_code == 404: - raise CrossrefDOINotFoundException from e + logging.warning(f"DOI {doi} not found, skipping.") + return None else: raise CrossrefFetchException("Cannot fetch metadata from Crossref") from e diff --git a/backend/common/utils/result_notification.py b/backend/common/utils/result_notification.py index 3ca3d5c4bf3b1..c690be06a7d7a 100644 --- a/backend/common/utils/result_notification.py +++ b/backend/common/utils/result_notification.py @@ -106,3 +106,48 @@ def gen_wmg_pipeline_success_message(snapshot_path: str, dataset_count: int, cel }, ] } + + +def gen_cg_pipeline_success_message(output_path: str, description_output_path: str) -> dict: + return { + "blocks": [ + { + "type": "header", + "text": { + "type": "plain_text", + "text": "CellGuide Pipeline Run Succeeded:tada: ", + "emoji": True, + }, + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": f"\n* CellGuide snapshot stored in {output_path}" + f"\n* GPT Descriptions can be found in {description_output_path}.", + }, + }, + ] + } + + +def gen_cg_pipeline_failure_message(failure_info: str) -> dict: + return { + "blocks": [ + { + "type": "header", + "text": { + "type": "plain_text", + "text": "CellGuide Pipeline Run FAILED:alert:", + "emoji": True, + }, + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": f"CellGuide Pipeline failure @sc-oncall-eng \n{failure_info}", + }, + }, + ] + } diff --git a/tests/unit/backend/common/test_crossref_provider.py b/tests/unit/backend/common/test_crossref_provider.py index 20506dca8a111..980a966f03a2e 100644 --- a/tests/unit/backend/common/test_crossref_provider.py +++ b/tests/unit/backend/common/test_crossref_provider.py @@ -7,7 +7,6 @@ from requests.models import HTTPError, Response from backend.common.providers.crossref_provider import ( - CrossrefDOINotFoundException, CrossrefException, CrossrefFetchException, CrossrefParseException, @@ -305,9 +304,9 @@ def test__provider_throws_exception_if_request_fails(self, mock_config, mock_get @patch("backend.common.providers.crossref_provider.requests.get") @patch("backend.common.providers.crossref_provider.CorporaConfig") - def test__provider_throws_exception_if_request_fails_with_404(self, mock_config, mock_get): + def test__provider_returns_none_on_404(self, mock_config, mock_get): """ - Asserts a CrossrefFetchException if the GET request fails for any reason + Asserts that the function returns (None, None, None) if the GET request fails with a 404 status. """ response_404 = Response() response_404.status_code = 404 @@ -315,8 +314,9 @@ def test__provider_throws_exception_if_request_fails_with_404(self, mock_config, provider = CrossrefProvider() - with self.assertRaises(CrossrefDOINotFoundException): - provider.fetch_metadata("test_doi") + result = provider.fetch_metadata("test_doi") + + self.assertEqual(result, (None, None, None), "Expected None for a 404 response") @patch("backend.common.providers.crossref_provider.requests.get") @patch("backend.common.providers.crossref_provider.CorporaConfig")