Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(cellguide): Add slack notification to pipeline and skip if DOI returns 404 #7359

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions backend/cellguide/pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
from backend.cellguide.pipeline.ontology_tree import run as run_ontology_tree_pipeline
from backend.cellguide.pipeline.source_collections import run as run_source_collections_pipeline
from backend.common.utils.cloudfront import create_invalidation_for_cellguide_data
from backend.common.utils.result_notification import (
format_failed_batch_issue_slack_alert,
gen_cg_pipeline_failure_message,
gen_cg_pipeline_success_message,
notify_slack,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -55,8 +61,8 @@ def run_cellguide_pipeline():
gpt_seo_output_directory=GPT_SEO_OUTPUT_DIRECTORY_FOLDERNAME,
)

upload_cellguide_pipeline_output_to_s3(output_directory=output_directory)
upload_gpt_descriptions_to_s3(
output_path = upload_cellguide_pipeline_output_to_s3(output_directory=output_directory)
description_output_path = upload_gpt_descriptions_to_s3(
gpt_output_directory=GPT_OUTPUT_DIRECTORY_FOLDERNAME,
gpt_seo_output_directory=GPT_SEO_OUTPUT_DIRECTORY_FOLDERNAME,
)
Expand All @@ -67,6 +73,8 @@ def run_cellguide_pipeline():
# cleanup
cleanup(output_directory=output_directory)

return output_path, description_output_path


def upload_cellguide_pipeline_output_to_s3(*, output_directory: str):
"""
Expand Down Expand Up @@ -97,6 +105,10 @@ def upload_cellguide_pipeline_output_to_s3(*, output_directory: str):
# this is used for custom cloudfront error handling
s3_provider.upload_file("404", bucket, "404", {})

output_path = f"{bucket_path}{output_directory}"

return output_path


def upload_gpt_descriptions_to_s3(*, gpt_output_directory: str, gpt_seo_output_directory: str) -> None:
bucket_path = get_bucket_path()
Expand All @@ -113,6 +125,10 @@ def upload_gpt_descriptions_to_s3(*, gpt_output_directory: str, gpt_seo_output_d
num_descriptions = len(glob(f"{src_directory}/*.json"))
logger.info(f"Uploaded {num_descriptions} GPT descriptions to {bucket_path}{dst_directory}/")

description_output_path = f"{bucket_path}{dst_directory}/"

return description_output_path


def cleanup(*, output_directory: str):
logger.info(f"Cleaning up {output_directory} and other CellGuide pipeline outputs")
Expand All @@ -129,5 +145,14 @@ def cleanup(*, output_directory: str):


if __name__ == "__main__":
run_cellguide_pipeline()
try:
output_path, description_output_path = run_cellguide_pipeline()
success_message = gen_cg_pipeline_success_message(output_path, description_output_path)
notify_slack(success_message)
except Exception as e:
logger.exception("Cell Guide Pipeline failed")
failure_message = format_failed_batch_issue_slack_alert(
gen_cg_pipeline_failure_message(f"Issue with Cell Guide pipeline run: {e}. See logs for more detail.")
)
notify_slack(failure_message)
sys.exit()
3 changes: 2 additions & 1 deletion backend/common/providers/crossref_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ def _fetch_crossref_payload(self, doi):
res.raise_for_status()
except requests.RequestException as e:
if e.response is not None and e.response.status_code == 404:
raise CrossrefDOINotFoundException from e
logging.warning(f"DOI {doi} not found, skipping.")
return None
else:
raise CrossrefFetchException("Cannot fetch metadata from Crossref") from e

Expand Down
45 changes: 45 additions & 0 deletions backend/common/utils/result_notification.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,48 @@
},
]
}


def gen_cg_pipeline_success_message(output_path: str, description_output_path: str) -> dict:
return {

Check warning on line 112 in backend/common/utils/result_notification.py

View check run for this annotation

Codecov / codecov/patch

backend/common/utils/result_notification.py#L112

Added line #L112 was not covered by tests
"blocks": [
{
"type": "header",
"text": {
"type": "plain_text",
"text": "CellGuide Pipeline Run Succeeded:tada: ",
"emoji": True,
},
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": f"\n* CellGuide snapshot stored in {output_path}"
f"\n* GPT Descriptions can be found in {description_output_path}.",
},
},
]
}


def gen_cg_pipeline_failure_message(failure_info: str) -> dict:
return {

Check warning on line 135 in backend/common/utils/result_notification.py

View check run for this annotation

Codecov / codecov/patch

backend/common/utils/result_notification.py#L135

Added line #L135 was not covered by tests
"blocks": [
{
"type": "header",
"text": {
"type": "plain_text",
"text": "CellGuide Pipeline Run FAILED:alert:",
"emoji": True,
},
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": f"CellGuide Pipeline failure @sc-oncall-eng \n{failure_info}",
},
},
]
}
10 changes: 5 additions & 5 deletions tests/unit/backend/common/test_crossref_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from requests.models import HTTPError, Response

from backend.common.providers.crossref_provider import (
CrossrefDOINotFoundException,
CrossrefException,
CrossrefFetchException,
CrossrefParseException,
Expand Down Expand Up @@ -305,18 +304,19 @@ def test__provider_throws_exception_if_request_fails(self, mock_config, mock_get

@patch("backend.common.providers.crossref_provider.requests.get")
@patch("backend.common.providers.crossref_provider.CorporaConfig")
def test__provider_throws_exception_if_request_fails_with_404(self, mock_config, mock_get):
def test__provider_returns_none_on_404(self, mock_config, mock_get):
"""
Asserts a CrossrefFetchException if the GET request fails for any reason
Asserts that the function returns (None, None, None) if the GET request fails with a 404 status.
"""
response_404 = Response()
response_404.status_code = 404
mock_get.side_effect = HTTPError(response=response_404)

provider = CrossrefProvider()

with self.assertRaises(CrossrefDOINotFoundException):
provider.fetch_metadata("test_doi")
result = provider.fetch_metadata("test_doi")

self.assertEqual(result, (None, None, None), "Expected None for a 404 response")

@patch("backend.common.providers.crossref_provider.requests.get")
@patch("backend.common.providers.crossref_provider.CorporaConfig")
Expand Down
Loading