diff --git a/catalog/dags/providers/provider_api_scripts/stocksnap.py b/catalog/dags/providers/provider_api_scripts/stocksnap.py index a5d5f415627..a7a1da39977 100644 --- a/catalog/dags/providers/provider_api_scripts/stocksnap.py +++ b/catalog/dags/providers/provider_api_scripts/stocksnap.py @@ -15,6 +15,9 @@ import json import logging +import backoff +from requests.exceptions import HTTPError + from common.licenses import get_license_info from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester @@ -157,6 +160,17 @@ def _get_title(item): img_title = " ".join(tags) return img_title.title() + # Add a backoff on every request + # for 5XX error codes. + # See: https://github.com/WordPress/openverse/issues/4878 + @backoff.on_exception( + backoff.expo, + HTTPError, + # 30 minutes + max_time=60 * 30, + # Only retry for 5xx errors + giveup=lambda e: e.response.status_code not in {502, 503, 504}, + ) def _get_filesize(self, image_url): """Get the size of the image in bytes.""" resp = self.delayed_requester.head(image_url)