Skip to content

Commit

Permalink
Merge branch 'main' into add/failed-deployment-tips
Browse files Browse the repository at this point in the history
  • Loading branch information
sarayourfriend authored Jun 25, 2023
2 parents 1f4b454 + 9dcb4e3 commit 06260d5
Show file tree
Hide file tree
Showing 39 changed files with 1,011 additions and 364 deletions.
22 changes: 19 additions & 3 deletions .github/workflows/ci_cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -565,13 +565,13 @@ jobs:
name:
- playwright_vr
- playwright_e2e
- storybook_vr
- storybook
include:
- name: playwright_vr
script: "test:playwright visual-regression"
- name: playwright_e2e
script: "test:playwright e2e"
- name: storybook_vr
- name: storybook
script: "test:storybook"

steps:
Expand Down Expand Up @@ -609,7 +609,7 @@ jobs:
name:
- playwright_vr
- playwright_e2e
- storybook_vr
- storybook

steps:
- name: Pass
Expand Down Expand Up @@ -949,6 +949,22 @@ jobs:
wait_time: 60 # check every minute
max_time: 1800 # allow up to 30 minutes for a deployment

- name: Deploy staging thumbnails
uses: felixp8/[email protected]
with:
owner: WordPress
repo: openverse-infrastructure
token: ${{ secrets.ACCESS_TOKEN }}
event_type: deploy_staging_api_thumbnails
client_payload: |
{
"actor": "${{ github.actor }}",
"tag": "${{ needs.get-image-tag.outputs.image_tag }}",
"run_name": "${{ steps.commit.outputs.commit_message }}"
}
wait_time: 60 # check every minute
max_time: 1800 # allow up to 30 minutes for a deployment

################
# Notification #
################
Expand Down
18 changes: 17 additions & 1 deletion api/api/controllers/search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,26 @@ def _post_process_results(
end = 90 + 45
```
"""
if end >= search_results.hits.total.value:
# Total available hits already exhausted in previous iteration
return results

end += int(end / 2)
if start + end > ELASTICSEARCH_MAX_RESULT_WINDOW:
query_size = start + end
if query_size > ELASTICSEARCH_MAX_RESULT_WINDOW:
return results

# subtract start to account for the records skipped
# and which should not count towards the total
# available hits for the query
total_available_hits = search_results.hits.total.value - start
if query_size > total_available_hits:
# Clamp the query size to last available hit. On the next
# iteration, if results are still insufficient, the check
# to compare previous_query_size and total_available_hits
# will prevent further query attempts
end = search_results.hits.total.value

s = s[start:end]
search_response = s.execute()

Expand Down
6 changes: 3 additions & 3 deletions api/api/utils/dead_link_mask.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import django_redis
from deepdiff import DeepHash
from django_redis import get_redis_connection
from elasticsearch_dsl import Search


Expand Down Expand Up @@ -32,7 +32,7 @@ def get_query_mask(query_hash: str) -> list[int]:
:param query_hash: Unique value for a particular query.
:return: Boolean mask as a list of integers (0 or 1).
"""
redis = get_redis_connection("default")
redis = django_redis.get_redis_connection("default")
key = f"{query_hash}:dead_link_mask"
return list(map(int, redis.lrange(key, 0, -1)))

Expand All @@ -44,7 +44,7 @@ def save_query_mask(query_hash: str, mask: list):
:param mask: Boolean mask as a list of integers (0 or 1).
:param query_hash: Unique value to be used as key.
"""
redis_pipe = get_redis_connection("default").pipeline()
redis_pipe = django_redis.get_redis_connection("default").pipeline()
key = f"{query_hash}:dead_link_mask"

redis_pipe.delete(key)
Expand Down
128 changes: 128 additions & 0 deletions api/api/utils/image_proxy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import logging
from typing import Literal
from urllib.parse import urlparse

from django.conf import settings
from django.http import HttpResponse
from rest_framework.exceptions import UnsupportedMediaType

import django_redis
import requests
import sentry_sdk

from api.utils.image_proxy.exception import UpstreamThumbnailException
from api.utils.image_proxy.extension import get_image_extension
from api.utils.image_proxy.photon import get_photon_request_params
from api.utils.tallies import get_monthly_timestamp


parent_logger = logging.getLogger(__name__)

HEADERS = {
"User-Agent": settings.OUTBOUND_USER_AGENT_TEMPLATE.format(
purpose="ThumbnailGeneration"
)
}

PHOTON_TYPES = {"gif", "jpg", "jpeg", "png", "webp"}
ORIGINAL_TYPES = {"svg"}

PHOTON = "photon"
ORIGINAL = "original"
THUMBNAIL_STRATEGY = Literal["photon_proxy", "original"]


def get_request_params_for_extension(
ext: str,
headers: dict[str, str],
image_url: str,
parsed_image_url: urlparse,
is_full_size: bool,
is_compressed: bool,
) -> tuple[str, dict[str, str], dict[str, str]]:
"""
Get the request params (url, params, headers) for the thumbnail proxy.
If the image type is supported by photon, we use photon, and compute the necessary
request params, if the file can be cached and returned as is (SVG), we do that,
otherwise we raise UnsupportedMediaType exception.
"""
if ext in PHOTON_TYPES:
return get_photon_request_params(
parsed_image_url, is_full_size, is_compressed, headers
)
elif ext in ORIGINAL_TYPES:
return image_url, {}, headers
raise UnsupportedMediaType(
f"Image extension {ext} is not supported by the thumbnail proxy."
)


def get(
image_url: str,
media_identifier: str,
accept_header: str = "image/*",
is_full_size: bool = False,
is_compressed: bool = True,
) -> HttpResponse:
"""
Proxy an image through Photon if its file type is supported, else return the
original image if the file type is SVG. Otherwise, raise an exception.
"""
logger = parent_logger.getChild("get")
tallies = django_redis.get_redis_connection("tallies")
month = get_monthly_timestamp()

image_extension = get_image_extension(image_url, media_identifier)

headers = {"Accept": accept_header} | HEADERS

parsed_image_url = urlparse(image_url)
domain = parsed_image_url.netloc

upstream_url, params, headers = get_request_params_for_extension(
image_extension,
headers,
image_url,
parsed_image_url,
is_full_size,
is_compressed,
)

try:
upstream_response = requests.get(
upstream_url,
timeout=15,
params=params,
headers=headers,
)
tallies.incr(f"thumbnail_response_code:{month}:{upstream_response.status_code}")
tallies.incr(
f"thumbnail_response_code_by_domain:{domain}:"
f"{month}:{upstream_response.status_code}"
)
upstream_response.raise_for_status()
except Exception as exc:
exception_name = f"{exc.__class__.__module__}.{exc.__class__.__name__}"
key = f"thumbnail_error:{exception_name}:{domain}:{month}"
count = tallies.incr(key)
if count <= settings.THUMBNAIL_ERROR_INITIAL_ALERT_THRESHOLD or (
count % settings.THUMBNAIL_ERROR_REPEATED_ALERT_FREQUENCY == 0
):
sentry_sdk.capture_exception(exc)
if isinstance(exc, requests.exceptions.HTTPError):
tallies.incr(
f"thumbnail_http_error:{domain}:{month}:{exc.response.status_code}:{exc.response.text}"
)
raise UpstreamThumbnailException(f"Failed to render thumbnail. {exc}")

res_status = upstream_response.status_code
content_type = upstream_response.headers.get("Content-Type")
logger.debug(
f"Image proxy response status: {res_status}, content-type: {content_type}"
)

return HttpResponse(
upstream_response.content,
status=res_status,
content_type=content_type,
)
8 changes: 8 additions & 0 deletions api/api/utils/image_proxy/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from rest_framework import status
from rest_framework.exceptions import APIException


class UpstreamThumbnailException(APIException):
status_code = status.HTTP_424_FAILED_DEPENDENCY
default_detail = "Could not render thumbnail due to upstream provider error."
default_code = "upstream_photon_failure"
58 changes: 58 additions & 0 deletions api/api/utils/image_proxy/extension.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from os.path import splitext
from urllib.parse import urlparse

import django_redis
import requests
import sentry_sdk

from api.utils.image_proxy.exception import UpstreamThumbnailException


def get_image_extension(image_url: str, media_identifier: str) -> str | None:
cache = django_redis.get_redis_connection("default")
key = f"media:{media_identifier}:thumb_type"

ext = _get_file_extension_from_url(image_url)

if not ext:
# If the extension is not present in the URL, try to get it from the redis cache
ext = cache.get(key)
ext = ext.decode("utf-8") if ext else None

if not ext:
# If the extension is still not present, try getting it from the content type
try:
response = requests.head(image_url, timeout=10)
response.raise_for_status()
except Exception as exc:
sentry_sdk.capture_exception(exc)
raise UpstreamThumbnailException(
"Failed to render thumbnail due to inability to check media "
f"type. {exc}"
)
else:
if response.headers and "Content-Type" in response.headers:
content_type = response.headers["Content-Type"]
ext = _get_file_extension_from_content_type(content_type)
else:
ext = None

cache.set(key, ext if ext else "unknown")
return ext


def _get_file_extension_from_url(image_url: str) -> str:
"""Return the image extension if present in the URL."""
parsed = urlparse(image_url)
_, ext = splitext(parsed.path)
return ext[1:].lower() # remove the leading dot


def _get_file_extension_from_content_type(content_type: str) -> str | None:
"""
Return the image extension if present in the Response's content type
header.
"""
if content_type and "/" in content_type:
return content_type.split("/")[1]
return None
43 changes: 43 additions & 0 deletions api/api/utils/image_proxy/photon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from django.conf import settings


def get_photon_request_params(
parsed_image_url,
is_full_size: bool,
is_compressed: bool,
headers: dict,
):
"""
Photon options documented here:
https://developer.wordpress.com/docs/photon/api/
"""
params = {}

if not is_full_size:
params["w"] = settings.THUMBNAIL_WIDTH_PX

if is_compressed:
params["quality"] = settings.THUMBNAIL_QUALITY

if parsed_image_url.query:
# No need to URL encode this string because requests will already
# pass the `params` object to `urlencode` before it appends it to the
# request URL.
params["q"] = parsed_image_url.query

if parsed_image_url.scheme == "https":
# Photon defaults to HTTP without this parameter
# which will cause some providers to fail (if they
# do not serve over HTTP and do not have a redirect)
params["ssl"] = "true"

# Photon excludes the protocol, so we need to reconstruct the url + port + path
# to send as the "path" of the Photon request
domain = parsed_image_url.netloc
path = parsed_image_url.path
upstream_url = f"{settings.PHOTON_ENDPOINT}{domain}{path}"

if settings.PHOTON_AUTH_KEY:
headers["X-Photon-Authentication"] = settings.PHOTON_AUTH_KEY

return upstream_url, params, headers
Loading

0 comments on commit 06260d5

Please sign in to comment.