Skip to content

Commit

Permalink
Use Content-Encoding to identify if data is binary
Browse files Browse the repository at this point in the history
When using _whitenoise_ for caching, which provides compression, binary types
may include mimetypes, "text/", "application/json":

- response.mimetype.startswith("text/")
- response.mimetype == "application/json"

Assuming that Content-Encoding will be set (as whitenoise apparently does)
this allows compression to be applied by the application for "text/" and
"application/json".

About Content-Encoding:
developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding

---

The above commit message and functional code change to zappa/handler.py was
written by GH user @monkut and a PR was provided with
Miserlou/Zappa#2170.

That PR was not merged in before the fork from Miserlou/zappa to Zappa/zappa.

This commit copies the code from that PR, adds a comment line referencing the
new migrated issue, and additionally adds tests to confirm behavior.

Co-authored-by: monkut <[email protected]>
  • Loading branch information
2 people authored and Jonathan Demirgian committed Nov 29, 2021
1 parent 0125954 commit eb2b50f
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 23 deletions.
104 changes: 84 additions & 20 deletions tests/test_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,10 @@ def test_exception_handler_on_web_request(self):
self.assertEqual(response["statusCode"], 500)
mocked_exception_handler.assert_called()

def test_wsgi_script_binary_support_base64_behavior(self):
"""
With Binary Support enabled, response mimetypes that are not text/* or application/json will be base64 encoded
def test_wsgi_script_binary_support_with_content_encoding(self):
"""
Ensure that response body is base64 encoded when BINARY_SUPPORT is enabled and Content-Encoding header is present.
""" # don't linebreak so that whole line is shown during nosetest readout
lh = LambdaHandler("tests.test_binary_support_settings")

text_plain_event = {
Expand All @@ -243,48 +243,112 @@ def test_wsgi_script_binary_support_base64_behavior(self):
"pathParameters": {"proxy": "return/request/url"},
"httpMethod": "GET",
"stageVariables": {},
"path": "/textplain_mimetype_response1",
"path": "/content_encoding_header_json1",
}

# A likely scenario is that the application would be gzip compressing some json response. That's checked first.
response = lh.handler(text_plain_event, None)

self.assertEqual(response["statusCode"], 200)
self.assertNotIn("isBase64Encoded", response)
self.assertFalse(is_base64(response["body"]))
self.assertIn("isBase64Encoded", response)
self.assertTrue(is_base64(response["body"]))

# We also verify that some unknown mimetype with a Content-Encoding also encodes to b64. This route serves
# bytes in the response.

text_arbitrary_event = {
**text_plain_event,
**{"path": "/textarbitrary_mimetype_response1"},
**{"path": "/content_encoding_header_textarbitrary1"},
}

response = lh.handler(text_arbitrary_event, None)

self.assertEqual(response["statusCode"], 200)
self.assertNotIn("isBase64Encoded", response)
self.assertFalse(is_base64(response["body"]))
self.assertIn("isBase64Encoded", response)
self.assertTrue(is_base64(response["body"]))

# This route is similar to the above, but it serves its response as text and not bytes. That the response
# isn't bytes shouldn't matter because it still has a Content-Encoding header.

application_json_event = {
**text_plain_event,
**{"path": "/json_mimetype_response1"},
**{"path": "/content_encoding_header_textarbitrary2"},
}

response = lh.handler(application_json_event, None)

self.assertEqual(response["statusCode"], 200)
self.assertNotIn("isBase64Encoded", response)
self.assertFalse(is_base64(response["body"]))
self.assertIn("isBase64Encoded", response)
self.assertTrue(is_base64(response["body"]))

arbitrary_binary_event = {
**text_plain_event,
**{"path": "/arbitrarybinary_mimetype_response1"},
def test_wsgi_script_binary_support_without_content_encoding_edgecases(
self,
):
"""
Ensure zappa response bodies are NOT base64 encoded when BINARY_SUPPORT is enabled and the mimetype is "application/json" or starts with "text/".
""" # don't linebreak so that whole line is shown during nosetest readout

lh = LambdaHandler("tests.test_binary_support_settings")

text_plain_event = {
"body": "",
"resource": "/{proxy+}",
"requestContext": {},
"queryStringParameters": {},
"headers": {
"Host": "1234567890.execute-api.us-east-1.amazonaws.com",
},
"pathParameters": {"proxy": "return/request/url"},
"httpMethod": "GET",
"stageVariables": {},
"path": "/textplain_mimetype_response1",
}

response = lh.handler(arbitrary_binary_event, None)
for path in [
"/textplain_mimetype_response1", # text/plain mimetype should not be turned to base64
"/textarbitrary_mimetype_response1", # text/arbitrary mimetype should not be turned to base64
"/json_mimetype_response1", # application/json mimetype should not be turned to base64
]:
event = {**text_plain_event, "path": path}
response = lh.handler(event, None)

self.assertEqual(response["statusCode"], 200)
self.assertNotIn("isBase64Encoded", response)
self.assertFalse(is_base64(response["body"]))

def test_wsgi_script_binary_support_without_content_encoding(
self,
):
"""
Ensure zappa response bodies are base64 encoded when BINARY_SUPPORT is enabled and Content-Encoding is absent.
""" # don't linebreak so that whole line is shown during nosetest readout

self.assertEqual(response["statusCode"], 200)
self.assertIn("isBase64Encoded", response)
self.assertTrue(response["isBase64Encoded"])
self.assertTrue(is_base64(response["body"]))
lh = LambdaHandler("tests.test_binary_support_settings")

text_plain_event = {
"body": "",
"resource": "/{proxy+}",
"requestContext": {},
"queryStringParameters": {},
"headers": {
"Host": "1234567890.execute-api.us-east-1.amazonaws.com",
},
"pathParameters": {"proxy": "return/request/url"},
"httpMethod": "GET",
"stageVariables": {},
"path": "/textplain_mimetype_response1",
}

for path in [
"/arbitrarybinary_mimetype_response1",
"/arbitrarybinary_mimetype_response2",
]:
event = {**text_plain_event, "path": path}
response = lh.handler(event, None)

self.assertEqual(response["statusCode"], 200)
self.assertIn("isBase64Encoded", response)
self.assertTrue(is_base64(response["body"]))

def test_wsgi_script_on_cognito_event_request(self):
"""
Expand Down
34 changes: 33 additions & 1 deletion tests/test_wsgi_binary_support_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# _responses_ when Binary Support is enabled.
###

import io
import gzip
import json

from flask import Flask, Response, send_file
Expand All @@ -29,3 +29,35 @@ def json_mimetype_response_1():
@app.route("/arbitrarybinary_mimetype_response1", methods=["GET"])
def arbitrary_mimetype_response_1():
return Response(response=b"some binary data", mimetype="arbitrary/binary_mimetype")


@app.route("/arbitrarybinary_mimetype_response2", methods=["GET"])
def arbitrary_mimetype_response_3():
return Response(response="doesnt_matter", mimetype="definitely_not_text")


@app.route("/content_encoding_header_json1", methods=["GET"])
def response_with_content_encoding_1():
return Response(
response=gzip.compress(json.dumps({"some": "data"}).encode()),
mimetype="application/json",
headers={"Content-Encoding": "gzip"},
)


@app.route("/content_encoding_header_textarbitrary1", methods=["GET"])
def response_with_content_encoding_2():
return Response(
response=b"OK",
mimetype="text/arbitrary",
headers={"Content-Encoding": "something_arbitrarily_binary"},
)


@app.route("/content_encoding_header_textarbitrary2", methods=["GET"])
def response_with_content_encoding_3():
return Response(
response="OK",
mimetype="text/arbitrary",
headers={"Content-Encoding": "with_content_type_but_not_bytes_response"},
)
18 changes: 16 additions & 2 deletions zappa/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,14 +583,28 @@ def handler(self, event, context):
)

if response.data:
if (
# We base64 encode for two reasons when BINARY_SUPPORT is enabled:
# - Content-Encoding is present, which is commonly used by compression mechanisms to indicate
# that the content is in br/gzip/deflate/etc encoding
# (Related: https://github.com/zappa/Zappa/issues/908). Content like this must be
# transmitted as b64.
# - The response is assumed to be some binary format (since BINARY_SUPPORT is enabled and it
# isn't application/json or text/)
if settings.BINARY_SUPPORT and response.headers.get(
"Content-Encoding"
):
zappa_returndict["body"] = base64.b64encode(
response.data
).decode()
zappa_returndict["isBase64Encoded"] = True
elif (
settings.BINARY_SUPPORT
and not response.mimetype.startswith("text/")
and response.mimetype != "application/json"
):
zappa_returndict["body"] = base64.b64encode(
response.data
).decode("utf-8")
).decode()
zappa_returndict["isBase64Encoded"] = True
else:
zappa_returndict["body"] = response.get_data(as_text=True)
Expand Down

0 comments on commit eb2b50f

Please sign in to comment.