diff --git a/tests/test_binary_support_settings.py b/tests/test_binary_support_settings.py new file mode 100644 index 000000000..484fc74dd --- /dev/null +++ b/tests/test_binary_support_settings.py @@ -0,0 +1,12 @@ +API_STAGE = "dev" +APP_FUNCTION = "app" +APP_MODULE = "tests.test_wsgi_binary_support_app" +BINARY_SUPPORT = True +CONTEXT_HEADER_MAPPINGS = {} +DEBUG = "True" +DJANGO_SETTINGS = None +DOMAIN = "api.example.com" +ENVIRONMENT_VARIABLES = {} +LOG_LEVEL = "DEBUG" +PROJECT_NAME = "binary_support_settings" +COGNITO_TRIGGER_MAPPING = {} \ No newline at end of file diff --git a/tests/test_handler.py b/tests/test_handler.py index cc0590128..6beea38fd 100644 --- a/tests/test_handler.py +++ b/tests/test_handler.py @@ -1,11 +1,11 @@ -import sys import unittest - from mock import Mock from zappa.handler import LambdaHandler from zappa.utilities import merge_headers +from .utils import is_base64 + def no_args(): return @@ -223,6 +223,130 @@ def test_exception_handler_on_web_request(self): self.assertEqual(response["statusCode"], 500) mocked_exception_handler.assert_called() + def test_wsgi_script_binary_support_with_content_encoding(self): + """ + Ensure that response body is base64 encoded when BINARY_SUPPORT is enabled and Content-Encoding header is present. + """ # don't linebreak so that whole line is shown during nosetest readout + lh = LambdaHandler("tests.test_binary_support_settings") + + text_plain_event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/content_encoding_header_json1", + } + + # A likely scenario is that the application would be gzip compressing some json response. That's checked first. + response = lh.handler(text_plain_event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + + # We also verify that some unknown mimetype with a Content-Encoding also encodes to b64. This route serves + # bytes in the response. + + text_arbitrary_event = { + **text_plain_event, + **{"path": "/content_encoding_header_textarbitrary1"}, + } + + response = lh.handler(text_arbitrary_event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + + # This route is similar to the above, but it serves its response as text and not bytes. That the response + # isn't bytes shouldn't matter because it still has a Content-Encoding header. + + application_json_event = { + **text_plain_event, + **{"path": "/content_encoding_header_textarbitrary2"}, + } + + response = lh.handler(application_json_event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + + def test_wsgi_script_binary_support_without_content_encoding_edgecases( + self, + ): + """ + Ensure zappa response bodies are NOT base64 encoded when BINARY_SUPPORT is enabled and the mimetype is "application/json" or starts with "text/". + """ # don't linebreak so that whole line is shown during nosetest readout + + lh = LambdaHandler("tests.test_binary_support_settings") + + text_plain_event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/textplain_mimetype_response1", + } + + for path in [ + "/textplain_mimetype_response1", # text/plain mimetype should not be turned to base64 + "/textarbitrary_mimetype_response1", # text/arbitrary mimetype should not be turned to base64 + "/json_mimetype_response1", # application/json mimetype should not be turned to base64 + ]: + event = {**text_plain_event, "path": path} + response = lh.handler(event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertNotIn("isBase64Encoded", response) + self.assertFalse(is_base64(response["body"])) + + def test_wsgi_script_binary_support_without_content_encoding( + self, + ): + """ + Ensure zappa response bodies are base64 encoded when BINARY_SUPPORT is enabled and Content-Encoding is absent. + """ # don't linebreak so that whole line is shown during nosetest readout + + lh = LambdaHandler("tests.test_binary_support_settings") + + text_plain_event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/textplain_mimetype_response1", + } + + for path in [ + "/arbitrarybinary_mimetype_response1", + "/arbitrarybinary_mimetype_response2", + ]: + event = {**text_plain_event, "path": path} + response = lh.handler(event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + def test_wsgi_script_on_cognito_event_request(self): """ Ensure that requests sent by cognito behave sensibly diff --git a/tests/test_wsgi_binary_support_app.py b/tests/test_wsgi_binary_support_app.py new file mode 100644 index 000000000..0fbdd9a5f --- /dev/null +++ b/tests/test_wsgi_binary_support_app.py @@ -0,0 +1,63 @@ +### +# This test application exists to confirm how Zappa handles WSGI application +# _responses_ when Binary Support is enabled. +### + +import gzip +import json + +from flask import Flask, Response, send_file + +app = Flask(__name__) + + +@app.route("/textplain_mimetype_response1", methods=["GET"]) +def text_mimetype_response_1(): + return Response(response="OK", mimetype="text/plain") + + +@app.route("/textarbitrary_mimetype_response1", methods=["GET"]) +def text_mimetype_response_2(): + return Response(response="OK", mimetype="text/arbitary") + + +@app.route("/json_mimetype_response1", methods=["GET"]) +def json_mimetype_response_1(): + return Response(response=json.dumps({"some": "data"}), mimetype="application/json") + + +@app.route("/arbitrarybinary_mimetype_response1", methods=["GET"]) +def arbitrary_mimetype_response_1(): + return Response(response=b"some binary data", mimetype="arbitrary/binary_mimetype") + + +@app.route("/arbitrarybinary_mimetype_response2", methods=["GET"]) +def arbitrary_mimetype_response_3(): + return Response(response="doesnt_matter", mimetype="definitely_not_text") + + +@app.route("/content_encoding_header_json1", methods=["GET"]) +def response_with_content_encoding_1(): + return Response( + response=gzip.compress(json.dumps({"some": "data"}).encode()), + mimetype="application/json", + headers={"Content-Encoding": "gzip"}, + ) + + +@app.route("/content_encoding_header_textarbitrary1", methods=["GET"]) +def response_with_content_encoding_2(): + return Response( + response=b"OK", + mimetype="text/arbitrary", + headers={"Content-Encoding": "something_arbitrarily_binary"}, + ) + + +@app.route("/content_encoding_header_textarbitrary2", methods=["GET"]) +def response_with_content_encoding_3(): + return Response( + response="OK", + mimetype="text/arbitrary", + headers={"Content-Encoding": "with_content_type_but_not_bytes_response"}, + ) \ No newline at end of file diff --git a/tests/utils.py b/tests/utils.py index 91e588cc4..8a2a93fad 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,3 +1,4 @@ +import base64 import functools import os from contextlib import contextmanager @@ -6,10 +7,7 @@ import placebo from mock import MagicMock, patch -try: - file -except NameError: # builtin 'file' was removed in Python 3 - from io import IOBase as file +from io import IOBase as file PLACEBO_DIR = os.path.join(os.path.dirname(__file__), "placebo") @@ -72,3 +70,11 @@ def stub_open(*args, **kwargs): with patch("__builtin__.open", stub_open): yield mock_open, mock_file + + +def is_base64(test_string: str) -> bool: + # Taken from https://stackoverflow.com/a/45928164/3200002 + try: + return base64.b64encode(base64.b64decode(test_string)).decode() == test_string + except Exception: + return False \ No newline at end of file diff --git a/zappa/handler.py b/zappa/handler.py index ed0cc9835..f44274c3b 100644 --- a/zappa/handler.py +++ b/zappa/handler.py @@ -557,12 +557,28 @@ def handler(self, event, context): zappa_returndict.setdefault("statusDescription", response.status) if response.data: - if ( - settings.BINARY_SUPPORT - and not response.mimetype.startswith("text/") - and response.mimetype != "application/json" + # We base64 encode for two reasons when BINARY_SUPPORT is enabled: + # - Content-Encoding is present, which is commonly used by compression mechanisms to indicate + # that the content is in br/gzip/deflate/etc encoding + # (Related: https://github.com/zappa/Zappa/issues/908). Content like this must be + # transmitted as b64. + # - The response is assumed to be some binary format (since BINARY_SUPPORT is enabled and it + # isn't application/json or text/) + if settings.BINARY_SUPPORT and response.headers.get( + "Content-Encoding" ): - zappa_returndict["body"] = base64.b64encode(response.data).decode("utf-8") + zappa_returndict["body"] = base64.b64encode( + response.data + ).decode() + zappa_returndict["isBase64Encoded"] = True + elif ( + settings.BINARY_SUPPORT + and not response.mimetype.startswith("text/") + and response.mimetype != "application/json" + ): + zappa_returndict["body"] = base64.b64encode( + response.data + ).decode("utf8") zappa_returndict["isBase64Encoded"] = True else: zappa_returndict["body"] = response.get_data(as_text=True)