diff --git a/README.md b/README.md index b9684219b..ab723b523 100644 --- a/README.md +++ b/README.md @@ -854,6 +854,7 @@ to change Zappa's behavior. Use these at your own risk! ```javascript { "dev": { + "additional_text_mimetypes": [], // allows you to provide additional mimetypes to be handled as text when binary_support is true. "alb_enabled": false, // enable provisioning of application load balancing resources. If set to true, you _must_ fill out the alb_vpc_config option as well. "alb_vpc_config": { "CertificateArn": "your_acm_certificate_arn", // ACM certificate ARN for ALB diff --git a/test_settings.json b/test_settings.json index f7b29bfc2..2ee126a1f 100644 --- a/test_settings.json +++ b/test_settings.json @@ -120,5 +120,12 @@ "lambda_concurrency_enabled": { "extends": "ttt888", "lambda_concurrency": 6 - } + }, + "addtextmimetypes": { + "s3_bucket": "lmbda", + "app_function": "tests.test_app.hello_world", + "delete_local_zip": true, + "binary_support": true, + "additional_text_mimetypes": ["application/custommimetype"] + } } diff --git a/tests/test_bad_additional_text_mimetypes_settings.json b/tests/test_bad_additional_text_mimetypes_settings.json new file mode 100644 index 000000000..b3a09f57f --- /dev/null +++ b/tests/test_bad_additional_text_mimetypes_settings.json @@ -0,0 +1,9 @@ +{ + "nobinarysupport": { + "s3_bucket": "lmbda", + "app_function": "tests.test_app.hello_world", + "delete_local_zip": true, + "binary_support": false, + "additional_text_mimetypes": ["application/custommimetype"] + } +} \ No newline at end of file diff --git a/tests/test_binary_support_additional_text_mimetypes_settings.py b/tests/test_binary_support_additional_text_mimetypes_settings.py new file mode 100644 index 000000000..27c70c1bc --- /dev/null +++ b/tests/test_binary_support_additional_text_mimetypes_settings.py @@ -0,0 +1,14 @@ +API_STAGE = "dev" +APP_FUNCTION = "app" +APP_MODULE = "tests.test_wsgi_binary_support_app" +BINARY_SUPPORT = True +CONTEXT_HEADER_MAPPINGS = {} +DEBUG = "True" +DJANGO_SETTINGS = None +DOMAIN = "api.example.com" +ENVIRONMENT_VARIABLES = {} +LOG_LEVEL = "DEBUG" +PROJECT_NAME = "binary_support_settings" +COGNITO_TRIGGER_MAPPING = {} +EXCEPTION_HANDLER = None +ADDITIONAL_TEXT_MIMETYPES = ["application/vnd.oai.openapi"] diff --git a/tests/test_binary_support_settings.py b/tests/test_binary_support_settings.py new file mode 100644 index 000000000..84713e889 --- /dev/null +++ b/tests/test_binary_support_settings.py @@ -0,0 +1,13 @@ +API_STAGE = "dev" +APP_FUNCTION = "app" +APP_MODULE = "tests.test_wsgi_binary_support_app" +BINARY_SUPPORT = True +CONTEXT_HEADER_MAPPINGS = {} +DEBUG = "True" +DJANGO_SETTINGS = None +DOMAIN = "api.example.com" +ENVIRONMENT_VARIABLES = {} +LOG_LEVEL = "DEBUG" +PROJECT_NAME = "binary_support_settings" +COGNITO_TRIGGER_MAPPING = {} +EXCEPTION_HANDLER = None diff --git a/tests/test_handler.py b/tests/test_handler.py index cc0590128..c15159567 100644 --- a/tests/test_handler.py +++ b/tests/test_handler.py @@ -1,4 +1,3 @@ -import sys import unittest from mock import Mock @@ -6,6 +5,8 @@ from zappa.handler import LambdaHandler from zappa.utilities import merge_headers +from .utils import is_base64 + def no_args(): return @@ -223,6 +224,188 @@ def test_exception_handler_on_web_request(self): self.assertEqual(response["statusCode"], 500) mocked_exception_handler.assert_called() + def test_wsgi_script_binary_support_with_content_encoding(self): + """ + Ensure that response body is base64 encoded when BINARY_SUPPORT is enabled and Content-Encoding header is present. + """ + lh = LambdaHandler("tests.test_binary_support_settings") + + text_plain_event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/content_encoding_header_json1", + } + + # A likely scenario is that the application would be gzip compressing some json response. That's checked first. + response = lh.handler(text_plain_event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + + # We also verify that some unknown mimetype with a Content-Encoding also encodes to b64. This route serves + # bytes in the response. + + text_arbitrary_event = { + **text_plain_event, + **{"path": "/content_encoding_header_textarbitrary1"}, + } + + response = lh.handler(text_arbitrary_event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + + # This route is similar to the above, but it serves its response as text and not bytes. That the response + # isn't bytes shouldn't matter because it still has a Content-Encoding header. + + application_json_event = { + **text_plain_event, + **{"path": "/content_encoding_header_textarbitrary2"}, + } + + response = lh.handler(application_json_event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + + def test_wsgi_script_binary_support_without_content_encoding_edgecases( + self, + ): + """ + Ensure zappa response bodies are NOT base64 encoded when BINARY_SUPPORT is enabled and the mimetype is "application/json" or starts with "text/". + """ + + lh = LambdaHandler("tests.test_binary_support_settings") + + text_plain_event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/textplain_mimetype_response1", + } + + for path in [ + "/textplain_mimetype_response1", # text/plain mimetype should not be turned to base64 + "/textarbitrary_mimetype_response1", # text/arbitrary mimetype should not be turned to base64 + "/json_mimetype_response1", # application/json mimetype should not be turned to base64 + ]: + event = {**text_plain_event, "path": path} + response = lh.handler(event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertNotIn("isBase64Encoded", response) + self.assertFalse(is_base64(response["body"])) + + def test_wsgi_script_binary_support_without_content_encoding( + self, + ): + """ + Ensure zappa response bodies are base64 encoded when BINARY_SUPPORT is enabled and Content-Encoding is absent. + """ + + lh = LambdaHandler("tests.test_binary_support_settings") + + text_plain_event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/textplain_mimetype_response1", + } + + for path in [ + "/arbitrarybinary_mimetype_response1", + "/arbitrarybinary_mimetype_response2", + ]: + event = {**text_plain_event, "path": path} + response = lh.handler(event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + + def test_wsgi_script_binary_support_userdefined_additional_text_mimetypes__defined( + self, + ): + """ + Ensure zappa response bodies are NOT base64 encoded when BINARY_SUPPORT is True, and additional_text_mimetypes are defined + """ + lh = LambdaHandler("tests.test_binary_support_additional_text_mimetypes_settings") + expected_additional_mimetypes = ["application/vnd.oai.openapi"] + self.assertEqual(lh.settings.ADDITIONAL_TEXT_MIMETYPES, expected_additional_mimetypes) + + event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/userdefined_additional_mimetype_response1", + } + + response = lh.handler(event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertNotIn("isBase64Encoded", response) + self.assertFalse(is_base64(response["body"])) + + def test_wsgi_script_binary_support_userdefined_additional_text_mimetypes__undefined( + self, + ): + """ + Ensure zappa response bodies are base64 encoded when BINARY_SUPPORT is True and mimetype not defined in additional_text_mimetypes + """ + lh = LambdaHandler("tests.test_binary_support_settings") + + event = { + "body": "", + "resource": "/{proxy+}", + "requestContext": {}, + "queryStringParameters": {}, + "headers": { + "Host": "1234567890.execute-api.us-east-1.amazonaws.com", + }, + "pathParameters": {"proxy": "return/request/url"}, + "httpMethod": "GET", + "stageVariables": {}, + "path": "/userdefined_additional_mimetype_response1", + } + + response = lh.handler(event, None) + + self.assertEqual(response["statusCode"], 200) + self.assertIn("isBase64Encoded", response) + self.assertTrue(is_base64(response["body"])) + def test_wsgi_script_on_cognito_event_request(self): """ Ensure that requests sent by cognito behave sensibly diff --git a/tests/test_wsgi_binary_support_app.py b/tests/test_wsgi_binary_support_app.py new file mode 100644 index 000000000..b4d9bfb50 --- /dev/null +++ b/tests/test_wsgi_binary_support_app.py @@ -0,0 +1,71 @@ +""" +This test application exists to confirm how Zappa handles WSGI application +_responses_ when Binary Support is enabled. +""" + +import gzip +import json + +from flask import Flask, Response + +app = Flask(__name__) + + +@app.route("/textplain_mimetype_response1", methods=["GET"]) +def text_mimetype_response_1(): + return Response(response="OK", mimetype="text/plain") + + +@app.route("/textarbitrary_mimetype_response1", methods=["GET"]) +def text_mimetype_response_2(): + return Response(response="OK", mimetype="text/arbitary") + + +@app.route("/json_mimetype_response1", methods=["GET"]) +def json_mimetype_response_1(): + return Response(response=json.dumps({"some": "data"}), mimetype="application/json") + + +@app.route("/arbitrarybinary_mimetype_response1", methods=["GET"]) +def arbitrary_mimetype_response_1(): + return Response(response=b"some binary data", mimetype="arbitrary/binary_mimetype") + + +@app.route("/arbitrarybinary_mimetype_response2", methods=["GET"]) +def arbitrary_mimetype_response_3(): + return Response(response="doesnt_matter", mimetype="definitely_not_text") + + +@app.route("/content_encoding_header_json1", methods=["GET"]) +def response_with_content_encoding_1(): + return Response( + response=gzip.compress(json.dumps({"some": "data"}).encode()), + mimetype="application/json", + headers={"Content-Encoding": "gzip"}, + ) + + +@app.route("/content_encoding_header_textarbitrary1", methods=["GET"]) +def response_with_content_encoding_2(): + return Response( + response=b"OK", + mimetype="text/arbitrary", + headers={"Content-Encoding": "something_arbitrarily_binary"}, + ) + + +@app.route("/content_encoding_header_textarbitrary2", methods=["GET"]) +def response_with_content_encoding_3(): + return Response( + response="OK", + mimetype="text/arbitrary", + headers={"Content-Encoding": "with_content_type_but_not_bytes_response"}, + ) + + +@app.route("/userdefined_additional_mimetype_response1", methods=["GET"]) +def response_with_userdefined_addtional_mimetype(): + return Response( + response="OK", + mimetype="application/vnd.oai.openapi", + ) diff --git a/tests/tests.py b/tests/tests.py index e66e8ea9e..563e158f5 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1165,6 +1165,20 @@ def test_load_settings_toml(self): zappa_cli.load_settings("tests/test_settings.toml") self.assertEqual(False, zappa_cli.stage_config["touch"]) + def test_load_settings_bad_additional_text_mimetypes(self): + zappa_cli = ZappaCLI() + zappa_cli.api_stage = "nobinarysupport" + with self.assertRaises(ClickException): + zappa_cli.load_settings("tests/test_bad_additional_text_mimetypes_settings.json") + + def test_load_settings_additional_text_mimetypes(self): + zappa_cli = ZappaCLI() + zappa_cli.api_stage = "addtextmimetypes" + zappa_cli.load_settings("test_settings.json") + expected_additional_text_mimetypes = ["application/custommimetype"] + self.assertEqual(expected_additional_text_mimetypes, zappa_cli.stage_config["additional_text_mimetypes"]) + self.assertEqual(True, zappa_cli.stage_config["binary_support"]) + def test_settings_extension(self): """ Make sure Zappa uses settings in the proper order: JSON, TOML, YAML. diff --git a/tests/utils.py b/tests/utils.py index db48b0db5..725947c84 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,17 +1,14 @@ +import base64 import functools import os from collections import namedtuple from contextlib import contextmanager +from io import IOBase as file import boto3 import placebo from mock import MagicMock, patch -try: - file -except NameError: # builtin 'file' was removed in Python 3 - from io import IOBase as file - PLACEBO_DIR = os.path.join(os.path.dirname(__file__), "placebo") @@ -75,6 +72,14 @@ def stub_open(*args, **kwargs): yield mock_open, mock_file +def is_base64(test_string: str) -> bool: + # Taken from https://stackoverflow.com/a/45928164/3200002 + try: + return base64.b64encode(base64.b64decode(test_string)).decode() == test_string + except Exception: + return False + + def get_unsupported_sys_versioninfo() -> tuple: """Mock used to test the python unsupported version testcase""" invalid_versioninfo = namedtuple("version_info", ["major", "minor", "micro", "releaselevel", "serial"]) diff --git a/zappa/cli.py b/zappa/cli.py index 305e74e30..0e4ac6272 100755 --- a/zappa/cli.py +++ b/zappa/cli.py @@ -67,6 +67,7 @@ BOTO3_CONFIG_DOCS_URL = "https://boto3.readthedocs.io/en/latest/guide/quickstart.html#configuration" + ## # Main Input Processing ## @@ -117,6 +118,7 @@ class ZappaCLI: xray_tracing = False aws_kms_key_arn = "" context_header_mappings = None + additional_text_mimetypes = None tags = [] layers = None @@ -2290,6 +2292,11 @@ def load_settings(self, settings_file=None, session=None): self.xray_tracing = self.stage_config.get("xray_tracing", False) self.desired_role_arn = self.stage_config.get("role_arn") self.layers = self.stage_config.get("layers", None) + self.additional_text_mimetypes = self.stage_config.get("additional_text_mimetypes", None) + + # check that BINARY_SUPPORT is True if additional_text_mimetypes is provided + if self.additional_text_mimetypes and not self.binary_support: + raise ClickException("zappa_settings.json has additional_text_mimetypes defined, but binary_support is False!") # Load ALB-related settings self.use_alb = self.stage_config.get("alb_enabled", False) @@ -2622,6 +2629,10 @@ def get_zappa_settings_string(self): # async response async_response_table = self.stage_config.get("async_response_table", "") settings_s += "ASYNC_RESPONSE_TABLE='{0!s}'\n".format(async_response_table) + + # additional_text_mimetypes + additional_text_mimetypes = self.stage_config.get("additional_text_mimetypes", []) + settings_s += f"ADDITIONAL_TEXT_MIMETYPES={additional_text_mimetypes}\n" return settings_s def remove_local_zip(self): diff --git a/zappa/handler.py b/zappa/handler.py index 2cccb46ce..ec911fd67 100644 --- a/zappa/handler.py +++ b/zappa/handler.py @@ -10,6 +10,8 @@ import tarfile import traceback from builtins import str +from types import ModuleType +from typing import Tuple import boto3 from werkzeug.wrappers import Response @@ -18,11 +20,11 @@ # so handle both scenarios. try: from zappa.middleware import ZappaWSGIMiddleware - from zappa.utilities import merge_headers, parse_s3_url + from zappa.utilities import DEFAULT_TEXT_MIMETYPES, merge_headers, parse_s3_url from zappa.wsgi import common_log, create_wsgi_request except ImportError: # pragma: no cover from .middleware import ZappaWSGIMiddleware - from .utilities import merge_headers, parse_s3_url + from .utilities import DEFAULT_TEXT_MIMETYPES, merge_headers, parse_s3_url from .wsgi import common_log, create_wsgi_request @@ -265,6 +267,47 @@ def _process_exception(cls, exception_handler, event, context, exception): print(cex) return exception_processed + @staticmethod + def _process_response_body(response: Response, settings: ModuleType) -> Tuple[str, bool]: + """ + Perform Response body encoding/decoding + + Related: https://github.com/zappa/Zappa/issues/908 + API Gateway requires binary data be base64 encoded: + https://aws.amazon.com/blogs/compute/handling-binary-data-using-amazon-api-gateway-http-apis/ + When BINARY_SUPPORT is enabled the body is base64 encoded in the following cases: + + - Content-Encoding defined, commonly used to specify compression (br/gzip/deflate/etc) + https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding + Content like this must be transmitted as b64. + + - Response assumed binary when Response.mimetype does + not start with an entry defined in 'handle_as_text_mimetypes' + """ + encode_body_as_base64 = False + if settings.BINARY_SUPPORT: + handle_as_text_mimetypes = DEFAULT_TEXT_MIMETYPES + additional_text_mimetypes = getattr(settings, "ADDITIONAL_TEXT_MIMETYPES", None) + if additional_text_mimetypes: + handle_as_text_mimetypes += tuple(additional_text_mimetypes) + + if response.headers.get("Content-Encoding"): # Assume br/gzip/deflate/etc encoding + encode_body_as_base64 = True + + # werkzeug Response.mimetype: lowercase without parameters + # https://werkzeug.palletsprojects.com/en/2.2.x/wrappers/#werkzeug.wrappers.Request.mimetype + elif not response.mimetype.startswith(handle_as_text_mimetypes): + encode_body_as_base64 = True + + if encode_body_as_base64: + body = base64.b64encode(response.data).decode("utf8") + else: + # response.data decoded by werkzeug + # https://werkzeug.palletsprojects.com/en/2.2.x/wrappers/#werkzeug.wrappers.Request.get_data + body = response.get_data(as_text=True) + + return body, encode_body_as_base64 + @staticmethod def run_function(app_function, event, context): """ @@ -560,21 +603,11 @@ def handler(self, event, context): ##### # TODO: Update to match black rules if response.data: - if settings.BINARY_SUPPORT and response.headers.get("Content-Encoding"): - # We could have a text response that's gzip - # encoded. Therefore, we base-64 encode it. - zappa_returndict['body'] = base64.b64encode(response.data).decode('utf-8') - zappa_returndict["isBase64Encoded"] = True - elif settings.BINARY_SUPPORT and \ - not response.mimetype.startswith("text/") \ - and response.mimetype != "application/json": - zappa_returndict['body'] = base64.b64encode(response.data).decode('utf-8') - - zappa_returndict["isBase64Encoded"] = True - else: - zappa_returndict["body"] = response.get_data(as_text=True) - # End TODO - ##### + processed_body, is_base64_encoded = self._process_response_body(response, settings=settings) + zappa_returndict["body"] = processed_body + if is_base64_encoded: + zappa_returndict["isBase64Encoded"] = is_base64_encoded + zappa_returndict["statusCode"] = response.status_code if "headers" in event: zappa_returndict["headers"] = {} diff --git a/zappa/utilities.py b/zappa/utilities.py index 9a514b2f5..cffbd0ae0 100644 --- a/zappa/utilities.py +++ b/zappa/utilities.py @@ -28,6 +28,19 @@ class UnserializableJsonError(TypeError): # Settings / Packaging ## +# mimetypes starting with entries defined here are considered as TEXT when BINARTY_SUPPORT is True. +# - Additional TEXT mimetypes may be defined with the 'ADDITIONAL_TEXT_MIMETYPES' setting. +DEFAULT_TEXT_MIMETYPES = ( + "text/", + "application/json", # RFC 4627 + "application/javascript", # RFC 4329 + "application/ecmascript", # RFC 4329 + "application/xml", # RFC 3023 + "application/xml-external-parsed-entity", # RFC 3023 + "application/xml-dtd", # RFC 3023 + "image/svg+xml", # RFC 3023 +) + def copytree(src, dst, metadata=True, symlinks=False, ignore=None): """