Skip to content

Commit

Permalink
Handle spaces in x-forwared-for/remove six (zappa#1127) (zappa#1163)
Browse files Browse the repository at this point in the history
* ✨ Handle spaces in x-forwared-for (zappa#1127)

* 🔥 remove `six` (no longer needed as 2.x is no longer supported)

* ✅ add testcase for SUPPORTED_VERSION check to increase code coverage.

* 🎨 run black/isort

* 🔧 rename function for clarity

* 🔥 remove unnecessary import

* 🔧 change name of unused mock instance

* ✨ (zappa#879) Fix url decoding for query string

* 🔧 change docstring type multi-line comment to standard multiline comment
✨ handle case where "requestContext" is not provided by the event.

> Systems calling the Lambda (other than API Gateway) may not provide the field requestContext in the event.

✅ add testcases

* 🔥 remove duplicate comment

* 📝 add docstring to util function, `get_unsupported_sys_versioninfo()`

* 🔥 remove double-underscore
  • Loading branch information
monkut authored and Ian288 committed Jul 11, 2023
1 parent 0d93f68 commit 7fe267f
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 28 deletions.
1 change: 0 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ python-slugify = "*"
PyYAML = "*"
# previous versions don't work with urllib3 1.24
requests = ">=2.20.0"
six = "*"
toml = "*"
tqdm = "*"
troposphere = ">=3.0"
Expand Down
163 changes: 163 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@
)
from zappa.wsgi import common_log, create_wsgi_request

from .utils import get_unsupported_sys_versioninfo


def random_string(length):
return "".join(random.choice(string.printable) for _ in range(length))
Expand Down Expand Up @@ -761,6 +763,62 @@ def test_wsgi_event(self):

request = create_wsgi_request(event)

def test_wsgi_event_handle_space_in_xforwardedfor(self):
event = {
"body": None,
"resource": "/",
"requestContext": {
"resourceId": "6cqjw9qu0b",
"apiId": "9itr2lba55",
"resourcePath": "/",
"httpMethod": "GET",
"requestId": "c17cb1bf-867c-11e6-b938-ed697406e3b5",
"accountId": "724336686645",
"identity": {
"apiKey": None,
"userArn": None,
"cognitoAuthenticationType": None,
"caller": None,
"userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:48.0) Gecko/20100101 Firefox/48.0",
"user": None,
"cognitoIdentityPoolId": None,
"cognitoIdentityId": None,
"cognitoAuthenticationProvider": None,
"sourceIp": "50.191.225.98",
"accountId": None,
},
"stage": "devorr",
},
"queryStringParameters": None,
"httpMethod": "GET",
"pathParameters": None,
"headers": {
"Via": "1.1 6801928d54163af944bf854db8d5520e.cloudfront.net (CloudFront)",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"CloudFront-Is-SmartTV-Viewer": "false",
"CloudFront-Forwarded-Proto": "https",
"X-Forwarded-For": "50.191.225.98 , 204.246.168.101",
"CloudFront-Viewer-Country": "US",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Upgrade-Insecure-Requests": "1",
"Host": "9itr2lba55.execute-api.us-east-1.amazonaws.com",
"X-Forwarded-Proto": "https",
"X-Amz-Cf-Id": "qgNdqKT0_3RMttu5KjUdnvHI3OKm1BWF8mGD2lX8_rVrJQhhp-MLDw==",
"CloudFront-Is-Tablet-Viewer": "false",
"X-Forwarded-Port": "443",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:48.0) Gecko/20100101 Firefox/48.0",
"CloudFront-Is-Mobile-Viewer": "false",
"CloudFront-Is-Desktop-Viewer": "true",
},
"stageVariables": None,
"path": "/",
}
expected = "50.191.225.98"
request = create_wsgi_request(event)
actual = request["REMOTE_ADDR"]
self.assertEqual(actual, expected)

def test_wsgi_path_info_unquoted(self):
event = {
"body": {},
Expand Down Expand Up @@ -973,6 +1031,89 @@ def test_wsgi_without_body(self):
response_tuple = collections.namedtuple("Response", ["status_code", "content"])
response = response_tuple(200, "hello")

def test_wsgi_without_requestcontext(self):
event = {
"body": None,
"resource": "/",
"queryStringParameters": None,
"pathParameters": None,
"headers": {
"Via": "1.1 38205a04d96d60185e88658d3185ccee.cloudfront.net (CloudFront)",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"CloudFront-Is-SmartTV-Viewer": "false",
"CloudFront-Forwarded-Proto": "https",
"X-Forwarded-For": "71.231.27.57, 104.246.180.51",
"CloudFront-Viewer-Country": "US",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:45.0) Gecko/20100101 Firefox/45.0",
"Host": "xo2z7zafjh.execute-api.us-east-1.amazonaws.com",
"X-Forwarded-Proto": "https",
"Cookie": "zappa=AQ4",
"CloudFront-Is-Tablet-Viewer": "false",
"X-Forwarded-Port": "443",
"Referer": "https://xo8z7zafjh.execute-api.us-east-1.amazonaws.com/former/post",
"CloudFront-Is-Mobile-Viewer": "false",
"X-Amz-Cf-Id": "31zxcUcVyUxBOMk320yh5NOhihn5knqrlYQYpGGyOngKKwJb0J0BAQ==",
"CloudFront-Is-Desktop-Viewer": "true",
},
"stageVariables": None,
"path": "/",
"isBase64Encoded": True,
}
environ = create_wsgi_request(event, trailing_slash=False)
self.assertTrue(environ)

def test_wsgi_with_authorizer(self):
expected_remote_user = "remote-user"
authorizer = {
"principalId": expected_remote_user,
}
event = {
"body": None,
"resource": "/",
"requestContext": {
"resourceId": "6cqjw9qu0b",
"apiId": "9itr2lba55",
"resourcePath": "/",
"httpMethod": "POST",
"requestId": "c17cb1bf-867c-11e6-b938-ed697406e3b5",
"accountId": "724336686645",
"authorizer": authorizer,
"stage": "devorr",
},
"queryStringParameters": None,
"httpMethod": "POST",
"pathParameters": None,
"headers": {
"Via": "1.1 38205a04d96d60185e88658d3185ccee.cloudfront.net (CloudFront)",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"CloudFront-Is-SmartTV-Viewer": "false",
"CloudFront-Forwarded-Proto": "https",
"X-Forwarded-For": "71.231.27.57, 104.246.180.51",
"CloudFront-Viewer-Country": "US",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:45.0) Gecko/20100101 Firefox/45.0",
"Host": "xo2z7zafjh.execute-api.us-east-1.amazonaws.com",
"X-Forwarded-Proto": "https",
"Cookie": "zappa=AQ4",
"CloudFront-Is-Tablet-Viewer": "false",
"X-Forwarded-Port": "443",
"Referer": "https://xo8z7zafjh.execute-api.us-east-1.amazonaws.com/former/post",
"CloudFront-Is-Mobile-Viewer": "false",
"X-Amz-Cf-Id": "31zxcUcVyUxBOMk320yh5NOhihn5knqrlYQYpGGyOngKKwJb0J0BAQ==",
"CloudFront-Is-Desktop-Viewer": "true",
},
"stageVariables": None,
"path": "/",
"isBase64Encoded": True,
}

environ = create_wsgi_request(event, trailing_slash=False)
self.assertEqual(environ["REMOTE_USER"], expected_remote_user)
self.assertDictEqual(environ["API_GATEWAY_AUTHORIZER"], authorizer)

def test_wsgi_from_apigateway_testbutton(self):
"""
API Gateway resources have a "test bolt" button on methods.
Expand Down Expand Up @@ -2687,6 +2828,28 @@ def test_delete_lambda_concurrency(self, client):
FunctionName="abc",
)

@mock.patch("sys.version_info", new_callable=get_unsupported_sys_versioninfo)
def test_unsupported_version_error(self, *_):
from importlib import reload

with self.assertRaises(RuntimeError):
import zappa

reload(zappa)

def test_wsgi_query_string_unquoted(self):
event = {
"body": {},
"headers": {},
"pathParameters": {},
"path": "/path/path1",
"httpMethod": "GET",
"queryStringParameters": {"a": "A,B", "b": "C#D"},
"requestContext": {},
}
request = create_wsgi_request(event)
self.assertEqual(request["QUERY_STRING"], "a=A,B&b=C#D")


if __name__ == "__main__":
unittest.main()
7 changes: 7 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import functools
import os
from collections import namedtuple
from contextlib import contextmanager

import boto3
Expand Down Expand Up @@ -72,3 +73,9 @@ def stub_open(*args, **kwargs):

with patch("__builtin__.open", stub_open):
yield mock_open, mock_file


def get_unsupported_sys_versioninfo() -> tuple:
"""Mock used to test the python unsupported version testcase"""
invalid_versioninfo = namedtuple("version_info", ["major", "minor", "micro", "releaselevel", "serial"])
return invalid_versioninfo(3, 6, 1, "final", 0)
56 changes: 29 additions & 27 deletions zappa/wsgi.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import base64
import logging
import sys
from io import BytesIO
from urllib.parse import urlencode

import six
from requestlogger import ApacheFormatter
from werkzeug import urls

Expand All @@ -25,26 +25,26 @@ def create_wsgi_request(
Given some event_info via API Gateway,
create and return a valid WSGI request environ.
"""
method = event_info["httpMethod"]
method = event_info.get("httpMethod", None)
headers = merge_headers(event_info) or {} # Allow for the AGW console 'Test' button to work (Pull #735)

"""
API Gateway and ALB both started allowing for multi-value querystring
params in Nov. 2018. If there aren't multi-value params present, then
it acts identically to 'queryStringParameters', so we can use it as a
drop-in replacement.
The one caveat here is that ALB will only include _one_ of
queryStringParameters _or_ multiValueQueryStringParameters, which means
we have to check for the existence of one and then fall back to the
other.
"""
# API Gateway and ALB both started allowing for multi-value querystring
# params in Nov. 2018. If there aren't multi-value params present, then
# it acts identically to 'queryStringParameters', so we can use it as a
# drop-in replacement.
#
# The one caveat here is that ALB will only include _one_ of
# queryStringParameters _or_ multiValueQueryStringParameters, which means
# we have to check for the existence of one and then fall back to the
# other.

if "multiValueQueryStringParameters" in event_info:
query = event_info["multiValueQueryStringParameters"]
query_string = urlencode(query, doseq=True) if query else ""
else:
query = event_info.get("queryStringParameters", {})
query_string = urlencode(query) if query else ""
query_string = urls.url_unquote(query_string)

if context_header_mappings:
for key, value in context_header_mappings.items():
Expand All @@ -59,13 +59,6 @@ def create_wsgi_request(
if header_val is not None:
headers[key] = header_val

# Extract remote user from context if Authorizer is enabled
remote_user = None
if event_info["requestContext"].get("authorizer"):
remote_user = event_info["requestContext"]["authorizer"].get("principalId")
elif event_info["requestContext"].get("identity"):
remote_user = event_info["requestContext"]["identity"].get("userArn")

# Related: https://github.com/Miserlou/Zappa/issues/677
# https://github.com/Miserlou/Zappa/issues/683
# https://github.com/Miserlou/Zappa/issues/696
Expand All @@ -77,12 +70,12 @@ def create_wsgi_request(
body = base64.b64decode(encoded_body)
else:
body = event_info["body"]
if isinstance(body, six.string_types):
if isinstance(body, str):
body = body.encode("utf-8")

else:
body = event_info["body"]
if isinstance(body, six.string_types):
if isinstance(body, str):
body = body.encode("utf-8")

# Make header names canonical, e.g. content-type => Content-Type
Expand All @@ -100,7 +93,8 @@ def create_wsgi_request(
if "," in x_forwarded_for:
# The last one is the cloudfront proxy ip. The second to last is the real client ip.
# Everything else is user supplied and untrustworthy.
remote_addr = x_forwarded_for.split(", ")[-2]
addresses = [addr.strip() for addr in x_forwarded_for.split(",")]
remote_addr = addresses[-2]
else:
remote_addr = x_forwarded_for or "127.0.0.1"

Expand All @@ -122,13 +116,24 @@ def create_wsgi_request(
"wsgi.run_once": False,
}

# Systems calling the Lambda (other than API Gateway) may not provide the field requestContext
# Extract remote_user, authorizer if Authorizer is enabled
remote_user = None
if "requestContext" in event_info:
authorizer = event_info["requestContext"].get("authorizer", None)
if authorizer:
remote_user = authorizer.get("principalId")
environ["API_GATEWAY_AUTHORIZER"] = authorizer
elif event_info["requestContext"].get("identity"):
remote_user = event_info["requestContext"]["identity"].get("userArn")

# Input processing
if method in ["POST", "PUT", "PATCH", "DELETE"]:
if "Content-Type" in headers:
environ["CONTENT_TYPE"] = headers["Content-Type"]

# This must be Bytes or None
environ["wsgi.input"] = six.BytesIO(body)
environ["wsgi.input"] = BytesIO(body)
if body:
environ["CONTENT_LENGTH"] = str(len(body))
else:
Expand All @@ -148,9 +153,6 @@ def create_wsgi_request(
if remote_user:
environ["REMOTE_USER"] = remote_user

if event_info["requestContext"].get("authorizer"):
environ["API_GATEWAY_AUTHORIZER"] = event_info["requestContext"]["authorizer"]

return environ


Expand Down

0 comments on commit 7fe267f

Please sign in to comment.