From e421c779e2d70610c38cd9bf6c455037d403a6e9 Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Mon, 28 Mar 2022 19:09:53 +0800
Subject: [PATCH 1/3] add from_bytes alternative constructor for
 HttpResponseHeaders

---
 tests/test_page_inputs.py | 18 +++++++++++++++++
 web_poet/page_inputs.py   | 42 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py
index 0ed6d246..462ceed4 100644
--- a/tests/test_page_inputs.py
+++ b/tests/test_page_inputs.py
@@ -95,6 +95,24 @@ def test_http_respose_headers():
         headers["user agent"]
 
 
+def test_http_response_headers_from_bytes():
+    raw_headers = {
+        b"Content-Length": [b"316"],
+        b"Content-Encoding": [b"gzip", b"br"],
+        b"server": b"sffe",
+        "X-string": "string",
+        "X-missing": None
+    }
+    headers = HttpResponseHeaders.from_bytes(raw_headers)
+
+    assert headers.get("content-length") == "316"
+    assert headers.get("content-encoding") == "gzip"
+    assert headers.getall("Content-Encoding") == ["gzip", "br"]
+    assert headers.get("server") == "sffe"
+    assert headers.get("x-string") == "string"
+    assert headers.get("X-missing") is None
+
+
 def test_http_response_headers_init_requests():
     requests_response = requests.Response()
     requests_response.headers['User-Agent'] = "mozilla"
diff --git a/web_poet/page_inputs.py b/web_poet/page_inputs.py
index d265c456..db2d5632 100644
--- a/web_poet/page_inputs.py
+++ b/web_poet/page_inputs.py
@@ -1,5 +1,5 @@
 import json
-from typing import Optional, Dict, List, TypeVar, Type
+from typing import Optional, Dict, List, TypeVar, Type, Union
 
 import attrs
 from multidict import CIMultiDict
@@ -14,6 +14,7 @@
 from .utils import memoizemethod_noargs
 
 T_headers = TypeVar("T_headers", bound="HttpResponseHeaders")
+BytesDict = Dict[bytes, Union[bytes, List[bytes]]]
 
 
 class HttpResponseBody(bytes):
@@ -74,6 +75,45 @@ def from_name_value_pairs(cls: Type[T_headers], arg: List[Dict]) -> T_headers:
         """
         return cls([(pair["name"], pair["value"]) for pair in arg])
 
+    @classmethod
+    def from_bytes(
+        cls: Type[T_headers], arg: BytesDict, encoding: str = "utf-8"
+    ) -> T_headers:
+        """An alternative constructor for instantiation where the header-value
+        pairs are in raw bytes form.
+
+        This supports multiple header values in the form of ``List[bytes]``
+        alongside a plain ``bytes`` value.
+
+        By default, it converts the ``bytes`` value using "utf-8". However, this
+        can easily be overridden using the ``encoding`` parameter.
+
+        >>> raw_values = {
+        ...     b"Content-Encoding": [b"gzip", b"br"],
+        ...     b"Content-Type": [b"text/html"],
+        ...     b"content-length": b"648",
+        ... }
+        >>> headers = HttpResponseHeaders.from_bytes(raw_values)
+        >>> headers
+        <HttpResponseHeaders('Content-Encoding': 'gzip', 'Content-Encoding': 'br', 'Content-Type': 'text/html', 'content-length': '648')>
+        """
+
+        def _norm(data):
+            if isinstance(data, str):
+                return data
+            elif isinstance(data, bytes):
+                return data.decode(encoding)
+
+        converted = []
+
+        for header, value in arg.items():
+            if isinstance(value, list):
+                converted.extend([(_norm(header), _norm(v)) for v in value])
+            else:
+                converted.append((_norm(header), _norm(value)))
+
+        return cls(converted)
+
     def declared_encoding(self) -> Optional[str]:
         """ Return encoding detected from the Content-Type header, or None
         if encoding is not found """

From eb184270ff8db4bccbb7e3f7317c0192a0fb6a29 Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Thu, 31 Mar 2022 10:21:37 +0800
Subject: [PATCH 2/3] rename from_bytes() to from_bytes_dict()

---
 tests/test_page_inputs.py | 4 ++--
 web_poet/page_inputs.py   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py
index 462ceed4..d8606cd2 100644
--- a/tests/test_page_inputs.py
+++ b/tests/test_page_inputs.py
@@ -95,7 +95,7 @@ def test_http_respose_headers():
         headers["user agent"]
 
 
-def test_http_response_headers_from_bytes():
+def test_http_response_headers_from_bytes_dict():
     raw_headers = {
         b"Content-Length": [b"316"],
         b"Content-Encoding": [b"gzip", b"br"],
@@ -103,7 +103,7 @@ def test_http_response_headers_from_bytes():
         "X-string": "string",
         "X-missing": None
     }
-    headers = HttpResponseHeaders.from_bytes(raw_headers)
+    headers = HttpResponseHeaders.from_bytes_dict(raw_headers)
 
     assert headers.get("content-length") == "316"
     assert headers.get("content-encoding") == "gzip"
diff --git a/web_poet/page_inputs.py b/web_poet/page_inputs.py
index db2d5632..d99f3635 100644
--- a/web_poet/page_inputs.py
+++ b/web_poet/page_inputs.py
@@ -76,7 +76,7 @@ def from_name_value_pairs(cls: Type[T_headers], arg: List[Dict]) -> T_headers:
         return cls([(pair["name"], pair["value"]) for pair in arg])
 
     @classmethod
-    def from_bytes(
+    def from_bytes_dict(
         cls: Type[T_headers], arg: BytesDict, encoding: str = "utf-8"
     ) -> T_headers:
         """An alternative constructor for instantiation where the header-value
@@ -93,7 +93,7 @@ def from_bytes(
         ...     b"Content-Type": [b"text/html"],
         ...     b"content-length": b"648",
         ... }
-        >>> headers = HttpResponseHeaders.from_bytes(raw_values)
+        >>> headers = HttpResponseHeaders.from_bytes_dict(raw_values)
         >>> headers
         <HttpResponseHeaders('Content-Encoding': 'gzip', 'Content-Encoding': 'br', 'Content-Type': 'text/html', 'content-length': '648')>
         """

From ddb7d203e76d2399fa108fcf43f08890ed731991 Mon Sep 17 00:00:00 2001
From: Kevin Lloyd Bernal <kevinoxy@gmail.com>
Date: Mon, 11 Apr 2022 12:57:23 +0800
Subject: [PATCH 3/3] update from_bytes_dict() to handle tuple and raise
 ValueError if non str or bytes

---
 tests/test_page_inputs.py | 16 ++++++++++++++--
 web_poet/page_inputs.py   | 18 ++++++++++--------
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py
index d8606cd2..ec65b774 100644
--- a/tests/test_page_inputs.py
+++ b/tests/test_page_inputs.py
@@ -101,7 +101,8 @@ def test_http_response_headers_from_bytes_dict():
         b"Content-Encoding": [b"gzip", b"br"],
         b"server": b"sffe",
         "X-string": "string",
-        "X-missing": None
+        "X-missing": None,
+        "X-tuple": (b"x", "y"),
     }
     headers = HttpResponseHeaders.from_bytes_dict(raw_headers)
 
@@ -110,7 +111,18 @@ def test_http_response_headers_from_bytes_dict():
     assert headers.getall("Content-Encoding") == ["gzip", "br"]
     assert headers.get("server") == "sffe"
     assert headers.get("x-string") == "string"
-    assert headers.get("X-missing") is None
+    assert headers.get("x-missing") is None
+    assert headers.get("x-tuple") == "x"
+    assert headers.getall("x-tuple") == ["x", "y"]
+
+
+def test_http_response_headers_from_bytes_dict_err():
+
+    with pytest.raises(ValueError):
+        HttpResponseHeaders.from_bytes_dict({b"Content-Length": [316]})
+
+    with pytest.raises(ValueError):
+        HttpResponseHeaders.from_bytes_dict({b"Content-Length": 316})
 
 
 def test_http_response_headers_init_requests():
diff --git a/web_poet/page_inputs.py b/web_poet/page_inputs.py
index d99f3635..17cfaff4 100644
--- a/web_poet/page_inputs.py
+++ b/web_poet/page_inputs.py
@@ -1,5 +1,5 @@
 import json
-from typing import Optional, Dict, List, TypeVar, Type, Union
+from typing import Optional, Dict, List, TypeVar, Type, Union, Tuple, AnyStr
 
 import attrs
 from multidict import CIMultiDict
@@ -14,7 +14,7 @@
 from .utils import memoizemethod_noargs
 
 T_headers = TypeVar("T_headers", bound="HttpResponseHeaders")
-BytesDict = Dict[bytes, Union[bytes, List[bytes]]]
+AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]
 
 
 class HttpResponseBody(bytes):
@@ -77,13 +77,14 @@ def from_name_value_pairs(cls: Type[T_headers], arg: List[Dict]) -> T_headers:
 
     @classmethod
     def from_bytes_dict(
-        cls: Type[T_headers], arg: BytesDict, encoding: str = "utf-8"
+        cls: Type[T_headers], arg: AnyStrDict, encoding: str = "utf-8"
     ) -> T_headers:
         """An alternative constructor for instantiation where the header-value
-        pairs are in raw bytes form.
+        pairs could be in raw bytes form.
 
-        This supports multiple header values in the form of ``List[bytes]``
-        alongside a plain ``bytes`` value.
+        This supports multiple header values in the form of ``List[bytes]`` and
+        ``Tuple[bytes]]`` alongside a plain ``bytes`` value. A value in ``str``
+        also works and wouldn't break the decoding process at all.
 
         By default, it converts the ``bytes`` value using "utf-8". However, this
         can easily be overridden using the ``encoding`` parameter.
@@ -99,15 +100,16 @@ def from_bytes_dict(
         """
 
         def _norm(data):
-            if isinstance(data, str):
+            if isinstance(data, str) or data is None:
                 return data
             elif isinstance(data, bytes):
                 return data.decode(encoding)
+            raise ValueError(f"Expecting str or bytes. Received {type(data)}")
 
         converted = []
 
         for header, value in arg.items():
-            if isinstance(value, list):
+            if isinstance(value, list) or isinstance(value, tuple):
                 converted.extend([(_norm(header), _norm(v)) for v in value])
             else:
                 converted.append((_norm(header), _norm(value)))