From e421c779e2d70610c38cd9bf6c455037d403a6e9 Mon Sep 17 00:00:00 2001 From: Kevin Lloyd Bernal Date: Mon, 28 Mar 2022 19:09:53 +0800 Subject: [PATCH 1/3] add from_bytes alternative constructor for HttpResponseHeaders --- tests/test_page_inputs.py | 18 +++++++++++++++++ web_poet/page_inputs.py | 42 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py index 0ed6d246..462ceed4 100644 --- a/tests/test_page_inputs.py +++ b/tests/test_page_inputs.py @@ -95,6 +95,24 @@ def test_http_respose_headers(): headers["user agent"] +def test_http_response_headers_from_bytes(): + raw_headers = { + b"Content-Length": [b"316"], + b"Content-Encoding": [b"gzip", b"br"], + b"server": b"sffe", + "X-string": "string", + "X-missing": None + } + headers = HttpResponseHeaders.from_bytes(raw_headers) + + assert headers.get("content-length") == "316" + assert headers.get("content-encoding") == "gzip" + assert headers.getall("Content-Encoding") == ["gzip", "br"] + assert headers.get("server") == "sffe" + assert headers.get("x-string") == "string" + assert headers.get("X-missing") is None + + def test_http_response_headers_init_requests(): requests_response = requests.Response() requests_response.headers['User-Agent'] = "mozilla" diff --git a/web_poet/page_inputs.py b/web_poet/page_inputs.py index d265c456..db2d5632 100644 --- a/web_poet/page_inputs.py +++ b/web_poet/page_inputs.py @@ -1,5 +1,5 @@ import json -from typing import Optional, Dict, List, TypeVar, Type +from typing import Optional, Dict, List, TypeVar, Type, Union import attrs from multidict import CIMultiDict @@ -14,6 +14,7 @@ from .utils import memoizemethod_noargs T_headers = TypeVar("T_headers", bound="HttpResponseHeaders") +BytesDict = Dict[bytes, Union[bytes, List[bytes]]] class HttpResponseBody(bytes): @@ -74,6 +75,45 @@ def from_name_value_pairs(cls: Type[T_headers], arg: List[Dict]) -> T_headers: """ return cls([(pair["name"], pair["value"]) for pair in arg]) + @classmethod + def from_bytes( + cls: Type[T_headers], arg: BytesDict, encoding: str = "utf-8" + ) -> T_headers: + """An alternative constructor for instantiation where the header-value + pairs are in raw bytes form. + + This supports multiple header values in the form of ``List[bytes]`` + alongside a plain ``bytes`` value. + + By default, it converts the ``bytes`` value using "utf-8". However, this + can easily be overridden using the ``encoding`` parameter. + + >>> raw_values = { + ... b"Content-Encoding": [b"gzip", b"br"], + ... b"Content-Type": [b"text/html"], + ... b"content-length": b"648", + ... } + >>> headers = HttpResponseHeaders.from_bytes(raw_values) + >>> headers + + """ + + def _norm(data): + if isinstance(data, str): + return data + elif isinstance(data, bytes): + return data.decode(encoding) + + converted = [] + + for header, value in arg.items(): + if isinstance(value, list): + converted.extend([(_norm(header), _norm(v)) for v in value]) + else: + converted.append((_norm(header), _norm(value))) + + return cls(converted) + def declared_encoding(self) -> Optional[str]: """ Return encoding detected from the Content-Type header, or None if encoding is not found """ From eb184270ff8db4bccbb7e3f7317c0192a0fb6a29 Mon Sep 17 00:00:00 2001 From: Kevin Lloyd Bernal Date: Thu, 31 Mar 2022 10:21:37 +0800 Subject: [PATCH 2/3] rename from_bytes() to from_bytes_dict() --- tests/test_page_inputs.py | 4 ++-- web_poet/page_inputs.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py index 462ceed4..d8606cd2 100644 --- a/tests/test_page_inputs.py +++ b/tests/test_page_inputs.py @@ -95,7 +95,7 @@ def test_http_respose_headers(): headers["user agent"] -def test_http_response_headers_from_bytes(): +def test_http_response_headers_from_bytes_dict(): raw_headers = { b"Content-Length": [b"316"], b"Content-Encoding": [b"gzip", b"br"], @@ -103,7 +103,7 @@ def test_http_response_headers_from_bytes(): "X-string": "string", "X-missing": None } - headers = HttpResponseHeaders.from_bytes(raw_headers) + headers = HttpResponseHeaders.from_bytes_dict(raw_headers) assert headers.get("content-length") == "316" assert headers.get("content-encoding") == "gzip" diff --git a/web_poet/page_inputs.py b/web_poet/page_inputs.py index db2d5632..d99f3635 100644 --- a/web_poet/page_inputs.py +++ b/web_poet/page_inputs.py @@ -76,7 +76,7 @@ def from_name_value_pairs(cls: Type[T_headers], arg: List[Dict]) -> T_headers: return cls([(pair["name"], pair["value"]) for pair in arg]) @classmethod - def from_bytes( + def from_bytes_dict( cls: Type[T_headers], arg: BytesDict, encoding: str = "utf-8" ) -> T_headers: """An alternative constructor for instantiation where the header-value @@ -93,7 +93,7 @@ def from_bytes( ... b"Content-Type": [b"text/html"], ... b"content-length": b"648", ... } - >>> headers = HttpResponseHeaders.from_bytes(raw_values) + >>> headers = HttpResponseHeaders.from_bytes_dict(raw_values) >>> headers """ From ddb7d203e76d2399fa108fcf43f08890ed731991 Mon Sep 17 00:00:00 2001 From: Kevin Lloyd Bernal Date: Mon, 11 Apr 2022 12:57:23 +0800 Subject: [PATCH 3/3] update from_bytes_dict() to handle tuple and raise ValueError if non str or bytes --- tests/test_page_inputs.py | 16 ++++++++++++++-- web_poet/page_inputs.py | 18 ++++++++++-------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py index d8606cd2..ec65b774 100644 --- a/tests/test_page_inputs.py +++ b/tests/test_page_inputs.py @@ -101,7 +101,8 @@ def test_http_response_headers_from_bytes_dict(): b"Content-Encoding": [b"gzip", b"br"], b"server": b"sffe", "X-string": "string", - "X-missing": None + "X-missing": None, + "X-tuple": (b"x", "y"), } headers = HttpResponseHeaders.from_bytes_dict(raw_headers) @@ -110,7 +111,18 @@ def test_http_response_headers_from_bytes_dict(): assert headers.getall("Content-Encoding") == ["gzip", "br"] assert headers.get("server") == "sffe" assert headers.get("x-string") == "string" - assert headers.get("X-missing") is None + assert headers.get("x-missing") is None + assert headers.get("x-tuple") == "x" + assert headers.getall("x-tuple") == ["x", "y"] + + +def test_http_response_headers_from_bytes_dict_err(): + + with pytest.raises(ValueError): + HttpResponseHeaders.from_bytes_dict({b"Content-Length": [316]}) + + with pytest.raises(ValueError): + HttpResponseHeaders.from_bytes_dict({b"Content-Length": 316}) def test_http_response_headers_init_requests(): diff --git a/web_poet/page_inputs.py b/web_poet/page_inputs.py index d99f3635..17cfaff4 100644 --- a/web_poet/page_inputs.py +++ b/web_poet/page_inputs.py @@ -1,5 +1,5 @@ import json -from typing import Optional, Dict, List, TypeVar, Type, Union +from typing import Optional, Dict, List, TypeVar, Type, Union, Tuple, AnyStr import attrs from multidict import CIMultiDict @@ -14,7 +14,7 @@ from .utils import memoizemethod_noargs T_headers = TypeVar("T_headers", bound="HttpResponseHeaders") -BytesDict = Dict[bytes, Union[bytes, List[bytes]]] +AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]] class HttpResponseBody(bytes): @@ -77,13 +77,14 @@ def from_name_value_pairs(cls: Type[T_headers], arg: List[Dict]) -> T_headers: @classmethod def from_bytes_dict( - cls: Type[T_headers], arg: BytesDict, encoding: str = "utf-8" + cls: Type[T_headers], arg: AnyStrDict, encoding: str = "utf-8" ) -> T_headers: """An alternative constructor for instantiation where the header-value - pairs are in raw bytes form. + pairs could be in raw bytes form. - This supports multiple header values in the form of ``List[bytes]`` - alongside a plain ``bytes`` value. + This supports multiple header values in the form of ``List[bytes]`` and + ``Tuple[bytes]]`` alongside a plain ``bytes`` value. A value in ``str`` + also works and wouldn't break the decoding process at all. By default, it converts the ``bytes`` value using "utf-8". However, this can easily be overridden using the ``encoding`` parameter. @@ -99,15 +100,16 @@ def from_bytes_dict( """ def _norm(data): - if isinstance(data, str): + if isinstance(data, str) or data is None: return data elif isinstance(data, bytes): return data.decode(encoding) + raise ValueError(f"Expecting str or bytes. Received {type(data)}") converted = [] for header, value in arg.items(): - if isinstance(value, list): + if isinstance(value, list) or isinstance(value, tuple): converted.extend([(_norm(header), _norm(v)) for v in value]) else: converted.append((_norm(header), _norm(value)))