Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add from_bytes_dict alternative constructor for HttpResponseHeaders #33

Merged
merged 3 commits into from
Apr 11, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions tests/test_page_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,24 @@ def test_http_respose_headers():
headers["user agent"]


def test_http_response_headers_from_bytes_dict():
raw_headers = {
b"Content-Length": [b"316"],
b"Content-Encoding": [b"gzip", b"br"],
b"server": b"sffe",
"X-string": "string",
"X-missing": None
}
headers = HttpResponseHeaders.from_bytes_dict(raw_headers)

assert headers.get("content-length") == "316"
assert headers.get("content-encoding") == "gzip"
assert headers.getall("Content-Encoding") == ["gzip", "br"]
assert headers.get("server") == "sffe"
assert headers.get("x-string") == "string"
assert headers.get("X-missing") is None


def test_http_response_headers_init_requests():
requests_response = requests.Response()
requests_response.headers['User-Agent'] = "mozilla"
Expand Down
42 changes: 41 additions & 1 deletion web_poet/page_inputs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from typing import Optional, Dict, List, TypeVar, Type
from typing import Optional, Dict, List, TypeVar, Type, Union

import attrs
from multidict import CIMultiDict
Expand All @@ -14,6 +14,7 @@
from .utils import memoizemethod_noargs

T_headers = TypeVar("T_headers", bound="HttpResponseHeaders")
BytesDict = Dict[bytes, Union[bytes, List[bytes]]]


class HttpResponseBody(bytes):
Expand Down Expand Up @@ -74,6 +75,45 @@ def from_name_value_pairs(cls: Type[T_headers], arg: List[Dict]) -> T_headers:
"""
return cls([(pair["name"], pair["value"]) for pair in arg])

@classmethod
def from_bytes_dict(
cls: Type[T_headers], arg: BytesDict, encoding: str = "utf-8"
) -> T_headers:
"""An alternative constructor for instantiation where the header-value
pairs are in raw bytes form.

This supports multiple header values in the form of ``List[bytes]``
alongside a plain ``bytes`` value.

By default, it converts the ``bytes`` value using "utf-8". However, this
can easily be overridden using the ``encoding`` parameter.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This utf-8 default value was based on how Scrapy was using it as the default for its Headers.


>>> raw_values = {
... b"Content-Encoding": [b"gzip", b"br"],
... b"Content-Type": [b"text/html"],
... b"content-length": b"648",
... }
>>> headers = HttpResponseHeaders.from_bytes_dict(raw_values)
>>> headers
<HttpResponseHeaders('Content-Encoding': 'gzip', 'Content-Encoding': 'br', 'Content-Type': 'text/html', 'content-length': '648')>
"""

def _norm(data):
if isinstance(data, str):
return data
elif isinstance(data, bytes):
return data.decode(encoding)
Copy link
Member

@kmike kmike Apr 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it should raise an exception if data is not bytes or str

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! Adressed this in ddb7d20 alongside the ability to handle Tuple alongside List.


converted = []

for header, value in arg.items():
if isinstance(value, list):
converted.extend([(_norm(header), _norm(v)) for v in value])
else:
converted.append((_norm(header), _norm(value)))

return cls(converted)

def declared_encoding(self) -> Optional[str]:
""" Return encoding detected from the Content-Type header, or None
if encoding is not found """
Expand Down