diff --git a/setup.py b/setup.py index 6b342aad..f661be6a 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ 'url-matcher', 'multidict', 'w3lib >= 1.22.0', + 'yarl', ], classifiers=[ 'Development Status :: 2 - Pre-Alpha', diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py index 65934a10..4b7adf0b 100644 --- a/tests/test_page_inputs.py +++ b/tests/test_page_inputs.py @@ -6,6 +6,8 @@ import parsel from web_poet.page_inputs import ( + ResponseURL, + RequestURL, HttpRequest, HttpResponse, HttpRequestBody, @@ -16,6 +18,22 @@ ) +@pytest.mark.parametrize("cls", [ResponseURL, RequestURL]) +def test_url(cls): + url_value = "https://example.com/category/product?query=123&id=xyz#frag1" + + url = cls(url_value) + + assert str(url) == url_value + assert url.scheme == "https" + assert url.host == "example.com" + assert url.path == "/category/product" + assert url.query_string == "query=123&id=xyz" + assert url.fragment == "frag1" + + new_url = cls(url) + + @pytest.mark.parametrize("body_cls", [HttpRequestBody, HttpResponseBody]) def test_http_body_hashable(body_cls): http_body = body_cls(b"content") diff --git a/web_poet/.overrides.py.swp b/web_poet/.overrides.py.swp new file mode 100644 index 00000000..d1928eae Binary files /dev/null and b/web_poet/.overrides.py.swp differ diff --git a/web_poet/mixins.py b/web_poet/mixins.py index faf6c0f6..78aae99d 100644 --- a/web_poet/mixins.py +++ b/web_poet/mixins.py @@ -67,7 +67,7 @@ def base_url(self) -> str: # FIXME: move it to HttpResponse if self._cached_base_url is None: text = self.html[:4096] - self._cached_base_url = get_base_url(text, self.url) + self._cached_base_url = get_base_url(text, str(self.url)) return self._cached_base_url def urljoin(self, url: str) -> str: diff --git a/web_poet/page_inputs/__init__.py b/web_poet/page_inputs/__init__.py index ddb3c65b..e5ff8d4f 100644 --- a/web_poet/page_inputs/__init__.py +++ b/web_poet/page_inputs/__init__.py @@ -1,6 +1,8 @@ from .meta import Meta from .client import HttpClient from .http import ( + ResponseURL, + RequestURL, HttpRequest, HttpResponse, HttpRequestHeaders, diff --git a/web_poet/page_inputs/http.py b/web_poet/page_inputs/http.py index a3df744d..81933f88 100644 --- a/web_poet/page_inputs/http.py +++ b/web_poet/page_inputs/http.py @@ -9,6 +9,7 @@ http_content_type_encoding ) +import yarl from web_poet._base import _HttpHeaders from web_poet.utils import memoizemethod_noargs from web_poet.mixins import SelectableMixin @@ -18,12 +19,46 @@ _AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]] -class ResponseURL(str): +class _URL: + def __init__(self, url: Union[str, yarl.URL]): + self._url = yarl.URL(str(url)) + + def __str__(self) -> str: + return str(self._url) + + def __repr__(self) -> str: + return str(self._url) + + def __eq__(self, other) -> bool: + return str(self._url) == str(other) + + @property + def scheme(self) -> str: + return self._url.scheme + + @property + def host(self) -> str: + return self._url.host + + @property + def path(self) -> str: + return self._url.path + + @property + def query_string(self) -> str: + return self._url.query_string + + @property + def fragment(self) -> str: + return self._url.fragment + + +class ResponseURL(_URL): """ URL of the response """ pass -class RequestURL(str): +class RequestURL(_URL): """ URL of the request """ pass