Skip to content

Commit

Permalink
Merge branch 'master' into additional-requests
Browse files Browse the repository at this point in the history
  • Loading branch information
BurnzZ committed Apr 11, 2022
2 parents 3310c1b + a29d86d commit 29f84c6
Show file tree
Hide file tree
Showing 21 changed files with 1,146 additions and 4 deletions.
5 changes: 2 additions & 3 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ Changelog
TBR
------------------

* Removed support for Python 3.6
* Added support for Python 3.10
* Added support for performing additional requests using
``web_poet.HttpClient``.
* Introduced ``web_poet.Meta`` to pass arbitrary information
inside a Page Object.
* added a ``PageObjectRegistry`` class which has the ``handle_urls`` decorator
to conveniently declare and collect ``OverrideRule``.
* removed support for Python 3.6
* added support for Python 3.10
* Backward Incompatible Change:
Expand All @@ -19,7 +19,6 @@ TBR
specific attribute types like ``HttpResponseBody`` and
``HttpResponseHeaders``.


0.1.1 (2021-06-02)
------------------

Expand Down
16 changes: 16 additions & 0 deletions docs/api_reference.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.. _`api-reference`:

=============
API Reference
=============
Expand Down Expand Up @@ -61,3 +63,17 @@ Exceptions

.. automodule:: web_poet.exceptions.http
:members:

.. _`api-overrides`:

Overrides
=========

See the tutorial section on :ref:`intro-overrides` for more context about its
use cases and some examples.

.. autofunction:: web_poet.handle_urls

.. automodule:: web_poet.overrides
:members:
:exclude-members: handle_urls
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@
intersphinx_mapping = {
'python': ('https://docs.python.org/3', None, ),
'scrapy': ('https://docs.scrapy.org/en/latest', None, ),
'url-matcher': ('https://url-matcher.readthedocs.io/en/stable/', None, ),
'parsel': ('https://parsel.readthedocs.io/en/latest/', None, ),
'multidict': ('https://multidict.readthedocs.io/en/latest/', None, ),
}
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ and the motivation behind ``web-poet``, start with :ref:`from-ground-up`.

intro/tutorial
intro/from-ground-up
intro/overrides

.. toctree::
:caption: Advanced
Expand Down
484 changes: 484 additions & 0 deletions docs/intro/overrides.rst

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
install_requires=[
'attrs >= 21.3.0',
'parsel',
'url-matcher',
'multidict',
'w3lib >= 1.22.0',
],
Expand Down
39 changes: 39 additions & 0 deletions tests/po_lib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""
This package is just for overrides testing purposes.
"""
from typing import Dict, Any, Callable

from url_matcher import Patterns

from .. import po_lib_sub # NOTE: this module contains a PO with @handle_rules
from web_poet import handle_urls, PageObjectRegistry


class POBase:
expected_overrides: Callable
expected_patterns: Patterns
expected_meta: Dict[str, Any]


class POTopLevelOverriden1:
...


class POTopLevelOverriden2:
...


# This first annotation is ignored. A single annotation per registry is allowed
@handle_urls("example.com", overrides=POTopLevelOverriden1)
@handle_urls("example.com", overrides=POTopLevelOverriden1, exclude="/*.jpg|", priority=300)
class POTopLevel1(POBase):
expected_overrides = POTopLevelOverriden1
expected_patterns = Patterns(["example.com"], ["/*.jpg|"], priority=300)
expected_meta = {} # type: ignore


@handle_urls("example.com", overrides=POTopLevelOverriden2)
class POTopLevel2(POBase):
expected_overrides = POTopLevelOverriden2
expected_patterns = Patterns(["example.com"])
expected_meta = {} # type: ignore
16 changes: 16 additions & 0 deletions tests/po_lib/a_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from url_matcher import Patterns

from tests.po_lib import POBase
from web_poet import handle_urls


class POModuleOverriden:
...


@handle_urls("example.com", overrides=POModuleOverriden, extra_arg="foo")
class POModule(POBase):
expected_overrides = POModuleOverriden
expected_patterns = Patterns(["example.com"])
expected_meta = {"extra_arg": "foo"} # type: ignore

Empty file added tests/po_lib/an_empty_module.py
Empty file.
Empty file.
15 changes: 15 additions & 0 deletions tests/po_lib/nested_package/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from url_matcher import Patterns

from tests.po_lib import POBase
from web_poet import handle_urls


class PONestedPkgOverriden:
...


@handle_urls(include=["example.com", "example.org"], exclude=["/*.jpg|"], overrides=PONestedPkgOverriden)
class PONestedPkg(POBase):
expected_overrides = PONestedPkgOverriden
expected_patterns = Patterns(["example.com", "example.org"], ["/*.jpg|"])
expected_meta = {} # type: ignore
15 changes: 15 additions & 0 deletions tests/po_lib/nested_package/a_nested_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from url_matcher import Patterns

from tests.po_lib import POBase
from web_poet import handle_urls


class PONestedModuleOverriden:
...


@handle_urls(include=["example.com", "example.org"], exclude=["/*.jpg|"], overrides=PONestedModuleOverriden)
class PONestedModule(POBase):
expected_overrides = PONestedModuleOverriden
expected_patterns = Patterns(include=["example.com", "example.org"], exclude=["/*.jpg|"])
expected_meta = {} # type: ignore
25 changes: 25 additions & 0 deletions tests/po_lib_sub/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""This package is being used by tests/po_lib to validate some behaviors on
external depedencies.
"""
from typing import Dict, Any, Callable

from url_matcher import Patterns

from web_poet import handle_urls


class POBase:
expected_overrides: Callable
expected_patterns: Patterns
expected_meta: Dict[str, Any]


class POLibSubOverriden:
...


@handle_urls("sub_example.com", overrides=POLibSubOverriden)
class POLibSub(POBase):
expected_overrides = POLibSubOverriden
expected_patterns = Patterns(["sub_example.com"])
expected_meta = {} # type: ignore
112 changes: 112 additions & 0 deletions tests/test_overrides.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import pytest
from url_matcher import Patterns

from tests.po_lib_sub import POLibSub
from tests.po_lib import (
POTopLevel1,
POTopLevel2,
POTopLevelOverriden2,
)
from tests.po_lib.a_module import POModule, POModuleOverriden
from tests.po_lib.nested_package import PONestedPkg
from tests.po_lib.nested_package.a_nested_module import PONestedModule
from web_poet import (
default_registry,
consume_modules,
OverrideRule,
PageObjectRegistry,
)


POS = {POTopLevel1, POTopLevel2, POModule, PONestedPkg, PONestedModule}


def test_override_rule_uniqueness():
"""The same instance of an OverrideRule with the same attribute values should
have the same hash identity.
"""

patterns = Patterns(include=["example.com"], exclude=["example.com/blog"])

rule1 = OverrideRule(
for_patterns=patterns,
use=POTopLevel1,
instead_of=POTopLevelOverriden2,
meta={"key_1": 1},
)
rule2 = OverrideRule(
for_patterns=patterns,
use=POTopLevel1,
instead_of=POTopLevelOverriden2,
meta={"key_2": 2},
)

assert hash(rule1) == hash(rule2)


def test_list_page_objects_all():
rules = default_registry.get_overrides()
page_objects = {po.use for po in rules}

# Note that the 'tests_extra.po_lib_sub_not_imported.POLibSubNotImported'
# Page Object is not included here since it was never imported anywhere in
# our test package. It would only be included if we run any of the following
# below. (Note that they should run before `get_overrides` is called.)
# - from tests_extra import po_lib_sub_not_imported
# - import tests_extra.po_lib_sub_not_imported
# - web_poet.consume_modules("tests_extra")
# Merely having `import tests_extra` won't work since the subpackages and
# modules needs to be traversed and imported as well.
assert all(["po_lib_sub_not_imported" not in po.__module__ for po in page_objects])

# Ensure that ALL Override Rules are returned as long as the given
# registry's @handle_urls annotation was used.
assert page_objects == POS.union({POLibSub})
for rule in rules:
assert rule.instead_of == rule.use.expected_overrides, rule.use
assert rule.for_patterns == rule.use.expected_patterns, rule.use
assert rule.meta == rule.use.expected_meta, rule.use


def test_consume_module_not_existing():
with pytest.raises(ImportError):
consume_modules("this_does_not_exist")


def test_list_page_objects_all_consume():
"""A test similar to the one above but calls ``consume_modules()`` to properly
load the @handle_urls annotations from other modules/packages.
"""
consume_modules("tests_extra")
rules = default_registry.get_overrides()
page_objects = {po.use for po in rules}
assert any(["po_lib_sub_not_imported" in po.__module__ for po in page_objects])


def test_registry_search_overrides():
rules = default_registry.search_overrides(use=POTopLevel2)
assert len(rules) == 1
assert rules[0].use == POTopLevel2

rules = default_registry.search_overrides(instead_of=POTopLevelOverriden2)
assert len(rules) == 1
assert rules[0].instead_of == POTopLevelOverriden2

# Such rules doesn't exist
rules = default_registry.search_overrides(use=POModuleOverriden)
assert len(rules) == 0


def test_from_override_rules():
rules = [
OverrideRule(
for_patterns=Patterns(include=["sample.com"]),
use=POTopLevel1,
instead_of=POTopLevelOverriden2,
)
]

registry = PageObjectRegistry.from_override_rules(rules)

assert registry.get_overrides() == rules
assert default_registry.get_overrides() != rules
30 changes: 30 additions & 0 deletions tests/test_page_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,36 @@ def test_http_request_init_full():
assert req_1.body == req_2.body


def test_http_response_headers_from_bytes_dict():
raw_headers = {
b"Content-Length": [b"316"],
b"Content-Encoding": [b"gzip", b"br"],
b"server": b"sffe",
"X-string": "string",
"X-missing": None,
"X-tuple": (b"x", "y"),
}
headers = HttpResponseHeaders.from_bytes_dict(raw_headers)

assert headers.get("content-length") == "316"
assert headers.get("content-encoding") == "gzip"
assert headers.getall("Content-Encoding") == ["gzip", "br"]
assert headers.get("server") == "sffe"
assert headers.get("x-string") == "string"
assert headers.get("x-missing") is None
assert headers.get("x-tuple") == "x"
assert headers.getall("x-tuple") == ["x", "y"]


def test_http_response_headers_from_bytes_dict_err():

with pytest.raises(ValueError):
HttpResponseHeaders.from_bytes_dict({b"Content-Length": [316]})

with pytest.raises(ValueError):
HttpResponseHeaders.from_bytes_dict({b"Content-Length": 316})


def test_http_response_headers_init_requests():
requests_response = requests.Response()
requests_response.headers['User-Agent'] = "mozilla"
Expand Down
5 changes: 5 additions & 0 deletions tests_extra/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
This test package was created separately to see the behavior of retrieving the
Override rules declared on a registry where @handle_urls is defined on another
package.
"""
28 changes: 28 additions & 0 deletions tests_extra/po_lib_sub_not_imported/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""
This package quite is similar to tests/po_lib_sub in terms of code contents.
What we're ultimately trying to test here is to see if the `default_registry`
captures the rules annotated in this module if it was not imported.
"""
from typing import Dict, Any, Callable

from url_matcher import Patterns

from web_poet import handle_urls


class POBase:
expected_overrides: Callable
expected_patterns: Patterns
expected_meta: Dict[str, Any]


class POLibSubOverridenNotImported:
...


@handle_urls("sub_example_not_imported.com", overrides=POLibSubOverridenNotImported)
class POLibSubNotImported(POBase):
expected_overrides = POLibSubOverridenNotImported
expected_patterns = Patterns(["sub_example_not_imported.com"])
expected_meta = {} # type: ignore
5 changes: 5 additions & 0 deletions web_poet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,8 @@
HttpRequestBody,
HttpResponseBody,
)
from .overrides import PageObjectRegistry, consume_modules, OverrideRule


default_registry = PageObjectRegistry()
handle_urls = default_registry.handle_urls
Loading

0 comments on commit 29f84c6

Please sign in to comment.