diff --git a/scrapy_poet/_request_fingerprinter.py b/scrapy_poet/_request_fingerprinter.py index f6593a16..7073754f 100644 --- a/scrapy_poet/_request_fingerprinter.py +++ b/scrapy_poet/_request_fingerprinter.py @@ -13,6 +13,7 @@ from typing import Callable, Dict, List, Optional, get_args, get_origin from weakref import WeakKeyDictionary + from andi import CustomBuilder from scrapy import Request from scrapy.crawler import Crawler from scrapy.settings.default_settings import REQUEST_FINGERPRINTER_CLASS @@ -34,6 +35,8 @@ logger = getLogger(__name__) def _serialize_dep(cls): + if isinstance(cls, CustomBuilder): + cls = cls.result_class_or_fn try: from typing import Annotated except ImportError: diff --git a/scrapy_poet/utils/testing.py b/scrapy_poet/utils/testing.py index b334c8cb..6d7ad28c 100644 --- a/scrapy_poet/utils/testing.py +++ b/scrapy_poet/utils/testing.py @@ -15,6 +15,7 @@ from twisted.web.resource import Resource from twisted.web.server import NOT_DONE_YET +from scrapy_poet import ScrapyPoetRequestFingerprinter from scrapy_poet.utils.mockserver import MockServer @@ -231,6 +232,7 @@ def create_scrapy_settings(request): InjectedDependenciesCollectorMiddleware: 542, "scrapy_poet.InjectionMiddleware": 543, }, + REQUEST_FINGERPRINTER_CLASS=ScrapyPoetRequestFingerprinter, ) return Settings(s) diff --git a/tests/test_request_fingerprinter.py b/tests/test_request_fingerprinter.py index b329590d..a860225b 100644 --- a/tests/test_request_fingerprinter.py +++ b/tests/test_request_fingerprinter.py @@ -512,3 +512,35 @@ async def parse_page(self, response, page: WebPage): fingerprinter.fingerprint(request) fingerprinter.fingerprint(request) mock.assert_called_once_with(request) + + +def test_item(settings): + """Test that fingerprinting works even for items.""" + from scrapy import Request, Spider + from web_poet import ItemPage, RulesRegistry + + registry = RulesRegistry() + + class MyItem: + pass + + @registry.handle_urls("example.com") + class MyPage(ItemPage[MyItem]): + pass + + class TestSpider(Spider): + name = "test" + + def __init__(self, *args, **kwargs): + self.request = Request("https://example.com", callback=self.parse_page) + + async def parse_page(self, response, a: MyItem): + pass + + settings["SCRAPY_POET_RULES"] = registry.get_rules() + crawler = get_crawler(spider_cls=TestSpider, settings=settings) + fingerprinter = crawler.request_fingerprinter + + fingerprint = fingerprinter.fingerprint(crawler.spider.request) + assert fingerprint + assert isinstance(fingerprint, bytes)