From 29eb601a54600802417deaf30196046b02112ed7 Mon Sep 17 00:00:00 2001 From: Kevin Lloyd Bernal Date: Tue, 16 Jan 2024 18:42:19 +0800 Subject: [PATCH] use HttpOrBrowserRespose --- setup.py | 3 ++- .../pages/product_navigation_heuristics.py | 11 +++-------- zyte_spider_templates/spiders/ecommerce.py | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index 0c99735..2c93d8e 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,8 @@ "scrapy>=2.11.0", "scrapy-poet>=0.16.0", "scrapy-spider-metadata>=0.1.2", - "scrapy-zyte-api[provider]>=0.12.2", + # "scrapy-zyte-api[provider]>=0.12.2", + "scrapy-zyte-api @ git+https://git@github.com/scrapy-plugins/scrapy-zyte-api@http-or-browser-response#egg=scrapy-zyte-api", "zyte-common-items>=0.13.0", ], classifiers=[ diff --git a/zyte_spider_templates/pages/product_navigation_heuristics.py b/zyte_spider_templates/pages/product_navigation_heuristics.py index 5d1d38a..475f648 100644 --- a/zyte_spider_templates/pages/product_navigation_heuristics.py +++ b/zyte_spider_templates/pages/product_navigation_heuristics.py @@ -3,7 +3,7 @@ import attrs from scrapy.http import TextResponse from scrapy.linkextractors import LinkExtractor -from web_poet import HttpResponse, PageParams, field, handle_urls +from web_poet import HttpResponse, PageParams, field, handle_urls, HttpOrBrowserResponse, BrowserResponse from zyte_common_items import AutoProductNavigationPage, ProbabilityRequest from zyte_spider_templates.heuristics import might_be_category @@ -12,12 +12,7 @@ @handle_urls("") @attrs.define class HeuristicsProductNavigationPage(AutoProductNavigationPage): - # TODO: swap with BrowserResponse after evaluating it. - # Also after when the following issue has been fixed: - # https://github.com/scrapy-plugins/scrapy-zyte-api/issues/91#issuecomment-1744305554 - # NOTE: Even with BrowserResponse, it would still send separate - # requests for it and productNavigation. - response: HttpResponse + response: HttpOrBrowserResponse page_params: PageParams @field @@ -55,7 +50,7 @@ def _probably_category_links(self) -> List[ProbabilityRequest]: ignore_urls = set(self._urls_for_category()) links = [] - response = TextResponse(url=str(self.response.url), body=self.response.body) + response = TextResponse(url=self.response.url, body=self.response.text.encode()) for link in link_extractor.extract_links(response): if link.url in ignore_urls: continue diff --git a/zyte_spider_templates/spiders/ecommerce.py b/zyte_spider_templates/spiders/ecommerce.py index 694f317..6be623d 100644 --- a/zyte_spider_templates/spiders/ecommerce.py +++ b/zyte_spider_templates/spiders/ecommerce.py @@ -69,7 +69,7 @@ class EcommerceSpiderParams(BaseSpiderParams): "Whether to perform extraction using a browser request " "(browserHtml) or an HTTP request (httpResponseBody)." ), - default=None, + default=ExtractFrom.browserHtml, json_schema_extra={ "enumMeta": { ExtractFrom.browserHtml: {