Skip to content

Commit

Permalink
Write error response to WACZ (#19, PR#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
Wesley van Lee authored and leewesleyv committed Nov 13, 2024
1 parent f63e676 commit 012b3f7
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions scrapy_webarchive/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def from_crawler(cls, crawler: Crawler) -> Self:
except AttributeError:
exporter = cls(crawler.settings, crawler)

crawler.signals.connect(exporter.response_received, signal=signals.response_received)
crawler.signals.connect(exporter.response_downloaded, signal=signals.response_downloaded)
crawler.signals.connect(exporter.spider_closed, signal=signals.spider_closed)
crawler.signals.connect(exporter.spider_opened, signal=signals.spider_opened)
return exporter
Expand Down Expand Up @@ -143,7 +143,7 @@ def from_settings(cls, settings: Settings, crawler: Crawler):
def spider_opened(self) -> None:
self.writer.write_warcinfo(robotstxt_obey=self.settings["ROBOTSTXT_OBEY"])

def response_received(self, response: Response, request: Request, spider: Spider) -> None:
def response_downloaded(self, response: Response, request: Request, spider: Spider) -> None:
request.meta["WARC-Date"] = get_formatted_dt_string(format=WARC_DT_FORMAT)

# Write response WARC record
Expand Down
4 changes: 2 additions & 2 deletions tests/test_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def test_get_store(self, *args):
extension = WaczExporter.from_crawler(crawler)
assert isinstance(extension.store, FTPFilesStore)

def test_response_received(self):
def test_response_downloaded(self):
crawler = get_crawler(settings_dict={"SW_EXPORT_URI": "/tmp/scrapy-webarchive/wacz/"})
crawler.spider = crawler._create_spider("quotes")
extension = WaczExporter.from_crawler(crawler)
Expand All @@ -49,7 +49,7 @@ def test_response_received(self):
# Call the method under test
request = Request("http://example.com")
response = Response(request.url)
extension.response_received(response, request, crawler.spider)
extension.response_downloaded(response, request, crawler.spider)

# Verify that the WARC date was set in request meta
assert "WARC-Date" in request.meta
Expand Down

0 comments on commit 012b3f7

Please sign in to comment.