Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove warning-triggering NotConfigured messages #112

Merged
merged 1 commit into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 2 additions & 10 deletions tests/incremental/test_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,8 @@ def test_middleware_init_not_configured():
crawler = crawler_for_incremental()
crawler.spider.settings = Settings({"INCREMENTAL_CRAWL_ENABLED": False})

with pytest.raises(NotConfigured) as exc_info:
with pytest.raises(NotConfigured):
IncrementalCrawlMiddleware(crawler)
assert str(exc_info.value) == (
"IncrementalCrawlMiddleware is not enabled. Set the "
"INCREMENTAL_CRAWL_ENABLED setting to True to enable it."
)


@patch("scrapinghub.ScrapinghubClient")
Expand All @@ -59,12 +55,8 @@ def test_prepare_manager_with_collection_fp_failure(caplog):
crawler.spider.settings = Settings({"INCREMENTAL_CRAWL_ENABLED": True})

caplog.clear()
with pytest.raises(CloseSpider) as exc_info:
with pytest.raises(CloseSpider):
IncrementalCrawlMiddleware.prepare_incremental_manager(crawler)
assert exc_info.value.reason == "incremental_crawling_middleware_collection_issue"
assert caplog.messages[-1].startswith(
"IncrementalCrawlMiddleware is enabled, but something went wrong with Collections."
)


@patch("scrapinghub.ScrapinghubClient")
Expand Down
5 changes: 1 addition & 4 deletions zyte_spider_templates/_incremental/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,7 @@ class IncrementalCrawlMiddleware:
def __init__(self, crawler: Crawler):
assert crawler.spider
if not crawler.spider.settings.getbool("INCREMENTAL_CRAWL_ENABLED", False):
raise NotConfigured(
"IncrementalCrawlMiddleware is not enabled. Set the "
"INCREMENTAL_CRAWL_ENABLED setting to True to enable it."
)
raise NotConfigured
self.inc_manager: IncrementalCrawlingManager = self.prepare_incremental_manager(
crawler
)
Expand Down
21 changes: 4 additions & 17 deletions zyte_spider_templates/middlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,7 @@ def __init__(self, crawler: Crawler):
0, crawler.spider.settings.getint("MAX_REQUESTS_PER_SEED", 0)
)
if not max_requests_per_seed:
raise NotConfigured(
"MaxRequestsPerSeedDownloaderMiddleware is not configured. "
"Set MAX_REQUESTS_PER_SEED to enable it."
)
raise NotConfigured
self.crawler = crawler
self.requests_per_seed: defaultdict = defaultdict(int)
self.seeds_reached_limit: Set[str] = set()
Expand Down Expand Up @@ -326,10 +323,7 @@ def __init__(self, crawler):
self.max_navigation_depth = max_navigation_depth
self.stats = crawler.stats
else:
raise NotConfigured(
"TrackNavigationDepthSpiderMiddleware is not configured. "
"Set `NAVIGATION_DEPTH_LIMIT` to 1 or more to enable it."
)
raise NotConfigured

def update_page_params(self, request, page_params) -> None:
page_params["skip_subcategories"] = request.meta.get(
Expand Down Expand Up @@ -414,10 +408,7 @@ def __init__(self, crawler: Crawler):
super().__init__(crawler)
assert crawler.spider
if not crawler.spider.settings.getbool("ONLY_FEEDS_ENABLED"): # type: ignore[union-attr]
raise NotConfigured(
"OnlyFeedsMiddleware is not configured. Set "
"ONLY_FEEDS_ENABLED to True to enable it."
)
raise NotConfigured

def update_page_params(self, request, page_params) -> None:
page_params["only_feeds"] = request.meta.get(
Expand Down Expand Up @@ -460,11 +451,7 @@ def __init__(self, crawler: Crawler):
if not crawler.spider.settings.getbool( # type: ignore[union-attr]
"OFFSITE_REQUESTS_PER_SEED_ENABLED", True
):
raise NotConfigured(
"OffsiteRequestsPerSeedMiddleware is not enabled. Set the "
"OFFSITE_REQUESTS_PER_SEED_ENABLED setting to True to enable "
"it."
)
raise NotConfigured

self.stats = crawler.stats
self.allowed_domains_per_seed: Dict[str, Set[str]] = defaultdict(set)
Expand Down
Loading