Skip to content

Commit

Permalink
Fixed not closing page after some actions with activated Recaptcha mi…
Browse files Browse the repository at this point in the history
…ddleware (#31)
  • Loading branch information
MatthewZMSU authored Jul 22, 2024
1 parent 5b0ee56 commit cf898e0
Showing 1 changed file with 19 additions and 14 deletions.
33 changes: 19 additions & 14 deletions scrapypuppeteer/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def from_crawler(cls, crawler):
)
return middleware

def process_request(self, request, spider):
def process_request(self, request, **_):
if isinstance(request, CloseContextRequest):
return self.process_close_context_request(request)

Expand Down Expand Up @@ -343,19 +343,27 @@ def from_crawler(cls, crawler: Crawler):
)
return cls(recaptcha_solving, submit_selectors)

def process_request(self, request, spider):
@staticmethod
def is_recaptcha_producing_action(action) -> bool:
return not isinstance(
action,
(Screenshot, Scroll, CustomJsAction, RecaptchaSolver),
)

def process_request(self, request, **_):
if request.meta.get("dont_recaptcha", False):
return None

# Checking if we need to close page after action
if isinstance(request, PuppeteerRequest):
if request.close_page and not request.meta.get(
"_captcha_submission", False
):
request.close_page = False
request.dont_filter = True
self._page_closing.add(request)
return request
return None
if self.is_recaptcha_producing_action(request.action):
if request.close_page and not request.meta.get(
"_captcha_submission", False
):
request.close_page = False
request.dont_filter = True
self._page_closing.add(request)
return request

def process_response(self, request, response, spider):
if not isinstance(
Expand All @@ -376,10 +384,7 @@ def process_response(self, request, response, spider):
# RECaptchaSolver was called by recaptcha middleware
return self._submit_recaptcha(request, response, spider)

if isinstance(
puppeteer_request.action,
(Screenshot, Scroll, CustomJsAction, RecaptchaSolver),
):
if not self.is_recaptcha_producing_action(puppeteer_request.action):
# No recaptcha after these actions
return response

Expand Down

0 comments on commit cf898e0

Please sign in to comment.