Anorov · ghost · Sep 29, 2019 · Sep 29, 2019 · ghost · Sep 29, 2019
diff --git a/README.md b/README.md
@@ -99,6 +99,32 @@ There is no need to override this delay unless cloudflare-scrape generates an er
 scraper = cfscrape.create_scraper(delay=10)
 ```
 
+### ReCAPTCHA
+
+You can use the `captcha` event hook to implement reCAPTCHA solving. It's similar to Requests' [response event hook](https://requests.kennethreitz.org/en/stable/user/advanced/#event-hooks). However, this particular hook function should be set on the scraper instance.
+
+reCAPTCHA solving steps:
+1. Extract the siteKey and any form values from the response
+2. Submit the siteKey and page URL to a captcha solving service
+3. Retrieve the token from said captcha solving service
+4. Send the form with token to Cloudflare
+
+```python
+scraper = cfscrape.create_scraper()
+# This URL points to a site that is dedicated to Cloudflare's reCAPTCHA.
+url = 'https://captcha.website'
+
+def solve_captcha(resp, *args, **kwargs):
+    # After performing all other steps, submit the form
+    params = { 'g-recaptcha-response': 'token', 's': 'secret' }
+    resp = scraper.get('{}/cdn-cgi/l/chk_captcha'.format(url), params=params)
+    # Cloudflare should have responded with the requested content
+    return resp
+
+scraper.hooks['captcha'] = solve_captcha
+print scraper.get(url).content # => "<!DOCTTYPE html>..."
+```
+
 ## Integration
 
 It's easy to integrate cloudflare-scrape with other applications and tools. Cloudflare uses two cookies as tokens: one to verify you made it past their challenge page and one to track your session. To bypass the challenge page, simply include both of these cookies (with the appropriate user-agent) in all HTTP requests you make.

diff --git a/cfscrape/__init__.py b/cfscrape/__init__.py
@@ -15,6 +15,7 @@
 from requests.adapters import HTTPAdapter
 from requests.compat import urlparse, urlunparse
 from requests.exceptions import RequestException
+from requests.hooks import dispatch_hook
 
 from urllib3.util.ssl_ import create_urllib3_context, DEFAULT_CIPHERS
 
@@ -119,27 +120,39 @@ def is_cloudflare_captcha_challenge(resp):
     def request(self, method, url, *args, **kwargs):
         resp = super(CloudflareScraper, self).request(method, url, *args, **kwargs)
 
-        # Check if Cloudflare captcha challenge is presented
-        if self.is_cloudflare_captcha_challenge(resp):
-            self.handle_captcha_challenge(resp, url)
-
         # Check if Cloudflare anti-bot "I'm Under Attack Mode" is enabled
         if self.is_cloudflare_iuam_challenge(resp):
             resp = self.solve_cf_challenge(resp, **kwargs)
 
         return resp
 
+    def send(self, request, **kwargs):
+        resp = super(CloudflareScraper, self).send(request, **kwargs)
+
+        while self.is_cloudflare_captcha_challenge(resp):
+            hook_resp = dispatch_hook('captcha', self.hooks, resp, **kwargs)
+
+            # The captcha hooks are expected to return a different response
+            if resp is hook_resp or not hook_resp:
+                # No captcha hooks or the response is invalid
+                self.handle_captcha_challenge(resp)
+
+            # Replace the response and check again
+            resp = hook_resp
+
+        return resp
+
     def cloudflare_is_bypassed(self, url, resp=None):
         cookie_domain = ".{}".format(urlparse(url).netloc)
         return (
             self.cookies.get("cf_clearance", None, domain=cookie_domain) or
             (resp and resp.cookies.get("cf_clearance", None, domain=cookie_domain))
         )
 
-    def handle_captcha_challenge(self, resp, url):
+    def handle_captcha_challenge(self, resp):
         error = (
             "Cloudflare captcha challenge presented for %s (cfscrape cannot solve captchas)"
-            % urlparse(url).netloc
+            % urlparse(resp.url).netloc
         )
         if ssl.OPENSSL_VERSION_NUMBER < 0x10101000:
             error += ". Your OpenSSL version is lower than 1.1.1. Please upgrade your OpenSSL library and recompile Python."

diff --git a/tests/__init__.py b/tests/__init__.py
@@ -149,7 +149,9 @@ def on_redirect(request):
                 responses.add(DefaultResponse(url=url, body=requested_page))
 
             responses.add(RedirectResponse(
-                url=submit_uri, callback=on_redirect, location=redirect_to
+                url=submit_uri,
+                callback=on_redirect,
+                location=redirect_to
             ))
 
             return test(self, **cfscrape_kwargs)
@@ -158,12 +160,22 @@ def on_redirect(request):
     return challenge_responses_decorator
 
 
-def recaptcha_responses(filename):
+def recaptcha_responses(filename, redirect_to='/'):
     def recaptcha_responses_decorator(test):
         @responses.activate
         def wrapper(self):
             responses.add(CaptchaResponse(url=url, body=fixtures(filename)))
 
+            def on_redirect(request):
+                # We don't register the last response unless the redirect occurs
+                responses.add(DefaultResponse(url=url, body=requested_page))
+
+            responses.add(RedirectResponse(
+                url='{}/cdn-cgi/l/chk_captcha'.format(url),
+                callback=on_redirect,
+                location=redirect_to
+            ))
+
             return test(self, **cfscrape_kwargs)
         return wrapper
 

diff --git a/tests/test_cfscrape.py b/tests/test_cfscrape.py
@@ -65,6 +65,26 @@ def test_cf_recaptcha_15_04_2019(self, **kwargs):
         finally:
             ssl.OPENSSL_VERSION_NUMBER = v
 
+    @recaptcha_responses(filename='cf_recaptcha_15_04_2019.html')
+    def test_captcha_hooks(self, **kwargs):
+        scraper = cfscrape.CloudflareScraper(**kwargs)
+
+        def captcha_hook(resp, **kwargs):
+            return scraper.get('{}/cdn-cgi/l/chk_captcha'.format(url))
+
+        scraper.hooks['captcha'] = captcha_hook
+
+        expect(scraper.get(url).content).to.equal(requested_page)
+
+    @recaptcha_responses(filename='cf_recaptcha_15_04_2019.html')
+    def test_captcha_hooks_empty_response(self, **kwargs):
+        scraper = cfscrape.CloudflareScraper(**kwargs)
+        scraper.hooks['captcha'] = [lambda resp, **kwargs: None]
+
+        message = re.compile(r'captcha challenge presented')
+        scraper.get.when.called_with(url) \
+            .should.have.raised(cfscrape.CloudflareCaptchaError, message)
+
     @responses.activate
     def test_js_challenge_unable_to_identify(self):
         body = fixtures('js_challenge_10_04_2019.html')
@@ -323,5 +343,6 @@ def test_create_scraper_with_session(self):
         scraper.should_not.have.property('data')
 
         session.data = {'bar': 'foo'}
+
         scraper = cfscrape.create_scraper(sess=session)
         scraper.data.should.equal(session.data)