Skip to content

Commit

Permalink
Splash proxy support
Browse files Browse the repository at this point in the history
Do not set meta['proxy'] for splash requests, pass them to splash
args instead.
  • Loading branch information
lopuhin committed Jun 15, 2016
1 parent d3ecd49 commit acb2962
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 3 deletions.
13 changes: 13 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,19 @@ Response is JSON with a ``status`` field with the following possible values:
``cookies`` field, in ``Cookie.__dict__`` format.


Proxy support
-------------

Proxies can be specified via ``HTTP_PROXY`` and ``HTTPS_PROOXY`` keys
in ``settings`` argument. Username and password can be specified
as part of the proxy url (the format is ``protocol://username:password@url``).

If you are using proxy with Splash, it is assumed that
you want to have Splash make requests via given proxy,
and not make a request **to** Splash via proxy.
``HTTP_PROXY`` is always used for Splash.


Captcha support
---------------

Expand Down
15 changes: 13 additions & 2 deletions autologin/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ def get_cookiejar(response):
return obj


class ProxyFromSettingsMiddleware(HttpProxyMiddleware):
"""A middleware that sets proxy from settings file"""
class ProxyMiddleware(HttpProxyMiddleware):
""" A middleware that sets proxy from settings for non-splash requests,
and passes proxy splash args for splash requests.
This middleware must be placed **before** splash middleware.
"""

@classmethod
def from_crawler(cls, crawler):
Expand All @@ -48,8 +51,16 @@ def __init__(self, settings):
('http', settings.get('HTTP_PROXY')),
('https', settings.get('HTTPS_PROXY')),
]
self.splash_proxy = settings.get('HTTP_PROXY')
for type_, url in proxies:
if url:
self.proxies[type_] = self._get_proxy(url, type_)
if not self.proxies:
raise NotConfigured

def process_request(self, request, spider):
if 'splash' in request.meta:
if self.splash_proxy:
request.meta['splash']['args']['proxy'] = self.splash_proxy
else:
super(ProxyMiddleware, self).process_request(request, spider)
3 changes: 2 additions & 1 deletion autologin/spiders.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@
USER_AGENT = USER_AGENT,
DOWNLOADER_MIDDLEWARES = {
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': None,
'autologin.middleware.ProxyFromSettingsMiddleware': 750,
# Placed before splash middleware
'autologin.middleware.ProxyMiddleware': 720,
},
))

Expand Down

0 comments on commit acb2962

Please sign in to comment.