forked from mickelfeng/ProxyPool-1
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathproxypool.py
107 lines (96 loc) · 3.12 KB
/
proxypool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import requests
import threading
import time
import pymysql
import json
from proxy_spiders.spider_ipcn import SpiderIpcn
from proxy_spiders.spider_66ip import SpiderIP66
from proxy_spiders.spider_kxdaili import SpiderKxdaili
from proxy_spiders.spider_89ip import SpiderIP89
from proxy_spiders.spider_data5u import SpiderData5u
from proxy_spiders.spider_ip181 import SpiderIP181
from proxy_spiders.spider_xicidaili import SpiderXicidaili
from proxy_spiders.spider_coderbusy import SpiderCoderBusy
from conf import MYSQL_CONF
class IsEnable(threading.Thread):
def __init__(self, ip):
super(IsEnable, self).__init__()
self.ip = ip
self.proxies = {
'http': 'http://%s' % ip
}
def run(self):
try:
html = requests.get('http://httpbin.org/ip',
proxies=self.proxies, timeout=5).text
result = eval(html)['origin']
if len(result.split(',')) == 2:
return
if result in self.ip:
with lock:
self.insert_into_sql()
except:
return
def insert_into_sql(self):
global cursor
global conn
global crawl_ip_count
try:
date = time.strftime('%Y-%m-%d %X', time.localtime())
cursor.execute("insert into proxypool(ip,port,time) values" + str(
(self.ip.split(':')[0], self.ip.split(':')[1], date)))
conn.commit()
crawl_ip_count += 1
except:
pass
def get_current_time():
return time.strftime('%Y-%m-%d %X', time.localtime())
if __name__ == '__main__':
lock = threading.Lock()
crawlers = [SpiderCoderBusy, SpiderIP66, SpiderIP89]
while True:
crawl_ip_count = 0
conn = pymysql.connect(host=MYSQL_CONF['host'],
user=MYSQL_CONF['user'],
passwd=MYSQL_CONF['passwd'],
db=MYSQL_CONF['db'],
port=MYSQL_CONF['port'],
charset='utf8')
cursor = conn.cursor()
result = []
tasks = []
for crawler in crawlers:
task = crawler()
task.setDaemon(True)
tasks.append(task)
for task in tasks:
task.start()
for task in tasks:
task.join()
for task in tasks:
try:
result += task.result
except:
continue
while (len(result)):
num = 0
while (num < 50):
try:
ip = result.pop()
except:
break
work = IsEnable(ip)
work.setDaemon(True)
work.start()
num += 1
time.sleep(5)
try:
conn.commit()
except:
pass
cursor.close()
conn.close()
print('[%s][ProxyPool]Crawl IP Count:' %
get_current_time(), crawl_ip_count)
print('[%s][ProxyPool][Sleeping]' % get_current_time())
time.sleep(300)