-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproxy.py
executable file
·80 lines (64 loc) · 2.63 KB
/
proxy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
import json, time, random
from twisted.internet import reactor
from twisted.web import proxy, http
from ers import ERSLocal
import ipdb
def header2json(header):
[f for f in header.getAllRawHeaders()]
def json2header(json_string):
pass
class CommonProxyClient(proxy.ProxyClient):
def handleHeader(self, key, value):
proxy.ProxyClient.handleHeader(self, key, value)
def handleResponsePart(self, buffer):
# ipdb.set_trace()
"""
if self.father.is_cacheable():
# print "caching"
cache.add_data(self.father.request_id, 'http:content', buffer, provenance)
headers_string = json.dumps(tuple(self.father.responseHeaders.getAllRawHeaders()))
cache.add_data(self.father.request_id, 'http:responseHeaders',
headers_string, provenance)
#else:
# print "will not cache"
"""
proxy.ProxyClient.handleResponsePart(self, buffer)
class CommonProxyClientFactory(proxy.ProxyClientFactory):
protocol = CommonProxyClient
class CommonProxyRequest(proxy.ProxyRequest):
protocols = {'http': CommonProxyClientFactory}
def process(self):
self.log_request()
try:
proxy.ProxyRequest.process(self)
except KeyError:
print "HTTPS is not supported at the moment!"
def log_request(self):
call_time = time.time()
self.request_id = 'urn:http:request:{0}{1}'.format(call_time, random.randint(1000,9999))
headers = self.getAllHeaders()
cache.add_data(self.request_id, 'http:url', self.uri, provenance)
cache.add_data(self.request_id, 'dct:date', time.asctime(time.gmtime(call_time)), provenance)
cache.add_data(self.request_id, 'http:headers', json.dumps(headers), provenance)
cache.add_data(self.request_id, 'http:mthd', self.method, provenance)
def is_cacheable(self):
"""
Response is cacheable if 'text' occurs in headers['content-type'] and
headers['content-length'] < 100000
"""
try:
length = int(self.responseHeaders.getRawHeaders('content-length')[0])
is_text = any([t.find('text') >=0 for t in self.responseHeaders.getRawHeaders('content-type')])
except TypeError:
return False
return self.code==200 and is_text and length < 10e5
class CommonProxy(proxy.Proxy):
requestFactory = CommonProxyRequest
class CommonProxyFactory(http.HTTPFactory):
def buildProtocol(self, addr):
return CommonProxy()
provenance = 'local'
cache = ERSLocal(dbname='cache')
reactor.listenTCP(8080, CommonProxyFactory())
reactor.run()