-
Notifications
You must be signed in to change notification settings - Fork 7
/
TwitterMedia.py
134 lines (105 loc) · 4.31 KB
/
TwitterMedia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import re, requests
from pathlib import Path
from bs4 import BeautifulSoup
from shutil import copyfileobj
from urllib.parse import quote as qt
from requests.packages.urllib3.exceptions import InsecureRequestWarning
HEADERS = {
'authority': 'twittervideodownloader.com',
'Referer': 'http://twittervideodownloader.com/',
'content-type': 'application/x-www-form-urlencoded',
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_4_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Mobile/15E148 Safari/604.1',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8'
}
class TwitterMediaContent:
url = None
type = None
filename = None
def __init__(self, url, type, filename):
self.url = url
self.type = type
self.filename = filename
def __getitem__(self, name):
if name == 'url':
return self.url
if name == 'type':
return self.type
if name == 'filename':
return self.filename
class TwitterMedia:
def __init__(self, use_print=False):
self._token = ''
self._is_token_generated = False
self._use_print = use_print
self.main_page = 'https://165.227.112.236/'
self.download_page = 'https://165.227.112.236/download'
# bypassing cloudflare bullshit by directly accessing the website url.
# needed a quick fix to resolve the SSL certificate error
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
def browser(self, post, url, **kwargs):
with requests.Session() as session:
if post:
return session.post(url, verify=False, headers=kwargs['headers'], cookies=kwargs['cookies'], data=kwargs['rawdata'])
else:
return session.get(url, verify=False, stream=True)
def download(self, url, custom=None):
filename = (url.split('/')[-1]).split('?')[0]
if custom:
filename = f'{custom}.mp4'
with self.browser(0, url) as data:
with open(filename, 'wb') as video:
copyfileobj(data.raw, video)
return filename
def fetch_media(self, url):
self._generate_token()
html = self.browser(
1,
self.download_page,
cookies={'csrftoken': self._token},
rawdata=f'csrfmiddlewaretoken={self._token}&tweet={qt(url)}&submit=',
headers={
**HEADERS,
'referer': self.main_page
}
)
content = html.content
if 'Download Video' not in str(content):
self._print('Deleting the csrf token!')
if Path('csrftoken').is_file():
Path('csrftoken').unlink()
return
urls, res = [], []
statusID = re.findall(r'status/([\d.]*\d+)', url)
username = re.findall('.com/(.*)/status', url)
filename = '{}_{}'.format(username[0], statusID[0])
soup = BeautifulSoup(content, 'html.parser')
for url in soup.select('a.expanded.button.small.float-right'):
urls.append(url['href'])
if len(urls) == 1:
return TwitterMediaContent(urls[0], 'gif', filename)
else:
for url in urls:
resolution = re.search(r'/[0-9]*x[0-9]*/', url).group(0)
res.append(int((resolution.replace('/', '')).split('x')[1]))
return TwitterMediaContent(urls[res.index(max(res))], 'video', filename)
def _generate_token(self):
# FIXME: Should we instead look for self.token only?
if self._is_token_generated:
return
if Path('csrftoken').exists():
with open('csrftoken', 'r') as f:
self._token = f.readline()
self._print('Loaded the csrf token from cache.\n')
else:
session = self.browser(0, self.main_page)
self._print(session.content)
self._print(session.cookies)
csrftoken = session.cookies['csrftoken']
self._token = csrftoken
with open('csrftoken', 'w') as f:
f.write(csrftoken)
self._print('Created a new csrf token!\n')
self._is_token_generated = True
def _print(self, *args):
if self._use_print:
print(*args)