-
Notifications
You must be signed in to change notification settings - Fork 1
/
drtv-dl.py
215 lines (171 loc) · 6.82 KB
/
drtv-dl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#!/usr/bin/env python
try:
import urllib.request as urllib
except ImportError:
# python2 compatibility
# probably not the best way to ensure compatibility, but it works - for now
import urllib
import sys
import json
import argparse
from bs4 import BeautifulSoup
def parseString(data, findStart, findEnd):
''' returns string between findStart and findEnd '''
# decode
data = data.decode('utf-8')
start = data.find(findStart) + len(findStart)
end = data.find(findEnd, start)
return data[start:end]
def getUrlContent(url):
page = urllib.urlopen(url)
content = page.read()
return content
def gatherInformation(jsonData):
data = {}
data['title'] = jsonData["Data"][0]['Title']
return data
def _reportHook(count, block_size, total_size):
''' progress bar for urlretrieve '''
percent = int(count*block_size*100/total_size)
# progress_size = int(count*block_size) # currently not in use. use it for downloaded mb later
# sys.stdout.write("\rDownloading...%d%%, %d MB" % (percent, progress_size/(1024*1024)))
sys.stdout.write("\rDownloading...%d%%" % percent)
sys.stdout.flush()
def downloadDRTV(url, output=None):
print('Finding json data URL from DR TV...')
# find json data
siteContent = getUrlContent(url)
jsonUrl = parseString(siteContent, 'resource: "', '"')
print('- Resource file found: ' + jsonUrl)
print('Finding video file...')
# get json data
jsonContent = getUrlContent(jsonUrl)
jsonData = json.loads(jsonContent.decode('utf-8'))
# find the highest bitrate
bitrates = []
stream_urls = {}
try:
# loop through json to find correct list
for data in jsonData['Data'][0]['Assets']:
if 'Links' in data:
# we found it, now lets find the different bitrates and pair them with the stream urls
for i in range(len(data['Links'])):
if 'Bitrate' in data['Links'][i]:
# make sure this is for pc streaming
if data['Links'][i]['Target'] == 'Streaming':
temp_bitrate = data['Links'][i]['Bitrate']
temp_streamurl = data['Links'][i]['Uri']
bitrates.append(temp_bitrate)
stream_urls[temp_bitrate] = temp_streamurl
# todo: make sure we found a bitrate
break
except:
# todo: error handling
print("Error occured while parsing JSON. Quitting")
sys.exit()
print('Highest bitrate found: ' + str(max(bitrates)))
streamUrl = stream_urls[max(bitrates)]
if streamUrl is not None:
print('- Video file found: ' + str(streamUrl))
print('Starting download...')
else:
print('An error occured while fetching the download URL. Exiting')
sys.exit()
# replace some text in the url
streamUrl = streamUrl.replace('rtmp://vod.dr.dk/cms/mp4:', 'http://vodfiles.dr.dk/')
# gather some more details about the show
filmData = gatherInformation(jsonData)
# download the film
if output is None:
urllib.urlretrieve(streamUrl, filmData['title'] + '.mp4', reporthook=_reportHook)
print('')
else:
urllib.urlretrieve(streamUrl, output, reporthook=_reportHook)
print('')
print('Download finished! Quitting')
def downloadDRBonanza(url, output=None):
print('Finding video files from DR Bonanza...')
# fetch html content
site_content = getUrlContent(url)
# initialize beautifulsoup
html_soup = BeautifulSoup(site_content)
# parse the html into json
programs = []
for link in html_soup.find_all(class_='listItem Video'):
program = {}
# parse onclick event from links
onclick_string = link['onclick']
# fetch json
i = onclick_string.find('{')
json_string = onclick_string[i:-2]
json_obj = json.loads(json_string)
# fetch program information
program['id'] = link['id']
program['title'] = json_obj['Title']
program['description'] = json_obj['Description']
# find highest quality
for file_obj in json_obj['Files']:
if 'VideoHigh' in file_obj['Type']:
# highest video stream found, generate download link
video_link = 'http://vodfiles.dr.dk/' + file_obj['Location'].split(':')[-1]
program['video'] = video_link
programs.append(program)
# get user input
print('There are ' + str(len(programs)) + ' programs available for download:')
i = 1 # start from 1 as 0 == ALL
for program in programs:
print(str(i) + ') ' + program['title'])
i += 1
# python2 input support
try:
get_input = raw_input
except NameError:
get_input = input
# get user input for download
input_waiting = True
while input_waiting:
print('')
cmd = get_input('Enter a selection (default=all): ')
if cmd == '':
# default=all
input_waiting = False
elif 0 < int(cmd) < len(programs)+1:
input_waiting = False
else:
print('error: invalid value: ' + str(cmd) + ' is not between 1 and ' + str(len(programs)))
# parse user input
if cmd == '':
print('You chose to download all listed programs!')
# loop through programs and download each one
for program in programs:
print('Starting download of: "' + program['title'] + '"...')
urllib.urlretrieve(program['video'], program['title'] + '.mp4', reporthook=_reportHook)
print('')
else:
desired_program = programs[int(cmd)-1]
print('You selected: "' + desired_program['title'] + '"')
# download the program
if output is None:
urllib.urlretrieve(desired_program['video'], desired_program['title'] + '.mp4', reporthook=_reportHook)
print('')
else:
urllib.urlretrieve(desired_program['video'], output, reporthook=_reportHook)
print('')
# assume that download has finished
print('Download finished! Quitting')
if __name__ == '__main__':
# init parser
parser = argparse.ArgumentParser(prog='drtv-dl', description='Small command-line program to download videos from DR TV (www.dr.dk/tv/) and DR Bonanza (www.dr.dk/bonanza/)')
parser.add_argument('URL', nargs='+', help='The URL of the DR TV or DR Bonanza stream')
parser.add_argument('-o', help='Output file')
args = vars(parser.parse_args())
# handle arguments
url = args['URL'][0]
outputFile = args['o']
# start application
if 'bonanza' in url.lower():
downloadDRBonanza(url, outputFile)
elif 'tv' in url.lower():
downloadDRTV(url, outputFile)
else:
print('Error occured. URL is not valid!')