Skip to content

Commit

Permalink
[Docubay] Add series support.
Browse files Browse the repository at this point in the history
  • Loading branch information
Ashish0804 committed Jun 27, 2022
1 parent 881267b commit 8d09342
Showing 1 changed file with 42 additions and 11 deletions.
53 changes: 42 additions & 11 deletions yt_dlp/extractor/docubay.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,67 @@
import re

from ..utils import try_get
from .common import InfoExtractor


class DocubayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?docubay\.com/[^#/?$]+-(?P<id>\d+)'

_TESTS = [{
'url': 'https://www.docubay.com/20000-cables-under-the-sea-2574',
'url': 'https://www.docubay.com/inside-ikea-3242',
'info_dict': {
'id': '2574',
'id': '3242',
'ext': 'mp4',
'description': 'md5:aed927dcff70441282d7864e9b9a8d20',
'thumbnail': 'https://st1.docubay.com/featured-images/1606222830-20k-cables-under-the-sea-1024x576-xoriginal.jpg',
'title': '20,000 Cables under the Sea',
'description': 'md5:89b599ebdd695811d4f76a2fc5ee5718',
'thumbnail': 'https://st1.docubay.com/featured-images/1617969319-inside-ikea-1024x576-banner-xoriginal.jpg',
'title': 'Inside IKEA',
'upload_date': '20180101',
}
}, {
'url': 'https://www.docubay.com/the-female-battalion-3430',
'playlist_mincount': 6,
'info_dict': {
'id': '3430',
'title': 'The Female Battalion',
}

}]

_API_URL = "https://www.docubay.com/ajaxplayer"

def handle_series(self, webpage, s_id):
id = self._search_regex(r'data-currcontentid\s*=\s*"\d+"\s*data-id\s*=\s*"(\d+)"', webpage, 'id', None, False)
current_id = self._search_regex(r'data-currcontentid\s*=\s*"(\d+)"', webpage, 'current_id', None, False)
series_dump = self._download_webpage(
"https://www.docubay.com/season-api",
video_id=s_id,
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
data=f'id={id}&current_id={current_id}&curr_play_status='.encode()
)
return self.playlist_result([
self.url_result(v_url, ie=DocubayIE.ie_key())
for v_url in set(re.findall(r'(https?://(?:www\.)?docubay\.com/[^#/?$]+-\d+)', series_dump)) or []
],
playlist_id=s_id,
playlist_title=self._search_regex(r'class\s*=\s*"shows-title"[^>]*>([^<]+)', webpage, 'title', None, False))

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)

title = self._og_search_title(webpage).replace(' | Online at DocuBay', '').replace('Watch ', '').replace(' - ', '')
if not title:
title = self._search_regex(r'class\s*=\s*"shows-title"[^>]*>([^<]+)', webpage, 'title', None, False)

data_json = self._parse_json(self._download_json(
self._API_URL,
video_id,
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
data=f'cid={video_id}&action=st&type=video'.encode()), video_id)

if not try_get(data_json, lambda x: x['url']['video_url'], None):
return self.handle_series(webpage, video_id)

formats, subtitles = self._extract_m3u8_formats_and_subtitles(data_json['url']['video_url'], video_id)
subs = {}
for sub in data_json.get('subtitles') or []:
Expand All @@ -35,11 +72,6 @@ def _real_extract(self, url):
'url': sub_url,
})

webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'class\s*=\s*"shows-title"[^>]*>([^<]+)', webpage, 'title', None, False)
if not title:
title = self._og_search_title(webpage).replace(' | Online at DocuBay', '').replace('Watch ', '')

upload_date = self._search_regex(r'class\s*=\s*"show-duration"[^>]*>\s*(\d+)', webpage, 'upload date', None, False)
if upload_date:
upload_date += '0101'
Expand All @@ -54,5 +86,4 @@ def _real_extract(self, url):
'upload_date': upload_date,
'formats': formats,
'subtitles': subtitles

}

0 comments on commit 8d09342

Please sign in to comment.