[Docubay] Add series support.

Ashish0804 · Jun 27, 2022 · 8d09342 · 8d09342
1 parent 881267b
commit 8d09342
Showing 1 changed file with 42 additions and 11 deletions.
diff --git a/yt_dlp/extractor/docubay.py b/yt_dlp/extractor/docubay.py
@@ -1,30 +1,67 @@
+import re
+
+from ..utils import try_get
 from .common import InfoExtractor
 
 
 class DocubayIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?docubay\.com/[^#/?$]+-(?P<id>\d+)'
 
     _TESTS = [{
-        'url': 'https://www.docubay.com/20000-cables-under-the-sea-2574',
+        'url': 'https://www.docubay.com/inside-ikea-3242',
         'info_dict': {
-            'id': '2574',
+            'id': '3242',
             'ext': 'mp4',
-            'description': 'md5:aed927dcff70441282d7864e9b9a8d20',
-            'thumbnail': 'https://st1.docubay.com/featured-images/1606222830-20k-cables-under-the-sea-1024x576-xoriginal.jpg',
-            'title': '20,000 Cables under the Sea',
+            'description': 'md5:89b599ebdd695811d4f76a2fc5ee5718',
+            'thumbnail': 'https://st1.docubay.com/featured-images/1617969319-inside-ikea-1024x576-banner-xoriginal.jpg',
+            'title': 'Inside IKEA',
+            'upload_date': '20180101',
+        }
+    }, {
+        'url': 'https://www.docubay.com/the-female-battalion-3430',
+        'playlist_mincount': 6,
+        'info_dict': {
+            'id': '3430',
+            'title': 'The Female Battalion',
         }
+
     }]
 
     _API_URL = "https://www.docubay.com/ajaxplayer"
 
+    def handle_series(self, webpage, s_id):
+        id = self._search_regex(r'data-currcontentid\s*=\s*"\d+"\s*data-id\s*=\s*"(\d+)"', webpage, 'id', None, False)
+        current_id = self._search_regex(r'data-currcontentid\s*=\s*"(\d+)"', webpage, 'current_id', None, False)
+        series_dump = self._download_webpage(
+            "https://www.docubay.com/season-api",
+            video_id=s_id,
+            headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
+            data=f'id={id}&current_id={current_id}&curr_play_status='.encode()
+        )
+        return self.playlist_result([
+            self.url_result(v_url, ie=DocubayIE.ie_key())
+            for v_url in set(re.findall(r'(https?://(?:www\.)?docubay\.com/[^#/?$]+-\d+)', series_dump)) or []
+        ],
+            playlist_id=s_id,
+            playlist_title=self._search_regex(r'class\s*=\s*"shows-title"[^>]*>([^<]+)', webpage, 'title', None, False))
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage).replace(' | Online at DocuBay', '').replace('Watch ', '').replace(' - ', '')
+        if not title:
+            title = self._search_regex(r'class\s*=\s*"shows-title"[^>]*>([^<]+)', webpage, 'title', None, False)
+
         data_json = self._parse_json(self._download_json(
             self._API_URL,
             video_id,
             headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
             data=f'cid={video_id}&action=st&type=video'.encode()), video_id)
 
+        if not try_get(data_json, lambda x: x['url']['video_url'], None):
+            return self.handle_series(webpage, video_id)
+
         formats, subtitles = self._extract_m3u8_formats_and_subtitles(data_json['url']['video_url'], video_id)
         subs = {}
         for sub in data_json.get('subtitles') or []:
@@ -35,11 +72,6 @@ def _real_extract(self, url):
                 'url': sub_url,
             })
 
-        webpage = self._download_webpage(url, video_id)
-        title = self._search_regex(r'class\s*=\s*"shows-title"[^>]*>([^<]+)', webpage, 'title', None, False)
-        if not title:
-            title = self._og_search_title(webpage).replace(' | Online at DocuBay', '').replace('Watch ', '')
-
         upload_date = self._search_regex(r'class\s*=\s*"show-duration"[^>]*>\s*(\d+)', webpage, 'upload date', None, False)
         if upload_date:
             upload_date += '0101'
@@ -54,5 +86,4 @@ def _real_extract(self, url):
             'upload_date': upload_date,
             'formats': formats,
             'subtitles': subtitles
-
         }