diff --git a/lib/yt_dlp/extractor/_extractors.py b/lib/yt_dlp/extractor/_extractors.py index fc3ffeef0..51caefd4d 100644 --- a/lib/yt_dlp/extractor/_extractors.py +++ b/lib/yt_dlp/extractor/_extractors.py @@ -1156,6 +1156,7 @@ from .mixch import ( MixchArchiveIE, MixchIE, + MixchMovieIE, ) from .mixcloud import ( MixcloudIE, @@ -1939,9 +1940,7 @@ ) from .spreaker import ( SpreakerIE, - SpreakerPageIE, SpreakerShowIE, - SpreakerShowPageIE, ) from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE diff --git a/lib/yt_dlp/extractor/chaturbate.py b/lib/yt_dlp/extractor/chaturbate.py index b49f741ef..864d61f9c 100644 --- a/lib/yt_dlp/extractor/chaturbate.py +++ b/lib/yt_dlp/extractor/chaturbate.py @@ -9,7 +9,7 @@ class ChaturbateIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P[^/?&#]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?Pcom|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P[^/?&#]+)' _TESTS = [{ 'url': 'https://www.chaturbate.com/siswet19/', 'info_dict': { @@ -29,15 +29,24 @@ class ChaturbateIE(InfoExtractor): }, { 'url': 'https://en.chaturbate.com/siswet19/', 'only_matching': True, + }, { + 'url': 'https://chaturbate.eu/siswet19/', + 'only_matching': True, + }, { + 'url': 'https://chaturbate.eu/fullvideo/?b=caylin', + 'only_matching': True, + }, { + 'url': 'https://chaturbate.global/siswet19/', + 'only_matching': True, }] _ROOM_OFFLINE = 'Room is currently offline' def _real_extract(self, url): - video_id = self._match_id(url) + video_id, tld = self._match_valid_url(url).group('id', 'tld') webpage = self._download_webpage( - f'https://chaturbate.com/{video_id}/', video_id, + f'https://chaturbate.{tld}/{video_id}/', video_id, headers=self.geo_verification_headers()) found_m3u8_urls = [] diff --git a/lib/yt_dlp/extractor/cloudflarestream.py b/lib/yt_dlp/extractor/cloudflarestream.py index 8a409461a..9e9e89a80 100644 --- a/lib/yt_dlp/extractor/cloudflarestream.py +++ b/lib/yt_dlp/extractor/cloudflarestream.py @@ -8,7 +8,7 @@ class CloudflareStreamIE(InfoExtractor): _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video=' _ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+' - _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P{_ID_RE})' + _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}(?P{_DOMAIN_RE})/|{_EMBED_RE})(?P{_ID_RE})' _EMBED_REGEX = [ rf']+\bsrc=(["\'])(?P(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1', rf']+\bsrc=["\'](?Phttps?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})', @@ -19,7 +19,7 @@ class CloudflareStreamIE(InfoExtractor): 'id': '31c9291ab41fac05471db4e73aa11717', 'ext': 'mp4', 'title': '31c9291ab41fac05471db4e73aa11717', - 'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', + 'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', }, 'params': { 'skip_download': 'm3u8', @@ -30,7 +30,7 @@ class CloudflareStreamIE(InfoExtractor): 'id': '0e8e040aec776862e1d632a699edf59e', 'ext': 'mp4', 'title': '0e8e040aec776862e1d632a699edf59e', - 'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', + 'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', }, }, { 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', @@ -54,7 +54,7 @@ class CloudflareStreamIE(InfoExtractor): 'id': 'eaef9dea5159cf968be84241b5cedfe7', 'ext': 'mp4', 'title': 'eaef9dea5159cf968be84241b5cedfe7', - 'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', + 'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', }, 'params': { 'skip_download': 'm3u8', @@ -62,8 +62,9 @@ class CloudflareStreamIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) - domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' + video_id, domain = self._match_valid_url(url).group('id', 'domain') + if domain != 'bytehighway.net': + domain = 'cloudflarestream.com' base_url = f'https://{domain}/{video_id}/' if '.' in video_id: video_id = self._parse_json(base64.urlsafe_b64decode( diff --git a/lib/yt_dlp/extractor/goplay.py b/lib/yt_dlp/extractor/goplay.py index dfe5afe63..32300f75c 100644 --- a/lib/yt_dlp/extractor/goplay.py +++ b/lib/yt_dlp/extractor/goplay.py @@ -5,56 +5,63 @@ import hmac import json import os +import re +import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, + int_or_none, + js_to_json, + remove_end, traverse_obj, - unescapeHTML, ) class GoPlayIE(InfoExtractor): - _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P[^/#]+)' + _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P[^/#]+)' _NETRC_MACHINE = 'goplay' _TESTS = [{ - 'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay', + 'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1', 'info_dict': { - 'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811', + 'id': '2baa4560-87a0-421b-bffc-359914e3c387', 'ext': 'mp4', - 'title': 'S3 - Aflevering 2', - 'series': 'De Container Cup', - 'season': 'Season 3', - 'season_number': 3, - 'episode': 'Episode 2', - 'episode_number': 2, + 'title': 'S22 - Aflevering 1', + 'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}', + 'series': 'De Slimste Mens ter Wereld', + 'episode': 'Episode 1', + 'season_number': 22, + 'episode_number': 1, + 'season': 'Season 22', }, + 'params': {'skip_download': True}, 'skip': 'This video is only available for registered users', }, { - 'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay', + 'url': 'https://www.goplay.be/video/1917', 'info_dict': { - 'id': '74e3ed07-748c-49e4-85a0-393a93337dbf', + 'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907', 'ext': 'mp4', - 'title': 'A Family for the Holidays', + 'title': '1917', + 'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}', }, + 'params': {'skip_download': True}, 'skip': 'This video is only available for registered users', }, { 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay', 'info_dict': { - 'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656', + 'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee', 'ext': 'mp4', 'title': 'S11 - Aflevering 1', + 'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}', 'episode': 'Episode 1', 'series': 'De Mol', 'season_number': 11, 'episode_number': 1, 'season': 'Season 11', }, - 'params': { - 'skip_download': True, - }, + 'params': {'skip_download': True}, 'skip': 'This video is only available for registered users', }] @@ -69,27 +76,42 @@ def _real_initialize(self): if not self._id_token: raise self.raise_login_required(method='password') + def _find_json(self, s): + return self._search_json( + r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None) + def _real_extract(self, url): - url, display_id = self._match_valid_url(url).group(0, 'display_id') + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_data_json = self._html_search_regex(r']*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*', webpage), + (..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...)) + meta = traverse_obj(nextjs_data, ( + ..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any)) + + video_id = meta['uuid'] + info_dict = traverse_obj(meta, { + 'title': ('title', {str}), + 'description': ('description', {str.strip}), + }) + + if traverse_obj(meta, ('program', 'subtype')) != 'movie': + for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)): + episode_data = traverse_obj( + season_data, ('videos', lambda _, v: v['videoId'] == video_id, any)) + if not episode_data: + continue + + episode_title = traverse_obj( + episode_data, 'contextualTitle', 'episodeTitle', expected_type=str) + info_dict.update({ + 'title': episode_title or info_dict.get('title'), + 'series': remove_end(info_dict.get('title'), f' - {episode_title}'), + 'season_number': traverse_obj(season_data, ('season', {int_or_none})), + 'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})), + }) + break api = self._download_json( f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', diff --git a/lib/yt_dlp/extractor/mixch.py b/lib/yt_dlp/extractor/mixch.py index 7832784b2..4bccc81bd 100644 --- a/lib/yt_dlp/extractor/mixch.py +++ b/lib/yt_dlp/extractor/mixch.py @@ -12,7 +12,7 @@ class MixchIE(InfoExtractor): IE_NAME = 'mixch' - _VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P\d+)' + _VALID_URL = r'https?://mixch\.tv/u/(?P\d+)' _TESTS = [{ 'url': 'https://mixch.tv/u/16943797/live', @@ -74,7 +74,7 @@ def _get_comments(self, video_id): class MixchArchiveIE(InfoExtractor): IE_NAME = 'mixch:archive' - _VALID_URL = r'https?://(?:www\.)?mixch\.tv/archive/(?P\d+)' + _VALID_URL = r'https?://mixch\.tv/archive/(?P\d+)' _TESTS = [{ 'url': 'https://mixch.tv/archive/421', @@ -116,3 +116,56 @@ def _real_extract(self, url): 'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id), 'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})), } + + +class MixchMovieIE(InfoExtractor): + IE_NAME = 'mixch:movie' + _VALID_URL = r'https?://mixch\.tv/m/(?P\w+)' + + _TESTS = [{ + 'url': 'https://mixch.tv/m/Ve8KNkJ5', + 'info_dict': { + 'id': 'Ve8KNkJ5', + 'title': '夏☀️\nムービーへのポイントは本イベントに加算されないので配信にてお願い致します🙇🏻\u200d♀️\n#TGCCAMPUS #ミス東大 #ミス東大2024 ', + 'ext': 'mp4', + 'uploader': 'ミス東大No.5 松藤百香🍑💫', + 'uploader_id': '12299174', + 'channel_follower_count': int, + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'timestamp': 1724070828, + 'uploader_url': 'https://mixch.tv/u/12299174', + 'live_status': 'not_live', + 'upload_date': '20240819', + }, + }, { + 'url': 'https://mixch.tv/m/61DzpIKE', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + data = self._download_json( + f'https://mixch.tv/api-web/movies/{video_id}', video_id) + return { + 'id': video_id, + 'formats': [{ + 'format_id': 'mp4', + 'url': data['movie']['file'], + 'ext': 'mp4', + }], + **traverse_obj(data, { + 'title': ('movie', 'title', {str}), + 'thumbnail': ('movie', 'thumbnailURL', {url_or_none}), + 'uploader': ('ownerInfo', 'name', {str}), + 'uploader_id': ('ownerInfo', 'id', {int}, {str_or_none}), + 'channel_follower_count': ('ownerInfo', 'fan', {int_or_none}), + 'view_count': ('ownerInfo', 'view', {int_or_none}), + 'like_count': ('movie', 'favCount', {int_or_none}), + 'comment_count': ('movie', 'commentCount', {int_or_none}), + 'timestamp': ('movie', 'published', {int_or_none}), + 'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}, filter), + }), + 'live_status': 'not_live', + } diff --git a/lib/yt_dlp/extractor/rutube.py b/lib/yt_dlp/extractor/rutube.py index 2c416811a..abf9aec72 100644 --- a/lib/yt_dlp/extractor/rutube.py +++ b/lib/yt_dlp/extractor/rutube.py @@ -2,15 +2,18 @@ from .common import InfoExtractor from ..utils import ( + UnsupportedError, bool_or_none, determine_ext, int_or_none, + js_to_json, parse_qs, - traverse_obj, + str_or_none, try_get, unified_timestamp, url_or_none, ) +from ..utils.traversal import traverse_obj class RutubeBaseIE(InfoExtractor): @@ -19,7 +22,7 @@ def _download_api_info(self, video_id, query=None): query = {} query['format'] = 'json' return self._download_json( - f'http://rutube.ru/api/video/{video_id}/', + f'https://rutube.ru/api/video/{video_id}/', video_id, 'Downloading video JSON', 'Unable to download video JSON', query=query) @@ -61,18 +64,21 @@ def _download_api_options(self, video_id, query=None): query = {} query['format'] = 'json' return self._download_json( - f'http://rutube.ru/api/play/options/{video_id}/', + f'https://rutube.ru/api/play/options/{video_id}/', video_id, 'Downloading options JSON', 'Unable to download options JSON', headers=self.geo_verification_headers(), query=query) - def _extract_formats(self, options, video_id): + def _extract_formats_and_subtitles(self, options, video_id): formats = [] + subtitles = {} for format_id, format_url in options['video_balancer'].items(): ext = determine_ext(format_url) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + fmts, subs = self._extract_m3u8_formats_and_subtitles( + format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( format_url, video_id, f4m_id=format_id, fatal=False)) @@ -82,11 +88,19 @@ def _extract_formats(self, options, video_id): 'format_id': format_id, }) for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})): - formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4', fatal=False)) - return formats - - def _download_and_extract_formats(self, video_id, query=None): - return self._extract_formats( + fmts, subs = self._extract_m3u8_formats_and_subtitles( + hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls') + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + for caption in traverse_obj(options, ('captions', lambda _, v: url_or_none(v['file']))): + subtitles.setdefault(caption.get('code') or 'ru', []).append({ + 'url': caption['file'], + 'name': caption.get('langTitle'), + }) + return formats, subtitles + + def _download_and_extract_formats_and_subtitles(self, video_id, query=None): + return self._extract_formats_and_subtitles( self._download_api_options(video_id, query=query), video_id) @@ -97,8 +111,8 @@ class RutubeIE(RutubeBaseIE): _EMBED_REGEX = [r']+?src=(["\'])(?P(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1'] _TESTS = [{ - 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', - 'md5': 'e33ac625efca66aba86cbec9851f2692', + 'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', + 'md5': '3d73fdfe5bb81b9aef139e22ef3de26a', 'info_dict': { 'id': '3eac3b4561676c17df9132a9a1e62e3e', 'ext': 'mp4', @@ -111,26 +125,25 @@ class RutubeIE(RutubeBaseIE): 'upload_date': '20131016', 'age_limit': 0, 'view_count': int, - 'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg', + 'thumbnail': 'https://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg', 'categories': ['Новости и СМИ'], 'chapters': [], }, - 'expected_warnings': ['Unable to download f4m'], }, { - 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', + 'url': 'https://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 'only_matching': True, }, { - 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', + 'url': 'https://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', 'only_matching': True, }, { - 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', + 'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', 'only_matching': True, }, { 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', 'only_matching': True, }, { 'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg', - 'md5': 'd106225f15d625538fe22971158e896f', + 'md5': '4fce7b4fcc7b1bcaa3f45eb1e1ad0dd7', 'info_dict': { 'id': '884fb55f07a97ab673c7d654553e0f48', 'ext': 'mp4', @@ -143,11 +156,10 @@ class RutubeIE(RutubeBaseIE): 'upload_date': '20221210', 'age_limit': 0, 'view_count': int, - 'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg', + 'thumbnail': 'https://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg', 'categories': ['Видеоигры'], 'chapters': [], }, - 'expected_warnings': ['Unable to download f4m'], }, { 'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/', 'info_dict': { @@ -156,17 +168,16 @@ class RutubeIE(RutubeBaseIE): 'chapters': 'count:4', 'categories': ['Бизнес и предпринимательство'], 'description': 'md5:252feac1305257d8c1bab215cedde75d', - 'thumbnail': 'http://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png', + 'thumbnail': 'https://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png', 'duration': 782, 'age_limit': 0, 'uploader_id': '23491359', 'timestamp': 1677153329, 'view_count': int, 'upload_date': '20230223', - 'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании', + 'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании #1', 'uploader': 'Стас Быков', }, - 'expected_warnings': ['Unable to download f4m'], }, { 'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/', 'info_dict': { @@ -174,7 +185,7 @@ class RutubeIE(RutubeBaseIE): 'ext': 'mp4', 'categories': ['Телепередачи'], 'description': '', - 'thumbnail': 'http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg', + 'thumbnail': 'https://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg', 'live_status': 'is_live', 'age_limit': 0, 'uploader_id': '23460655', @@ -184,6 +195,24 @@ class RutubeIE(RutubeBaseIE): 'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'uploader': 'Первый канал', }, + }, { + 'url': 'https://rutube.ru/play/embed/03a9cb54bac3376af4c5cb0f18444e01/', + 'info_dict': { + 'id': '03a9cb54bac3376af4c5cb0f18444e01', + 'ext': 'mp4', + 'age_limit': 0, + 'description': '', + 'title': 'Церемония начала торгов акциями ПАО «ЕвроТранс»', + 'chapters': [], + 'upload_date': '20240829', + 'duration': 293, + 'uploader': 'MOEX - Московская биржа', + 'timestamp': 1724946628, + 'thumbnail': 'https://pic.rutubelist.ru/video/2e/24/2e241fddb459baf0fa54acfca44874f4.jpg', + 'view_count': int, + 'uploader_id': '38420507', + 'categories': ['Интервью'], + }, }, { 'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/', 'only_matching': True, @@ -192,40 +221,46 @@ class RutubeIE(RutubeBaseIE): 'only_matching': True, }] - @classmethod - def suitable(cls, url): - return False if RutubePlaylistIE.suitable(url) else super().suitable(url) - def _real_extract(self, url): video_id = self._match_id(url) query = parse_qs(url) info = self._download_and_extract_info(video_id, query) - info['formats'] = self._download_and_extract_formats(video_id, query) - return info + formats, subtitles = self._download_and_extract_formats_and_subtitles(video_id, query) + return { + **info, + 'formats': formats, + 'subtitles': subtitles, + } class RutubeEmbedIE(RutubeBaseIE): IE_NAME = 'rutube:embed' IE_DESC = 'Rutube embedded videos' - _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P[0-9]+)' + _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P[0-9]+)(?:[?#/]|$)' _TESTS = [{ - 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', + 'url': 'https://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'info_dict': { 'id': 'a10e53b86e8f349080f718582ce4c661', 'ext': 'mp4', 'timestamp': 1387830582, 'upload_date': '20131223', 'uploader_id': '297833', - 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix

восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', 'uploader': 'subziro89 ILya', 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', + 'age_limit': 0, + 'duration': 1395, + 'chapters': [], + 'description': 'md5:a5acea57bbc3ccdc3cacd1f11a014b5b', + 'view_count': int, + 'thumbnail': 'https://pic.rutubelist.ru/video/d3/03/d3031f4670a6e6170d88fb3607948418.jpg', + 'categories': ['Сериалы'], }, 'params': { 'skip_download': True, }, }, { - 'url': 'http://rutube.ru/play/embed/8083783', + 'url': 'https://rutube.ru/play/embed/8083783', 'only_matching': True, }, { # private video @@ -240,11 +275,12 @@ def _real_extract(self, url): query = parse_qs(url) options = self._download_api_options(embed_id, query) video_id = options['effective_video'] - formats = self._extract_formats(options, video_id) + formats, subtitles = self._extract_formats_and_subtitles(options, video_id) info = self._download_and_extract_info(video_id, query) info.update({ 'extractor_key': 'Rutube', 'formats': formats, + 'subtitles': subtitles, }) return info @@ -295,14 +331,14 @@ class RutubeTagsIE(RutubePlaylistBaseIE): IE_DESC = 'Rutube tags' _VALID_URL = r'https?://rutube\.ru/tags/video/(?P\d+)' _TESTS = [{ - 'url': 'http://rutube.ru/tags/video/1800/', + 'url': 'https://rutube.ru/tags/video/1800/', 'info_dict': { 'id': '1800', }, 'playlist_mincount': 68, }] - _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' + _PAGE_TEMPLATE = 'https://rutube.ru/api/tags/video/%s/?page=%s&format=json' class RutubeMovieIE(RutubePlaylistBaseIE): @@ -310,8 +346,8 @@ class RutubeMovieIE(RutubePlaylistBaseIE): IE_DESC = 'Rutube movies' _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P\d+)' - _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' - _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' + _MOVIE_TEMPLATE = 'https://rutube.ru/api/metainfo/tv/%s/?format=json' + _PAGE_TEMPLATE = 'https://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' def _real_extract(self, url): movie_id = self._match_id(url) @@ -327,62 +363,82 @@ class RutubePersonIE(RutubePlaylistBaseIE): IE_DESC = 'Rutube person videos' _VALID_URL = r'https?://rutube\.ru/video/person/(?P\d+)' _TESTS = [{ - 'url': 'http://rutube.ru/video/person/313878/', + 'url': 'https://rutube.ru/video/person/313878/', 'info_dict': { 'id': '313878', }, - 'playlist_mincount': 37, + 'playlist_mincount': 36, }] - _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' + _PAGE_TEMPLATE = 'https://rutube.ru/api/video/person/%s/?page=%s&format=json' class RutubePlaylistIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:playlist' IE_DESC = 'Rutube playlists' - _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P\d+)' + _VALID_URL = r'https?://rutube\.ru/plst/(?P\d+)' _TESTS = [{ - 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag', + 'url': 'https://rutube.ru/plst/308547/', 'info_dict': { - 'id': '3097', + 'id': '308547', }, - 'playlist_count': 27, - }, { - 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', - 'only_matching': True, + 'playlist_mincount': 22, }] - - _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json' - - @classmethod - def suitable(cls, url): - from ..utils import int_or_none, parse_qs - - if not super().suitable(url): - return False - params = parse_qs(url) - return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) - - def _next_page_url(self, page_num, playlist_id, item_kind): - return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num) - - def _real_extract(self, url): - qs = parse_qs(url) - playlist_kind = qs['pl_type'][0] - playlist_id = qs['pl_id'][0] - return self._extract_playlist(playlist_id, item_kind=playlist_kind) + _PAGE_TEMPLATE = 'https://rutube.ru/api/playlist/custom/%s/videos?page=%s&format=json' class RutubeChannelIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:channel' IE_DESC = 'Rutube channel' - _VALID_URL = r'https?://rutube\.ru/channel/(?P\d+)/videos' + _VALID_URL = r'https?://rutube\.ru/(?:channel/(?P\d+)|u/(?P\w+))(?:/(?P
videos|shorts|playlists))?' _TESTS = [{ 'url': 'https://rutube.ru/channel/639184/videos/', 'info_dict': { - 'id': '639184', + 'id': '639184_videos', + }, + 'playlist_mincount': 129, + }, { + 'url': 'https://rutube.ru/channel/25902603/shorts/', + 'info_dict': { + 'id': '25902603_shorts', }, - 'playlist_mincount': 133, + 'playlist_mincount': 277, + }, { + 'url': 'https://rutube.ru/channel/25902603/', + 'info_dict': { + 'id': '25902603', + }, + 'playlist_mincount': 406, + }, { + 'url': 'https://rutube.ru/u/rutube/videos/', + 'info_dict': { + 'id': '23704195_videos', + }, + 'playlist_mincount': 113, }] - _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' + _PAGE_TEMPLATE = 'https://rutube.ru/api/video/person/%s/?page=%s&format=json&origin__type=%s' + + def _next_page_url(self, page_num, playlist_id, section): + origin_type = { + 'videos': 'rtb,rst,ifrm,rspa', + 'shorts': 'rshorts', + None: '', + }.get(section) + return self._PAGE_TEMPLATE % (playlist_id, page_num, origin_type) + + def _real_extract(self, url): + playlist_id, slug, section = self._match_valid_url(url).group('id', 'slug', 'section') + if section == 'playlists': + raise UnsupportedError(url) + if slug: + webpage = self._download_webpage(url, slug) + redux_state = self._search_json( + r'window\.reduxState\s*=', webpage, 'redux state', slug, transform_source=js_to_json) + playlist_id = traverse_obj(redux_state, ( + 'api', 'queries', lambda k, _: k.startswith('channelIdBySlug'), + 'data', 'channel_id', {int}, {str_or_none}, any)) + playlist = self._extract_playlist(playlist_id, section=section) + if section: + playlist['id'] = f'{playlist_id}_{section}' + return playlist diff --git a/lib/yt_dlp/extractor/spreaker.py b/lib/yt_dlp/extractor/spreaker.py index d1df45969..c64c2fcd2 100644 --- a/lib/yt_dlp/extractor/spreaker.py +++ b/lib/yt_dlp/extractor/spreaker.py @@ -2,13 +2,16 @@ from .common import InfoExtractor from ..utils import ( + filter_dict, float_or_none, int_or_none, + parse_qs, str_or_none, try_get, unified_timestamp, url_or_none, ) +from ..utils.traversal import traverse_obj def _extract_episode(data, episode_id=None): @@ -58,15 +61,10 @@ def duration(key): class SpreakerIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - api\.spreaker\.com/ - (?: - (?:download/)?episode| - v2/episodes - )/ - (?P\d+) - ''' + _VALID_URL = [ + r'https?://api\.spreaker\.com/(?:(?:download/)?episode|v2/episodes)/(?P\d+)', + r'https?://(?:www\.)?spreaker\.com/episode/[^#?/]*?(?P\d+)/?(?:[?#]|$)', + ] _TESTS = [{ 'url': 'https://api.spreaker.com/episode/12534508', 'info_dict': { @@ -83,7 +81,9 @@ class SpreakerIE(InfoExtractor): 'view_count': int, 'like_count': int, 'comment_count': int, - 'series': 'Success With Music (SWM)', + 'series': 'Success With Music | SWM', + 'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/777ce4f96b71b0e1b7c09a5e625210e3.jpg', + 'creators': ['SWM'], }, }, { 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', @@ -91,52 +91,75 @@ class SpreakerIE(InfoExtractor): }, { 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments', 'only_matching': True, + }, { + 'note': 'episode', + 'url': 'https://www.spreaker.com/episode/grunge-music-origins-the-raw-sound-that-defined-a-generation--60269615', + 'info_dict': { + 'id': '60269615', + 'display_id': 'grunge-music-origins-the-raw-sound-that-', + 'ext': 'mp3', + 'title': 'Grunge Music Origins - The Raw Sound that Defined a Generation', + 'description': str, + 'timestamp': 1717468905, + 'upload_date': '20240604', + 'uploader': 'Katie Brown 2', + 'uploader_id': '17733249', + 'duration': 818.83, + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'series': '90s Grunge', + 'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/bb0d4178f7cf57cc8786dedbd9c5d969.jpg', + 'creators': ['Katie Brown 2'], + }, + }, { + 'url': 'https://www.spreaker.com/episode/60269615', + 'only_matching': True, }] def _real_extract(self, url): episode_id = self._match_id(url) data = self._download_json( - f'https://api.spreaker.com/v2/episodes/{episode_id}', - episode_id)['response']['episode'] + f'https://api.spreaker.com/v2/episodes/{episode_id}', episode_id, + query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode'] return _extract_episode(data, episode_id) -class SpreakerPageIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - episode_id = self._search_regex( - (r'data-episode_id=["\'](?P\d+)', - r'episode_id\s*:\s*(?P\d+)'), webpage, 'episode id') - return self.url_result( - f'https://api.spreaker.com/episode/{episode_id}', - ie=SpreakerIE.ie_key(), video_id=episode_id) - - class SpreakerShowIE(InfoExtractor): - _VALID_URL = r'https?://api\.spreaker\.com/show/(?P\d+)' + _VALID_URL = [ + r'https?://api\.spreaker\.com/show/(?P\d+)', + r'https?://(?:www\.)?spreaker\.com/podcast/[\w-]+--(?P[\d]+)', + r'https?://(?:www\.)?spreaker\.com/show/(?P\d+)/episodes/feed', + ] _TESTS = [{ 'url': 'https://api.spreaker.com/show/4652058', 'info_dict': { 'id': '4652058', }, 'playlist_mincount': 118, + }, { + 'url': 'https://www.spreaker.com/podcast/health-wealth--5918323', + 'info_dict': { + 'id': '5918323', + }, + 'playlist_mincount': 60, + }, { + 'url': 'https://www.spreaker.com/show/5887186/episodes/feed', + 'info_dict': { + 'id': '5887186', + }, + 'playlist_mincount': 290, }] - def _entries(self, show_id): + def _entries(self, show_id, key=None): for page_num in itertools.count(1): episodes = self._download_json( f'https://api.spreaker.com/show/{show_id}/episodes', - show_id, note=f'Downloading JSON page {page_num}', query={ + show_id, note=f'Downloading JSON page {page_num}', query=filter_dict({ 'page': page_num, 'max_per_page': 100, - }) + 'key': key, + })) pager = try_get(episodes, lambda x: x['response']['pager'], dict) if not pager: break @@ -152,21 +175,5 @@ def _entries(self, show_id): def _real_extract(self, url): show_id = self._match_id(url) - return self.playlist_result(self._entries(show_id), playlist_id=show_id) - - -class SpreakerShowPageIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.spreaker.com/show/success-with-music', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - show_id = self._search_regex( - r'show_id\s*:\s*(?P\d+)', webpage, 'show id') - return self.url_result( - f'https://api.spreaker.com/show/{show_id}', - ie=SpreakerShowIE.ie_key(), video_id=show_id) + key = traverse_obj(parse_qs(url), ('key', 0)) + return self.playlist_result(self._entries(show_id, key), playlist_id=show_id) diff --git a/lib/yt_dlp_version b/lib/yt_dlp_version index 41aaf1a8d..3427a7b81 100644 --- a/lib/yt_dlp_version +++ b/lib/yt_dlp_version @@ -1 +1 @@ -b83ca24eb72e1e558b0185bd73975586c0bc0546 \ No newline at end of file +a9f85670d03ab993dc589f21a9ffffcad61392d5 \ No newline at end of file