diff --git a/lib/yt_dlp/extractor/common.py b/lib/yt_dlp/extractor/common.py index 28a3adf93..ce79e0b62 100644 --- a/lib/yt_dlp/extractor/common.py +++ b/lib/yt_dlp/extractor/common.py @@ -1854,12 +1854,26 @@ def _check_formats(self, formats, video_id): @staticmethod def _remove_duplicate_formats(formats): - format_urls = set() + seen_urls = set() + seen_fragment_urls = set() unique_formats = [] for f in formats: - if f['url'] not in format_urls: - format_urls.add(f['url']) + fragments = f.get('fragments') + if callable(fragments): unique_formats.append(f) + + elif fragments: + fragment_urls = frozenset( + fragment.get('url') or urljoin(f['fragment_base_url'], fragment['path']) + for fragment in fragments) + if fragment_urls not in seen_fragment_urls: + seen_fragment_urls.add(fragment_urls) + unique_formats.append(f) + + elif f['url'] not in seen_urls: + seen_urls.add(f['url']) + unique_formats.append(f) + formats[:] = unique_formats def _is_valid_url(self, url, video_id, item='video', headers={}): diff --git a/lib/yt_dlp/extractor/dacast.py b/lib/yt_dlp/extractor/dacast.py index 4e81aa4a7..537352e5f 100644 --- a/lib/yt_dlp/extractor/dacast.py +++ b/lib/yt_dlp/extractor/dacast.py @@ -1,3 +1,4 @@ +import functools import hashlib import re import time @@ -51,6 +52,15 @@ class DacastVODIE(DacastBaseIE): 'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2', }, 'params': {'skip_download': 'm3u8'}, + }, { # /uspaes/ in hls_url + 'url': 'https://iframe.dacast.com/vod/f9823fc6-faba-b98f-0d00-4a7b50a58c5b/348c5c84-b6af-4859-bb9d-1d01009c795b', + 'info_dict': { + 'id': '348c5c84-b6af-4859-bb9d-1d01009c795b', + 'ext': 'mp4', + 'title': 'pl1-edyta-rubas-211124.mp4', + 'uploader_id': 'f9823fc6-faba-b98f-0d00-4a7b50a58c5b', + 'thumbnail': 'https://universe-files.dacast.com/4d0bd042-a536-752d-fc34-ad2fa44bbcbb.png', + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/', @@ -74,6 +84,15 @@ class DacastVODIE(DacastBaseIE): 'params': {'skip_download': 'm3u8'}, }] + @functools.cached_property + def _usp_signing_secret(self): + player_js = self._download_webpage( + 'https://player.dacast.com/js/player.js', None, 'Downloading player JS') + # Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation + return self._search_regex( + r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P(?:(?!\1).)+)', player_js, + 'usp signing secret', group='secret', fatal=False) or 'odnInCGqhvtyRTtIiddxtuRtawYYICZP' + def _real_extract(self, url): user_id, video_id = self._match_valid_url(url).group('user_id', 'id') query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'} @@ -94,10 +113,10 @@ def _real_extract(self, url): if 'DRM_EXT' in hls_url: self.report_drm(video_id) elif '/uspaes/' in hls_url: - # From https://player.dacast.com/js/player.js + # Ref: https://player.dacast.com/js/player.js ts = int(time.time()) signature = hashlib.sha1( - f'{10413792000 - ts}{ts}YfaKtquEEpDeusCKbvYszIEZnWmBcSvw').digest().hex() + f'{10413792000 - ts}{ts}{self._usp_signing_secret}'.encode()).digest().hex() hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}' for retry in self.RetryManager(): diff --git a/lib/yt_dlp/extractor/dropbox.py b/lib/yt_dlp/extractor/dropbox.py index c12209623..2bfeebc7c 100644 --- a/lib/yt_dlp/extractor/dropbox.py +++ b/lib/yt_dlp/extractor/dropbox.py @@ -48,32 +48,30 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) fn = urllib.parse.unquote(url_basename(url)) title = os.path.splitext(fn)[0] - password = self.get_param('videopassword') + content_id = None for part in self._yield_decoded_parts(webpage): if '/sm/password' in part: - webpage = self._download_webpage( - update_url('https://www.dropbox.com/sm/password', query=part.partition('?')[2]), video_id) + content_id = self._search_regex(r'content_id=([\w.+=/-]+)', part, 'content ID') break - if (self._og_search_title(webpage, default=None) == 'Dropbox - Password Required' - or 'Enter the password for this link' in webpage): - if password: - response = self._download_json( - 'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', - headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'}, - data=urlencode_postdata({ - 'is_xhr': 'true', - 't': self._get_cookies('https://www.dropbox.com')['t'].value, - 'content_id': self._search_regex(r'content_id=([\w.+=/-]+)["\']', webpage, 'content id'), - 'password': password, - 'url': url, - })) - - if response.get('status') != 'authed': - raise ExtractorError('Invalid password', expected=True) - elif not self._get_cookies('https://dropbox.com').get('sm_auth'): + if content_id: + password = self.get_param('videopassword') + if not password: raise ExtractorError('Password protected video, use --video-password ', expected=True) + + response = self._download_json( + 'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', + data=urlencode_postdata({ + 'is_xhr': 'true', + 't': self._get_cookies('https://www.dropbox.com')['t'].value, + 'content_id': content_id, + 'password': password, + 'url': update_url(url, scheme='', netloc=''), + })) + if response.get('status') != 'authed': + raise ExtractorError('Invalid password', expected=True) + webpage = self._download_webpage(url, video_id) formats, subtitles = [], {} diff --git a/lib/yt_dlp/extractor/tiktok.py b/lib/yt_dlp/extractor/tiktok.py index ba15f08b6..9e53b3407 100644 --- a/lib/yt_dlp/extractor/tiktok.py +++ b/lib/yt_dlp/extractor/tiktok.py @@ -413,15 +413,6 @@ def extract_addr(addr, add_meta={}): for f in formats: self._set_cookie(urllib.parse.urlparse(f['url']).hostname, 'sid_tt', auth_cookie.value) - thumbnails = [] - for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak', - 'origin_cover', 'dynamic_cover'): - for cover_url in traverse_obj(video_info, (cover_id, 'url_list', ...)): - thumbnails.append({ - 'id': cover_id, - 'url': cover_url, - }) - stats_info = aweme_detail.get('statistics') or {} music_info = aweme_detail.get('music') or {} labels = traverse_obj(aweme_detail, ('hybrid_label', ..., 'text'), expected_type=str) @@ -467,7 +458,17 @@ def extract_addr(addr, add_meta={}): 'formats': formats, 'subtitles': self.extract_subtitles( aweme_detail, aweme_id, traverse_obj(author_info, 'uploader', 'uploader_id', 'channel_id')), - 'thumbnails': thumbnails, + 'thumbnails': [ + { + 'id': cover_id, + 'url': cover_url, + 'preference': -1 if cover_id in ('cover', 'origin_cover') else -2, + } + for cover_id in ( + 'cover', 'ai_dynamic_cover', 'animated_cover', + 'ai_dynamic_cover_bak', 'origin_cover', 'dynamic_cover') + for cover_url in traverse_obj(video_info, (cover_id, 'url_list', ...)) + ], 'duration': (traverse_obj(video_info, ( (None, 'download_addr'), 'duration', {int_or_none(scale=1000)}, any)) or traverse_obj(music_info, ('duration', {int_or_none}))), @@ -600,11 +601,15 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_fl 'repost_count': 'shareCount', 'comment_count': 'commentCount', }), expected_type=int_or_none), - 'thumbnails': traverse_obj(aweme_detail, ( - (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), { - 'url': ({url_or_none}, {self._proto_relative_url}), - }, - )), + 'thumbnails': [ + { + 'id': cover_id, + 'url': self._proto_relative_url(cover_url), + 'preference': -2 if cover_id == 'dynamicCover' else -1, + } + for cover_id in ('thumbnail', 'cover', 'dynamicCover', 'originCover') + for cover_url in traverse_obj(aweme_detail, ((None, 'video'), cover_id, {url_or_none})) + ], } diff --git a/lib/yt_dlp_version b/lib/yt_dlp_version index c94ae1ff7..2611dd1b2 100644 --- a/lib/yt_dlp_version +++ b/lib/yt_dlp_version @@ -1 +1 @@ -4b5eec0aaa7c02627f27a386591b735b90e681a8 \ No newline at end of file +00dcde728635633eee969ad4d498b9f233c4a94e \ No newline at end of file