From cf38793f18c2208a0f56b30de0f82a749140f9ae Mon Sep 17 00:00:00 2001 From: insaneracist Date: Tue, 10 Nov 2020 21:38:50 -0800 Subject: [PATCH] [youtube] post entire client context to api endpoint --- youtube_dlc/extractor/youtube.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 3bb673e64..0ab3ce3bb 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -2889,30 +2889,29 @@ def _extract_mix(self, playlist_id): def _extract_playlist(self, playlist_id): url = self._TEMPLATE_URL % playlist_id + print(url) page = self._download_webpage(url, playlist_id) yt_initial = self._get_yt_initial_data('', page) if yt_initial: playlist_items = try_get(yt_initial, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'], list) - video_ids = [] entries = [] playlist_page = 1 api_key = self._search_regex( r'"INNERTUBE_API_KEY":"([^"]+)"', page, 'api key', default="AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", fatal=False) - api_client_version = self._search_regex( - r'"INNERTUBE_CONTEXT_CLIENT_VERSION":"([^"]+)"', - page, 'client version', fatal=False) + api_client_context_string = self._search_regex( + r'ytcfg\.set\({"INNERTUBE_CONTEXT":(.*?)}\)', + page, 'client context', fatal=False) + api_client_context = self._parse_json(api_client_context_string, 'client context') while playlist_items: item = playlist_items.pop(0) item_video = try_get(item, lambda x: x['playlistVideoRenderer'], dict) if item_video: video_id = try_get(item_video, lambda x: x['videoId'], compat_str) - if video_id in video_ids: + if not video_id: continue - else: - video_ids.append(video_id) entry = { '_type': 'url', 'duration': int_or_none(try_get(item_video, lambda x: x['lengthSeconds'], compat_str)), @@ -2929,12 +2928,7 @@ def _extract_playlist(self, playlist_id): playlist_page += 1 continuation_token = try_get(item_continue, lambda x: x['continuationEndpoint']['continuationCommand']['token'], compat_str) request_data = { - 'context': { - 'client': { - 'clientName': 'WEB', - 'clientVersion': api_client_version - } - }, + 'context': api_client_context, 'continuation': continuation_token } response = self._download_json( @@ -2946,11 +2940,7 @@ def _extract_playlist(self, playlist_id): video_id=playlist_id) playlist_items_new = try_get(response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list) if playlist_items_new: - # load more pages until we get a page of all videos already in the playlist (some playlists loop) - video_ids_new = [try_get(i, lambda x: x['playlistVideoRenderer']['videoId'], compat_str) for i in playlist_items_new] - video_ids_new = [i for i in video_ids_new if i and i not in video_ids] - if video_ids_new: - playlist_items.extend(playlist_items_new) + playlist_items.extend(playlist_items_new) playlist_title = try_get(yt_initial, lambda x: x['microformat']['microformatDataRenderer']['title'], compat_str) playlist_description = try_get(yt_initial, lambda x: x['microformat']['microformatDataRenderer']['description'], compat_str)