Skip to content

Commit

Permalink
[CI] auto update yt_dlp to upstream commit 90c3721a322756bb7f4ca10ceb…
Browse files Browse the repository at this point in the history
…73744500bee37e
  • Loading branch information
github-actions[bot] committed Jun 17, 2024
1 parent 9a38482 commit e553dc6
Show file tree
Hide file tree
Showing 6 changed files with 248 additions and 26 deletions.
4 changes: 4 additions & 0 deletions lib/yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1928,6 +1928,10 @@
)
from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE
from .sproutvideo import (
SproutVideoIE,
VidsIoIE,
)
from .srgssr import (
SRGSSRIE,
SRGSSRPlayIE,
Expand Down
12 changes: 6 additions & 6 deletions lib/yt_dlp/extractor/brightcove.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def _build_brightcove_url_from_js(cls, object_js):
@classmethod
def _make_brightcove_url(cls, params):
return update_url_query(
'http://c.brightcove.com/services/viewer/htmlFederated', params)
'https://c.brightcove.com/services/viewer/htmlFederated', params)

@classmethod
def _extract_brightcove_url(cls, webpage):
Expand Down Expand Up @@ -470,7 +470,7 @@ def _real_extract(self, url):
if referer:
headers['Referer'] = referer
player_page = self._download_webpage(
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
'https://link.brightcove.com/services/player/bcpid' + player_id[0],
video_id, headers=headers, fatal=False)
if player_page:
player_key = self._search_regex(
Expand All @@ -480,7 +480,7 @@ def _real_extract(self, url):
enc_pub_id = player_key.split(',')[1].replace('~', '=')
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
if publisher_id:
brightcove_new_url = f'http://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
if referer:
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
Expand Down Expand Up @@ -801,7 +801,7 @@ def _extract_brightcove_urls(ie, webpage):
# Look for iframe embeds [1]
for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
entries.append(url if url.startswith('http') else 'http:' + url)
entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url)

# Look for <video> tags [2] and embed_in_page embeds [3]
# [2] looks like:
Expand Down Expand Up @@ -830,7 +830,7 @@ def _extract_brightcove_urls(ie, webpage):
player_id = player_id or attrs.get('data-player') or 'default'
embed = embed or attrs.get('data-embed') or 'default'

bc_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
bc_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'

# Some brightcove videos may be embedded with video tag only and
# without script tag or any mentioning of brightcove at all. Such
Expand Down Expand Up @@ -867,7 +867,7 @@ def _real_extract(self, url):
store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)

def extract_policy_key():
base_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/'
base_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/'
config = self._download_json(
base_url + 'config.json', video_id, fatal=False) or {}
policy_key = try_get(
Expand Down
17 changes: 12 additions & 5 deletions lib/yt_dlp/extractor/patreon.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import urllib.parse

from .common import InfoExtractor
from .sproutvideo import VidsIoIE
from .vimeo import VimeoIE
from ..networking.exceptions import HTTPError
from ..utils import (
Expand All @@ -12,6 +13,7 @@
int_or_none,
mimetype2ext,
parse_iso8601,
smuggle_url,
str_or_none,
traverse_obj,
url_or_none,
Expand Down Expand Up @@ -305,22 +307,27 @@ def _real_extract(self, url):
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
}))

# all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
headers = {'referer': 'https://patreon.com/'}

# handle Vimeo embeds
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
v_url = urllib.parse.unquote(self._html_search_regex(
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
if url_or_none(v_url) and self._request_webpage(
v_url, video_id, 'Checking Vimeo embed URL',
headers={'Referer': 'https://patreon.com/'},
fatal=False, errnote=False):
v_url, video_id, 'Checking Vimeo embed URL', headers=headers, fatal=False, errnote=False):
entries.append(self.url_result(
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
VimeoIE, url_transparent=True))

embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
entries.append(self.url_result(embed_url))
if embed_url and (urlh := self._request_webpage(
embed_url, video_id, 'Checking embed URL', headers=headers,
fatal=False, errnote=False, expected_status=403)):
# Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
if urlh.status != 403 or VidsIoIE.suitable(embed_url):
entries.append(self.url_result(smuggle_url(embed_url, headers)))

post_file = traverse_obj(attributes, ('post_file', {dict}))
if post_file:
Expand Down
41 changes: 27 additions & 14 deletions lib/yt_dlp/extractor/podbayfm.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,40 @@
from .common import InfoExtractor
from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call
from ..utils import (
OnDemandPagedList,
clean_html,
int_or_none,
jwt_decode_hs256,
url_or_none,
)
from ..utils.traversal import traverse_obj


def result_from_props(props, episode_id=None):
def result_from_props(props):
return {
'id': props.get('podcast_id') or episode_id,
'title': props.get('title'),
'url': props['mediaURL'],
**traverse_obj(props, {
'id': ('_id', {str}),
'title': ('title', {str}),
'url': ('mediaURL', {url_or_none}),
'description': ('description', {clean_html}),
'thumbnail': ('image', {jwt_decode_hs256}, 'url', {url_or_none}),
'timestamp': ('timestamp', {int_or_none}),
'duration': ('duration', {int_or_none}),
}),
'ext': 'mp3',
'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']),
'timestamp': props.get('timestamp'),
'duration': int_or_none(props.get('duration')),
'vcodec': 'none',
}


class PodbayFMIE(InfoExtractor):
_VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$'
_VALID_URL = r'https?://podbay\.fm/p/[^/?#]+/e/(?P<id>\d+)'
_TESTS = [{
'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
'md5': '98b41285dcf7989d105a4ed0404054cf',
'md5': '895ac8505de349515f5ee8a4a3195c93',
'info_dict': {
'id': '1647338400',
'id': '62306451f4a48e58d0c4d6a8',
'title': 'Part One: Kissinger',
'ext': 'mp3',
'description': r're:^We begin our epic six part series on Henry Kissinger.+',
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1647338400,
'duration': 5001,
Expand All @@ -34,24 +46,25 @@ def _real_extract(self, url):
episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id)
data = self._search_nextjs_data(webpage, episode_id)
return result_from_props(data['props']['pageProps']['episode'], episode_id)
return result_from_props(data['props']['pageProps']['episode'])


class PodbayFMChannelIE(InfoExtractor):
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$'
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/?#]+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://podbay.fm/p/behind-the-bastards',
'info_dict': {
'id': 'behind-the-bastards',
'title': 'Behind the Bastards',
},
'playlist_mincount': 21,
}]
_PAGE_SIZE = 10

def _fetch_page(self, channel_id, pagenum):
return self._download_json(
f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
channel_id)['podcast']
f'Downloading channel JSON page {pagenum + 1}', channel_id)['podcast']

@staticmethod
def _results_from_page(channel_id, page):
Expand Down
198 changes: 198 additions & 0 deletions lib/yt_dlp/extractor/sproutvideo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
import base64
import urllib.parse

from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
qualities,
remove_start,
smuggle_url,
unsmuggle_url,
update_url_query,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj


class SproutVideoIE(InfoExtractor):
_NO_SCHEME_RE = r'//videos\.sproutvideo\.com/embed/(?P<id>[\da-f]+)/[\da-f]+'
_VALID_URL = rf'https?:{_NO_SCHEME_RE}'
_EMBED_REGEX = [rf'<iframe [^>]*\bsrc=["\'](?P<url>(?:https?:)?{_NO_SCHEME_RE}[^"\']*)["\']']
_TESTS = [{
'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3',
'md5': '1343ce1a6cb39d67889bfa07c7b02b0e',
'info_dict': {
'id': '4c9dddb01910e3c9c4',
'ext': 'mp4',
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
'duration': 576,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
}, {
'url': 'https://videos.sproutvideo.com/embed/a79fdcb21f1be2c62e/93bf31e41e39ca27',
'md5': 'cebae5cf558cca83271917cf4ec03f26',
'info_dict': {
'id': 'a79fdcb21f1be2c62e',
'ext': 'mp4',
'title': 'HS_01_Live Stream 2023-01-14 10:00',
'duration': 703,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
}, {
# http formats 'sd' and 'hd' are available
'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90',
'md5': 'f368c78df07e78a749508b221528672c',
'info_dict': {
'id': '119cd6bc1a18e6cd98',
'ext': 'mp4',
'title': '3. Updating your Partner details',
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
'duration': 60,
},
'params': {'format': 'hd'},
}, {
# subtitles
'url': 'https://videos.sproutvideo.com/embed/119dd8ba121ee0cc98/4ee50c88a343215d?type=hd',
'md5': '7f6798f037d7a3e3e07e67959de68fc6',
'info_dict': {
'id': '119dd8ba121ee0cc98',
'ext': 'mp4',
'title': 'Recipients Setup - Domestic Wire Only',
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
'duration': 77,
'subtitles': {'en': 'count:1'},
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs',
'info_dict': {
'id': '4c9dddb01910e3c9c4',
'ext': 'mp4',
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
'duration': 576,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
}]
_M3U8_URL_TMPL = 'https://{base}.videos.sproutvideo.com/{s3_user_hash}/{s3_video_hash}/video/index.m3u8'
_QUALITIES = ('hd', 'uhd', 'source') # Exclude 'sd' to prioritize hls formats above it

@staticmethod
def _policy_to_qs(policy, signature_key, as_string=False):
query = {}
for key, value in policy['signatures'][signature_key].items():
query[remove_start(key, 'CloudFront-')] = value
query['sessionID'] = policy['sessionID']
return urllib.parse.urlencode(query, doseq=True) if as_string else query

@classmethod
def _extract_embed_urls(cls, url, webpage):
for embed_url in super()._extract_embed_urls(url, webpage):
if embed_url.startswith('//'):
embed_url = f'https:{embed_url}'
yield smuggle_url(embed_url, {'referer': url})

def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
data = self._search_json(
r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+',
end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode())

formats, subtitles = [], {}
headers = {
'Accept': '*/*',
'Origin': 'https://videos.sproutvideo.com',
'Referer': url,
}

# HLS extraction is fatal; only attempt it if the JSON data says it's available
if traverse_obj(data, 'hls'):
manifest_query = self._policy_to_qs(data, 'm')
fragment_query = self._policy_to_qs(data, 't', as_string=True)
key_query = self._policy_to_qs(data, 'k', as_string=True)

formats.extend(self._extract_m3u8_formats(
self._M3U8_URL_TMPL.format(**data), video_id, 'mp4',
m3u8_id='hls', headers=headers, query=manifest_query))
for fmt in formats:
fmt.update({
'url': update_url_query(fmt['url'], manifest_query),
'extra_param_to_segment_url': fragment_query,
'extra_param_to_key_url': key_query,
})

if downloads := traverse_obj(data, ('downloads', {dict.items}, lambda _, v: url_or_none(v[1]))):
quality = qualities(self._QUALITIES)
acodec = 'none' if data.get('has_audio') is False else None
formats.extend([{
'format_id': str(format_id),
'url': format_url,
'ext': 'mp4',
'quality': quality(format_id),
'acodec': acodec,
} for format_id, format_url in downloads])

for sub_data in traverse_obj(data, ('subtitleData', lambda _, v: url_or_none(v['src']))):
subtitles.setdefault(sub_data.get('srclang', 'en'), []).append({
'url': sub_data['src'],
})

return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'http_headers': headers,
**traverse_obj(data, {
'title': ('title', {str}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('posterframe_url', {url_or_none}),
}),
}


class VidsIoIE(InfoExtractor):
IE_NAME = 'vids.io'
_VALID_URL = r'https?://[\w-]+\.vids\.io/videos/(?P<id>[\da-f]+)/(?P<display_id>[\w-]+)'
_TESTS = [{
'url': 'https://how-to-video.vids.io/videos/799cd8b11c10efc1f0/how-to-video-live-streaming',
'md5': '9bbbb2c0c0739eb163b80f87b8d77c9e',
'info_dict': {
'id': '799cd8b11c10efc1f0',
'ext': 'mp4',
'title': 'How to Video: Live Streaming',
'duration': 2787,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
}]

def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=403)

if urlh.status == 403:
password = self.get_param('videopassword')
if not password:
raise ExtractorError(
'This video is password-protected; use the --video-password option', expected=True)
try:
webpage = self._download_webpage(
url, display_id, 'Submitting video password',
data=urlencode_postdata({
'password': password,
**self._hidden_inputs(webpage),
}))
# Requests with user's session cookie `_sproutvideo_session` are now authorized
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
raise ExtractorError('Incorrect password', expected=True)
raise

if embed_url := next(SproutVideoIE._extract_embed_urls(url, webpage), None):
return self.url_result(embed_url, SproutVideoIE, video_id)

raise ExtractorError('Unable to extract any SproutVideo embed url')
2 changes: 1 addition & 1 deletion lib/yt_dlp_version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5dbac313ae4e3e8521dfe2e1a6a048a98ff4b4fe
90c3721a322756bb7f4ca10ceb73744500bee37e

0 comments on commit e553dc6

Please sign in to comment.