Skip to content

Commit

Permalink
[YouTube] Refactor subtitle processing
Browse files Browse the repository at this point in the history
* move to internal function
* use `traverse-obj()`
  • Loading branch information
dirkf committed Jan 6, 2025
1 parent ab7c61c commit 00ad2b8
Showing 1 changed file with 24 additions and 22 deletions.
46 changes: 24 additions & 22 deletions youtube_dl/extractor/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -2415,9 +2415,9 @@ def process_manifest_format(f, proto, client_name, itag, all_formats=False):
'is_live': is_live,
}

pctr = try_get(
pctr = traverse_obj(
player_response,
lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
('captions', 'playerCaptionsTracklistRenderer', T(dict)))
if pctr:
def process_language(container, base_url, lang_code, query):
lang_subs = []
Expand All @@ -2431,28 +2431,30 @@ def process_language(container, base_url, lang_code, query):
})
container[lang_code] = lang_subs

subtitles = {}
for caption_track in (pctr.get('captionTracks') or []):
base_url = caption_track.get('baseUrl')
if not base_url:
continue
if caption_track.get('kind') != 'asr':
lang_code = caption_track.get('languageCode')
if not lang_code:
def process_subtitles():
subtitles = {}
for caption_track in traverse_obj(pctr, (
'captionTracks', lambda _, v: v.get('baseUrl'))):
if not base_url:
continue
process_language(
subtitles, base_url, lang_code, {})
continue
automatic_captions = {}
for translation_language in (pctr.get('translationLanguages') or []):
translation_language_code = translation_language.get('languageCode')
if not translation_language_code:
if caption_track.get('kind') != 'asr':
lang_code = caption_track.get('languageCode')
if not lang_code:
continue
process_language(
subtitles, base_url, lang_code, {})
continue
process_language(
automatic_captions, base_url, translation_language_code,
{'tlang': translation_language_code})
info['automatic_captions'] = automatic_captions
info['subtitles'] = subtitles
automatic_captions = {}
for translation_language in traverse_obj(pctr, (
'translationLanguages', lambda _, v: v.get('languageCode'))):
translation_language_code = translation_language['languageCode']
process_language(
automatic_captions, base_url, translation_language_code,
{'tlang': translation_language_code})
info['automatic_captions'] = automatic_captions
info['subtitles'] = subtitles

process_subtitles()

parsed_url = compat_urllib_parse_urlparse(url)
for component in (parsed_url.fragment, parsed_url.query):
Expand Down

0 comments on commit 00ad2b8

Please sign in to comment.