Support youtube direct URLs - See #190

blackjack4494 · Nov 21, 2020 · 49ee474 · 49ee474
1 parent 9a75f08
commit 49ee474
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 5 deletions.
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
@@ -35,6 +35,9 @@ def test_youtube_playlist_matching(self):
         assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
         assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q')  # 585
         assertPlaylist('PL63F0C78739B09958')
+        assertTab('https://www.youtube.com/AsapSCIENCE')
+        assertTab('https://www.youtube.com/embedded')
+        assertTab('https://www.youtube.com/feed')  # Own channel's home page
         assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
         assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
         assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
@@ -47,7 +50,7 @@ def test_youtube_matching(self):
         self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
         self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012'))  # 668
         self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
-        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
+        # self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])  # /v/ is no longer valid
         self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
         self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
 

diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py
@@ -64,6 +64,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 
+    _RESERVED_NAMES = (
+        r'course|embed|watch|w|results|storefront|'
+        r'shared|index|account|reporthistory|t/terms|about|upload|signin|logout|'
+        r'feed/(watch_later|history|subscriptions|library|trending|recommended)')
+
     _NETRC_MACHINE = 'youtube'
     # If True it will raise an error if no login info is provided
     _LOGIN_REQUIRED = False
@@ -2495,7 +2500,13 @@ def decrypt_sig(mobj):
 
 class YoutubeTabIE(YoutubeBaseInfoExtractor):
     IE_DESC = 'YouTube.com tab'
-    _VALID_URL = r'https?://(?:\w+\.)?(?:youtube(?:kids)?\.com|invidio\.us)/(?:(?:channel|c|user)/|(?:playlist|watch)\?.*?\blist=)(?P<id>[^/?#&]+)'
+    # (?x)^ will cause warning in LiveIE. So I cant split this into multiple lines using '''
+    _VALID_URL = (
+        r'https?://(?:\w+\.)?(?:youtube(?:kids)?\.com|invidio\.us)/'
+        r'(?:(?!(%s)([/#?]|$))|'
+        r'(?:channel|c|user)/|'
+        r'(?:playlist|watch)\?.*?\blist=)'
+        r'(?P<id>[^/?#&]+)') % YoutubeBaseInfoExtractor._RESERVED_NAMES
     IE_NAME = 'youtube:tab'
 
     _TESTS = [{
@@ -2692,8 +2703,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 
     @classmethod
     def suitable(cls, url):
-        return False if YoutubeLiveIE.suitable(url) else super(
-            YoutubeTabIE, cls).suitable(url)
+        IGNORE = (YoutubeLiveIE,)
+        return (
+            False if any(ie.suitable(url) for ie in IGNORE)
+            else super(YoutubeTabIE, cls).suitable(url))
 
     def _extract_channel_id(self, webpage):
         channel_id = self._html_search_meta(
@@ -3036,6 +3049,7 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
         selected_tab = self._extract_selected_tab(tabs)
         renderer = try_get(
             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
+        playlist_id = None
         if renderer:
             channel_title = renderer.get('title') or item_id
             tab_title = selected_tab.get('title')
@@ -3050,6 +3064,8 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
             title = renderer.get('title')
             description = None
             playlist_id = item_id
+        if playlist_id is None:
+            return None
         playlist = self.playlist_result(
             self._entries(selected_tab['content'], identity_token),
             playlist_id=playlist_id, playlist_title=title,
@@ -3214,7 +3230,7 @@ def _real_extract(self, url):
 
 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
     IE_DESC = 'YouTube.com live streams'
-    _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
+    _VALID_URL = r'(?P<base_url>%s)/live' % YoutubeTabIE._VALID_URL
     IE_NAME = 'youtube:live'
 
     _TESTS = [{