From 2652861ae23c33d63cb1c99f69a0f1b211fe1372 Mon Sep 17 00:00:00 2001 From: Zsombor Gegesy Date: Sun, 30 Apr 2023 09:27:07 +0200 Subject: [PATCH] Parse all the playlists of a channel --- .../extractor/channel/ChannelExtractor.java | 5 + .../youtube/YoutubeParsingHelper.java | 57 +++++++++ .../GridPlaylistRendererExtractor.java | 64 ++++++++++ .../extractors/YoutubeChannelExtractor.java | 39 +++--- .../YoutubeChannelPlaylistExtractor.java | 116 ++++++++++++++++++ .../extractors/YoutubePlaylistExtractor.java | 23 +--- 6 files changed, 268 insertions(+), 36 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/GridPlaylistRendererExtractor.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelPlaylistExtractor.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/channel/ChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/channel/ChannelExtractor.java index a045411247..3553e4c0fd 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/channel/ChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/channel/ChannelExtractor.java @@ -4,6 +4,7 @@ import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.playlist.PlaylistInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItem; /* @@ -44,4 +45,8 @@ public ChannelExtractor(final StreamingService service, final ListLinkHandler li public abstract String getParentChannelAvatarUrl() throws ParsingException; public abstract boolean isVerified() throws ParsingException; + public ListExtractor getPlaylists() throws ParsingException { + return null; + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java index cb7c21bdc1..86d2c9a248 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java @@ -34,9 +34,11 @@ import com.grack.nanojson.JsonWriter; import org.jsoup.nodes.Entities; import org.schabi.newpipe.extractor.MetaInfo; +import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.AccountTerminatedException; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; +import org.schabi.newpipe.extractor.exceptions.ContentNotSupportedException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; @@ -1177,6 +1179,61 @@ public static String getValidJsonResponseBody(@Nonnull final Response response) return responseBody; } + public static Optional getTabByName(@Nonnull final JsonObject initialData, + @Nonnull final String tabName) { + final JsonArray tabs = initialData.getObject("contents") + .getObject("twoColumnBrowseResultsRenderer").getArray("tabs"); + + return tabs.stream().filter(Objects::nonNull).filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .filter(tab -> tab.has("tabRenderer") + && tab.getObject("tabRenderer").getString("title", "").equals(tabName)) + .findFirst().map(tab -> tab.getObject("tabRenderer")); + } + + public static JsonObject getPlaylistsTab(@Nonnull final JsonObject initialData) + throws ContentNotSupportedException { + return getTabByName(initialData, "Playlists").orElseThrow( + () -> new ContentNotSupportedException("This channel has no Playlists tab")); + } + + /** + * Return a page, which contains the continuation of the current list - if the + * item has a 'continuationItemRenderer'. + */ + public static Page getNextPageFromItem(final JsonObject item, final Localization localization, + final ContentCountry contentCountry) + throws UnsupportedEncodingException, IOException, ExtractionException { + if (item.has("continuationItemRenderer")) { + return getNextPageFromContinuationItemRenderer( + item.getObject("continuationItemRenderer"), localization, contentCountry); + } else { + return null; + } + } + + /** + * Return a page, which contains the continuation of the current list - if the + * item *is* a 'continuationItemRenderer', so it has 'continuationEndpoint'. + */ + public static Page getNextPageFromContinuationItemRenderer(final JsonObject item, + final Localization localization, final ContentCountry contentCountry) + throws UnsupportedEncodingException, IOException, ExtractionException { + final String token = item.getObject("continuationEndpoint").getObject("continuationCommand") + .getString("token"); + if (token == null) { + return null; + } + + final byte[] body = JsonWriter + .string(prepareDesktopJsonBuilder(localization, contentCountry) + .value("continuation", token).done()) + .getBytes(StandardCharsets.UTF_8); + + return new Page(YOUTUBEI_V1_URL + "browse?key=" + getKey() + DISABLE_PRETTY_PRINT_PARAMETER, + body); + } + public static JsonObject getJsonPostResponse(final String endpoint, final byte[] body, final Localization localization) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/GridPlaylistRendererExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/GridPlaylistRendererExtractor.java new file mode 100644 index 0000000000..13006ba3be --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/GridPlaylistRendererExtractor.java @@ -0,0 +1,64 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemExtractor; +import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory; + +import com.grack.nanojson.JsonObject; + +public class GridPlaylistRendererExtractor implements PlaylistInfoItemExtractor { + + private final JsonObject playlistInfoItem; + + GridPlaylistRendererExtractor(final JsonObject playlistInfoItem) { + this.playlistInfoItem = playlistInfoItem; + } + + @Override + public String getName() throws ParsingException { + return playlistInfoItem.getObject("title").getArray("runs").getObject(0).getString("text"); + } + + @Override + public String getUrl() throws ParsingException { + try { + final String id = playlistInfoItem.getString("playlistId"); + return YoutubePlaylistLinkHandlerFactory.getInstance().getUrl(id); + } catch (final Exception e) { + throw new ParsingException("Could not get url", e); + } + } + + @Override + public String getThumbnailUrl() throws ParsingException { + return playlistInfoItem.getObject("thumbnailRenderer") + .getObject("playlistVideoThumbnailRenderer").getObject("thumbnail") + .getArray("thumbnails").getObject(0).getString("url"); + } + + @Override + public String getUploaderName() throws ParsingException { + return null; + } + + @Override + public String getUploaderUrl() throws ParsingException { + return null; + } + + public boolean isUploaderVerified() throws ParsingException { + try { + return YoutubeParsingHelper.isVerified(playlistInfoItem.getArray("ownerBadges")); + } catch (final Exception e) { + throw new ParsingException("Could not get uploader verification info", e); + } + } + + @Override + public long getStreamCount() throws ParsingException { + return Long.parseLong( + playlistInfoItem.getObject("videoCountShortText").getString("simpleText")); + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 0f67349cf9..c553fc44d3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -32,7 +32,6 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import java.util.Objects; import java.util.Optional; import javax.annotation.Nonnull; @@ -63,6 +62,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { private Optional channelHeader; private boolean isCarouselHeader = false; private JsonObject videoTab; + private JsonObject playlistsTab; /** * Some channels have response redirects and the only way to reliably get the id is by saving it @@ -495,20 +495,7 @@ private JsonObject getVideoTab() throws ParsingException { return videoTab; } - final JsonArray tabs = initialData.getObject("contents") - .getObject("twoColumnBrowseResultsRenderer") - .getArray("tabs"); - - final JsonObject foundVideoTab = tabs.stream() - .filter(Objects::nonNull) - .filter(JsonObject.class::isInstance) - .map(JsonObject.class::cast) - .filter(tab -> tab.has("tabRenderer") - && tab.getObject("tabRenderer") - .getString("title", "") - .equals("Videos")) - .findFirst() - .map(tab -> tab.getObject("tabRenderer")) + final JsonObject foundVideoTab = YoutubeParsingHelper.getTabByName(initialData, "Videos") .orElseThrow( () -> new ContentNotSupportedException("This channel has no Videos tab")); @@ -530,4 +517,26 @@ private JsonObject getVideoTab() throws ParsingException { videoTab = foundVideoTab; return foundVideoTab; } + + @Override + public YoutubeChannelPlaylistExtractor getPlaylists() throws ParsingException { + final JsonObject tab = getPlaylistsTab(); + if (tab != null) { + return new YoutubeChannelPlaylistExtractor(getService(), getLinkHandler(), + tab.getObject("endpoint").getObject("browseEndpoint")); + } + return null; + } + + @Nullable + private JsonObject getPlaylistsTab() throws ParsingException { + if (playlistsTab != null) { + return playlistsTab; + } + + this.playlistsTab = YoutubeParsingHelper.getPlaylistsTab(initialData); + + return playlistsTab; + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelPlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelPlaylistExtractor.java new file mode 100644 index 0000000000..e9d4a2eaaa --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelPlaylistExtractor.java @@ -0,0 +1,116 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; +import static org.schabi.newpipe.extractor.utils.Utils.UTF_8; +import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; + +import org.schabi.newpipe.extractor.ListExtractor; +import org.schabi.newpipe.extractor.Page; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.downloader.Downloader; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.localization.Localization; +import org.schabi.newpipe.extractor.playlist.PlaylistInfoItem; +import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemsCollector; +import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; + +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonWriter; + +public class YoutubeChannelPlaylistExtractor extends ListExtractor { + + private final String browseId; + private final String params; + private final String canonicalBaseUrl; + private JsonObject browseResponse; + private JsonObject playlistTab; + + YoutubeChannelPlaylistExtractor(final StreamingService service, + final ListLinkHandler linkHandler, final JsonObject browseEndpoint) { + super(service, linkHandler); + this.browseId = browseEndpoint.getString("browseId"); + this.params = browseEndpoint.getString("params"); + this.canonicalBaseUrl = browseEndpoint.getString("canonicalBaseUrl"); + } + + @Override + public InfoItemsPage getInitialPage() + throws IOException, ExtractionException { + final PlaylistInfoItemsCollector pic = new PlaylistInfoItemsCollector(getServiceId()); + + final JsonArray playlistItems = playlistTab.getObject("content") + .getObject("sectionListRenderer").getArray("contents").getObject(0) + .getObject("itemSectionRenderer").getArray("contents").getObject(0) + .getObject("gridRenderer").getArray("items"); + final var continuation = collectPlaylistsFrom(playlistItems, pic); + return new InfoItemsPage<>(pic, continuation); + } + + private Page collectPlaylistsFrom(final JsonArray playlistItems, + final PlaylistInfoItemsCollector collector) + throws UnsupportedEncodingException, IOException, ExtractionException { + Page continuation = null; + for (final var item : playlistItems) { + if (item instanceof JsonObject) { + final JsonObject jsonItem = (JsonObject) item; + if (jsonItem.has("gridPlaylistRenderer")) { + collector.commit(new GridPlaylistRendererExtractor( + jsonItem.getObject("gridPlaylistRenderer"))); + } else if (jsonItem.has("continuationItemRenderer")) { + continuation = YoutubeParsingHelper.getNextPageFromItem(jsonItem, + getExtractorLocalization(), getExtractorContentCountry()); + } + } + } + return continuation; + } + + @Override + public InfoItemsPage getPage(final Page page) + throws IOException, ExtractionException { + if (page == null || isNullOrEmpty(page.getUrl())) { + throw new IllegalArgumentException("Page doesn't contain an URL"); + } + final PlaylistInfoItemsCollector collector = new PlaylistInfoItemsCollector(getServiceId()); + + final JsonObject ajaxJson = getJsonPostResponse("browse", page.getBody(), + getExtractorLocalization()); + + final JsonArray continuation = ajaxJson.getArray("onResponseReceivedActions").getObject(0) + .getObject("appendContinuationItemsAction").getArray("continuationItems"); + + final var cont = collectPlaylistsFrom(continuation, collector); + + return new InfoItemsPage<>(collector, cont); + } + + @Override + public void onFetchPage(final Downloader downloader) throws IOException, ExtractionException { + final Localization localization = getExtractorLocalization(); + final byte[] body = JsonWriter + .string(prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) + .value("browseId", browseId) + .value("params", params) + .value("canonicalBaseUrl", canonicalBaseUrl).done()) + .getBytes(UTF_8); + + browseResponse = getJsonPostResponse("browse", body, localization); + playlistTab = YoutubeParsingHelper.getPlaylistsTab(browseResponse); + + YoutubeParsingHelper.defaultAlertsCheck(browseResponse); + } + + @Override + public String getName() throws ParsingException { + return browseResponse.getObject("metadata").getObject("channelMetadataRenderer") + .getString("title"); + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index 06f055f14b..2f6b5565b6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -1,11 +1,8 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.extractPlaylistTypeFromPlaylistUrl; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; @@ -376,24 +373,8 @@ private Page getNextPageFrom(final JsonArray contents) } final JsonObject lastElement = contents.getObject(contents.size() - 1); - if (lastElement.has("continuationItemRenderer")) { - final String continuation = lastElement - .getObject("continuationItemRenderer") - .getObject("continuationEndpoint") - .getObject("continuationCommand") - .getString("token"); - - final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder( - getExtractorLocalization(), getExtractorContentCountry()) - .value("continuation", continuation) - .done()) - .getBytes(StandardCharsets.UTF_8); - - return new Page(YOUTUBEI_V1_URL + "browse?key=" + getKey() - + DISABLE_PRETTY_PRINT_PARAMETER, body); - } else { - return null; - } + return YoutubeParsingHelper.getNextPageFromItem(lastElement, getExtractorLocalization(), + getExtractorContentCountry()); } private void collectStreamsFrom(@Nonnull final StreamInfoItemsCollector collector,