From 2f8d19241dd14ba87dc3f2f7b4d37fb415eb01ee Mon Sep 17 00:00:00 2001 From: Irfan Date: Wed, 29 May 2024 11:07:15 +0500 Subject: [PATCH] fixes #551 --- src/Model/Common/MalUrl.php | 4 ++-- src/Parser/Anime/AnimeParser.php | 40 ++++++++++++++++++++++++++++++-- src/Parser/Manga/MangaParser.php | 40 ++++++++++++++++++++++++++++++-- 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/src/Model/Common/MalUrl.php b/src/Model/Common/MalUrl.php index 1977b902..afeb1f8d 100644 --- a/src/Model/Common/MalUrl.php +++ b/src/Model/Common/MalUrl.php @@ -14,12 +14,12 @@ class MalUrl /** * @var string */ - private $name; + private string $name; /** * @var string */ - private $url; + private string $url; /** * Genre constructor. diff --git a/src/Parser/Anime/AnimeParser.php b/src/Parser/Anime/AnimeParser.php index 49e55dd5..b39b4819 100644 --- a/src/Parser/Anime/AnimeParser.php +++ b/src/Parser/Anime/AnimeParser.php @@ -724,11 +724,47 @@ public function getStreamingLinks(): array public function getRelated(): array { $related = []; + + // MAL has divided relations up into tiles and table format + // We first parse whatever there's in the tiles + $this->crawler + ->filterXPath('//div[contains(@class, "related-entries")]/div[contains(@class, "entries-tile")]/div[contains(@class, "entry")]') + ->each( + function (Crawler $c) use (&$related) { + $relation = $c->filterXPath('//div[@class="content"]/div[@class="relation"]')->text(); + + // strip entry type (if any) + $relation = JString::cleanse( + preg_replace("~\s\(.*\)~", '', $relation) + ); + + $links = $c->filterXPath('//div[@class="content"]/div[@class="title"]/a'); + + // Check for empty links #justMALThings + if ($links->count() === 1 // if it's the only link MAL has listed + && empty($links->first()->text()) // and if its a bugged/empty link + ) { + $related[$relation] = []; + return; + } + + // Remove empty/bugged links #justMALThings + foreach ($links as $node) { + if (empty($node->textContent)) { + $node->parentNode->removeChild($node); + } + } + + $related[$relation][] = (new MalUrlParser($links))->getModel(); + } + ); + + // Then we'll parse the table $this->crawler - ->filterXPath('//table[contains(@class, "anime_detail_related_anime")]/tr') + ->filterXPath('//table[contains(@class, "entries-table")]/tr') ->each( function (Crawler $c) use (&$related) { - $links = $c->filterXPath('//td[2]/a'); + $links = $c->filterXPath('//td[2]//a'); $relation = JString::cleanse( str_replace(':', '', $c->filterXPath('//td[1]')->text()) ); diff --git a/src/Parser/Manga/MangaParser.php b/src/Parser/Manga/MangaParser.php index 7a8c3b1f..95717523 100644 --- a/src/Parser/Manga/MangaParser.php +++ b/src/Parser/Manga/MangaParser.php @@ -611,11 +611,47 @@ public function getExternalLinks(): array public function getMangaRelated(): array { $related = []; + + // MAL has divided relations up into tiles and table format + // We first parse whatever there's in the tiles + $this->crawler + ->filterXPath('//div[contains(@class, "related-entries")]/div[contains(@class, "entries-tile")]/div[contains(@class, "entry")]') + ->each( + function (Crawler $c) use (&$related) { + $relation = $c->filterXPath('//div[@class="content"]/div[@class="relation"]')->text(); + + // strip entry type (if any) + $relation = JString::cleanse( + preg_replace("~\s\(.*\)~", '', $relation) + ); + + $links = $c->filterXPath('//div[@class="content"]/div[@class="title"]/a'); + + // Check for empty links #justMALThings + if ($links->count() === 1 // if it's the only link MAL has listed + && empty($links->first()->text()) // and if its a bugged/empty link + ) { + $related[$relation] = []; + return; + } + + // Remove empty/bugged links #justMALThings + foreach ($links as $node) { + if (empty($node->textContent)) { + $node->parentNode->removeChild($node); + } + } + + $related[$relation][] = (new MalUrlParser($links))->getModel(); + } + ); + + // Then we'll parse the table $this->crawler - ->filterXPath('//table[contains(@class, "anime_detail_related_anime")]/tr') + ->filterXPath('//table[contains(@class, "entries-table")]/tr') ->each( function (Crawler $c) use (&$related) { - $links = $c->filterXPath('//td[2]/a'); + $links = $c->filterXPath('//td[2]//a'); $relation = JString::cleanse( str_replace(':', '', $c->filterXPath('//td[1]')->text()) );