Skip to content

Commit

Permalink
fix parser issues
Browse files Browse the repository at this point in the history
  • Loading branch information
irfan-dahir committed Jan 26, 2022
1 parent de723b4 commit 20e14c4
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 31 deletions.
7 changes: 3 additions & 4 deletions src/Model/Common/MangaCard.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class MangaCard
protected $synopsis;

/**
* @var string
* @var string|null
*/
protected $type;

Expand Down Expand Up @@ -128,7 +128,6 @@ protected static function setProperties(Parser\Common\MangaCardParser $parser, $
$instance->explicitGenres = $parser->getExplicitGenres();
$instance->themes = $parser->getThemes();
$instance->demographics = $parser->getDemographics();
$instance->type = $parser->getType();
$instance->authors = $parser->getAuthor();
$instance->score = $parser->getMangaScore();
$instance->serialization = $parser->getSerialization();
Expand Down Expand Up @@ -183,9 +182,9 @@ public function getSynopsis(): string
}

/**
* @return string
* @return string|null
*/
public function getType(): string
public function getType(): ?string
{
return $this->type;
}
Expand Down
89 changes: 63 additions & 26 deletions src/Parser/Common/MangaCardParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public function getMalId(): int
*/
public function getMangaUrl(): string
{
return $this->crawler->filterXPath('//div[contains(@class, "title")]/p/a')->attr('href');
return $this->crawler->filterXPath('//div/div/h2/a')->attr('href');
}

/**
Expand All @@ -83,21 +83,51 @@ function (Crawler $crawler) {
*/
public function getVolumes(): ?int
{
$vols = $this->crawler->filterXPath('//div[contains(@class, "eps")]')->text();
$vols = JString::cleanse($vols);
$vols = str_replace(' eps', '', $vols);
$node = $this->crawler->filterXPath('//div/div[2]/div/span[contains(@class, "item")][3]');

return $vols === '?' ? null : (int)$vols;
if (!$node->count()) {
$node = $this->crawler->filterXPath('//div/div[2]/div/span[contains(@class, "item")][2]');
}

if (!$node->count()) {
return null;
}

$text = JString::cleanse($node->text());

if (!preg_match('~([0-9]{1,})~', $text, $matches)) {
return null;
}

return $matches[1];
}

/**
* @return string
* @return string|null
* @throws \RuntimeException
* @throws \InvalidArgumentException
*/
public function getType(): string
public function getType(): ?string
{
return JString::cleanse($this->crawler->filterXPath('//span[contains(@class, "source")]')->text());
// this information is no longer available
return null;

$text = $this->crawler->filterXPath('//div[contains(@class, "info")]');

if (!$text->count()) {
return null;
}

$text = JString::cleanse($text->text());
preg_match('/^([a-zA-Z-\.]+)/', $text, $matches);

$type = $matches[1];

if ($type === '-') {
$type = 'Unknown';
}

return $type;
}

/**
Expand Down Expand Up @@ -137,7 +167,7 @@ function (Crawler $crawler) {
*/
public function getDescription(): string
{
return $this->crawler->filterXPath('//div[contains(@class, "synopsis")]/span')->text();
return $this->crawler->filterXPath('//div[contains(@class, "synopsis")]/p')->text();
}

/**
Expand All @@ -146,16 +176,19 @@ public function getDescription(): string
*/
public function getPublishDates(): ?\DateTimeImmutable
{
$date = str_replace(
'(JST)',
'',
JString::cleanse($this->crawler->filterXPath('//span[contains(@class, "remain-time")]')->text())
);
$node = $this->crawler->filterXPath('//div/div[2]/div/span[contains(@class, "item")][1]');

if (
!preg_match('~(.*), ([0-9]{1,})~', $node->text(), $matches)
) {
return null;
}

$date = $matches[2];

try {
return (new \DateTimeImmutable($date, new \DateTimeZone('JST')))
->setTimezone(new \DateTimeZone('UTC'))
->setTime(0, 0);
->setTimezone(new \DateTimeZone('UTC'));
} catch (\Exception $e) {
return null;
}
Expand All @@ -168,10 +201,15 @@ public function getPublishDates(): ?\DateTimeImmutable
*/
public function getMembers(): int
{
$count = $this->crawler->filterXPath('//div[contains(@class, "scormem")]/span')->text();
$count = $this->crawler->filterXPath('//div[contains(@class, "information")]/div/div/div[2]')->text();

$count = JString::cleanse($count);

return (int)str_replace(',', '', $count);
$count = str_replace('K', '000', $count);
$count = str_replace('M', '000000', $count);


return (int)str_replace([',', '.'], '', $count);
}

/**
Expand All @@ -195,7 +233,7 @@ public function getMangaMeta(): Model\Common\MangaMeta
*/
public function getTitle(): string
{
return $this->crawler->filterXPath('//p[contains(@class,"title-text")]/a')->text();
return $this->crawler->filterXPath('//div/div/h2/a')->text();
}

/**
Expand Down Expand Up @@ -233,7 +271,8 @@ public function getMangaImage(): ?string
*/
public function getMangaScore(): ?float
{
$score = JString::cleanse($this->crawler->filterXPath('//span[contains(@class, "score")]')->text());
$score = JString::cleanse($this->crawler->filterXPath('//div[contains(@class, "information")]/div/div/div[1]')->text());

if ($score === 'N/A') {
return null;
}
Expand Down Expand Up @@ -274,15 +313,14 @@ public function getSerialization(): ?array
*/
public function getThemes(): array
{
// if anyone can fix this spaghetti code, most welcome
$node = $this->crawler->filterXPath('//div[contains(@class, "synopsis")]//p[contains(@class, "mb4 mt8")]');
$node = $this->crawler->filterXPath('//div[contains(@class, "synopsis")]/div[contains(@class, "properties")]/div[3]/span');

$malUrl = [];

$node->each(function(Crawler $c) use(&$malUrl) {
$node = $c->filterXPath('//span');

if (str_contains($node->text(), "Theme")) {
if (str_contains($node->text(), "Theme") || str_contains($node->text(), "Themes")) {
$node->nextAll()->filterXPath('//a')
->each(function(Crawler $c) use(&$malUrl) {
$malUrl[] = (new MalUrlParser($c))->getModel();
Expand All @@ -300,15 +338,14 @@ public function getThemes(): array
*/
public function getDemographics(): array
{
// if anyone can fix this spaghetti code, most welcome
$node = $this->crawler->filterXPath('//div[contains(@class, "synopsis")]//p[contains(@class, "mb4 mt8")]');
$node = $this->crawler->filterXPath('//div[contains(@class, "synopsis")]/div[contains(@class, "properties")]/div[4]/span');

$malUrl = [];

$node->each(function(Crawler $c) use(&$malUrl) {
$node = $c->filterXPath('//span');

if (str_contains($node->text(), "Demographic")) {
if (str_contains($node->text(), "Demographic") || str_contains($node->text(), "Demographics")) {
$node->nextAll()->filterXPath('//a')
->each(function(Crawler $c) use(&$malUrl) {
$malUrl[] = (new MalUrlParser($c))->getModel();
Expand Down
2 changes: 1 addition & 1 deletion src/Request/Magazine/MagazinesRequest.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
*
* @package Jikan\Request
*/
class MagazinesRequest implements RequestInterface
class MagazinesRequest extends \Jikan\Request\Magazine\MagazineRequest implements RequestInterface
{
/**
* ProducersRequest constructor.
Expand Down

0 comments on commit 20e14c4

Please sign in to comment.