From 970ff8aad4ad46040a04c9d4fb4ed74278a81354 Mon Sep 17 00:00:00 2001 From: pushrbx Date: Mon, 4 Nov 2024 21:09:02 +0000 Subject: [PATCH 1/9] removed redundant imports --- app/Console/Commands/Indexer/AnimeIndexer.php | 1 - app/Features/QuerySpecificAnimeSeasonHandler.php | 2 -- 2 files changed, 3 deletions(-) diff --git a/app/Console/Commands/Indexer/AnimeIndexer.php b/app/Console/Commands/Indexer/AnimeIndexer.php index 3e03f302..b70ae429 100644 --- a/app/Console/Commands/Indexer/AnimeIndexer.php +++ b/app/Console/Commands/Indexer/AnimeIndexer.php @@ -2,7 +2,6 @@ namespace App\Console\Commands\Indexer; -use App\Exceptions\Console\CommandAlreadyRunningException; use App\Exceptions\Console\FileNotFoundException; use Illuminate\Console\Command; use Illuminate\Support\Facades\Storage; diff --git a/app/Features/QuerySpecificAnimeSeasonHandler.php b/app/Features/QuerySpecificAnimeSeasonHandler.php index 1ec68210..e073fa0f 100644 --- a/app/Features/QuerySpecificAnimeSeasonHandler.php +++ b/app/Features/QuerySpecificAnimeSeasonHandler.php @@ -3,8 +3,6 @@ namespace App\Features; use App\Dto\QuerySpecificAnimeSeasonCommand; -use App\Enums\AnimeSeasonEnum; -use App\Enums\AnimeStatusEnum; use App\Enums\AnimeTypeEnum; use Illuminate\Contracts\Database\Query\Builder; use Illuminate\Support\Carbon; From e0cc44495bb6a32c7e7658b49f35d43b5775bb89 Mon Sep 17 00:00:00 2001 From: pushrbx Date: Mon, 4 Nov 2024 21:09:21 +0000 Subject: [PATCH 2/9] added incremental indexer --- .../Commands/Indexer/IncrementalIndexer.php | 183 ++++++++++++++++++ app/Console/Kernel.php | 3 +- composer.json | 1 + container-setup.sh | 5 + 4 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 app/Console/Commands/Indexer/IncrementalIndexer.php diff --git a/app/Console/Commands/Indexer/IncrementalIndexer.php b/app/Console/Commands/Indexer/IncrementalIndexer.php new file mode 100644 index 00000000..bc618e0d --- /dev/null +++ b/app/Console/Commands/Indexer/IncrementalIndexer.php @@ -0,0 +1,183 @@ + ['The media type to index.', 'Valid values: anime, manga, character, people'] + ]; + } + + public function handle(): int + { + $validator = Validator::make( + [ + 'mediaType' => $this->argument('mediaType'), + 'delay' => $this->option('delay'), + 'resume' => $this->option('resume') ?? false, + 'failed' => $this->option('failed') ?? false + ], + [ + 'mediaType' => 'required|in:anime,manga,character,people', + 'delay' => 'integer|min:1', + 'resume' => 'bool|prohibited_with:failed', + 'failed' => 'bool|prohibited_with:resume' + ] + ); + + if ($validator->fails()) { + $this->error($validator->errors()->toJson()); + return 1; + } + + $this->trap(SIGTERM, fn () => $this->cancelled = true); + + $resume = $this->option('resume') ?? false; + $onlyFailed = $this->option('failed') ?? false; + $existingIdsHash = ""; + $existingIdsRaw = ""; + /** + * @var $mediaTypes array + */ + $mediaTypes = $this->argument("mediaType"); + + foreach ($mediaTypes as $mediaType) + { + $idsToFetch = []; + $failedIds = []; + $success = []; + + if ($onlyFailed && Storage::exists("indexer/incremental/{$mediaType}_failed.json")) + { + $idsToFetch["sfw"] = json_decode(Storage::get("indexer/incremental/{$mediaType}_failed.json")); + } + else + { + if (Storage::exists("indexer/incremental/$mediaType.json")) + { + $existingIdsRaw = Storage::get("indexer/incremental/$mediaType.json"); + $existingIdsHash = sha1($existingIdsRaw); + } + + if ($this->cancelled) + { + return 127; + } + + $newIdsRaw = file_get_contents("https://raw.githubusercontent.com/purarue/mal-id-cache/master/cache/${mediaType}_cache.json"); + $newIdsHash = sha1($newIdsRaw); + + /** @noinspection PhpConditionAlreadyCheckedInspection */ + if ($this->cancelled) + { + return 127; + } + + if ($newIdsHash !== $existingIdsHash) + { + $newIds = json_decode($newIdsRaw, true); + $existingIds = json_decode($existingIdsRaw, true); + + if (is_null($existingIds) || count($existingIds) === 0) + { + $idsToFetch = $newIds; + } + else + { + foreach (["sfw", "nsfw"] as $t) + { + $idsToFetch[$t] = array_diff($existingIds[$t], $newIds[$t]); + } + } + + Storage::put("indexer/incremental/$mediaType.json.tmp", $newIdsRaw); + } + } + + $idCount = count($idsToFetch); + if ($idCount > 0) + { + $index = 0; + if ($resume && Storage::exists("indexer/incremental/{$mediaType}_resume.save")) + { + $index = (int)Storage::get("indexer/incremental/{$mediaType}_resume.save"); + $this->info("Resuming from index: $index"); + } + + if ($index > 0 && !isset($this->ids[$index])) { + $index = 0; + $this->warn('Invalid index; set back to 0'); + } + + Storage::put("indexer/incremental/{$mediaType}_resume.save", 0); + + $this->info("$idCount $mediaType entries available"); + $ids = array_merge($idsToFetch['sfw'], $idsToFetch['nsfw']); + for ($i = $index; $i <= ($idCount - 1); $i++) + { + if ($this->cancelled) + { + return 127; + } + + $id = $ids[$index]; + + $url = env('APP_URL') . "/v4/anime/$id"; + $this->info("Indexing/Updating " . ($i + 1) . "/$idCount $url [MAL ID: $id]"); + + try + { + $response = json_decode(file_get_contents($url), true); + if (isset($response['error']) && $response['status'] != 404) + { + $this->error("[SKIPPED] Failed to fetch $url - {$response['error']}"); + } + } + catch (\Exception) + { + $this->warn("[SKIPPED] Failed to fetch $url"); + $failedIds[] = $id; + Storage::put("indexer/incremental/$mediaType.failed", json_encode($failedIds)); + } + + $success[] = $id; + Storage::put("indexer/incremental/{$mediaType}_resume.save", $index); + } + + Storage::delete("indexer/incremental/{$mediaType}_resume.save"); + $this->info("--- Indexing of $mediaType is complete."); + $this->info(count($success) . ' entries indexed or updated.'); + if (count($failedIds) > 0) + { + $this->info(count($failedIds) . ' entries failed to index or update. Re-run with --failed to requeue failed entries only.'); + } + // finalize the latest state + Storage::move("indexer/incremental/$mediaType.json.tmp", "indexer/incremental/$mediaType.json"); + } + } + + return 0; + } +} diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index 22c9b19c..eafe5553 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -24,7 +24,8 @@ class Kernel extends ConsoleKernel Indexer\GenreIndexer::class, Indexer\ProducersIndexer::class, Indexer\AnimeSweepIndexer::class, - Indexer\MangaSweepIndexer::class + Indexer\MangaSweepIndexer::class, + Indexer\IncrementalIndexer::class ]; /** diff --git a/composer.json b/composer.json index 8234697c..55e6a704 100644 --- a/composer.json +++ b/composer.json @@ -14,6 +14,7 @@ "php": "^8.1", "ext-json": "*", "ext-mongodb": "*", + "ext-pcntl": "*", "amphp/http-client": "^4.6", "danielmewes/php-rql": "dev-master", "darkaonline/swagger-lume": "^9.0", diff --git a/container-setup.sh b/container-setup.sh index 89c39c61..dd56f6e9 100755 --- a/container-setup.sh +++ b/container-setup.sh @@ -34,6 +34,7 @@ display_help() { echo "stop Stop Jikan API" echo "validate-prereqs Validate pre-reqs installed (docker, docker-compose)" echo "execute-indexers Execute the indexers, which will scrape and index data from MAL. (Notice: This can take days)" + echo "index-incrementally Executes the incremental indexers for each media type. (anime, manga, character, people)" echo "" } @@ -168,6 +169,10 @@ case "$1" in $DOCKER_COMPOSE_CMD -p "$DOCKER_COMPOSE_PROJECT_NAME" exec jikan_rest php /app/artisan indexer:producers echo "Indexing done!" ;; + "index-incrementally") + echo "Indexing..." + $DOCKER_COMPOSE_CMD -p "$DOCKER_COMPOSE_PROJECT_NAME" exec jikan_rest php /app/artisan indexer:incremental anime manga character people + echo "Indexing done!" *) echo "No command specified, displaying help" display_help From 6568ff264e1b14cb43b88f7712b260b86893fa50 Mon Sep 17 00:00:00 2001 From: pushrbx Date: Mon, 4 Nov 2024 21:22:07 +0000 Subject: [PATCH 3/9] added docs for the new command --- COMMANDS.MD | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/COMMANDS.MD b/COMMANDS.MD index b0656eb4..e07b6fe8 100644 --- a/COMMANDS.MD +++ b/COMMANDS.MD @@ -14,14 +14,14 @@ For an entire list of commands, you can run `php artisan list` - [Indexer](#indexer) - [Anime](#indexer-anime) - [Manga](#indexer-manga) - + ## Commands ### Serve Command: `serve` Example: `php artisan serve` -Serve the application on the PHP development server +Serve the application on the PHP development server ### Queue @@ -98,7 +98,7 @@ This function only needs to be run once. Any entry's cache updating will automat Command: ``` -indexer:anime +indexer:manga {--failed : Run only entries that failed to index last time} {--resume : Resume from the last position} {--reverse : Start from the end of the array} @@ -109,3 +109,16 @@ indexer:anime Example: `indexer:manga` This simply translates to running the indexer without any additional configuration. + +#### Indexer: Incremental +Incrementally indexes media entries from MAL. +This command will compare the latest version of MAL ids from the [mal_id_cache](https://github.com/purarue/mal-id-cache) +github repository and compares them with the downloaded ids from the previous run. If no ids found from the previous run, a full indexing session is started. + +Command: +``` +indexer:incremental {mediaType*} + {--failed : Run only entries that failed to index last time} + {--resume : Resume from the last position} + {--delay=3 : Set a delay between requests} +``` From 51489501258ac750623f89f56860eedce67484d9 Mon Sep 17 00:00:00 2001 From: pushrbx Date: Mon, 4 Nov 2024 23:11:49 +0000 Subject: [PATCH 4/9] added docs for the new command --- container_usage.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/container_usage.md b/container_usage.md index bce6bd74..a9cbb5bd 100644 --- a/container_usage.md +++ b/container_usage.md @@ -16,6 +16,9 @@ This will: > **Note**: The script supports both `docker` and `podman`. In case of `podman` please bare in mind that sometimes the container name resolution doesn't work on the container network. > In those cases you might have to install `aardvark-dns` package. On `Arch Linux` podman uses `netavark` network by default (in 2023) so you will need to install the before mentioned package. +> **Note 2**: The script will start the jikan API, but if you start it for the first time, it won't have any data in it! +> You will have to run the indexers through artisan to have data. See ["Running the indexer with the script"](#running-the-indexer-with-the-script) section. + The script has the following prerequisites and will notify you if these are not present: - git @@ -36,6 +39,7 @@ start Start Jikan API (mongodb, typesense, redis, jikan-api wor stop Stop Jikan API validate-prereqs Validate pre-reqs installed (docker, docker-compose) execute-indexers Execute the indexers, which will scrape and index data from MAL. (Notice: This can take days) +index-incrementally Executes the incremental indexers for each media type. (anime, manga, character, people) ``` ### Running the indexer with the script From 6c2d2d3e9da31f4521e037805a4b687b94bb50af Mon Sep 17 00:00:00 2001 From: pushrbx Date: Tue, 5 Nov 2024 09:25:52 +0000 Subject: [PATCH 5/9] refactored the code for better readability --- .../Commands/Indexer/IncrementalIndexer.php | 241 +++++++++++------- 1 file changed, 142 insertions(+), 99 deletions(-) diff --git a/app/Console/Commands/Indexer/IncrementalIndexer.php b/app/Console/Commands/Indexer/IncrementalIndexer.php index bc618e0d..8e09ecf6 100644 --- a/app/Console/Commands/Indexer/IncrementalIndexer.php +++ b/app/Console/Commands/Indexer/IncrementalIndexer.php @@ -30,8 +30,139 @@ protected function promptForMissingArgumentsUsing(): array ]; } + private function getExistingIds(string $mediaType): array + { + $existingIdsHash = ""; + $existingIdsRaw = ""; + + if (Storage::exists("indexer/incremental/$mediaType.json")) + { + $existingIdsRaw = Storage::get("indexer/incremental/$mediaType.json"); + $existingIdsHash = sha1($existingIdsRaw); + } + + return [$existingIdsHash, $existingIdsRaw]; + } + + private function getIdsToFetch(string $mediaType): array + { + $idsToFetch = []; + [$existingIdsHash, $existingIdsRaw] = $this->getExistingIds($mediaType); + + if ($this->cancelled) + { + return []; + } + + $newIdsRaw = file_get_contents("https://raw.githubusercontent.com/purarue/mal-id-cache/master/cache/${mediaType}_cache.json"); + $newIdsHash = sha1($newIdsRaw); + + /** @noinspection PhpConditionAlreadyCheckedInspection */ + if ($this->cancelled) + { + return []; + } + + if ($newIdsHash !== $existingIdsHash) + { + $newIds = json_decode($newIdsRaw, true); + $existingIds = json_decode($existingIdsRaw, true); + + if (is_null($existingIds) || count($existingIds) === 0) + { + $idsToFetch = $newIds; + } + else + { + foreach (["sfw", "nsfw"] as $t) + { + $idsToFetch[$t] = array_diff($existingIds[$t], $newIds[$t]); + } + } + + Storage::put("indexer/incremental/$mediaType.json.tmp", $newIdsRaw); + } + + return $idsToFetch; + } + + private function getFailedIdsToFetch(string $mediaType): array + { + return json_decode(Storage::get("indexer/incremental/{$mediaType}_failed.json")); + } + + private function fetchIds(string $mediaType, array $idsToFetch, bool $resume): void + { + $index = 0; + $success = []; + $failedIds = []; + $idCount = count($idsToFetch); + if ($resume && Storage::exists("indexer/incremental/{$mediaType}_resume.save")) + { + $index = (int)Storage::get("indexer/incremental/{$mediaType}_resume.save"); + $this->info("Resuming from index: $index"); + } + + if ($index > 0 && !isset($this->ids[$index])) + { + $index = 0; + $this->warn('Invalid index; set back to 0'); + } + + Storage::put("indexer/incremental/{$mediaType}_resume.save", 0); + + $this->info("$idCount $mediaType entries available"); + $ids = array_merge($idsToFetch['sfw'], $idsToFetch['nsfw']); + + for ($i = $index; $i <= ($idCount - 1); $i++) + { + if ($this->cancelled) + { + return; + } + + $id = $ids[$index]; + + $url = env('APP_URL') . "/v4/anime/$id"; + $this->info("Indexing/Updating " . ($i + 1) . "/$idCount $url [MAL ID: $id]"); + + try + { + $response = json_decode(file_get_contents($url), true); + if (!isset($response['error']) || $response['status'] == 404) + { + continue; + } + + $this->error("[SKIPPED] Failed to fetch $url - {$response['error']}"); + } + catch (\Exception) + { + $this->warn("[SKIPPED] Failed to fetch $url"); + $failedIds[] = $id; + Storage::put("indexer/incremental/$mediaType.failed", json_encode($failedIds)); + } + + $success[] = $id; + Storage::put("indexer/incremental/{$mediaType}_resume.save", $index); + } + + Storage::delete("indexer/incremental/{$mediaType}_resume.save"); + + $this->info("--- Indexing of $mediaType is complete."); + $this->info(count($success) . ' entries indexed or updated.'); + if (count($failedIds) > 0) + { + $this->info(count($failedIds) . ' entries failed to index or update. Re-run with --failed to requeue failed entries only.'); + } + + // finalize the latest state + Storage::move("indexer/incremental/$mediaType.json.tmp", "indexer/incremental/$mediaType.json"); + } + public function handle(): int { + // validate inputs $validator = Validator::make( [ 'mediaType' => $this->argument('mediaType'), @@ -52,12 +183,12 @@ public function handle(): int return 1; } + // we want to handle signals from the OS $this->trap(SIGTERM, fn () => $this->cancelled = true); $resume = $this->option('resume') ?? false; $onlyFailed = $this->option('failed') ?? false; - $existingIdsHash = ""; - $existingIdsRaw = ""; + /** * @var $mediaTypes array */ @@ -66,116 +197,28 @@ public function handle(): int foreach ($mediaTypes as $mediaType) { $idsToFetch = []; - $failedIds = []; - $success = []; + // if "--failed" option is specified just run the failed ones if ($onlyFailed && Storage::exists("indexer/incremental/{$mediaType}_failed.json")) { - $idsToFetch["sfw"] = json_decode(Storage::get("indexer/incremental/{$mediaType}_failed.json")); + $idsToFetch["sfw"] = $this->getFailedIdsToFetch($mediaType); } else { - if (Storage::exists("indexer/incremental/$mediaType.json")) - { - $existingIdsRaw = Storage::get("indexer/incremental/$mediaType.json"); - $existingIdsHash = sha1($existingIdsRaw); - } - - if ($this->cancelled) - { - return 127; - } - - $newIdsRaw = file_get_contents("https://raw.githubusercontent.com/purarue/mal-id-cache/master/cache/${mediaType}_cache.json"); - $newIdsHash = sha1($newIdsRaw); - - /** @noinspection PhpConditionAlreadyCheckedInspection */ - if ($this->cancelled) - { - return 127; - } - - if ($newIdsHash !== $existingIdsHash) - { - $newIds = json_decode($newIdsRaw, true); - $existingIds = json_decode($existingIdsRaw, true); - - if (is_null($existingIds) || count($existingIds) === 0) - { - $idsToFetch = $newIds; - } - else - { - foreach (["sfw", "nsfw"] as $t) - { - $idsToFetch[$t] = array_diff($existingIds[$t], $newIds[$t]); - } - } - - Storage::put("indexer/incremental/$mediaType.json.tmp", $newIdsRaw); - } + $idsToFetch = $this->getIdsToFetch($mediaType); } $idCount = count($idsToFetch); - if ($idCount > 0) + if ($idCount == 0) { - $index = 0; - if ($resume && Storage::exists("indexer/incremental/{$mediaType}_resume.save")) - { - $index = (int)Storage::get("indexer/incremental/{$mediaType}_resume.save"); - $this->info("Resuming from index: $index"); - } - - if ($index > 0 && !isset($this->ids[$index])) { - $index = 0; - $this->warn('Invalid index; set back to 0'); - } - - Storage::put("indexer/incremental/{$mediaType}_resume.save", 0); - - $this->info("$idCount $mediaType entries available"); - $ids = array_merge($idsToFetch['sfw'], $idsToFetch['nsfw']); - for ($i = $index; $i <= ($idCount - 1); $i++) - { - if ($this->cancelled) - { - return 127; - } - - $id = $ids[$index]; - - $url = env('APP_URL') . "/v4/anime/$id"; - $this->info("Indexing/Updating " . ($i + 1) . "/$idCount $url [MAL ID: $id]"); - - try - { - $response = json_decode(file_get_contents($url), true); - if (isset($response['error']) && $response['status'] != 404) - { - $this->error("[SKIPPED] Failed to fetch $url - {$response['error']}"); - } - } - catch (\Exception) - { - $this->warn("[SKIPPED] Failed to fetch $url"); - $failedIds[] = $id; - Storage::put("indexer/incremental/$mediaType.failed", json_encode($failedIds)); - } - - $success[] = $id; - Storage::put("indexer/incremental/{$mediaType}_resume.save", $index); - } - - Storage::delete("indexer/incremental/{$mediaType}_resume.save"); - $this->info("--- Indexing of $mediaType is complete."); - $this->info(count($success) . ' entries indexed or updated.'); - if (count($failedIds) > 0) + if ($this->cancelled) { - $this->info(count($failedIds) . ' entries failed to index or update. Re-run with --failed to requeue failed entries only.'); + return 127; } - // finalize the latest state - Storage::move("indexer/incremental/$mediaType.json.tmp", "indexer/incremental/$mediaType.json"); + continue; } + + $this->fetchIds($mediaType, $idsToFetch, $resume); } return 0; From 15bd63fdc7eae10845add3a552b6d8bf8c09e78a Mon Sep 17 00:00:00 2001 From: pushrbx Date: Tue, 5 Nov 2024 09:30:13 +0000 Subject: [PATCH 6/9] fixed media type choices and url for indexing --- app/Console/Commands/Indexer/IncrementalIndexer.php | 6 +++--- container-setup.sh | 2 +- container_usage.md | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/Console/Commands/Indexer/IncrementalIndexer.php b/app/Console/Commands/Indexer/IncrementalIndexer.php index 8e09ecf6..0ec05d4e 100644 --- a/app/Console/Commands/Indexer/IncrementalIndexer.php +++ b/app/Console/Commands/Indexer/IncrementalIndexer.php @@ -26,7 +26,7 @@ class IncrementalIndexer extends Command protected function promptForMissingArgumentsUsing(): array { return [ - 'mediaType' => ['The media type to index.', 'Valid values: anime, manga, character, people'] + 'mediaType' => ['The media type to index.', 'Valid values: anime, manga, characters, people'] ]; } @@ -123,7 +123,7 @@ private function fetchIds(string $mediaType, array $idsToFetch, bool $resume): v $id = $ids[$index]; - $url = env('APP_URL') . "/v4/anime/$id"; + $url = env('APP_URL') . "/v4/$mediaType/$id"; $this->info("Indexing/Updating " . ($i + 1) . "/$idCount $url [MAL ID: $id]"); try @@ -171,7 +171,7 @@ public function handle(): int 'failed' => $this->option('failed') ?? false ], [ - 'mediaType' => 'required|in:anime,manga,character,people', + 'mediaType' => 'required|in:anime,manga,characters,people', 'delay' => 'integer|min:1', 'resume' => 'bool|prohibited_with:failed', 'failed' => 'bool|prohibited_with:resume' diff --git a/container-setup.sh b/container-setup.sh index dd56f6e9..f47c076f 100755 --- a/container-setup.sh +++ b/container-setup.sh @@ -34,7 +34,7 @@ display_help() { echo "stop Stop Jikan API" echo "validate-prereqs Validate pre-reqs installed (docker, docker-compose)" echo "execute-indexers Execute the indexers, which will scrape and index data from MAL. (Notice: This can take days)" - echo "index-incrementally Executes the incremental indexers for each media type. (anime, manga, character, people)" + echo "index-incrementally Executes the incremental indexers for each media type. (anime, manga, characters, people)" echo "" } diff --git a/container_usage.md b/container_usage.md index a9cbb5bd..8de9b8d2 100644 --- a/container_usage.md +++ b/container_usage.md @@ -39,7 +39,7 @@ start Start Jikan API (mongodb, typesense, redis, jikan-api wor stop Stop Jikan API validate-prereqs Validate pre-reqs installed (docker, docker-compose) execute-indexers Execute the indexers, which will scrape and index data from MAL. (Notice: This can take days) -index-incrementally Executes the incremental indexers for each media type. (anime, manga, character, people) +index-incrementally Executes the incremental indexers for each media type. (anime, manga, characters, people) ``` ### Running the indexer with the script From cc5c12ff390ab7c16e70f7a712141331ae58283d Mon Sep 17 00:00:00 2001 From: pushrbx Date: Tue, 5 Nov 2024 16:30:41 +0000 Subject: [PATCH 7/9] removed "characters" and "people" media types from incremental indexer command --- app/Console/Commands/Indexer/IncrementalIndexer.php | 4 ++-- container-setup.sh | 4 ++-- container_usage.md | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/Console/Commands/Indexer/IncrementalIndexer.php b/app/Console/Commands/Indexer/IncrementalIndexer.php index 0ec05d4e..fe532a06 100644 --- a/app/Console/Commands/Indexer/IncrementalIndexer.php +++ b/app/Console/Commands/Indexer/IncrementalIndexer.php @@ -26,7 +26,7 @@ class IncrementalIndexer extends Command protected function promptForMissingArgumentsUsing(): array { return [ - 'mediaType' => ['The media type to index.', 'Valid values: anime, manga, characters, people'] + 'mediaType' => ['The media type to index.', 'Valid values: anime, manga'] ]; } @@ -171,7 +171,7 @@ public function handle(): int 'failed' => $this->option('failed') ?? false ], [ - 'mediaType' => 'required|in:anime,manga,characters,people', + 'mediaType' => 'required|in:anime,manga', 'delay' => 'integer|min:1', 'resume' => 'bool|prohibited_with:failed', 'failed' => 'bool|prohibited_with:resume' diff --git a/container-setup.sh b/container-setup.sh index f47c076f..3cc1f5c0 100755 --- a/container-setup.sh +++ b/container-setup.sh @@ -34,7 +34,7 @@ display_help() { echo "stop Stop Jikan API" echo "validate-prereqs Validate pre-reqs installed (docker, docker-compose)" echo "execute-indexers Execute the indexers, which will scrape and index data from MAL. (Notice: This can take days)" - echo "index-incrementally Executes the incremental indexers for each media type. (anime, manga, characters, people)" + echo "index-incrementally Executes the incremental indexers for each media type. (anime, manga)" echo "" } @@ -171,7 +171,7 @@ case "$1" in ;; "index-incrementally") echo "Indexing..." - $DOCKER_COMPOSE_CMD -p "$DOCKER_COMPOSE_PROJECT_NAME" exec jikan_rest php /app/artisan indexer:incremental anime manga character people + $DOCKER_COMPOSE_CMD -p "$DOCKER_COMPOSE_PROJECT_NAME" exec jikan_rest php /app/artisan indexer:incremental anime manga echo "Indexing done!" *) echo "No command specified, displaying help" diff --git a/container_usage.md b/container_usage.md index 8de9b8d2..f20e1103 100644 --- a/container_usage.md +++ b/container_usage.md @@ -39,7 +39,7 @@ start Start Jikan API (mongodb, typesense, redis, jikan-api wor stop Stop Jikan API validate-prereqs Validate pre-reqs installed (docker, docker-compose) execute-indexers Execute the indexers, which will scrape and index data from MAL. (Notice: This can take days) -index-incrementally Executes the incremental indexers for each media type. (anime, manga, characters, people) +index-incrementally Executes the incremental indexers for each media type. (anime, manga) ``` ### Running the indexer with the script From a87b4617d007e7b25fe85c327b4f9ce3f20087b4 Mon Sep 17 00:00:00 2001 From: pushrbx Date: Fri, 8 Nov 2024 19:30:00 +0000 Subject: [PATCH 8/9] fixed some issues with IncrementalIndexer --- .../Commands/Indexer/IncrementalIndexer.php | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/app/Console/Commands/Indexer/IncrementalIndexer.php b/app/Console/Commands/Indexer/IncrementalIndexer.php index fe532a06..eb076ac0 100644 --- a/app/Console/Commands/Indexer/IncrementalIndexer.php +++ b/app/Console/Commands/Indexer/IncrementalIndexer.php @@ -103,7 +103,9 @@ private function fetchIds(string $mediaType, array $idsToFetch, bool $resume): v $this->info("Resuming from index: $index"); } - if ($index > 0 && !isset($this->ids[$index])) + $ids = array_merge($idsToFetch['sfw'], $idsToFetch['nsfw']); + + if ($index > 0 && !isset($ids[$index])) { $index = 0; $this->warn('Invalid index; set back to 0'); @@ -112,7 +114,6 @@ private function fetchIds(string $mediaType, array $idsToFetch, bool $resume): v Storage::put("indexer/incremental/{$mediaType}_resume.save", 0); $this->info("$idCount $mediaType entries available"); - $ids = array_merge($idsToFetch['sfw'], $idsToFetch['nsfw']); for ($i = $index; $i <= ($idCount - 1); $i++) { @@ -184,7 +185,7 @@ public function handle(): int } // we want to handle signals from the OS - $this->trap(SIGTERM, fn () => $this->cancelled = true); + $this->trap([SIGTERM, SIGQUIT, SIGINT], fn () => $this->cancelled = true); $resume = $this->option('resume') ?? false; $onlyFailed = $this->option('failed') ?? false; @@ -208,13 +209,14 @@ public function handle(): int $idsToFetch = $this->getIdsToFetch($mediaType); } + if ($this->cancelled) + { + return 127; + } + $idCount = count($idsToFetch); - if ($idCount == 0) + if ($idCount === 0) { - if ($this->cancelled) - { - return 127; - } continue; } From 3ec81f2eb1d3dd0508ead78d1190ac78ba80619f Mon Sep 17 00:00:00 2001 From: pushrbx Date: Fri, 8 Nov 2024 19:31:51 +0000 Subject: [PATCH 9/9] updated COMMANDS.md --- COMMANDS.MD | 1 + 1 file changed, 1 insertion(+) diff --git a/COMMANDS.MD b/COMMANDS.MD index e07b6fe8..e1be3837 100644 --- a/COMMANDS.MD +++ b/COMMANDS.MD @@ -14,6 +14,7 @@ For an entire list of commands, you can run `php artisan list` - [Indexer](#indexer) - [Anime](#indexer-anime) - [Manga](#indexer-manga) + - [Incremental](#indexer-incremental) ## Commands