From 59b37e92cafbbfa0ce92135d14450e230a75fc78 Mon Sep 17 00:00:00 2001 From: Bogdan Abaev Date: Fri, 16 Feb 2024 03:43:22 +0000 Subject: [PATCH] enable reindexing of library to ES - added boolean column deindexed_from_es to the library table. It should be set to true when the library is deleted from ES. - GET /fulltext/reindex returns reindexingStatus that's one of indexed, indexing or deindexed. 'deindexed' if deindexed_from_es is true. 'indexed' if # of indexed items = # of indexable attachments, and 'indexing' otherwise. - POST /fulltext/reindex adds an event to SQS that triggers full-text-indexer lambda that handles reindeixing and sets deindexed_from_es = false. Only possible if deindexed_from_es is initially true. --- controllers/FullTextController.php | 44 +++++++++++++++++++ include/SQS.inc.php | 2 +- include/config/config.inc.php-sample | 2 + include/config/routes.inc.php | 2 + .../2024-02-15/add_es_status_to_library | 6 +++ misc/master.sql | 1 + model/FullText.inc.php | 19 ++++++++ model/Libraries.inc.php | 22 ++++++++++ 8 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 misc/db-updates/2024-02-15/add_es_status_to_library diff --git a/controllers/FullTextController.php b/controllers/FullTextController.php index dfc1d3f7..1b60830c 100644 --- a/controllers/FullTextController.php +++ b/controllers/FullTextController.php @@ -157,4 +157,48 @@ public function itemContent() { $this->end(); } + + public function reindex() { + $this->allowMethods(['POST', 'GET']); + + // General library access + if (!$this->permissions->canAccess($this->objectLibraryID)) { + $this->e403(); + } + + $isDeindexed = Zotero_Libraries::checkEsIndexStatus($this->objectLibraryID); + + // GET - return indexing status of ES: indexing, indexed, deindexed + if ($this->method == "GET") { + // Current count of records in ES + $esCount = Zotero_FullText::countInLibrary($this->objectLibraryID); + // Expected count of records in ES + $expectedCount = Zotero_Libraries::countIndexableAttachments($this->objectLibraryID); + + if ($esCount === $expectedCount) { + $result = ["reindexingStatus" => "indexed"]; + } + else if ($isDeindexed) { + $result = ["reindexingStatus" => "deindexed"]; + } + else { + $result = ["reindexingStatus" => "indexing", "indexedCount" => $esCount, "expectedCount" => $expectedCount]; + } + echo Zotero_Utilities::formatJSON($result); + $this->end(); + } + + // POST - request reindexing if the library was removed from ES + if (!$isDeindexed) { + $this->e400("Request was already submitted or the library was not removed from ElasticSearch"); + } + + // Send event to reindexing queue + Z_SQS::send(Z_CONFIG::$REINDEX_QUEUE_URL, json_encode(['libraryID' => $this->objectLibraryID])); + + // Update DB + Zotero_Libraries::setEsIndexStatus($this->objectLibraryID, 0); + $this->end(); + } + } diff --git a/include/SQS.inc.php b/include/SQS.inc.php index 88b870f3..9c8a93ee 100644 --- a/include/SQS.inc.php +++ b/include/SQS.inc.php @@ -102,7 +102,7 @@ public static function deleteBatch($queueURL, $batchEntries) { private static function load() { if (!self::$sqs) { - self::$sqs = Z_Core::$AWS->get('sqs'); + self::$sqs = Z_Core::$AWS->createSQS(); } } } diff --git a/include/config/config.inc.php-sample b/include/config/config.inc.php-sample index 3225fb2f..2cafc035 100644 --- a/include/config/config.inc.php-sample +++ b/include/config/config.inc.php-sample @@ -89,5 +89,7 @@ class Z_CONFIG { public static $CACHE_VERSION_ITEM_DATA = 1; public static $CACHE_VERSION_RESPONSE_JSON_COLLECTION = 1; public static $CACHE_VERSION_RESPONSE_JSON_ITEM = 1; + + public static $REINDEX_QUEUE_URL = ""; } ?> diff --git a/include/config/routes.inc.php b/include/config/routes.inc.php index b8b1a75d..8f66d29d 100644 --- a/include/config/routes.inc.php +++ b/include/config/routes.inc.php @@ -42,6 +42,8 @@ $router->map('/users/i:objectUserID/fulltext', array('controller' => 'FullText', 'action' => 'fulltext')); //$router->map('/users/i:objectUserID/publications/fulltext', ['controller' => 'FullText', 'action' => 'fulltext', 'extra' => ['publications' => true]]); $router->map('/groups/i:objectGroupID/fulltext', array('controller' => 'FullText', 'action' => 'fulltext')); +$router->map('/groups/i:objectGroupID/fulltext/reindex', array('controller' => 'FullText', 'action' => 'reindex')); +$router->map('/users/i:objectUserID/fulltext/reindex', array('controller' => 'FullText', 'action' => 'reindex')); // All trashed items $router->map('/users/i:objectUserID/items/trash', array('controller' => 'Items', 'extra' => array('subset' => 'trash'))); diff --git a/misc/db-updates/2024-02-15/add_es_status_to_library b/misc/db-updates/2024-02-15/add_es_status_to_library new file mode 100644 index 00000000..df59e01a --- /dev/null +++ b/misc/db-updates/2024-02-15/add_es_status_to_library @@ -0,0 +1,6 @@ +#!/usr/bin/php -d mysqlnd.net_read_timeout=3600 + self::$elasticsearchType . "_index", + 'body' => [ + 'query' => [ + 'bool' => [ + 'filter' => [ + 'term' => [ + 'libraryID' => $libraryID + ] + ] + ] + ] + ] + ]; + $resp = Z_Core::$ES->count($params); + return $resp['count']; + } public static function deleteItemContent(Zotero_Item $item) { $libraryID = $item->libraryID; diff --git a/model/Libraries.inc.php b/model/Libraries.inc.php index 7b73d130..f64ddd2a 100644 --- a/model/Libraries.inc.php +++ b/model/Libraries.inc.php @@ -53,6 +53,28 @@ public static function exists($libraryID) { $sql = "SELECT COUNT(*) FROM libraries WHERE libraryID=?"; return !!Zotero_DB::valueQuery($sql, $libraryID); } + + public static function countIndexableAttachments($libraryID) { + $attachmentIds = Zotero_DB::columnQuery( + "SELECT itemTypeID FROM itemTypes " + . "WHERE itemTypeName IN ('attachment') " + ); + $sql = "SELECT COUNT(*) as count FROM items INNER JOIN itemAttachments USING (itemID)" + . "WHERE NOT(linkMode='LINKED_URL') AND libraryID=? AND itemTypeID IN (" . implode(",", $attachmentIds) . ")"; + $count = Zotero_DB::query($sql, $libraryID, Zotero_Shards::getByLibraryID($libraryID)); + return $count[0]['count']; + } + + public static function checkEsIndexStatus($libraryID) { + $sql = "SELECT deindexed_from_es FROM libraries WHERE libraryID=?"; + $isDeleted = Zotero_DB::query($sql, $libraryID); + return $isDeleted[0]['deindexed_from_es'] == 1; + } + + public static function setEsIndexStatus($libraryID, $deindexed) { + $sql = "UPDATE libraries SET deindexed_from_es=? WHERE libraryID=?"; + Zotero_DB::query($sql, [$deindexed, $libraryID]); + } public static function getName($libraryID) {