Skip to content

Commit

Permalink
enable reindexing of library to ES
Browse files Browse the repository at this point in the history
- added boolean column deindexed_from_es to the library table.
It should be set to true when the library is deleted from ES.
- GET /fulltext/reindex returns reindexingStatus that's one of
indexed, indexing or deindexed. 'deindexed' if deindexed_from_es is true.
'indexed' if # of indexed items = # of indexable attachments, and
'indexing' otherwise.
- POST /fulltext/reindex adds an event to SQS that triggers full-text-indexer
lambda that handles reindeixing and sets deindexed_from_es = false.
Only possible if deindexed_from_es is initially true.
  • Loading branch information
abaevbog committed Feb 16, 2024
1 parent 17e67f4 commit 59b37e9
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 1 deletion.
44 changes: 44 additions & 0 deletions controllers/FullTextController.php
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,48 @@ public function itemContent() {

$this->end();
}

public function reindex() {
$this->allowMethods(['POST', 'GET']);

// General library access
if (!$this->permissions->canAccess($this->objectLibraryID)) {
$this->e403();
}

$isDeindexed = Zotero_Libraries::checkEsIndexStatus($this->objectLibraryID);

// GET - return indexing status of ES: indexing, indexed, deindexed
if ($this->method == "GET") {
// Current count of records in ES
$esCount = Zotero_FullText::countInLibrary($this->objectLibraryID);
// Expected count of records in ES
$expectedCount = Zotero_Libraries::countIndexableAttachments($this->objectLibraryID);

if ($esCount === $expectedCount) {
$result = ["reindexingStatus" => "indexed"];
}
else if ($isDeindexed) {
$result = ["reindexingStatus" => "deindexed"];
}
else {
$result = ["reindexingStatus" => "indexing", "indexedCount" => $esCount, "expectedCount" => $expectedCount];
}
echo Zotero_Utilities::formatJSON($result);
$this->end();
}

// POST - request reindexing if the library was removed from ES
if (!$isDeindexed) {
$this->e400("Request was already submitted or the library was not removed from ElasticSearch");
}

// Send event to reindexing queue
Z_SQS::send(Z_CONFIG::$REINDEX_QUEUE_URL, json_encode(['libraryID' => $this->objectLibraryID]));

// Update DB
Zotero_Libraries::setEsIndexStatus($this->objectLibraryID, 0);
$this->end();
}

}
2 changes: 1 addition & 1 deletion include/SQS.inc.php
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public static function deleteBatch($queueURL, $batchEntries) {

private static function load() {
if (!self::$sqs) {
self::$sqs = Z_Core::$AWS->get('sqs');
self::$sqs = Z_Core::$AWS->createSQS();
}
}
}
2 changes: 2 additions & 0 deletions include/config/config.inc.php-sample
Original file line number Diff line number Diff line change
Expand Up @@ -89,5 +89,7 @@ class Z_CONFIG {
public static $CACHE_VERSION_ITEM_DATA = 1;
public static $CACHE_VERSION_RESPONSE_JSON_COLLECTION = 1;
public static $CACHE_VERSION_RESPONSE_JSON_ITEM = 1;

public static $REINDEX_QUEUE_URL = "";
}
?>
2 changes: 2 additions & 0 deletions include/config/routes.inc.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
$router->map('/users/i:objectUserID/fulltext', array('controller' => 'FullText', 'action' => 'fulltext'));
//$router->map('/users/i:objectUserID/publications/fulltext', ['controller' => 'FullText', 'action' => 'fulltext', 'extra' => ['publications' => true]]);
$router->map('/groups/i:objectGroupID/fulltext', array('controller' => 'FullText', 'action' => 'fulltext'));
$router->map('/groups/i:objectGroupID/fulltext/reindex', array('controller' => 'FullText', 'action' => 'reindex'));
$router->map('/users/i:objectUserID/fulltext/reindex', array('controller' => 'FullText', 'action' => 'reindex'));

// All trashed items
$router->map('/users/i:objectUserID/items/trash', array('controller' => 'Items', 'extra' => array('subset' => 'trash')));
Expand Down
6 changes: 6 additions & 0 deletions misc/db-updates/2024-02-15/add_es_status_to_library
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/php -d mysqlnd.net_read_timeout=3600
<?php
set_include_path("../../../include");
require("header.inc.php");

Zotero_DB::query("ALTER TABLE libraries ADD COLUMN deindexed_from_es tinyint(1) unsigned NOT NULL DEFAULT '0';");
1 change: 1 addition & 0 deletions misc/master.sql
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ CREATE TABLE `libraries` (
`lastUpdated` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
`version` int(10) unsigned NOT NULL DEFAULT '0',
`shardID` smallint(5) unsigned NOT NULL,
`deindexed_from_es` tinyint(1) unsigned NOT NULL DEFAULT '0',
PRIMARY KEY (`libraryID`),
KEY `shardID` (`shardID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
Expand Down
19 changes: 19 additions & 0 deletions model/FullText.inc.php
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,25 @@ public static function searchInLibrary($libraryID, $searchText) {
}
return $keys;
}

public static function countInLibrary($libraryID) {
$params = [
'index' => self::$elasticsearchType . "_index",
'body' => [
'query' => [
'bool' => [
'filter' => [
'term' => [
'libraryID' => $libraryID
]
]
]
]
]
];
$resp = Z_Core::$ES->count($params);
return $resp['count'];
}

public static function deleteItemContent(Zotero_Item $item) {
$libraryID = $item->libraryID;
Expand Down
22 changes: 22 additions & 0 deletions model/Libraries.inc.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,28 @@ public static function exists($libraryID) {
$sql = "SELECT COUNT(*) FROM libraries WHERE libraryID=?";
return !!Zotero_DB::valueQuery($sql, $libraryID);
}

public static function countIndexableAttachments($libraryID) {
$attachmentIds = Zotero_DB::columnQuery(
"SELECT itemTypeID FROM itemTypes "
. "WHERE itemTypeName IN ('attachment') "
);
$sql = "SELECT COUNT(*) as count FROM items INNER JOIN itemAttachments USING (itemID)"
. "WHERE NOT(linkMode='LINKED_URL') AND libraryID=? AND itemTypeID IN (" . implode(",", $attachmentIds) . ")";
$count = Zotero_DB::query($sql, $libraryID, Zotero_Shards::getByLibraryID($libraryID));
return $count[0]['count'];
}

public static function checkEsIndexStatus($libraryID) {
$sql = "SELECT deindexed_from_es FROM libraries WHERE libraryID=?";
$isDeleted = Zotero_DB::query($sql, $libraryID);
return $isDeleted[0]['deindexed_from_es'] == 1;
}

public static function setEsIndexStatus($libraryID, $deindexed) {
$sql = "UPDATE libraries SET deindexed_from_es=? WHERE libraryID=?";
Zotero_DB::query($sql, [$deindexed, $libraryID]);
}


public static function getName($libraryID) {
Expand Down

0 comments on commit 59b37e9

Please sign in to comment.