openedx · ChrisChV · Sep 25, 2024 · Sep 18, 2024 · Sep 20, 2024 · Sep 20, 2024
diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
@@ -15,7 +15,7 @@
 from django.core.cache import cache
 from django.core.paginator import Paginator
 from meilisearch import Client as MeilisearchClient
-from meilisearch.errors import MeilisearchError
+from meilisearch.errors import MeilisearchApiError, MeilisearchError
 from meilisearch.models.task import TaskInfo
 from opaque_keys.edx.keys import UsageKey
 from opaque_keys.edx.locator import LibraryLocatorV2, LibraryCollectionLocator
@@ -34,6 +34,7 @@
     searchable_doc_for_course_block,
     searchable_doc_for_collection,
     searchable_doc_for_library_block,
+    searchable_doc_for_usage_key,
     searchable_doc_collections,
     searchable_doc_tags,
     searchable_doc_tags_for_collection,
@@ -400,8 +401,8 @@ def index_collection_batch(batch, num_done, library_key) -> int:
             docs = []
             for collection in batch:
                 try:
-                    doc = searchable_doc_for_collection(collection)
-                    doc.update(searchable_doc_tags_for_collection(library_key, collection))
+                    doc = searchable_doc_for_collection(library_key, collection.key, collection=collection)
+                    doc.update(searchable_doc_tags_for_collection(library_key, collection.key))
                     docs.append(doc)
                 except Exception as err:  # pylint: disable=broad-except
                     status_cb(f"Error indexing collection {collection}: {err}")
@@ -510,15 +511,28 @@ def delete_index_doc(usage_key: UsageKey) -> None:
     Args:
         usage_key (UsageKey): The usage key of the XBlock to be removed from the index
     """
-    current_rebuild_index_name = _get_running_rebuild_index_name()
+    doc = searchable_doc_for_usage_key(usage_key)
+    _delete_index_doc(doc[Fields.id])
+
+
+def _delete_index_doc(doc_id) -> None:
+    """
+    Helper function that deletes the document with the given ID from the search index
+
+    If there is a rebuild in progress, the document will also be removed from the new index.
+    """
+    if not doc_id:
+        return
 
     client = _get_meilisearch_client()
+    current_rebuild_index_name = _get_running_rebuild_index_name()
 
     tasks = []
     if current_rebuild_index_name:
-        # If there is a rebuild in progress, the document will also be deleted from the new index.
-        tasks.append(client.index(current_rebuild_index_name).delete_document(meili_id_from_opaque_key(usage_key)))
-    tasks.append(client.index(STUDIO_INDEX_NAME).delete_document(meili_id_from_opaque_key(usage_key)))
+        # If there is a rebuild in progress, the document will also be removed from the new index.
+        tasks.append(client.index(current_rebuild_index_name).delete_document(doc_id))
+
+    tasks.append(client.index(STUDIO_INDEX_NAME).delete_document(doc_id))
 
     _wait_for_meili_tasks(tasks)
 
@@ -561,20 +575,93 @@ def upsert_library_block_index_doc(usage_key: UsageKey) -> None:
     _update_index_docs(docs)
 
 
+def _get_document_from_index(document_id: str) -> dict:
+    """
+    Returns the Document identified by the given ID, from the given index.
+
+    Returns None if the document or index do not exist.
+    """
+    client = _get_meilisearch_client()
+    document = None
+    try:
+        index = client.get_index(STUDIO_INDEX_NAME)
+        return index.get_document(id)
+    except (MeilisearchError, MeilisearchApiError) as err:
+        # The index or documennt doesn't exist
+        log.exception(err)
+
+    return None
-    document = None
-    try:
-        index = client.get_index(STUDIO_INDEX_NAME)
-        return index.get_document(id)
-    except (MeilisearchError, MeilisearchApiError) as err:
-        # The index or documennt doesn't exist
-        log.exception(err)
-
-    return None
+    document = None
+    try:
+        index = client.get_index(STUDIO_INDEX_NAME)
+        document = index.get_document(id)
+    except (MeilisearchError, MeilisearchApiError) as err:
+        # The index or documennt doesn't exist
+        log.exception(err)
+
+    return document
-    document = None
-    try:
-        index = client.get_index(STUDIO_INDEX_NAME)
-        return index.get_document(id)
-    except (MeilisearchError, MeilisearchApiError) as err:
-        # The index or documennt doesn't exist
-        log.exception(err)
-
-    return None
+    document = None
+    try:
+        index = client.get_index(STUDIO_INDEX_NAME)
+        document = index.get_document(id)
+    except (MeilisearchError, MeilisearchApiError) as err:
+        # The index or documennt doesn't exist
+        log.exception(err)
+
+    return document
+
+
 def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collection_key: str) -> None:
     """
-    Creates or updates the document for the given Library Collection in the search index
+    Creates, updates, or deletes the document for the given Library Collection in the search index.
+
+    If the Collection is not found or disabled (i.e. soft-deleted), then delete it from the search index.
     """
-    content_library = lib_api.ContentLibrary.objects.get_by_key(library_key)
-    collection = authoring_api.get_collection(
-        learning_package_id=content_library.learning_package_id,
-        collection_key=collection_key,
-    )
-    docs = [
-        searchable_doc_for_collection(collection)
-    ]
+    doc = searchable_doc_for_collection(library_key, collection_key)
+    update_components = False
 
-    _update_index_docs(docs)
+    # Soft-deleted/disabled collections are removed from the index
+    # and their components updated.
+    if doc.get('_disabled'):
+
+        _delete_index_doc(doc[Fields.id])
+
+        update_components = True
+
+    # Hard-deleted collections are also deleted from the index,
+    # but their components are automatically updated as part of the deletion process, so we don't have to.
+    elif not doc.get(Fields.type):
+
+        _delete_index_doc(doc[Fields.id])
+
+    # Otherwise, upsert the collection.
+    # Newly-added/restored collection get their components updated too.
+    else:
+        already_indexed = _get_document_from_index(doc[Fields.id])
+        if not already_indexed:
+            update_components = True
+
+        _update_index_docs([doc])
+
+    # Asynchronously update the collection's components "collections" field
+    if update_components:
+        from .tasks import update_library_components_collections as update_task
+
+        update_task.delay(str(library_key), collection_key)
+
+
+def update_library_components_collections(
+    library_key: LibraryLocatorV2,
+    collection_key: str,
+    batch_size: int = 1000,
+) -> None:
+    """
+    Updates the "collections" field for all components associated with a given Library Collection.
+
+    Because there may be a lot of components, we send these updates to Meilisearch in batches.
+    """
+    library = lib_api.get_library(library_key)
+    components = authoring_api.get_collection_components(library.learning_package.id, collection_key)
+
+    paginator = Paginator(components, batch_size)
+    for page in paginator.page_range:
+        docs = []
+
+        for component in paginator.page(page).object_list:
+            usage_key = lib_api.library_component_usage_key(
+                library_key,
+                component,
+            )
+            doc = searchable_doc_collections(usage_key)
+            docs.append(doc)
+
+        log.info(
+            f"Updating document.collections for library {library_key} components"
+            f" page {page} / {paginator.num_pages}"
+        )
+        _update_index_docs(docs)
 
 
 def upsert_content_library_index_docs(library_key: LibraryLocatorV2) -> None:
@@ -612,10 +699,8 @@ def upsert_collection_tags_index_docs(collection_usage_key: LibraryCollectionLoc
     """
     Updates the tags data in documents for the given library collection
     """
-    collection = lib_api.get_library_collection_from_usage_key(collection_usage_key)
 
-    doc = {Fields.id: collection.id}
-    doc.update(searchable_doc_tags_for_collection(collection_usage_key.library_key, collection))
+    doc = searchable_doc_tags_for_collection(collection_usage_key.library_key, collection_usage_key.collection_id)
     _update_index_docs([doc])
 
 

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
@@ -16,7 +16,7 @@
 from openedx.core.djangoapps.content_libraries import api as lib_api
 from openedx.core.djangoapps.content_tagging import api as tagging_api
 from openedx.core.djangoapps.xblock import api as xblock_api
-from openedx_learning.api.authoring_models import LearningPackage
+from openedx_learning.api.authoring_models import Collection
 
 log = logging.getLogger(__name__)
 
@@ -112,6 +112,15 @@ def _meili_access_id_from_context_key(context_key: LearningContextKey) -> int:
     return access.id
 
 
+def searchable_doc_for_usage_key(usage_key: UsageKey) -> dict:
+    """
+    Generates a base document identified by its usage key.
+    """
+    return {
+        Fields.id: meili_id_from_opaque_key(usage_key),
+    }
+
+
 def _fields_from_block(block) -> dict:
     """
     Given an XBlock instance, call its index_dictionary() method to load any
@@ -297,14 +306,14 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
     library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title
     block = xblock_api.load_block(xblock_metadata.usage_key, user=None)
 
-    doc = {
-        Fields.id: meili_id_from_opaque_key(xblock_metadata.usage_key),
+    doc = searchable_doc_for_usage_key(xblock_metadata.usage_key)
+    doc.update({
         Fields.type: DocType.library_block,
         Fields.breadcrumbs: [],
         Fields.created: xblock_metadata.created.timestamp(),
         Fields.modified: xblock_metadata.modified.timestamp(),
         Fields.last_published: xblock_metadata.last_published.timestamp() if xblock_metadata.last_published else None,
-    }
+    })
 
     doc.update(_fields_from_block(block))
 
@@ -319,9 +328,7 @@ def searchable_doc_tags(usage_key: UsageKey) -> dict:
     Generate a dictionary document suitable for ingestion into a search engine
     like Meilisearch or Elasticsearch, with the tags data for the given content object.
     """
-    doc = {
-        Fields.id: meili_id_from_opaque_key(usage_key),
-    }
+    doc = searchable_doc_for_usage_key(usage_key)
     doc.update(_tags_for_content_object(usage_key))
 
     return doc
@@ -332,31 +339,25 @@ def searchable_doc_collections(usage_key: UsageKey) -> dict:
     Generate a dictionary document suitable for ingestion into a search engine
     like Meilisearch or Elasticsearch, with the collections data for the given content object.
     """
-    doc = {
-        Fields.id: meili_id_from_opaque_key(usage_key),
-    }
+    doc = searchable_doc_for_usage_key(usage_key)
     doc.update(_collections_for_content_object(usage_key))
 
     return doc
 
 
 def searchable_doc_tags_for_collection(
     library_key: LibraryLocatorV2,
-    collection,
+    collection_key: str,
 ) -> dict:
     """
     Generate a dictionary document suitable for ingestion into a search engine
     like Meilisearch or Elasticsearch, with the tags data for the given library collection.
     """
-    doc = {
-        Fields.id: collection.id,
-    }
-
     collection_usage_key = lib_api.get_library_collection_usage_key(
         library_key,
-        collection.key,
+        collection_key,
     )
-
+    doc = searchable_doc_for_usage_key(collection_usage_key)
     doc.update(_tags_for_content_object(collection_usage_key))
 
     return doc
@@ -368,49 +369,65 @@ def searchable_doc_for_course_block(block) -> dict:
     like Meilisearch or Elasticsearch, so that the given course block can be
     found using faceted search.
     """
-    doc = {
-        Fields.id: meili_id_from_opaque_key(block.usage_key),
+    doc = searchable_doc_for_usage_key(block.usage_key)
+    doc.update({
         Fields.type: DocType.course_block,
-    }
+    })
 
     doc.update(_fields_from_block(block))
 
     return doc
 
 
-def searchable_doc_for_collection(collection) -> dict:
+def searchable_doc_for_collection(
+    library_key: LibraryLocatorV2,
+    collection_key: str,
+    *,
+    # Optionally provide the collection if we've already fetched one
+    collection: Collection | None = None,
+) -> dict:
     """
     Generate a dictionary document suitable for ingestion into a search engine
     like Meilisearch or Elasticsearch, so that the given collection can be
     found using faceted search.
+
+    If no collection is found for the given library_key + collection_key, the returned document will contain only basic
+    information derived from the collection usage key, and no Fields.type value will be included in the returned dict.
     """
-    doc = {
-        Fields.id: collection.id,
-        Fields.block_id: collection.key,
-        Fields.type: DocType.collection,
-        Fields.display_name: collection.title,
-        Fields.description: collection.description,
-        Fields.created: collection.created.timestamp(),
-        Fields.modified: collection.modified.timestamp(),
-        # Add related learning_package.key as context_key by default.
-        # If related contentlibrary is found, it will override this value below.
-        # Mostly contentlibrary.library_key == learning_package.key
-        Fields.context_key: collection.learning_package.key,
-        Fields.num_children: collection.entities.count(),
-    }
-    # Just in case learning_package is not related to a library
+    collection_usage_key = lib_api.get_library_collection_usage_key(
+        library_key,
+        collection_key,
+    )
+
+    doc = searchable_doc_for_usage_key(collection_usage_key)
+
     try:
-        context_key = collection.learning_package.contentlibrary.library_key
-        org = str(context_key.org)
+        collection = collection or lib_api.get_library_collection_from_usage_key(collection_usage_key)
+    except lib_api.ContentLibraryCollectionNotFound:
+        # Collection not found, so we can only return the base doc
+        pass
+
+    if collection:
+        assert collection.key == collection_key
+
         doc.update({
-            Fields.context_key: str(context_key),
-            Fields.org: org,
-            Fields.usage_key: str(lib_api.get_library_collection_usage_key(context_key, collection.key)),
+            Fields.context_key: str(library_key),
+            Fields.org: str(library_key.org),
+            Fields.usage_key: str(collection_usage_key),
+            Fields.block_id: collection.key,
+            Fields.type: DocType.collection,
+            Fields.display_name: collection.title,
+            Fields.description: collection.description,
+            Fields.created: collection.created.timestamp(),
+            Fields.modified: collection.modified.timestamp(),
+            Fields.num_children: collection.entities.count(),
+            Fields.access_id: _meili_access_id_from_context_key(library_key),
+            Fields.breadcrumbs: [{"display_name": collection.learning_package.title}],
         })
-    except LearningPackage.contentlibrary.RelatedObjectDoesNotExist:
-        log.warning(f"Related library not found for {collection}")
-    doc[Fields.access_id] = _meili_access_id_from_context_key(doc[Fields.context_key])
-    # Add the breadcrumbs.
-    doc[Fields.breadcrumbs] = [{"display_name": collection.learning_package.title}]
+
+        # Disabled collections should be removed from the search index,
+        # so we mark them as _disabled
+        if not collection.enabled:
+            doc['_disabled'] = True
 
     return doc
diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py
@@ -23,6 +23,7 @@
     LIBRARY_BLOCK_DELETED,
     LIBRARY_BLOCK_UPDATED,
     LIBRARY_COLLECTION_CREATED,
+    LIBRARY_COLLECTION_DELETED,
     LIBRARY_COLLECTION_UPDATED,
     XBLOCK_CREATED,
     XBLOCK_DELETED,
@@ -166,6 +167,7 @@ def content_library_updated_handler(**kwargs) -> None:
 
 
 @receiver(LIBRARY_COLLECTION_CREATED)
+@receiver(LIBRARY_COLLECTION_DELETED)
 @receiver(LIBRARY_COLLECTION_UPDATED)
 @only_if_meilisearch_enabled
 def library_collection_updated_handler(**kwargs) -> None: