Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adds sortable fields to studio content search index [FC-0059] #35103

Merged
merged 11 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
Fields.tags + "." + Fields.tags_level3,
Fields.type,
Fields.access_id,
Fields.last_published,
])
# Mark which attributes are used for keyword search, in order of importance:
client.index(temp_index_name).update_searchable_attributes([
Expand All @@ -340,6 +341,24 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
Fields.tags + "." + Fields.tags_level2,
Fields.tags + "." + Fields.tags_level3,
])
# Mark which attributes can be used for sorting search results:
client.index(temp_index_name).update_sortable_attributes([
Fields.display_name,
Fields.created,
Fields.modified,
Fields.last_published,
])

# Update the search ranking rules to let the (optional) "sort" parameter take precedence over keyword relevance.
# cf https://www.meilisearch.com/docs/learn/core_concepts/relevancy
client.index(temp_index_name).update_ranking_rules([
"sort",
"words",
"typo",
"proximity",
"attribute",
"exactness",
])

############## Libraries ##############
status_cb("Indexing libraries...")
Expand Down
11 changes: 10 additions & 1 deletion openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ class Fields:
type = "type" # DocType.course_block or DocType.library_block (see below)
block_id = "block_id" # The block_id part of the usage key. Sometimes human-readable, sometimes a random hex ID
display_name = "display_name"
modified = "modified"
created = "created"
last_published = "last_published"
block_type = "block_type"
context_key = "context_key"
org = "org"
Expand Down Expand Up @@ -221,14 +224,20 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, so that the given library block can be
found using faceted search.

Datetime fields (created, modified, last_published) are serialized to POSIX timestamps so that they can be used to
sort the search results.
"""
library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title
block = xblock_api.load_block(xblock_metadata.usage_key, user=None)

doc = {
Fields.id: meili_id_from_opaque_key(xblock_metadata.usage_key),
Fields.type: DocType.library_block,
Fields.breadcrumbs: []
Fields.breadcrumbs: [],
Fields.created: xblock_metadata.created.timestamp(),
Fields.modified: xblock_metadata.modified.timestamp(),
Fields.last_published: xblock_metadata.last_published.timestamp() if xblock_metadata.last_published else None,
}

doc.update(_fields_from_block(block))
Expand Down
2 changes: 2 additions & 0 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
CONTENT_LIBRARY_UPDATED,
LIBRARY_BLOCK_CREATED,
LIBRARY_BLOCK_DELETED,
LIBRARY_BLOCK_UPDATED,
XBLOCK_CREATED,
XBLOCK_DELETED,
XBLOCK_UPDATED,
Expand Down Expand Up @@ -96,6 +97,7 @@ def xblock_deleted_handler(**kwargs) -> None:


@receiver(LIBRARY_BLOCK_CREATED)
@receiver(LIBRARY_BLOCK_UPDATED)
@only_if_meilisearch_enabled
def library_block_updated_handler(**kwargs) -> None:
"""
pomegranited marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
38 changes: 35 additions & 3 deletions openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@

import copy

from datetime import datetime, timezone
from unittest.mock import MagicMock, call, patch
from opaque_keys.edx.keys import UsageKey

import ddt
from django.test import override_settings
from freezegun import freeze_time
from organizations.tests.factories import OrganizationFactory

from common.djangoapps.student.tests.factories import UserFactory
Expand Down Expand Up @@ -118,8 +120,17 @@ def setUp(self):
title="Library",
)
lib_access, _ = SearchAccess.objects.get_or_create(context_key=self.library.key)
# Populate it with a problem:
self.problem1 = library_api.create_library_block(self.library.key, "problem", "p1")

# Populate it with 2 problems, freezing the date so we can verify created date serializes correctly.
created_date = datetime(2023, 4, 5, 6, 7, 8, tzinfo=timezone.utc)
with freeze_time(created_date):
self.problem1 = library_api.create_library_block(self.library.key, "problem", "p1")
self.problem2 = library_api.create_library_block(self.library.key, "problem", "p2")
# Update problem1, freezing the date so we can verify modified date serializes correctly.
modified_date = datetime(2024, 5, 6, 7, 8, 9, tzinfo=timezone.utc)
with freeze_time(modified_date):
library_api.set_library_block_olx(self.problem1.usage_key, "<problem />")

self.doc_problem1 = {
"id": "lborg1libproblemp1-a698218e",
"usage_key": "lb:org1:lib:problem:p1",
Expand All @@ -132,8 +143,10 @@ def setUp(self):
"content": {"problem_types": [], "capa_content": " "},
"type": "library_block",
"access_id": lib_access.id,
"last_published": None,
"created": created_date.timestamp(),
"modified": modified_date.timestamp(),
}
self.problem2 = library_api.create_library_block(self.library.key, "problem", "p2")
self.doc_problem2 = {
"id": "lborg1libproblemp2-b2f65e29",
"usage_key": "lb:org1:lib:problem:p2",
Expand All @@ -146,6 +159,9 @@ def setUp(self):
"content": {"problem_types": [], "capa_content": " "},
"type": "library_block",
"access_id": lib_access.id,
"last_published": None,
"created": created_date.timestamp(),
"modified": created_date.timestamp(),
}

# Create a couple of taxonomies with tags
Expand Down Expand Up @@ -223,6 +239,22 @@ def mocked_from_component(lib_key, component):
any_order=True,
)

# Check that the sorting-related settings were updated to support sorting on the expected fields
mock_meilisearch.return_value.index.return_value.update_sortable_attributes.assert_called_with([
"display_name",
"created",
"modified",
"last_published",
])
mock_meilisearch.return_value.index.return_value.update_ranking_rules.assert_called_with([
"sort",
"words",
"typo",
"proximity",
"attribute",
"exactness",
])

@ddt.data(
True,
False
Expand Down
26 changes: 24 additions & 2 deletions openedx/core/djangoapps/content/search/tests/test_handlers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""
Tests for the search index update handlers
"""
from datetime import datetime, timezone
from unittest.mock import MagicMock, patch

from django.test import LiveServerTestCase, override_settings
from freezegun import freeze_time
from organizations.tests.factories import OrganizationFactory

from common.djangoapps.student.tests.factories import UserFactory
Expand Down Expand Up @@ -132,7 +134,10 @@ def test_create_delete_library_block(self, meilisearch_client):
)
lib_access, _ = SearchAccess.objects.get_or_create(context_key=library.key)

problem = library_api.create_library_block(library.key, "problem", "Problem1")
# Populate it with a problem, freezing the date so we can verify created date serializes correctly.
created_date = datetime(2023, 4, 5, 6, 7, 8, tzinfo=timezone.utc)
with freeze_time(created_date):
problem = library_api.create_library_block(library.key, "problem", "Problem1")
doc_problem = {
"id": "lborgalib_aproblemproblem1-ca3186e9",
"type": "library_block",
Expand All @@ -145,17 +150,34 @@ def test_create_delete_library_block(self, meilisearch_client):
"breadcrumbs": [{"display_name": "Library Org A"}],
"content": {"problem_types": [], "capa_content": " "},
"access_id": lib_access.id,
"last_published": None,
"created": created_date.timestamp(),
"modified": created_date.timestamp(),
}

meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem])

# Rename the content library
library_api.update_library(library.key, title="Updated Library Org A")

# The breadcrumbs should be updated
# The breadcrumbs should be updated (but nothing else)
doc_problem["breadcrumbs"][0]["display_name"] = "Updated Library Org A"
meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem])

# Edit the problem block, freezing the date so we can verify modified date serializes correctly
modified_date = datetime(2024, 5, 6, 7, 8, 9, tzinfo=timezone.utc)
with freeze_time(modified_date):
library_api.set_library_block_olx(problem.usage_key, "<problem />")
doc_problem["modified"] = modified_date.timestamp()
meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem])

# Publish the content library, freezing the date so we can verify last_published date serializes correctly
published_date = datetime(2024, 6, 7, 8, 9, 10, tzinfo=timezone.utc)
with freeze_time(published_date):
library_api.publish_changes(library.key)
doc_problem["last_published"] = published_date.timestamp()
meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem])

# Delete the Library Block
library_api.delete_library_block(problem.usage_key)

Expand Down
18 changes: 12 additions & 6 deletions openedx/core/djangoapps/content_libraries/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,10 @@ class LibraryXBlockMetadata:
Class that represents the metadata about an XBlock in a content library.
"""
usage_key = attr.ib(type=LibraryUsageLocatorV2)
created = attr.ib(type=datetime)
modified = attr.ib(type=datetime)
display_name = attr.ib("")
last_published = attr.ib(default=None, type=datetime)
has_unpublished_changes = attr.ib(False)
tags_count = attr.ib(0)

Expand All @@ -203,13 +206,18 @@ def from_component(cls, library_key, component):
"""
Construct a LibraryXBlockMetadata from a Component object.
"""
last_publish_log = authoring_api.get_last_publish(component.pk)

return cls(
usage_key=LibraryUsageLocatorV2(
library_key,
component.component_type.name,
component.local_key,
),
display_name=component.versioning.draft.title,
created=component.created,
modified=component.versioning.draft.created,
last_published=None if last_publish_log is None else last_publish_log.published_at,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @ormsbee 👋
I added this code to record the published date for library components in the Studio search index. However, it doesn't work, seemingly because components don't get publish records recorded when they are published -- only the parent library does.

However, there isn't a date available on the component.versioning.published ComponentVersion -- the published.created matches the draft.modified date after publishing, and doesn't represent the actual date published.

What would be the best way to address this issue?

CC @bradenmacdonald

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cf #35195

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't remember if there's a good helper for it at the moment, but I think you can get the datetime when a Component was published by following these relations:

Component.publishable_entity -> PublishableEntity.published -> Published.publish_log_record -> PublishLogRecord.publish_log -> PublishLog. published_at

has_unpublished_changes=component.versioning.has_unpublished_changes
)

Expand Down Expand Up @@ -660,13 +668,11 @@ def get_library_block(usage_key) -> LibraryXBlockMetadata:
if not draft_version:
raise ContentLibraryBlockNotFound(usage_key)

published_version = component.versioning.published

return LibraryXBlockMetadata(
usage_key=usage_key,
display_name=draft_version.title,
has_unpublished_changes=(draft_version != published_version),
xblock_metadata = LibraryXBlockMetadata.from_component(
library_key=usage_key.context_key,
component=component,
)
return xblock_metadata


def set_library_block_olx(usage_key, new_olx_str):
Expand Down
16 changes: 16 additions & 0 deletions openedx/core/djangoapps/content_libraries/library_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

from django.core.exceptions import PermissionDenied

from openedx_events.content_authoring.data import LibraryBlockData
from openedx_events.content_authoring.signals import LIBRARY_BLOCK_UPDATED

from openedx.core.djangoapps.content_libraries import api, permissions
from openedx.core.djangoapps.content_libraries.models import ContentLibrary
from openedx.core.djangoapps.xblock.api import LearningContext
Expand Down Expand Up @@ -93,3 +96,16 @@ def block_exists(self, usage_key):
type_name=usage_key.block_type,
local_key=usage_key.block_id,
)

def send_block_updated_event(self, usage_key):
"""
Send a "block updated" event for the library block with the given usage_key.

usage_key: the UsageKeyV2 subclass used for this learning context
"""
LIBRARY_BLOCK_UPDATED.send_event(
library_block=LibraryBlockData(
library_key=usage_key.lib_key,
usage_key=usage_key,
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ def test_build_library_object_tree(self) -> None:
"""
Test if we can export a library
"""
with self.assertNumQueries(8):
with self.assertNumQueries(11):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My initial changes to LibraryXBlockMetadata.from_component brought this query count up to 14, because it used component.versioning.latest.

Using component.versioning.draft instead brought the query count down to 11, because the authoring API doesn't give us a more efficient way to fetch the published_at date than from the last_publish_log.

Welcome any advice on how to avoid the increased query count here!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like an @ormsbee question if I'm correctly understanding that it's about efficiently fetching modified dates from learning core's authoring API.

tagged_library = build_object_tree_with_objecttags(self.library.key, self.all_library_object_tags)

assert tagged_library == self.expected_library_tagged_xblock
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,10 @@ def definition_for_usage(self, usage_key, **kwargs):
Retuns None if the usage key doesn't exist in this context.
"""
raise NotImplementedError

def send_block_updated_event(self, usage_key):
"""
Send a "block updated" event for the block with the given usage_key in this context.

usage_key: the UsageKeyV2 subclass used for this learning context
"""
5 changes: 5 additions & 0 deletions openedx/core/djangoapps/xblock/rest_api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from openedx.core.djangoapps.xblock.learning_context.manager import get_learning_context_impl
from openedx.core.lib.api.view_utils import view_auth_classes
from ..api import (
get_block_metadata,
Expand Down Expand Up @@ -254,6 +255,10 @@ def post(self, request, usage_key_str):
# Save after the callback so any changes made in the callback will get persisted.
block.save()

# Signal that we've modified this block
context_impl = get_learning_context_impl(usage_key)
context_impl.send_updated_event(usage_key)

return Response({
"id": str(block.location),
"data": data,
Expand Down
Loading