Skip to content

Commit

Permalink
fix: whitespace issues in some capa problem index_dictionary content
Browse files Browse the repository at this point in the history
  • Loading branch information
bradenmacdonald committed Sep 25, 2024
1 parent 3768efa commit b5efca7
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 23 deletions.
4 changes: 2 additions & 2 deletions openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def setUp(self):
"context_key": "lib:org1:lib",
"org": "org1",
"breadcrumbs": [{"display_name": "Library"}],
"content": {"problem_types": [], "capa_content": " "},
"content": {"problem_types": [], "capa_content": ""},
"type": "library_block",
"access_id": lib_access.id,
"last_published": None,
Expand All @@ -157,7 +157,7 @@ def setUp(self):
"context_key": "lib:org1:lib",
"org": "org1",
"breadcrumbs": [{"display_name": "Library"}],
"content": {"problem_types": [], "capa_content": " "},
"content": {"problem_types": [], "capa_content": ""},
"type": "library_block",
"access_id": lib_access.id,
"last_published": None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_create_delete_library_block(self, meilisearch_client):
"context_key": "lib:orgA:lib_a",
"org": "orgA",
"breadcrumbs": [{"display_name": "Library Org A"}],
"content": {"problem_types": [], "capa_content": " "},
"content": {"problem_types": [], "capa_content": ""},
"access_id": lib_access.id,
"last_published": None,
"created": created_date.timestamp(),
Expand Down
6 changes: 5 additions & 1 deletion xmodule/capa_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,11 +616,15 @@ def index_dictionary(self):
"",
capa_content
)
# Strip out all other tags, leaving their content. But we want spaces between adjacent tags, so that
# <choice correct="true"><div>Option A</div></choice><choice correct="false"><div>Option B</div></choice>
# becomes "Option A Option B" not "Option AOption B" (these will appear in search results)
capa_content = re.sub(r"</(\w+)><([^>]+)>", r"</\1> <\2>", capa_content)
capa_content = re.sub(
r"(\s|&nbsp;|//)+",
" ",
nh3.clean(capa_content, tags=set())
)
).strip()

capa_body = {
"capa_content": capa_content,
Expand Down
57 changes: 38 additions & 19 deletions xmodule/tests/test_capa_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -3290,7 +3290,7 @@ def test_response_types_ignores_non_response_tags(self):
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': ['multiplechoiceresponse'],
'content': {'display_name': name, 'capa_content': ' Label Some comment Apple Banana Chocolate Donut '}}
'content': {'display_name': name, 'capa_content': 'Label Some comment Apple Banana Chocolate Donut'}}

def test_response_types_multiple_tags(self):
xml = textwrap.dedent("""
Expand Down Expand Up @@ -3328,7 +3328,7 @@ def test_response_types_multiple_tags(self):
'problem_types': {"optionresponse", "multiplechoiceresponse"},
'content': {
'display_name': name,
'capa_content': " Label Some comment Donut Buggy '1','2' "
'capa_content': "Label Some comment Donut Buggy '1','2'"
},
}
)
Expand Down Expand Up @@ -3369,7 +3369,7 @@ def test_solutions_not_indexed(self):
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': [],
'content': {'display_name': name, 'capa_content': ' '}}
'content': {'display_name': name, 'capa_content': ''}}

def test_indexing_checkboxes(self):
name = "Checkboxes"
Expand All @@ -3390,7 +3390,7 @@ def test_indexing_checkboxes(self):
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': ['choiceresponse'],
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ')}}
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ').strip()}}

def test_indexing_dropdown(self):
name = "Dropdown"
Expand All @@ -3405,7 +3405,7 @@ def test_indexing_dropdown(self):
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': ['optionresponse'],
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ')}}
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ').strip()}}

def test_indexing_multiple_choice(self):
name = "Multiple Choice"
Expand All @@ -3424,7 +3424,7 @@ def test_indexing_multiple_choice(self):
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': ['multiplechoiceresponse'],
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ')}}
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ').strip()}}

def test_indexing_numerical_input(self):
name = "Numerical Input"
Expand All @@ -3446,7 +3446,7 @@ def test_indexing_numerical_input(self):
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': ['numericalresponse'],
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ')}}
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ').strip()}}

def test_indexing_text_input(self):
name = "Text Input"
Expand All @@ -3465,7 +3465,7 @@ def test_indexing_text_input(self):
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': ['stringresponse'],
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ')}}
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ').strip()}}

def test_indexing_non_latin_problem(self):
sample_text_input_problem_xml = textwrap.dedent("""
Expand All @@ -3476,7 +3476,7 @@ def test_indexing_non_latin_problem(self):
""")
name = "Non latin Input"
block = self._create_block(sample_text_input_problem_xml, name=name)
capa_content = " Δοκιμή με μεταβλητές με Ελληνικούς χαρακτήρες μέσα σε python: $FX1_VAL "
capa_content = "Δοκιμή με μεταβλητές με Ελληνικούς χαρακτήρες μέσα σε python: $FX1_VAL"

block_dict = block.index_dictionary()
assert block_dict['content']['capa_content'] == smart_str(capa_content)
Expand All @@ -3503,7 +3503,7 @@ def test_indexing_checkboxes_with_hints_and_feedback(self):
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': ['choiceresponse'],
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ')}}
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ').strip()}}

def test_indexing_dropdown_with_hints_and_feedback(self):
name = "Dropdown with Hints and Feedback"
Expand All @@ -3523,7 +3523,7 @@ def test_indexing_dropdown_with_hints_and_feedback(self):
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': ['optionresponse'],
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ')}}
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ').strip()}}

def test_indexing_multiple_choice_with_hints_and_feedback(self):
name = "Multiple Choice with Hints and Feedback"
Expand All @@ -3543,7 +3543,7 @@ def test_indexing_multiple_choice_with_hints_and_feedback(self):
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': ['multiplechoiceresponse'],
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ')}}
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ').strip()}}

def test_indexing_numerical_input_with_hints_and_feedback(self):
name = "Numerical Input with Hints and Feedback"
Expand All @@ -3561,7 +3561,7 @@ def test_indexing_numerical_input_with_hints_and_feedback(self):
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': ['numericalresponse'],
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ')}}
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ').strip()}}

def test_indexing_text_input_with_hints_and_feedback(self):
name = "Text Input with Hints and Feedback"
Expand All @@ -3579,7 +3579,7 @@ def test_indexing_text_input_with_hints_and_feedback(self):
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': ['stringresponse'],
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ')}}
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ').strip()}}

def test_indexing_problem_with_html_tags(self):
sample_problem_xml = textwrap.dedent("""
Expand All @@ -3598,14 +3598,33 @@ def test_indexing_problem_with_html_tags(self):
""")
name = "Mixed business"
block = self._create_block(sample_problem_xml, name=name)
capa_content = textwrap.dedent("""
This has HTML comment in it.
HTML end.
""")
capa_content = "This has HTML comment in it. HTML end."
assert block.index_dictionary() ==\
{'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': [],
'content': {'display_name': name, 'capa_content': capa_content.replace('\n', ' ')}}
'content': {'display_name': name, 'capa_content': capa_content}}

def test_indexing_problem_with_no_whitespace_between_tags(self):
"""
The new (MFE) visual editor for capa problems renders the OLX without spaces between the tags.
We want to make sure the index description is still readable and has whitespace.
"""
sample_problem_xml = (
"<problem display_name=\"No spaces\">"
"<choiceresponse><div>Question text here.</div><checkboxgroup>"
"<choice correct=\"true\"><div>Option A</div></choice>"
"<choice correct=\"false\"><div>Option B</div></choice>"
"</checkboxgroup></choiceresponse>"
"</problem>"
)
name = "No spaces"
block = self._create_block(sample_problem_xml, name=name)
capa_content = "Question text here. Option A Option B"
assert block.index_dictionary() == {
'content_type': ProblemBlock.INDEX_CONTENT_TYPE,
'problem_types': ['choiceresponse'],
'content': {'display_name': name, 'capa_content': capa_content},
}

def test_invalid_xml_handling(self):
"""
Expand Down

0 comments on commit b5efca7

Please sign in to comment.