diff --git a/readthedocs/search/parsers.py b/readthedocs/search/parsers.py index f5a99294b79..7031217d56f 100644 --- a/readthedocs/search/parsers.py +++ b/readthedocs/search/parsers.py @@ -15,6 +15,45 @@ class GenericParser: # Limit that matches the ``index.mapping.nested_objects.limit`` ES setting. max_inner_documents = 10000 + # Block level elements have an implicit line break before and after them. + # List taken from: https://www.w3schools.com/htmL/html_blocks.asp. + block_level_elements = [ + "address", + "article", + "aside", + "blockquote", + "canvas", + "dd", + "div", + "dl", + "dt", + "fieldset", + "figcaption", + "figure", + "footer", + "form", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "header", + "hr", + "li", + "main", + "nav", + "noscript", + "ol", + "p", + "pre", + "section", + "table", + "tfoot", + "ul", + "video", + ] + def __init__(self, version): self.version = version self.project = self.version.project @@ -334,7 +373,12 @@ def _parse_section_content(self, tag, *, depth=0): ) if content: - contents.append(content) + is_block_level_element = next_tag.tag in self.block_level_elements + if is_block_level_element: + # Add a line break before and after a block level element. + contents.append(f"\n{content}\n") + else: + contents.append(content) next_tag = next_tag.next return self._parse_content("".join(contents)), section_found diff --git a/readthedocs/search/tests/data/sphinx/in/page.html b/readthedocs/search/tests/data/sphinx/in/page.html index 224d82846c9..d9caaee9153 100644 --- a/readthedocs/search/tests/data/sphinx/in/page.html +++ b/readthedocs/search/tests/data/sphinx/in/page.html @@ -166,6 +166,16 @@
How to contribute changes to the theme.