From c13bdd5c1426c5bbfd3d340096bc6c1fb2f508bf Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Thu, 3 Oct 2024 21:04:40 +0000 Subject: [PATCH] Fix logic for indentation inside list items This fixes problems with the markdownify logic for indentation inside list items. This PR uses a branch building on that for #120, #150 and #151, so those three PRs should be merged first before merging this one. There is limited logic in markdownify for handling indentation in the case of nested lists. There are two major problems with this logic: * As it's in `convert_list`, causing a list to be indented when inside another list, it does not add indentation for any other elements such as paragraphs that may be found inside list items (or `
`,
  `
`, etc.), so such elements are wrongly not indented and terminate the list in the output. * It uses fixed indentation of one tab. Following CommonMark, a tab in Markdown is considered equivalent to four spaces, which is not sufficient indentation in ordered list items with a number of three or more digits. Fix both of these issues by making `convert_li` handle indentation for the contents of `
  • `, based on the length of the list item marker, rather than doing it in `convert_list` at all. --- markdownify/__init__.py | 13 +++++++++---- tests/test_lists.py | 8 +++++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index dd2507d..5cbf95f 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -244,8 +244,8 @@ def escape(self, text): text = text.replace('_', r'\_') return text - def indent(self, text, level): - return line_beginning_re.sub('\t' * level, text) if text else '' + def indent(self, text, columns): + return line_beginning_re.sub(' ' * columns, text) if text else '' def underline(self, text, pad_char): text = (text or '').rstrip() @@ -346,7 +346,7 @@ def convert_list(self, el, text, convert_as_inline): el = el.parent if nested: # remove trailing newline if nested - return '\n' + self.indent(text, 1).rstrip() + return '\n' + text.rstrip() return '\n\n' + text + ('\n' if before_paragraph else '') convert_ul = convert_list @@ -368,7 +368,12 @@ def convert_li(self, el, text, convert_as_inline): el = el.parent bullets = self.options['bullets'] bullet = bullets[depth % len(bullets)] - return '%s %s\n' % (bullet, (text or '').strip()) + bullet = bullet + ' ' + text = (text or '').strip() + text = self.indent(text, len(bullet)) + if text: + text = bullet + text[len(bullet):] + return '%s\n' % text def convert_p(self, el, text, convert_as_inline): if convert_as_inline: diff --git a/tests/test_lists.py b/tests/test_lists.py index ecc1a65..a660778 100644 --- a/tests/test_lists.py +++ b/tests/test_lists.py @@ -47,10 +47,11 @@ def test_ol(): assert md('
    1. a
    2. b
    ') == '\n\n1. a\n2. b\n' assert md('
    1. a
    2. b
    ') == '\n\n1. a\n2. b\n' assert md('
    1. a
    2. b
    ') == '\n\n1. a\n2. b\n' + assert md('
    1. first para

      second para

    2. third para

      fourth para

    ') == '\n\n1234. first para\n \n second para\n1235. third para\n \n fourth para\n' def test_nested_ols(): - assert md(nested_ols) == '\n\n1. 1\n\t1. a\n\t\t1. I\n\t\t2. II\n\t\t3. III\n\t2. b\n\t3. c\n2. 2\n3. 3\n' + assert md(nested_ols) == '\n\n1. 1\n 1. a\n 1. I\n 2. II\n 3. III\n 2. b\n 3. c\n2. 2\n3. 3\n' def test_ul(): @@ -63,6 +64,7 @@ def test_ul():
  • c
  • """) == '\n\n* a\n* b\n* c\n' + assert md('
    • first para

      second para

    • third para

      fourth para

    ') == '\n\n* first para\n \n second para\n* third para\n \n fourth para\n' def test_inline_ul(): @@ -75,11 +77,11 @@ def test_nested_uls(): Nested ULs should alternate bullet characters. """ - assert md(nested_uls) == '\n\n* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n' + assert md(nested_uls) == '\n\n* 1\n + a\n - I\n - II\n - III\n + b\n + c\n* 2\n* 3\n' def test_bullets(): - assert md(nested_uls, bullets='-') == '\n\n- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n' + assert md(nested_uls, bullets='-') == '\n\n- 1\n - a\n - I\n - II\n - III\n - b\n - c\n- 2\n- 3\n' def test_li_text():