Skip to content

Commit

Permalink
Fix logic for indentation inside list items
Browse files Browse the repository at this point in the history
This fixes problems with the markdownify logic for indentation inside
list items.

This PR uses a branch building on that for matthewwithanm#120, matthewwithanm#150 and matthewwithanm#151, so
those three PRs should be merged first before merging this one.

There is limited logic in markdownify for handling indentation in the
case of nested lists.  There are two major problems with this logic:

* As it's in `convert_list`, causing a list to be indented when inside
  another list, it does not add indentation for any other elements
  such as paragraphs that may be found inside list items (or `<pre>`,
  `<blockquote>`, etc.), so such elements are wrongly not indented and
  terminate the list in the output.

* It uses fixed indentation of one tab.  Following CommonMark, a tab
  in Markdown is considered equivalent to four spaces, which is not
  sufficient indentation in ordered list items with a number of three
  or more digits.

Fix both of these issues by making `convert_li` handle indentation for
the contents of `<li>`, based on the length of the list item marker,
rather than doing it in `convert_list` at all.
  • Loading branch information
jsm28 committed Oct 3, 2024
1 parent 340aecb commit c13bdd5
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 7 deletions.
13 changes: 9 additions & 4 deletions markdownify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,8 @@ def escape(self, text):
text = text.replace('_', r'\_')
return text

def indent(self, text, level):
return line_beginning_re.sub('\t' * level, text) if text else ''
def indent(self, text, columns):
return line_beginning_re.sub(' ' * columns, text) if text else ''

def underline(self, text, pad_char):
text = (text or '').rstrip()
Expand Down Expand Up @@ -346,7 +346,7 @@ def convert_list(self, el, text, convert_as_inline):
el = el.parent
if nested:
# remove trailing newline if nested
return '\n' + self.indent(text, 1).rstrip()
return '\n' + text.rstrip()
return '\n\n' + text + ('\n' if before_paragraph else '')

convert_ul = convert_list
Expand All @@ -368,7 +368,12 @@ def convert_li(self, el, text, convert_as_inline):
el = el.parent
bullets = self.options['bullets']
bullet = bullets[depth % len(bullets)]
return '%s %s\n' % (bullet, (text or '').strip())
bullet = bullet + ' '
text = (text or '').strip()
text = self.indent(text, len(bullet))
if text:
text = bullet + text[len(bullet):]
return '%s\n' % text

def convert_p(self, el, text, convert_as_inline):
if convert_as_inline:
Expand Down
8 changes: 5 additions & 3 deletions tests/test_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,11 @@ def test_ol():
assert md('<ol start="-1"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
assert md('<ol start="foo"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
assert md('<ol start="1.5"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
assert md('<ol start="1234"><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ol>') == '\n\n1234. first para\n \n second para\n1235. third para\n \n fourth para\n'


def test_nested_ols():
assert md(nested_ols) == '\n\n1. 1\n\t1. a\n\t\t1. I\n\t\t2. II\n\t\t3. III\n\t2. b\n\t3. c\n2. 2\n3. 3\n'
assert md(nested_ols) == '\n\n1. 1\n 1. a\n 1. I\n 2. II\n 3. III\n 2. b\n 3. c\n2. 2\n3. 3\n'


def test_ul():
Expand All @@ -63,6 +64,7 @@ def test_ul():
<li> c
</li>
</ul>""") == '\n\n* a\n* b\n* c\n'
assert md('<ul><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ul>') == '\n\n* first para\n \n second para\n* third para\n \n fourth para\n'


def test_inline_ul():
Expand All @@ -75,11 +77,11 @@ def test_nested_uls():
Nested ULs should alternate bullet characters.
"""
assert md(nested_uls) == '\n\n* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n'
assert md(nested_uls) == '\n\n* 1\n + a\n - I\n - II\n - III\n + b\n + c\n* 2\n* 3\n'


def test_bullets():
assert md(nested_uls, bullets='-') == '\n\n- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n'
assert md(nested_uls, bullets='-') == '\n\n- 1\n - a\n - I\n - II\n - III\n - b\n - c\n- 2\n- 3\n'


def test_li_text():
Expand Down

0 comments on commit c13bdd5

Please sign in to comment.