diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index a37f870..dd2507d 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -67,6 +67,23 @@ def _todict(obj):
return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_'))
+def remove_whitespace_inside(el):
+ """Return to remove whitespace immediately inside a block-level element."""
+ if not el or not el.name:
+ return False
+ if html_heading_re.match(el.name) is not None:
+ return True
+ return el.name in ('p', 'blockquote',
+ 'ol', 'ul', 'li',
+ 'table', 'thead', 'tbody', 'tfoot',
+ 'tr', 'td', 'th')
+
+
+def remove_whitespace_outside(el):
+ """Return to remove whitespace immediately outside a block-level element."""
+ return remove_whitespace_inside(el) or (el and el.name == 'pre')
+
+
class MarkdownConverter(object):
class DefaultOptions:
autolinks = True
@@ -120,27 +137,23 @@ def process_tag(self, node, convert_as_inline, children_only=False):
if not children_only and (isHeading or isCell):
convert_children_as_inline = True
- # Remove whitespace-only textnodes in purely nested nodes
- def is_nested_node(el):
- return el and el.name in ['ol', 'ul', 'li',
- 'table', 'thead', 'tbody', 'tfoot',
- 'tr', 'td', 'th']
-
- if is_nested_node(node):
- for el in node.children:
- # Only extract (remove) whitespace-only text node if any of the
- # conditions is true:
- # - el is the first element in its parent
- # - el is the last element in its parent
- # - el is adjacent to an nested node
- can_extract = (not el.previous_sibling
- or not el.next_sibling
- or is_nested_node(el.previous_sibling)
- or is_nested_node(el.next_sibling))
- if (isinstance(el, NavigableString)
- and six.text_type(el).strip() == ''
- and can_extract):
- el.extract()
+ # Remove whitespace-only textnodes just before, after or
+ # inside block-level elements.
+ remove_inside = remove_whitespace_inside(node)
+ for el in node.children:
+ # Only extract (remove) whitespace-only text node if any of the
+ # conditions is true:
+ # - el is the first element in its parent (block-level)
+ # - el is the last element in its parent (block-level)
+ # - el is adjacent to a block-level node
+ can_extract = (remove_inside and (not el.previous_sibling
+ or not el.next_sibling)
+ or remove_whitespace_outside(el.previous_sibling)
+ or remove_whitespace_outside(el.next_sibling))
+ if (isinstance(el, NavigableString)
+ and six.text_type(el).strip() == ''
+ and can_extract):
+ el.extract()
# Convert the children first
for el in node.children:
@@ -179,12 +192,16 @@ def process_text(self, el):
if not el.find_parent(['pre', 'code', 'kbd', 'samp']):
text = self.escape(text)
- # remove trailing whitespaces if any of the following condition is true:
- # - current text node is the last node in li
- # - current text node is followed by an embedded list
- if (el.parent.name == 'li'
- and (not el.next_sibling
- or el.next_sibling.name in ['ul', 'ol'])):
+ # remove leading whitespace at the start or just after a
+ # block-level element; remove traliing whitespace at the end
+ # or just before a block-level element.
+ if (remove_whitespace_outside(el.previous_sibling)
+ or (remove_whitespace_inside(el.parent)
+ and not el.previous_sibling)):
+ text = text.lstrip()
+ if (remove_whitespace_outside(el.next_sibling)
+ or (remove_whitespace_inside(el.parent)
+ and not el.next_sibling)):
text = text.rstrip()
return text
@@ -257,7 +274,7 @@ def convert_a(self, el, text, convert_as_inline):
def convert_blockquote(self, el, text, convert_as_inline):
if convert_as_inline:
- return text
+ return ' ' + text.strip() + ' '
return '\n' + (line_beginning_re.sub('> ', text.strip()) + '\n\n') if text else ''
@@ -355,7 +372,7 @@ def convert_li(self, el, text, convert_as_inline):
def convert_p(self, el, text, convert_as_inline):
if convert_as_inline:
- return text
+ return ' ' + text.strip() + ' '
if self.options['wrap']:
# Preserve newlines (and preceding whitespace) resulting
# from
tags. Newlines in the input have already been
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 9c1edc3..0be1d0c 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -66,7 +66,7 @@ def test_blockquote_with_paragraph():
def test_blockquote_nested():
text = md('
And she was like') - assert text == '\n> And she was like \n> > Hello\n\n' + assert text == '\n> And she was like\n> > Hello\n\n' def test_br(): @@ -136,7 +136,7 @@ def test_hn(): def test_hn_chained(): - assert md('Hello
Hello
\nWorld
') == '\n\nHello\n\n\n---\n\n\nWorld\n\n' + assert md('Hello
\nWorld
') == '\n\nHello\n\n---\n\nWorld\n\n' def test_i(): @@ -303,3 +303,13 @@ def callback(el): assert md('test\n foo\nbar', code_language_callback=callback) == '\n```python\ntest\n foo\nbar\n```\n' assert md('
test\n foo\nbar
', code_language_callback=callback) == '\n```javascript\ntest\n foo\nbar\n```\n'
assert md('test\n foo\nbar
', code_language_callback=callback) == '\n```javascript\ntest\n foo\nbar\n```\n'
+
+
+def test_spaces():
+ assert md('a b
c d
') == '\n\na b\n\nc d\n\n' + assert md('a
') == '\n\n*a*\n\n' + assert md('testagain
') == 'test\n\nagain\n\n' + assert md('testtextafter') == 'test\n> text\n\nafter' + assert md('
foobar') == 'test\n```\n foo \n```\nbar'