Skip to content

Commit

Permalink
fix: update content conversion patch
Browse files Browse the repository at this point in the history
- Ensure tags like <iframe>, <video>, etc. are retained when converting Wiki content to markdown.
  • Loading branch information
AyshaHakeem committed Sep 13, 2024
1 parent 3502304 commit 639a0af
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 22 deletions.
2 changes: 1 addition & 1 deletion wiki/patches.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ wiki.wiki.doctype.wiki_feedback.patches.delete_wiki_feedback_item
[post_model_sync]
wiki.wiki.doctype.wiki_space.patches.wiki_sidebar_migration
wiki.wiki.doctype.wiki_settings.patches.wiki_navbar_item_migration
wiki.wiki.doctype.wiki_page.patches.convert_to_markdown
wiki.wiki.doctype.wiki_page.patches.convert_wiki_content_to_markdown
8 changes: 0 additions & 8 deletions wiki/wiki/doctype/wiki_page/patches/convert_to_markdown.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import re

import frappe
import six
from bs4 import Comment, Doctype, NavigableString
from markdownify import MarkdownConverter

html_heading_re = re.compile(r"h[1-6]")


class CustomMarkdownConverter(MarkdownConverter):
# overeride markdownify's process_tag function to escape certain html tags
def process_tag(self, node, convert_as_inline, children_only=False):
text = ""

# markdown headings or cells can't include
# block elements (elements w/newlines)
isHeading = html_heading_re.match(node.name) is not None
isCell = node.name in ["td", "th"]
convert_children_as_inline = convert_as_inline

if not children_only and (isHeading or isCell):
convert_children_as_inline = True

# Remove whitespace-only textnodes in purely nested nodes
def is_nested_node(el):
return el and el.name in ["ol", "ul", "li", "table", "thead", "tbody", "tfoot", "tr", "td", "th"]

if is_nested_node(node):
for el in node.children:
# Only extract (remove) whitespace-only text node if any of the
# conditions is true:
# - el is the first element in its parent
# - el is the last element in its parent
# - el is adjacent to an nested node
can_extract = (
not el.previous_sibling
or not el.next_sibling
or is_nested_node(el.previous_sibling)
or is_nested_node(el.next_sibling)
)
if isinstance(el, NavigableString) and six.text_type(el).strip() == "" and can_extract:
el.extract()

# Convert the children first
for el in node.children:
if isinstance(el, Comment) or isinstance(el, Doctype):
continue
elif isinstance(el, NavigableString):
text += self.process_text(el)
else:
if el.name in ["video", "iframe", "audio", "embed", "object", "source", "picture", "math"]:
text += self.process_text(el)
text += self.process_tag(el, convert_children_as_inline)

if not children_only:
convert_fn = getattr(self, f"convert_{node.name}", None)
if convert_fn and self.should_convert_tag(node.name):
text = convert_fn(node, text, convert_as_inline)

return text


def custom_markdownify(html, **options):
return CustomMarkdownConverter(**options).convert(html)


def execute():
wiki_pages = frappe.db.get_all("Wiki Page", fields=["name", "content"])
for page in wiki_pages:
markdown_content = custom_markdownify(page["content"])
frappe.db.set_value("Wiki Page", page["name"], "content", markdown_content)
6 changes: 0 additions & 6 deletions wiki/wiki/doctype/wiki_page/wiki_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,12 +452,6 @@ def convert_markdown(markdown):
return html


@frappe.whitelist()
def convert_html(html):
markdown = frappe.utils.to_markdown(html)
return markdown


@frappe.whitelist()
def update(
name,
Expand Down
14 changes: 7 additions & 7 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
dependencies:
type-fest "^2.0.0"

"@tiptap/core@^2.0.0", "@tiptap/core@^2.0.2":
"@tiptap/core@^2.0.2":
version "2.0.2"
resolved "https://registry.npmjs.org/@tiptap/core/-/core-2.0.2.tgz"
integrity sha512-DBry6tpX7mYaTJkEDjVA4WmF8Kgthr275L0uIIOVdwW5nG5PAnOvREKyVOoMQnN3vR7CjtaCK+c3y+MCQhMA/g==
Expand All @@ -74,7 +74,7 @@
resolved "https://registry.npmjs.org/@tiptap/extension-code-block-lowlight/-/extension-code-block-lowlight-2.0.2.tgz"
integrity sha512-7BbRCKJE2oxsZ5n7HIjS0r/y1S/bSxEJgAFF1Tj3KN2IG3x48w+sqYxRMYmCZdoTexmmBpNF64uYXngKXB9/Ig==

"@tiptap/extension-code-block@^2.0.0", "@tiptap/extension-code-block@^2.0.2":
"@tiptap/extension-code-block@^2.0.2":
version "2.0.2"
resolved "https://registry.npmjs.org/@tiptap/extension-code-block/-/extension-code-block-2.0.2.tgz"
integrity sha512-GL8ogok1tl1FkXwk0P0ZWYh6oAmSA+R3oubtDZJG1fLlezKLcLYCN/Q2jgYDHDwEOnxMc4JIiT7EYwJ0pqmNaQ==
Expand Down Expand Up @@ -201,7 +201,7 @@
resolved "https://registry.npmjs.org/@tiptap/extension-text/-/extension-text-2.0.2.tgz"
integrity sha512-kAO+WurWOyHIV/x8qHMF3bSlWrdlPtjEYmf+w8wHKy3FzE55eF6SsGt4FymClNkJmyXdgflXBB3Wv/Z53myy8g==

"@tiptap/pm@^2.0.0", "@tiptap/pm@^2.0.2":
"@tiptap/pm@^2.0.2":
version "2.0.2"
resolved "https://registry.npmjs.org/@tiptap/pm/-/pm-2.0.2.tgz"
integrity sha512-vXlI82bZ4XrmVD6m/pO27gqlm+tU57mpjy9WjkJpEUOifQZK8LihR3l5k55Z0RqalV4/E79iU1cp8mw0v13nhA==
Expand Down Expand Up @@ -580,7 +580,7 @@ prosemirror-menu@^1.2.1:
prosemirror-history "^1.0.0"
prosemirror-state "^1.0.0"

prosemirror-model@^1, prosemirror-model@^1.0.0, prosemirror-model@^1.16.0, prosemirror-model@^1.18.1, prosemirror-model@^1.19.0, prosemirror-model@^1.8.1:
prosemirror-model@^1.0.0, prosemirror-model@^1.16.0, prosemirror-model@^1.18.1, prosemirror-model@^1.19.0, prosemirror-model@^1.8.1:
version "1.19.0"
resolved "https://registry.npmjs.org/prosemirror-model/-/prosemirror-model-1.19.0.tgz"
integrity sha512-/CvFGJnwc41EJSfDkQLly1cAJJJmBpZwwUJtwZPTjY2RqZJfM8HVbCreOY/jti8wTRbVyjagcylyGoeJH/g/3w==
Expand All @@ -603,7 +603,7 @@ prosemirror-schema-list@^1.2.2:
prosemirror-state "^1.0.0"
prosemirror-transform "^1.0.0"

prosemirror-state@^1, prosemirror-state@^1.0.0, prosemirror-state@^1.2.2, prosemirror-state@^1.3.1, prosemirror-state@^1.4.1:
prosemirror-state@^1.0.0, prosemirror-state@^1.2.2, prosemirror-state@^1.3.1, prosemirror-state@^1.4.1:
version "1.4.2"
resolved "https://registry.npmjs.org/prosemirror-state/-/prosemirror-state-1.4.2.tgz"
integrity sha512-puuzLD2mz/oTdfgd8msFbe0A42j5eNudKAAPDB0+QJRw8cO1ygjLmhLrg9RvDpf87Dkd6D4t93qdef00KKNacQ==
Expand Down Expand Up @@ -640,7 +640,7 @@ prosemirror-transform@^1.0.0, prosemirror-transform@^1.1.0, prosemirror-transfor
dependencies:
prosemirror-model "^1.0.0"

prosemirror-view@^1, prosemirror-view@^1.0.0, prosemirror-view@^1.1.0, prosemirror-view@^1.13.3, prosemirror-view@^1.27.0, prosemirror-view@^1.28.2:
prosemirror-view@^1.0.0, prosemirror-view@^1.1.0, prosemirror-view@^1.13.3, prosemirror-view@^1.27.0, prosemirror-view@^1.28.2:
version "1.30.2"
resolved "https://registry.npmjs.org/prosemirror-view/-/prosemirror-view-1.30.2.tgz"
integrity sha512-nTNzZvalQf9kHeEyO407LiV6DoOs/pXsid88UqW9Vvybo4ozJW2PJhkfZUxCUF1hR/9vJLdhxX84wuw9P9HsXA==
Expand Down Expand Up @@ -739,7 +739,7 @@ w3c-keyname@^2.2.0:
resolved "https://registry.npmjs.org/w3c-keyname/-/w3c-keyname-2.2.6.tgz"
integrity sha512-f+fciywl1SJEniZHD6H+kUO8gOnwIr7f4ijKA6+ZvJFjeGi1r4PDLl53Ayud9O/rk64RqgoQine0feoeOU0kXg==

which@^1.2.9, [email protected].x:
[email protected].x, which@^1.2.9:
version "1.2.14"
resolved "https://registry.npmjs.org/which/-/which-1.2.14.tgz"
integrity sha512-16uPglFkRPzgiUXYMi1Jf8Z5EzN1iB4V0ZtMXcHZnwsBtQhhHeCqoWw7tsUY42hJGNDWtUsVLTjakIa5BgAxCw==
Expand Down

0 comments on commit 639a0af

Please sign in to comment.