From 6dbf9a96100d3ea9a11332bd5ae1125e77b15983 Mon Sep 17 00:00:00 2001 From: Jongyoul Lee Date: Tue, 21 Sep 2021 11:47:39 +0900 Subject: [PATCH] [ZEPPELIN-5527] Remove the dependency of `markdown` from `zeppelin-jupyter` ### What is this PR for? Simplifying dependences. ### What type of PR is it? [Improvement] ### Todos * [x] - Remove the dependency of `markdown` and implement a markdown parser directly ### What is the Jira issue? * Jira https://issues.apache.org/jira/browse/ZEPPELIN-5527 ### How should this be tested? Import Jupyter notebook ### Screenshots (if appropriate) ### Questions: * Does the licenses files need update? No * Is there breaking changes for older versions? No * Does this needs documentation? No Author: Jongyoul Lee Closes #4228 from jongyoul/ZEPPELIN-5527 and squashes the following commits: acf087e08 [Jongyoul Lee] Fix tests fae3fc1cd [Jongyoul Lee] Remove a redundant newline 790f0cc4f [Jongyoul Lee] [ZEPPELIN-5527] Remove the dependency of `markdown` from `zeppelin-jupyter` --- STYLE.md | 4 +- docs/interpreter/markdown.md | 13 ++-- docs/setup/operation/upgrading.md | 3 + markdown/README.md | 8 +-- markdown/pom.xml | 12 +--- pom.xml | 25 ++++++-- zeppelin-distribution/src/bin_license/LICENSE | 1 - zeppelin-jupyter/pom.xml | 16 +++-- .../apache/zeppelin/jupyter/JupyterUtil.java | 9 ++- .../jupyter/parser/MarkdownParser.java | 60 +++++++++++++++++++ .../jupyter/nbformat/JupyterUtilTest.java | 2 +- 11 files changed, 106 insertions(+), 47 deletions(-) create mode 100644 zeppelin-jupyter/src/main/java/org/apache/zeppelin/jupyter/parser/MarkdownParser.java diff --git a/STYLE.md b/STYLE.md index 8182301b5a5..b0a5f2a084a 100644 --- a/STYLE.md +++ b/STYLE.md @@ -7,7 +7,7 @@ app/styles/looknfeel Overall look and theme of the Zeppelin notebook page can be customized here. ### Code Syntax Highlighting -There are two parts to code highlighting. First, Zeppelin uses the Ace Editor for its note paragraphs. Color style for this can be changed by setting theme on the editor instance. Second, Zeppelin's Markdown interpreter calls pegdown parser to emit HTML, and such content may contain <pre><code> tags that can be consumed by Highlight.js. +There are two parts to code highlighting. First, Zeppelin uses the Ace Editor for its note paragraphs. Color style for this can be changed by setting theme on the editor instance. Second, Zeppelin's Markdown interpreter calls flexmark parser to emit HTML, and such content may contain <pre><code> tags that can be consumed by Highlight.js. #### Theme on Ace Editor app/scripts/controllers/paragraph.js @@ -16,7 +16,7 @@ Call setTheme on the editor with the theme path/name. [List of themes on GitHub](https://github.com/ajaxorg/ace/tree/master/lib/ace/theme) #### Style for Markdown Code Blocks -Highlight.js parses and converts <pre><code> blocks from pegdown parser into keywords and language syntax with proper styles. It also attempts to infer the best fitting language if it is not provided. The visual style can be changed by simply including the desired [stylesheet](https://github.com/components/highlightjs/tree/master/styles) into app/index.html. See the next section on build. +Highlight.js parses and converts <pre><code> blocks from markdown parser into keywords and language syntax with proper styles. It also attempts to infer the best fitting language if it is not provided. The visual style can be changed by simply including the desired [stylesheet](https://github.com/components/highlightjs/tree/master/styles) into app/index.html. See the next section on build. Note that code block background color is overriden in app/styles/notebook.css (look for .paragraph .tableDisplay .hljs). diff --git a/docs/interpreter/markdown.md b/docs/interpreter/markdown.md index e06c563e47a..a9c830652db 100644 --- a/docs/interpreter/markdown.md +++ b/docs/interpreter/markdown.md @@ -25,11 +25,11 @@ limitations under the License. ## Overview [Markdown](http://daringfireball.net/projects/markdown/) is a plain text formatting syntax designed so that it can be converted to HTML. -Apache Zeppelin uses [flexmark](https://github.com/vsch/flexmark-java), [pegdown](https://github.com/sirthias/pegdown) and [markdown4j](https://github.com/jdcasey/markdown4j) as markdown parsers. +Apache Zeppelin uses [flexmark](https://github.com/vsch/flexmark-java) and [markdown4j](https://github.com/jdcasey/markdown4j) as markdown parsers. In Zeppelin notebook, you can use ` %md ` in the beginning of a paragraph to invoke the Markdown interpreter and generate static html from Markdown plain text. -In Zeppelin, Markdown interpreter is enabled by default and uses the [pegdown](https://github.com/sirthias/pegdown) parser. +In Zeppelin, Markdown interpreter is enabled by default and uses the [flexmark](https://github.com/vsch/flexmark-java) parser. @@ -54,7 +54,7 @@ For more information, please see [Mathematical Expression](../usage/display_syst markdown.parser.type flexmark - Markdown Parser Type.
Available values: flexmark, pegdown, markdown4j. + Markdown Parser Type.
Available values: flexmark, markdown4j. @@ -68,13 +68,8 @@ CommonMark/Markdown Java parser with source level AST. -### Pegdown Parser - -`pegdown` parser provides github flavored markdown. Although still one of the most popular Markdown parsing libraries for the JVM, pegdown has reached its end of life. -The project is essentially unmaintained with tickets piling up and crucial bugs not being fixed.`pegdown`'s parsing performance isn't great. But keep this parser for the backward compatibility. - ### Markdown4j Parser -Since `pegdown` parser is more accurate and provides much more markdown syntax `markdown4j` option might be removed later. But keep this parser for the backward compatibility. +Since `flexmark` parser is more accurate and provides much more markdown syntax `markdown4j` option might be removed later. But keep this parser for the backward compatibility. diff --git a/docs/setup/operation/upgrading.md b/docs/setup/operation/upgrading.md index 4b78ee628e7..673fcac59c7 100644 --- a/docs/setup/operation/upgrading.md +++ b/docs/setup/operation/upgrading.md @@ -35,6 +35,9 @@ So, copying `notebook` and `conf` directory should be enough. ## Migration Guide +### Upgrading from Zeppelin 0.9, 0.10 to 0.11 + - From 0.11, The type of `Pegdown` for parsing markdown was deprecated ([ZEPPELIN-5529](https://issues.apache.org/jira/browse/ZEPPELIN-2619)). It will use `Flexmark` instead. + ### Upgrading from Zeppelin 0.8 to 0.9 - From 0.9, we changed the notes file name structure ([ZEPPELIN-2619](https://issues.apache.org/jira/browse/ZEPPELIN-2619)). So when you upgrading zeppelin to 0.9, you need to upgrade note files. Here's steps you need to follow: diff --git a/markdown/README.md b/markdown/README.md index 24f5dce2a19..bc7ed929604 100644 --- a/markdown/README.md +++ b/markdown/README.md @@ -1,6 +1,6 @@ # Overview -Markdown parsers for Apache Zeppelin. Markdown is a plain text formatting syntax designed so that it can be converted to HTML. Apache Zeppelin uses `flexmark`, `pegdown` and `markdown4j`. -Since both `pegdown` and `markdown4j` are deprecated but it support for backward compatibility. +Markdown parsers for Apache Zeppelin. Markdown is a plain text formatting syntax designed so that it can be converted to HTML. Apache Zeppelin uses `flexmark` and `markdown4j`. +Since `markdown4j` are deprecated but it supports for backward compatibility. # Architecture Current interpreter implementation creates the instance of parser based on the configuration parameter provided, default is `flexmark` through `Markdown` and render the text into html. @@ -18,7 +18,7 @@ CommonMark/Markdown Java parser with source level AST. * maven dependency to add in pom.xml ``` -0.50.40 +0.62.2 com.vladsch.flexmark @@ -31,4 +31,4 @@ CommonMark/Markdown Java parser with source level AST. To support, YUML and websequnce diagram, need to build the image URL from the respective block and render it into HTML, So it requires to implement some custom classes. `UMLExtension` is base class which has factory for other classes like `UMLBlockQuoteParser` and `UMLNodeRenderer`. `UMLBlockQuoteParser` which parses the UML block and creates block quote node `UMLBlockQuote`. -`UMLNodeRenderer` which builds the URL using this block quote node `UMLBlockQuote` and render it as image into HTML. \ No newline at end of file +`UMLNodeRenderer` which builds the URL using this block quote node `UMLBlockQuote` and render it as image into HTML. diff --git a/markdown/pom.xml b/markdown/pom.xml index 4b4ef646b23..c665f2ce6e7 100644 --- a/markdown/pom.xml +++ b/markdown/pom.xml @@ -15,7 +15,7 @@ ~ See the License for the specific language governing permissions and ~ limitations under the License. --> - + 4.0.0 @@ -34,8 +34,6 @@ md 2.2-cj-1.0 - 1.6.0 - 0.62.2 @@ -54,14 +52,6 @@ com.vladsch.flexmark flexmark-all - ${flexmark.all.version} - - - - commons-logging - commons-logging - - diff --git a/pom.xml b/pom.xml index 5fcd2b9b4d8..f18dfebd528 100644 --- a/pom.xml +++ b/pom.xml @@ -124,6 +124,7 @@ 1.7.30 1.2.17 0.13.0 + 0.62.2 2.8.6 0.2.2 9.4.31.v20200723 @@ -151,7 +152,7 @@ 3.1.3 3.2.0 ${hadoop2.7.version} - + provided 2.3.2 1.4.0 @@ -214,6 +215,20 @@ + + + com.vladsch.flexmark + flexmark-all + ${flexmark.all.version} + + + + commons-logging + commons-logging + + + + org.slf4j @@ -1243,7 +1258,7 @@ scala-maven-plugin ${plugin.scala.alchim31.version} - + org.apache.maven.plugins maven-surefire-plugin @@ -1379,7 +1394,7 @@ maven-eclipse-plugin ${plugin.eclipse.version} - + org.apache.maven.plugins maven-dependency-plugin @@ -1446,7 +1461,7 @@ frontend-maven-plugin ${plugin.frontend.version} - + org.apache.maven.plugins maven-failsafe-plugin @@ -1518,7 +1533,7 @@ apache-rat-plugin ${plugin.rat.version} - + diff --git a/zeppelin-distribution/src/bin_license/LICENSE b/zeppelin-distribution/src/bin_license/LICENSE index 47878267bb2..304bbfe092b 100644 --- a/zeppelin-distribution/src/bin_license/LICENSE +++ b/zeppelin-distribution/src/bin_license/LICENSE @@ -127,7 +127,6 @@ The following components are provided under Apache License. (Apache 2.0) Servlet API (org.mortbay.jetty:servlet-api:2.5-20081211 - https://en.wikipedia.org/wiki/Jetty_(web_server)) (Apache 2.0) Google HTTP Client Library for Java (com.google.http-client:google-http-client-jackson2:1.21.0 - https://github.com/google/google-http-java-client/tree/dev/google-http-client-jackson2) (Apache 2.0) validation-api (javax.validation - http://beanvalidation.org/) - (Apache 2.0) pegdown (org.pegdown:pegdown:1.6.0 - https://github.com/sirthias/pegdown) (Apache 2.0) parboiled-java (org.parboiled:parboiled-java:1.1.7 - https://github.com/sirthias/parboiled) (Apache 2.0) parboiled-core (org.parboiled:parboiled-core:1.1.7 - https://github.com/sirthias/parboiled) (Apache 2.0) ZkClient (com.101tec:zkclient:0.7 - https://github.com/sgroschupf/zkclient) diff --git a/zeppelin-jupyter/pom.xml b/zeppelin-jupyter/pom.xml index 1c665cf3667..3d7cdf6df5d 100644 --- a/zeppelin-jupyter/pom.xml +++ b/zeppelin-jupyter/pom.xml @@ -53,15 +53,13 @@ - org.apache.zeppelin - zeppelin-markdown - ${project.version} - - - org.apache.zeppelin - zeppelin-interpreter-shaded - - + com.vladsch.flexmark + flexmark-all + + + + org.apache.commons + commons-lang3 diff --git a/zeppelin-jupyter/src/main/java/org/apache/zeppelin/jupyter/JupyterUtil.java b/zeppelin-jupyter/src/main/java/org/apache/zeppelin/jupyter/JupyterUtil.java index 4ec841e7f5a..81fda1a39dd 100644 --- a/zeppelin-jupyter/src/main/java/org/apache/zeppelin/jupyter/JupyterUtil.java +++ b/zeppelin-jupyter/src/main/java/org/apache/zeppelin/jupyter/JupyterUtil.java @@ -39,12 +39,11 @@ import org.apache.zeppelin.jupyter.nbformat.Output; import org.apache.zeppelin.jupyter.nbformat.RawCell; import org.apache.zeppelin.jupyter.nbformat.Stream; +import org.apache.zeppelin.jupyter.parser.MarkdownParser; import org.apache.zeppelin.jupyter.zformat.Note; import org.apache.zeppelin.jupyter.zformat.Paragraph; import org.apache.zeppelin.jupyter.zformat.Result; import org.apache.zeppelin.jupyter.zformat.TypeData; -import org.apache.zeppelin.markdown.FlexmarkParser; -import org.apache.zeppelin.markdown.MarkdownParser; import java.io.BufferedReader; import java.io.FileReader; @@ -68,7 +67,7 @@ public class JupyterUtil { private final RuntimeTypeAdapterFactory cellTypeFactory; private final RuntimeTypeAdapterFactory outputTypeFactory; - private final MarkdownParser markdownProcessor; + private final MarkdownParser markdownParser; public JupyterUtil() { this.cellTypeFactory = RuntimeTypeAdapterFactory.of(Cell.class, "cell_type") @@ -78,7 +77,7 @@ public JupyterUtil() { .registerSubtype(ExecuteResult.class, "execute_result") .registerSubtype(DisplayData.class, "display_data").registerSubtype(Stream.class, "stream") .registerSubtype(Error.class, "error"); - this.markdownProcessor = new FlexmarkParser(); + this.markdownParser = new MarkdownParser(); } public Nbformat getNbformat(Reader in) { @@ -146,7 +145,7 @@ public Note getNote(Nbformat nbformat, String id, String codeReplaced, String ma } } else if (cell instanceof MarkdownCell || cell instanceof HeadingCell) { interpreterName = markdownReplaced; - String markdownContent = markdownProcessor.render(codeText); + String markdownContent = markdownParser.render(codeText); typeDataList.add(new TypeData(TypeData.HTML, markdownContent)); paragraph.setUpMarkdownConfig(true); } else { diff --git a/zeppelin-jupyter/src/main/java/org/apache/zeppelin/jupyter/parser/MarkdownParser.java b/zeppelin-jupyter/src/main/java/org/apache/zeppelin/jupyter/parser/MarkdownParser.java new file mode 100644 index 00000000000..c0b2f09b4f3 --- /dev/null +++ b/zeppelin-jupyter/src/main/java/org/apache/zeppelin/jupyter/parser/MarkdownParser.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.zeppelin.jupyter.parser; + +import com.vladsch.flexmark.ext.autolink.AutolinkExtension; +import com.vladsch.flexmark.ext.emoji.EmojiExtension; +import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension; +import com.vladsch.flexmark.ext.tables.TablesExtension; +import com.vladsch.flexmark.ext.typographic.TypographicExtension; +import com.vladsch.flexmark.ext.wikilink.WikiLinkExtension; +import com.vladsch.flexmark.html.HtmlRenderer; +import com.vladsch.flexmark.parser.Parser; +import com.vladsch.flexmark.util.ast.Node; +import com.vladsch.flexmark.util.data.MutableDataSet; +import java.util.Arrays; + +import static com.vladsch.flexmark.ext.emoji.EmojiImageType.UNICODE_ONLY; + +public class MarkdownParser { + private final Parser parser; + private final HtmlRenderer renderer; + + public MarkdownParser() { + MutableDataSet options = new MutableDataSet(); + options.set(Parser.EXTENSIONS, Arrays.asList(StrikethroughExtension.create(), + TablesExtension.create(), + AutolinkExtension.create(), + WikiLinkExtension.create(), + TypographicExtension.create(), + EmojiExtension.create())); + options.set(HtmlRenderer.SOFT_BREAK, "
\n"); + options.set(EmojiExtension.USE_IMAGE_TYPE, UNICODE_ONLY); + parser = Parser.builder(options).build(); + renderer = HtmlRenderer.builder(options).build(); + } + + public String render(String markdownText) { + Node document = parser.parse(markdownText); + String html = renderer.render(document); + return wrapWithMarkdownClassDiv(html); + } + + public static String wrapWithMarkdownClassDiv(String html) { + return "
\n" + html + "
"; + } +} diff --git a/zeppelin-jupyter/src/test/java/org/apache/zeppelin/jupyter/nbformat/JupyterUtilTest.java b/zeppelin-jupyter/src/test/java/org/apache/zeppelin/jupyter/nbformat/JupyterUtilTest.java index cb436dd0168..033aa0b56dd 100644 --- a/zeppelin-jupyter/src/test/java/org/apache/zeppelin/jupyter/nbformat/JupyterUtilTest.java +++ b/zeppelin-jupyter/src/test/java/org/apache/zeppelin/jupyter/nbformat/JupyterUtilTest.java @@ -96,7 +96,7 @@ public void getNoteAndVerifyData() throws Exception { "
\n" + "
This notebook was created using IBM Knowledge Anyhow Workbench. To learn more, visit us at https://knowledgeanyhow.org.
\n" + " \n" + - "\n\n" + + "\n" + "" , results.get(0).getData()); assertEquals("HTML", results.get(0).getType()); }