Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For evaluation: Implementation of MarkdownDeep.net over Journaley. #129

Merged
merged 6 commits into from
Nov 26, 2015
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 11 additions & 45 deletions Journaley/Forms/MainForm.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
namespace Journaley.Forms
{
extern alias MDS;
extern alias MDD;

using System;
using System.Collections.Generic;
Expand All @@ -21,7 +21,7 @@
using Journaley.Core.Utilities;
using Journaley.Core.Watcher;
using Journaley.Utilities;
using MDS.MarkdownSharp;
using MDD.MarkdownDeep;
using Pabo.Calendar;
using Squirrel;

Expand Down Expand Up @@ -475,7 +475,8 @@ private Markdown Markdown
if (this.markdown == null)
{
this.markdown = new Markdown();
((MarkdownOptions)this.markdown.Options).AutoNewLines = true;
this.markdown.ExtraMode = true;
this.markdown.SafeMode = false;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are these two options?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the website:

bool SafeMode
Set to true to only allow whitelisted safe html tags

bool ExtraMode
Set to true to enable ExtraMode, which enables the same set of features as implemented by PHP Markdown Extra.

Extramode is needed, becuase most of the implementations in Day One uses Markdown Extra stuff like tables and footnotes

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. We might want to turn on the SafeMode option later for resolving #118. Let's keep this false for now.

}

return this.markdown;
Expand Down Expand Up @@ -638,53 +639,18 @@ private void OnGetMinMaxInfo(ref Message m)
/// </summary>
private void UpdateWebBrowser()
{
string initialString = this.SelectedEntry.EntryText;
string docType = "<!DOCTYPE html>";

string formattedString = string.Format(
"<style type=\"text/css\">\n<!-- Font CSS -->\n{0}\n<!-- Size CSS -->\n{1}\n<!-- Custom CSS -->\n{2}\n</style><html><body><div>{3}</div></body></html>",
"{0}\n<style type=\"text/css\">\n<!-- Font CSS -->\n{1}\n<!-- Size CSS -->\n{2}\n<!-- Custom CSS -->\n{3}\n</style>\n<html>\n<body>\n<div>\n{4}</div>\n</body>\n</html>",
docType,
this.GetWebBrowserTypefaceCSS(),
this.GetWebBrowserSizeCSS(),
this.CustomCSS ?? string.Empty,
Markdown.Transform(this.SelectedEntry.EntryText));

this.webBrowser.DocumentText = this.RemoveLineBreaksWithinLists(formattedString);
}

/// <summary>
/// Removes the wrong line breaks within nested ordered/unordered lists.
/// Hack to fix the issue #114.
/// </summary>
/// <param name="formattedString">The entry content formatted by MarkdownSharp.</param>
/// <returns>correctly formatted string with the wrong line breaks removed.</returns>
private string RemoveLineBreaksWithinLists(string formattedString)
{
System.Text.StringBuilder builder = new System.Text.StringBuilder();
string pattern = @"^</?(ol|ul|li)>";

var lines = formattedString.Split(
new string[] { "\r\n", "\n" },
StringSplitOptions.None);

for (int i = 0; i < lines.Length - 1; ++i)
{
string line = lines[i];
string nextLine = lines[i + 1];

// Remove the "<br />" tags only if the current line and the next line are starting
// with the opening/closing list tags. By doing this, it can prevent removing
// line breaks that are intentionally added by the user.
if (Regex.IsMatch(line, pattern) && Regex.IsMatch(nextLine, pattern))
{
while (line.EndsWith("<br />"))
{
line = line.Substring(0, line.LastIndexOf("<br />"));
}
}

builder.AppendLine(line);
}

builder.AppendLine(lines.Last());
PostMarkdownParser.PostMarkdown(Markdown.Transform(initialString)));

return builder.ToString();
this.webBrowser.DocumentText = formattedString;
}

/// <summary>
Expand Down
8 changes: 5 additions & 3 deletions Journaley/Journaley.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,10 @@
<SpecificVersion>False</SpecificVersion>
<HintPath>..\packages\squirrel.windows.0.99.2\lib\Net45\ICSharpCode.SharpZipLib.dll</HintPath>
</Reference>
<Reference Include="MarkdownSharp, Version=0.0.0.0, Culture=neutral, processorArchitecture=MSIL">
<HintPath>..\packages\MarkdownSharp.1.13.0.0\lib\35\MarkdownSharp.dll</HintPath>
<Aliases>MDS</Aliases>
<Reference Include="MarkdownDeep, Version=1.5.4615.26275, Culture=neutral, processorArchitecture=MSIL">
<HintPath>..\packages\MarkdownDeep.NET.1.5\lib\.NetFramework 3.5\MarkdownDeep.dll</HintPath>
<Private>True</Private>
<Aliases>MDD</Aliases>
</Reference>
<Reference Include="Microsoft.WindowsAPICodePack">
<HintPath>..\packages\winapicp.1.1\lib\Microsoft.WindowsAPICodePack.dll</HintPath>
Expand Down Expand Up @@ -202,6 +203,7 @@
<DesignTime>True</DesignTime>
<DependentUpon>Resources.resx</DependentUpon>
</Compile>
<Compile Include="Utilities\PostMarkdownParser.cs" />
<Compile Include="Utilities\CursorReader.cs" />
<Compile Include="Utilities\FontReader.cs" />
<Compile Include="Utilities\HtmlToText.cs">
Expand Down
4 changes: 4 additions & 0 deletions Journaley/Resources/JournaleyLarge.css
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,7 @@ div {
img {
width: 100%;
}

pre {
word-wrap: break-word;
}
4 changes: 4 additions & 0 deletions Journaley/Resources/JournaleyMedium.css
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,7 @@ div {
img {
width: 100%;
}

pre {
word-wrap: break-word;
}
4 changes: 4 additions & 0 deletions Journaley/Resources/JournaleySmall.css
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,7 @@ div {
img {
width: 100%;
}

pre {
word-wrap: break-word;
}
112 changes: 112 additions & 0 deletions Journaley/Utilities/PostMarkdownParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
namespace Journaley.Utilities
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

/// <summary>
/// Helper class for fixing MarkdownDeep parsed HTML.
/// </summary>
public class PostMarkdownParser
{
/// <summary>
/// Perform fixes after MarkdownDeep parsing for publishing
/// to Journaley.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to be more explicit here and say what it fixes.

  1. automatically add <br /> tags where the user hit enter.
  2. add strikethrough style support

/// - automatically adds break tags on single line breaks.
/// - adds strikethrough style support.
/// - makes the first sentence into a header.
/// </summary>
/// <param name="formattedString">Formatted HTML string</param>
/// <returns>Properly formatted HTML string for publishing.</returns>
public static string PostMarkdown(string formattedString)
{
StringBuilder builder = new StringBuilder();
StringBuilder paragraphBuilder = new StringBuilder();

// 0 - stumbles upon the beginning of a <p> tag/usual parsing.
// 1 - stumbles upon the end of </p> tag.
// 2 - skips check and just dumps the line to builder.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you meant to say -1 here, not 2.

var parseState = -1;

bool firstTitleParsed = false;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sorry, but could you make the firstTitle feature as a separate PR, once this one is merged? This seems to require more thoughts on it.
Also, it seems like the current implementation would convert the first single line sentence into a title, even if it's not at the top.


var lines = formattedString.Split(
new string[] { "\r\n", "\n" },
StringSplitOptions.None);

foreach (string line in lines)
{
// We first incrementally check if the line begins with <p> tag. When it does,
// We turn on the paragraphBuilder to append the remaining lines.

// If the checks hit a line with the </p> closing, we disable check and proceed
// to parse the text, add <br /> tags and then adding it to builder
// where it is assembled with the rest of the file.
if (line.Contains("<p>"))
{
parseState = 1;
}
else if (line.Contains("</p>"))
{
parseState = 0;
}
else if (line.Contains("<pre>"))
{
parseState = -1;
}

if (parseState == 1)
{
if (line.Contains("</p>"))
{
string singleLine = line;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be fine to just use line in the following code. No need to introduce a new variable here.

if (!firstTitleParsed)
{
singleLine = Regex.Replace(singleLine, @"(<p>)(.*?)(</p>)", "<h2>$2</h2>\n<br/>");
firstTitleParsed = true;
}

builder.AppendLine(ReplaceMDStrikethrough(singleLine));
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think applying strikethrough here is correct. See my comments below.

parseState = -1;
}
else
{
paragraphBuilder.AppendLine(ReplaceMDStrikethrough(line) + "<br />");
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one seems fine.

}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an else clause like this:

else
{
    paragraphBuilder.Add(line + "<br />");
}

I thought about this for a while, and I think this is the perfect place to add <br /> tags.
This is the place where we're within the <p> tag, and seeing the line break before the </p> tag.
I don't think we need that complex regex below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you thought this after I clarified to you about how the regex worked right?

}
else if (parseState == 0)
{
paragraphBuilder.AppendLine(ReplaceMDStrikethrough(line));
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, this seems incorrect to me.

I suggest applying strikethrough to the entire paragraph at this point, instead of applying it line by line.
That way, it could handle strikethroughs over multiple lines correctly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you need to evaluate the private function.


builder.AppendLine(paragraphBuilder.ToString());
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use Append instead of AppendLine to avoid adding extra new lines.

paragraphBuilder.Clear();
parseState = -1;
}
else
{
// For <pre> tagged text, just skip check and append to builder
builder.AppendLine(line);
}
}

return builder.ToString();
}

/// <summary>
/// Replaces "~~" strikethrough Markdown tag into del HTML tags.
/// </summary>
/// <param name="parsedString">Any string that has strikethrough tags</param>
/// <returns>A string with del HTML tags in place of Markdown strikethrough tags.</returns>
private static string ReplaceMDStrikethrough(string parsedString)
{
parsedString = Regex.Replace(parsedString, @"(~~)(.*?)(~~)", "<del>$2</del>", RegexOptions.Singleline);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think having just this one line with Multiline option would be enough, when we pass the entire paragraph to this method. Remove the following two lines.

parsedString = Regex.Replace(parsedString, @"(?<=<p>)(~~)", "<del>", RegexOptions.Singleline);
parsedString = Regex.Replace(parsedString, @"(~~)(?=</p>)", "</del>", RegexOptions.Singleline);

return parsedString;
}
}
}
2 changes: 1 addition & 1 deletion Journaley/packages.config
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="DeltaCompressionDotNet" version="1.0.0" targetFramework="net45" />
<package id="MarkdownSharp" version="1.13.0.0" targetFramework="net40-Client" />
<package id="MarkdownDeep.NET" version="1.5" targetFramework="net45" />
<package id="Splat" version="1.6.2" targetFramework="net45" />
<package id="squirrel.windows" version="0.99.2" targetFramework="net45" />
<package id="StyleCop.MSBuild" version="4.7.48.2" targetFramework="net40-Client" />
Expand Down