Skip to content

Commit

Permalink
Add fix for missing white-space between nodes
Browse files Browse the repository at this point in the history
Refs #12
Refs #14
  • Loading branch information
rrrene committed Apr 30, 2017
1 parent 823de7c commit c4bbac6
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 7 deletions.
12 changes: 11 additions & 1 deletion lib/html_sanitize_ex/parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,33 @@ defmodule HtmlSanitizeEx.Parser do @doc """
@type html_tree :: tuple | list

@my_root_node "html_sanitize_ex"
@linebreak [239, 188, 191]

@spec parse(binary) :: html_tree

def parse(html) do
html = "<#{@my_root_node}>#{html}</#{@my_root_node}>"
html = "<#{@my_root_node}>#{before_parse(html)}</#{@my_root_node}>"
{@my_root_node, [], parsed} = :mochiweb_html.parse(html)

if length(parsed) == 1, do: hd(parsed), else: parsed
end

defp before_parse(html) do
String.replace(html, ~r/(>)(\r?\n)/, "\\1 #{@linebreak} \\2")
end

def to_html(tokens) do
{@my_root_node, [], ensure_list(tokens)}
|> :mochiweb_html.to_html
|> Enum.join
|> String.replace(~r/^<#{@my_root_node}>/, "")
|> String.replace(~r/<\/#{@my_root_node}>$/, "")
|> String.replace("&lt;/html_sanitize_ex&gt;", "")
|> after_to_html()
end

defp after_to_html(html) do
String.replace(html, ~r/(\ ?#{@linebreak} )(\r?\n)/, "\\2")
end

defp ensure_list(list) do
Expand Down
4 changes: 2 additions & 2 deletions test/basic_html_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ defmodule HtmlSanitizeExScrubberBasicHTMLTest do

test "strips certain tags in multi line strings" do
input = "<title>This is <b>a <a href=\"\" target=\"_blank\">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n"
expected = "This is <b>a <a href=\"\">test</a></b>.<p>It no <b>longer <strong>contains <em>any HTML</em>.</strong></b></p>"
expected = "This is <b>a <a href=\"\">test</a></b>.\n\n\n\n<p>It no <b>longer <strong>contains <em>any HTML</em>.</strong></b></p>\n"
assert expected == basic_html_sanitize(input)
end

Expand Down Expand Up @@ -110,7 +110,7 @@ defmodule HtmlSanitizeExScrubberBasicHTMLTest do

@tag href_scrubbing: true
test "test_strip_links_leaves_nonlink_tags" do
assert "<a href=\"almost\">My mind</a><a href=\"almost\">all <b>day</b> long</a>" == basic_html_sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
assert "<a href=\"almost\">My mind</a>\n<a href=\"almost\">all <b>day</b> long</a>" == basic_html_sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
end

@tag href_scrubbing: true
Expand Down
4 changes: 2 additions & 2 deletions test/markdown_html_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ defmodule HtmlSanitizeExScrubberMarkdownHTMLTest do

test "strips certain tags in multi line strings" do
input = "<title>This is <b>a <a href=\"\" target=\"_top\">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n"
expected = "This is <b>a <a href=\"\">test</a></b>.<p>It no <b>longer <strong>contains <em>any HTML</em>.</strong></b></p>"
expected = "This is <b>a <a href=\"\">test</a></b>.\n\n\n\n<p>It no <b>longer <strong>contains <em>any HTML</em>.</strong></b></p>\n"
assert expected == sanitize(input)
end

Expand Down Expand Up @@ -139,7 +139,7 @@ defmodule HtmlSanitizeExScrubberMarkdownHTMLTest do

@tag href_scrubbing: true
test "test_strip_links_leaves_nonlink_tags" do
assert "<a href=\"almost\">My mind</a><a href=\"almost\">all <b>day</b> long</a>" == sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
assert "<a href=\"almost\">My mind</a>\n<a href=\"almost\">all <b>day</b> long</a>" == sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
end

@tag href_scrubbing: true
Expand Down
2 changes: 1 addition & 1 deletion test/strip_tags_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ defmodule HtmlSanitizeExScrubberStripTagsTest do

test "strips tags in multi line strings" do
input = "<title>This is <b>a <a href=\"\" target=\"_blank\">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n"
expected = "This is a test.It no longer contains any HTML."
expected = "This is a test.\n\n\n\nIt no longer contains any HTML.\n"
assert expected == strip_tags(input)
end

Expand Down
2 changes: 1 addition & 1 deletion test/traverser_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ defmodule HtmlSanitizeExTraverserTest do

test "should return expected tree 2" do
input = "<title>This is <b>the <a href=\"http://[email protected]\" target=\"_blank\">test</a></b>.</title>\n\n\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n"
expected = ["This is ", {"b", [], ["the ", "test"]}, ".", "It no ", {"b", [], ["longer ", "contains ", "any ", "HTML", "."]}]
expected = ["This is ", {"b", [], ["the ", "test"]}, ".", " _ \n\n\n\n", "It no ", {"b", [], ["longer ", "contains ", "any ", "HTML", "."]}, " _ \n"]
assert expected == parse_to_tree(input)
end

Expand Down

0 comments on commit c4bbac6

Please sign in to comment.