diff --git a/lib/html_sanitize_ex/parser.ex b/lib/html_sanitize_ex/parser.ex index 4426dc5..4e17037 100644 --- a/lib/html_sanitize_ex/parser.ex +++ b/lib/html_sanitize_ex/parser.ex @@ -10,16 +10,21 @@ defmodule HtmlSanitizeEx.Parser do @doc """ @type html_tree :: tuple | list @my_root_node "html_sanitize_ex" + @linebreak [239, 188, 191] @spec parse(binary) :: html_tree def parse(html) do - html = "<#{@my_root_node}>#{html}#{@my_root_node}>" + html = "<#{@my_root_node}>#{before_parse(html)}#{@my_root_node}>" {@my_root_node, [], parsed} = :mochiweb_html.parse(html) if length(parsed) == 1, do: hd(parsed), else: parsed end + defp before_parse(html) do + String.replace(html, ~r/(>)(\r?\n)/, "\\1 #{@linebreak} \\2") + end + def to_html(tokens) do {@my_root_node, [], ensure_list(tokens)} |> :mochiweb_html.to_html @@ -27,6 +32,11 @@ defmodule HtmlSanitizeEx.Parser do @doc """ |> String.replace(~r/^<#{@my_root_node}>/, "") |> String.replace(~r/<\/#{@my_root_node}>$/, "") |> String.replace("</html_sanitize_ex>", "") + |> after_to_html() + end + + defp after_to_html(html) do + String.replace(html, ~r/(\ ?#{@linebreak} )(\r?\n)/, "\\2") end defp ensure_list(list) do diff --git a/test/basic_html_test.exs b/test/basic_html_test.exs index ed474ee..4f90535 100644 --- a/test/basic_html_test.exs +++ b/test/basic_html_test.exs @@ -54,7 +54,7 @@ defmodule HtmlSanitizeExScrubberBasicHTMLTest do test "strips certain tags in multi line strings" do input = "
It no longer contains any HTML.
It no longer contains any HTML.
" + expected = "This is a test.\n\n\n\nIt no longer contains any HTML.
\n" assert expected == basic_html_sanitize(input) end @@ -110,7 +110,7 @@ defmodule HtmlSanitizeExScrubberBasicHTMLTest do @tag href_scrubbing: true test "test_strip_links_leaves_nonlink_tags" do - assert "My mindall day long" == basic_html_sanitize("My mind\nall day long") + assert "My mind\nall day long" == basic_html_sanitize("My mind\nall day long") end @tag href_scrubbing: true diff --git a/test/markdown_html_test.exs b/test/markdown_html_test.exs index bd94fa3..5fdf632 100644 --- a/test/markdown_html_test.exs +++ b/test/markdown_html_test.exs @@ -83,7 +83,7 @@ defmodule HtmlSanitizeExScrubberMarkdownHTMLTest do test "strips certain tags in multi line strings" do input = "It no longer contains any HTML.
It no longer contains any HTML.
" + expected = "This is a test.\n\n\n\nIt no longer contains any HTML.
\n" assert expected == sanitize(input) end @@ -139,7 +139,7 @@ defmodule HtmlSanitizeExScrubberMarkdownHTMLTest do @tag href_scrubbing: true test "test_strip_links_leaves_nonlink_tags" do - assert "My mindall day long" == sanitize("My mind\nall day long") + assert "My mind\nall day long" == sanitize("My mind\nall day long") end @tag href_scrubbing: true diff --git a/test/strip_tags_test.exs b/test/strip_tags_test.exs index 3ed2dad..d491834 100644 --- a/test/strip_tags_test.exs +++ b/test/strip_tags_test.exs @@ -37,7 +37,7 @@ defmodule HtmlSanitizeExScrubberStripTagsTest do test "strips tags in multi line strings" do input = "It no longer contains any HTML.
It no longer contains any HTML.