diff --git a/lib/html_sanitize_ex/parser.ex b/lib/html_sanitize_ex/parser.ex index 4e17037..964f634 100644 --- a/lib/html_sanitize_ex/parser.ex +++ b/lib/html_sanitize_ex/parser.ex @@ -10,7 +10,8 @@ defmodule HtmlSanitizeEx.Parser do @doc """ @type html_tree :: tuple | list @my_root_node "html_sanitize_ex" - @linebreak [239, 188, 191] + @replacement_linebreak [239, 188, 191] + @replacement_space [239, 189, 191] @spec parse(binary) :: html_tree @@ -22,7 +23,9 @@ defmodule HtmlSanitizeEx.Parser do @doc """ end defp before_parse(html) do - String.replace(html, ~r/(>)(\r?\n)/, "\\1 #{@linebreak} \\2") + html + |> String.replace(~r/(>)(\r?\n)/, "\\1 #{@replacement_linebreak} \\2") + |> String.replace(~r/(>)(\ +)(<)/, "\\1 #{@replacement_space}\\2\\3") end def to_html(tokens) do @@ -36,7 +39,9 @@ defmodule HtmlSanitizeEx.Parser do @doc """ end defp after_to_html(html) do - String.replace(html, ~r/(\ ?#{@linebreak} )(\r?\n)/, "\\2") + html + |> String.replace(~r/(\ ?#{@replacement_linebreak} )(\r?\n)/, "\\2") + |> String.replace(~r/(\>\;|>)(\ +)(#{@replacement_space})(\ +)(\<\;|<)/, "\\1\\4\\5") end defp ensure_list(list) do diff --git a/lib/html_sanitize_ex/scrubber.ex b/lib/html_sanitize_ex/scrubber.ex index 1a37d41..b0ba6f8 100644 --- a/lib/html_sanitize_ex/scrubber.ex +++ b/lib/html_sanitize_ex/scrubber.ex @@ -9,15 +9,9 @@ defmodule HtmlSanitizeEx.Scrubber do def scrub(html, scrubber_module) do html - |> before_scrub |> scrubber_module.before_scrub |> HtmlSanitizeEx.Parser.parse |> HtmlSanitizeEx.Traverser.traverse(scrubber_module) |> HtmlSanitizeEx.Parser.to_html end - - defp before_scrub(html) do - html - |> String.replace(~r/(>)(\ +)(<)/, "\\1 \\3") - end end diff --git a/test/html5_test.exs b/test/html5_test.exs index bde640f..721951e 100644 --- a/test/html5_test.exs +++ b/test/html5_test.exs @@ -66,4 +66,10 @@ defmodule HtmlSanitizeExScrubberHTML5Test do expected = ~s(Email Us) assert expected == full_html_sanitize(input) end + + test "does encode script in textarea, but preserves white-space" do + input = ~s() + expected = ~s() + assert expected == full_html_sanitize(input) + end end