Skip to content

Commit

Permalink
It won't hurt 💉
Browse files Browse the repository at this point in the history
  • Loading branch information
rrrene committed Jul 1, 2015
0 parents commit 69ea11d
Show file tree
Hide file tree
Showing 18 changed files with 850 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/_build
/deps
/docs/all.json
test.json
erl_crash.dump
*.ez
20 changes: 20 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Copyright (c) 2014 René Föhring

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
HtmlSanitizeEx
==============

** TODO: Add description **
24 changes: 24 additions & 0 deletions config/config.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# This file is responsible for configuring your application
# and its dependencies with the aid of the Mix.Config module.
use Mix.Config

# This configuration is loaded before any dependency and is restricted
# to this project. If another project depends on this project, this
# file won't be loaded nor affect the parent project. For this reason,
# if you want to provide default values for your application for third-
# party users, it should be done in your mix.exs file.

# Sample configuration:
#
# config :logger, :console,
# level: :info,
# format: "$date $time [$level] $metadata$message\n",
# metadata: [:user_id]

# It is also possible to import configuration files, relative to this
# directory. For example, you can emulate configuration per environment
# by uncommenting the line below and defining dev.exs, test.exs and such.
# Configuration from the imported file will override the ones defined
# here (which is why it is important to import them last).
#
# import_config "#{Mix.env}.exs"
19 changes: 19 additions & 0 deletions lib/html_sanitize_ex.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
defmodule HtmlSanitizeEx do
alias HtmlSanitizeEx.Scrubber

def noscrub(html) do
html |> Scrubber.scrub(Scrubber.NoScrub)
end

def basic_html(html) do
html |> Scrubber.scrub(Scrubber.BasicHTML)
end

def markdown(html) do
html |> Scrubber.scrub(Scrubber.BasicHTML)
end

def strip_tags(html) do
html |> Scrubber.scrub(Scrubber.StripTags)
end
end
38 changes: 38 additions & 0 deletions lib/html_sanitize_ex/parser.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
defmodule HtmlSanitizeEx.Parser do @doc """
Parses a HTML string.
## Examples
iex> Floki.parse("<div class=js-action>hello world</div>")
{"div", [{"class", "js-action"}], ["hello world"]}
iex> Floki.parse("<div>first</div><div>second</div>")
[{"div", [], ["first"]}, {"div", [], ["second"]}]
"""

@type html_tree :: tuple | list

@my_root_node "html_sanitize_ex"

@spec parse(binary) :: html_tree

def parse(html) do
html = "<#{@my_root_node}>#{html}</#{@my_root_node}>"
{@my_root_node, [], parsed} = :mochiweb_html.parse(html)

if length(parsed) == 1, do: hd(parsed), else: parsed
end

def to_html(tokens) do
{@my_root_node, [], ensure_list(tokens)}
|> :mochiweb_html.to_html
|> Enum.join
|> String.replace(~r/^<#{@my_root_node}>/, "")
|> String.replace(~r/<\/#{@my_root_node}>$/, "")
|> String.replace("&lt;/html_sanitize_ex&gt;", "")
end

defp ensure_list(list) do
case list do
[head | tail] -> list
_ -> [list]
end
end
end
17 changes: 17 additions & 0 deletions lib/html_sanitize_ex/scrubber.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
defmodule HtmlSanitizeEx.Scrubber do
def scrub("", _) do
""
end

def scrub(nil, _) do
""
end

def scrub(html, scrubber_module) do
html
|> scrubber_module.before_scrub
|> HtmlSanitizeEx.Parser.parse
|> HtmlSanitizeEx.Traverser.traverse(scrubber_module)
|> HtmlSanitizeEx.Parser.to_html
end
end
72 changes: 72 additions & 0 deletions lib/html_sanitize_ex/scrubber/basic_html.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
defmodule HtmlSanitizeEx.Scrubber.BasicHTML do
def before_scrub(text) do
HtmlSanitizeEx.Scrubber.StripTags.before_scrub(text)
end

require HtmlSanitizeEx.Scrubber.Meta
alias HtmlSanitizeEx.Scrubber.Meta

Meta.allow_tags_and_scrub_its_attributes ["h1", "h2", "h3", "h4", "h5",
"a", "b", "blockquote", "br", "code", "del", "em", "hr", "i",
"img", "li", "ol", "ul", "p", "pre", "span", "strong", "u",
"table", "tbody", "td", "th", "thead", "tr"]

Meta.allow_tag_with_these_attributes "a", ["name", "title"]

def scrub_attribute("a", {"href", "&" <> _}), do: nil

def scrub_attribute("a", {"href", href}) do
IO.inspect href
if no_scheme?(href) || valid_scheme?(href) do
{"href", href}
end
end

Meta.allow_tag_with_these_attributes "img", ["width", "height", "title", "alt"]

def scrub_attribute("img", {"src", "http://" <> src}) do
if no_scheme?(src) || valid_scheme?(src) do
{"src", src}
end
end

defp no_scheme?(uri) do
!String.match?(uri, ~r/\:/)
end

@valid_schemes ["http://", "https://"]

defp valid_scheme?(uri) do
String.starts_with?(uri, @valid_schemes)
end

# If we have covered the attribute until here, we just scrab it.
def scrub_attribute(tag, attribute) do
nil
end

# If we haven't covered the attribute until here, we just scrab it.
def scrub({tag, attributes, children}) do
children
end

def scrub({:comment, children}), do: ""
def scrub({token, children}), do: children

@doc """
Scrubs a text node.
"""
def scrub(text) do
scrub_text(text)
end

@doc false
def scrub_attributes(tag, attributes) do
Enum.map(attributes, fn(attr) -> scrub_attribute(tag, attr) end)
|> Enum.reject(&(is_nil(&1)))
end

def scrub_text(text) do
text
end
end
40 changes: 40 additions & 0 deletions lib/html_sanitize_ex/scrubber/meta.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
defmodule HtmlSanitizeEx.Scrubber.Meta do
@doc "Allow these tags and use the regular `scrub_attribute/2` function to scrub the attributes."
defmacro allow_tags_and_scrub_its_attributes(list) do
Enum.map(list, fn name -> allow_this_tag_and_scrub_its_attributes(name) end)
end

@doc "Allow these tags if they don't have attributes"
defmacro allow_tag_with_these_attributes(tag, list) do
Enum.map(list, fn name -> allow_this_tag_with_these_attributes(tag, name) end)
end

@doc "Allow these tags if they don't have attributes"
defmacro allow_these_tags_without_attributes(list) do
Enum.map(list, fn name -> allow_these_tag_without_attributes(name) end)
end

defp allow_this_tag_and_scrub_its_attributes(name) do
quote do
def scrub({unquote(name), attributes, children}) do
{unquote(name), scrub_attributes(unquote(name), attributes), children}
end
end
end

defp allow_this_tag_with_these_attributes(name, attr_name) do
quote do
def scrub_attribute(unquote(name), {unquote(attr_name), value}) do
{unquote(attr_name), value}
end
end
end

defp allow_these_tag_without_attributes(name) do
quote do
def scrub({unquote(name), [], children}) do
{unquote(name), [], children}
end
end
end
end
59 changes: 59 additions & 0 deletions lib/html_sanitize_ex/scrubber/no_scrub.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
defmodule HtmlSanitizeEx.Scrubber.NoScrub do
@moduledoc """
Scrubs neither tags, nor their attributes.
This meant for testing purposes and as a template for your own scrubber.
"""

def before_scrub(text) do
text
end

@doc """
Scrubs a single tag given its attributes and children.
Calls `scrub_attribute/2` to scrub individual attributes.
"""
def scrub({tag, attributes, children}) do
{tag, scrub_attributes(tag, attributes), children}
end

@doc """
Scrubs tokens like comments and doctypes.
"""
def scrub({token, children}), do: children

@doc """
Scrubs a text node.
"""
def scrub(text) do
text
end

@doc false
def scrub_attributes(tag, attributes) do
Enum.map(attributes, fn(attr) -> scrub_attribute(tag, attr) end)
|> Enum.reject(&(is_nil(&1)))
end

@doc """
Scrubs a single attribute for a given tag.
You can utilize scrub_attribute to write custom matchers so you can sanitize
specific attributes of specific tags:
As an example, if you only want to allow href attribute with the "http" and
"https" protocols, you could implement it like this:
def scrub_attribute("a", {"href", "http" <> target}) do
{"href", "http" <> target}
end
def scrub_attribute("a", {"href", _}) do
nil
end
"""
def scrub_attribute(tag, attribute) do
attribute
end
end
14 changes: 14 additions & 0 deletions lib/html_sanitize_ex/scrubber/strip_tags.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
defmodule HtmlSanitizeEx.Scrubber.StripTags do
@moduledoc """
Strips all tags.
"""

def before_scrub(text) do
String.replace(text, "<![CDATA[", "")
end

def scrub({_, _, children}), do: children
def scrub({:comment, children}), do: ""
def scrub({_, children}), do: children
def scrub(text), do: text
end
58 changes: 58 additions & 0 deletions lib/html_sanitize_ex/traverser.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
defmodule HtmlSanitizeEx.Traverser do
@doc """
Traverses an html_tree.
"""
def traverse([], _scrubber_module) do
[]
end

def traverse([head | tail], scrubber_module) do
head = traverse(head, scrubber_module) |> collapse_list
tail = traverse(tail, scrubber_module)

result = List.flatten([head] ++ tail)

#IO.inspect {:head, head}
#IO.inspect {:tail, tail}
#IO.inspect {:result, result}
result
end

def traverse({tag, attributes, children}, scrubber_module) do
children = children
|> traverse(scrubber_module)
{tag, attributes, children}
|> scrubber_module.scrub
end

def traverse(text, scrubber_module) when is_binary(text) do
text
|> scrubber_module.scrub
end

# Matches things like {:comment, "this is a comment"} or {:doctype, "..."}.
def traverse({token, children}, scrubber_module) do
children = children
|> traverse(scrubber_module)
|> collapse_list
{token, children}
|> scrubber_module.scrub
end

# Matches things like {:comment, "this is a comment"} or {:doctype, "..."}.
def traverse(what, scrubber_module) do
IO.inspect "########################"
IO.inspect {:error, what}
IO.inspect "########################"
what
end

# Collapses a list if it only consists of other lists.
defp collapse_list(children) do
result = case children do
[single] -> single
list -> list
end
result
end
end
Loading

0 comments on commit 69ea11d

Please sign in to comment.