From fd88a2836f4dd73d42c0d899d9aae5a132aaa7dc Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Mon, 8 Mar 2021 19:52:52 -0300 Subject: [PATCH] Improve docs and typespecs about traverse_and_update The intention is to make clear that we don't allow the function to update text nodes, but the user can update text nodes inside children. This closes https://github.com/philss/floki/issues/338 --- lib/floki.ex | 33 +++++++++++++++++++++++---------- lib/floki/traversal.ex | 6 +++--- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/lib/floki.ex b/lib/floki.ex index 15f14e79..8194b803 100644 --- a/lib/floki.ex +++ b/lib/floki.ex @@ -67,8 +67,10 @@ defmodule Floki do @type html_comment :: {:comment, String.t()} @type html_doctype :: {:doctype, String.t(), String.t(), String.t()} @type html_attribute :: {String.t(), String.t()} - @type html_tag :: {String.t(), [html_attribute()], [html_tag() | String.t() | html_comment()]} - @type html_node :: html_comment() | html_doctype() | html_tag() | html_declaration() + @type html_text :: String.t() + @type html_tag :: {String.t(), [html_attribute()], [html_node()]} + @type html_node :: + html_tag() | html_comment() | html_doctype() | html_declaration() | html_text() @type html_tree :: [html_node()] @type css_selector :: String.t() | Floki.Selector.t() | [Floki.Selector.t()] @@ -366,8 +368,9 @@ defmodule Floki do Traverses and updates a HTML tree structure. This function returns a new tree structure that is the result of applying the - given `fun` on all nodes. The tree is traversed in a post-walk fashion, where - the children are traversed before the parent. + given `fun` on all nodes except text nodes. + The tree is traversed in a post-walk fashion, where the children are traversed + before the parent. When the function `fun` encounters HTML tag, it receives a tuple with `{name, attributes, children}`, and should either return a similar tuple or @@ -378,6 +381,9 @@ defmodule Floki do documentation for `t:html_comment/0`, `t:html_doctype/0` and `t:html_declaration/0` for details. + **Note**: this won't update text nodes, but you can transform them when working + with children nodes. + ## Examples iex> html = [{"div", [], ["hello"]}] @@ -396,7 +402,10 @@ defmodule Floki do [{"div", [], [{"span", [], "I am comment"}]}] """ - @spec traverse_and_update(html_tree(), (html_node() -> html_node() | nil)) :: html_tree() + @spec traverse_and_update( + html_tree(), + (html_tag() | html_comment() | html_doctype() | html_declaration() -> html_node() | nil) + ) :: html_tree() defdelegate traverse_and_update(html_tree, fun), to: Floki.Traversal @@ -404,9 +413,9 @@ defmodule Floki do Traverses and updates a HTML tree structure with an accumulator. This function returns a new tree structure and the final value of accumulator - which are the result of applying the given `fun` on all nodes. The tree is - traversed in a post-walk fashion, where the children are traversed before - the parent. + which are the result of applying the given `fun` on all nodes except text nodes. + The tree is traversed in a post-walk fashion, where the children are traversed + before the parent. When the function `fun` encounters HTML tag, it receives a tuple with `{name, attributes, children}` and an accumulator. It and should return a @@ -419,6 +428,9 @@ defmodule Floki do documentation for `t:html_comment/0`, `t:html_doctype/0` and `t:html_declaration/0` for details. + **Note**: this won't update text nodes, but you can transform them when working + with children nodes. + ## Examples iex> html = [{"div", [], [{:comment, "I am a comment"}, "hello"]}, {"div", [], ["world"]}] @@ -445,14 +457,15 @@ defmodule Floki do @spec traverse_and_update( html_tree(), traverse_acc, - (html_node(), traverse_acc -> {html_node() | nil, traverse_acc}) + (html_tag() | html_comment() | html_doctype() | html_declaration(), traverse_acc -> + {html_node() | nil, traverse_acc}) ) :: {html_node(), traverse_acc} when traverse_acc: any() - defdelegate traverse_and_update(html_tree, acc, fun), to: Floki.Traversal @doc """ Returns the text nodes from a HTML tree. + By default, it will perform a deep search through the HTML tree. You can disable deep search with the option `deep` assigned to false. You can include content of script tags with the option `js` assigned to true. diff --git a/lib/floki/traversal.ex b/lib/floki/traversal.ex index 899d17f1..ae5d6bf0 100644 --- a/lib/floki/traversal.ex +++ b/lib/floki/traversal.ex @@ -10,9 +10,9 @@ defmodule Floki.Traversal do def traverse_and_update(html_node, acc, fun) def traverse_and_update([], acc, _fun), do: {[], acc} def traverse_and_update(text, acc, _fun) when is_binary(text), do: {text, acc} - def traverse_and_update(xml_tag = {:pi, _, _}, acc, fun), do: fun.(xml_tag, acc) - def traverse_and_update({:comment, children}, acc, fun), do: fun.({:comment, children}, acc) - def traverse_and_update(doctype = {:doctype, _, _, _}, acc, fun), do: fun.(doctype, acc) + def traverse_and_update({:pi, _, _} = xml_tag, acc, fun), do: fun.(xml_tag, acc) + def traverse_and_update({:comment, _children} = comment, acc, fun), do: fun.(comment, acc) + def traverse_and_update({:doctype, _, _, _} = doctype, acc, fun), do: fun.(doctype, acc) def traverse_and_update([head | tail], acc, fun) do case traverse_and_update(head, acc, fun) do