From 02572e8e850dac9c92b548c0cc8b9b65692a0b19 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 30 Nov 2024 17:10:19 -0500 Subject: [PATCH 1/2] fix: escape foreign style tag content when serializing HTML5 Normally, a `style` tag is considered to be a raw text element, meaning `<` is parsed as part of a possible "tag start" token, and is serialized literally (and not rendered as an escaped character reference `<`). However, when appearing in either SVG or MathML foreign content, a `style` tag should *not* be considered a raw text element, and should be escaped when serialized. libgumbo is parsing this case correctly, but our HTML5 serialization code does not escape the content. This commit updates the static `is_one_of()` C function to consider the namespace of the parent node as well as the tag's local name when deciding whether the tag matches the list of HTML elements, so that a `style` tag in foreign content will *not* match, but a `style` tag in HTML content will match. (cherry picked from commit 44e3a74aff2c93873c82d55db8f08912f4e69d59) --- ext/nokogiri/xml_node.c | 8 +++++++- test/html5/test_serialize.rb | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index a99a6463aae..55fbc084fe4 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -1849,13 +1849,19 @@ is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames) if (name == NULL) { // fragments don't have a name return false; } + + if (node->ns != NULL) { + // if the node has a namespace, it's in a foreign context and is not one of the HTML tags we're + // matching against. + return false; + } + for (size_t idx = 0; idx < num_tagnames; ++idx) { if (!strcmp(name, tagnames[idx])) { return true; } } return false; - } static void diff --git a/test/html5/test_serialize.rb b/test/html5/test_serialize.rb index a250ec3701a..1d9380d3e5f 100644 --- a/test/html5/test_serialize.rb +++ b/test/html5/test_serialize.rb @@ -553,4 +553,20 @@ def test_serializing_html5_fragment refute(fragment.send(:prepend_newline?)) assert_equal("
hello
goodbye", fragment.to_html) end + + describe "foreign content style tag serialization is escaped" do + it "with svg parent" do + input = %{