From 4600a1c7c4d653e41e3be1b4ecabe22cdcc15e59 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sun, 8 May 2022 11:23:37 -0400 Subject: [PATCH 1/2] test: be explicit about HTML4 and not just HTML This is long-term prep for a day when HTML5 may become the default on supported platforms. --- test/html4/sax/test_parser.rb | 8 +- test/html4/sax/test_parser_text.rb | 2 +- test/html4/sax/test_push_parser.rb | 2 +- test/html4/test_attributes.rb | 2 +- .../html4/test_attributes_properly_escaped.rb | 6 +- test/html4/test_builder.rb | 28 +-- test/html4/test_comments.rb | 10 +- test/html4/test_document.rb | 220 ++++++++---------- test/html4/test_document_encoding.rb | 30 +-- test/html4/test_document_fragment.rb | 104 ++++----- test/html4/test_element_description.rb | 2 +- test/html4/test_node.rb | 16 +- test/html4/test_node_encoding.rb | 10 +- test/html5/test_monkey_patch.rb | 4 +- .../test_html_module.rb => test_html.rb} | 12 +- test/test_memory_leak.rb | 8 +- test/test_nokogiri.rb | 12 +- test/xml/test_node.rb | 12 +- test/xml/test_node_reparenting.rb | 2 +- test/xml/test_xpath.rb | 6 +- 20 files changed, 238 insertions(+), 258 deletions(-) rename test/{html4/test_html_module.rb => test_html.rb} (50%) diff --git a/test/html4/sax/test_parser.rb b/test/html4/sax/test_parser.rb index 4dc6ac61fd0..2d1e6451ef3 100644 --- a/test/html4/sax/test_parser.rb +++ b/test/html4/sax/test_parser.rb @@ -9,7 +9,7 @@ module SAX class TestParser < Nokogiri::SAX::TestCase def setup super - @parser = HTML::SAX::Parser.new(Doc.new) + @parser = Nokogiri::HTML4::SAX::Parser.new(Doc.new) end def test_parse_empty_document @@ -163,9 +163,9 @@ def test_empty_processing_instruction end it "handles invalid types gracefully" do - assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse(0xcafecafe) } - assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_memory(0xcafecafe) } - assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_io(0xcafecafe) } + assert_raises(TypeError) { Nokogiri::HTML4::SAX::Parser.new.parse(0xcafecafe) } + assert_raises(TypeError) { Nokogiri::HTML4::SAX::Parser.new.parse_memory(0xcafecafe) } + assert_raises(TypeError) { Nokogiri::HTML4::SAX::Parser.new.parse_io(0xcafecafe) } end end end diff --git a/test/html4/sax/test_parser_text.rb b/test/html4/sax/test_parser_text.rb index 0564000b824..2866e893f0d 100644 --- a/test/html4/sax/test_parser_text.rb +++ b/test/html4/sax/test_parser_text.rb @@ -10,7 +10,7 @@ class TestParserText < Nokogiri::SAX::TestCase def setup super @doc = DocWithOrderedItems.new - @parser = HTML::SAX::Parser.new(@doc) + @parser = Nokogiri::HTML4::SAX::Parser.new(@doc) end def test_texts_order diff --git a/test/html4/sax/test_push_parser.rb b/test/html4/sax/test_push_parser.rb index 53e62b31ce4..f11018f613e 100644 --- a/test/html4/sax/test_push_parser.rb +++ b/test/html4/sax/test_push_parser.rb @@ -9,7 +9,7 @@ module SAX class TestPushParser < Nokogiri::SAX::TestCase def setup super - @parser = HTML::SAX::PushParser.new(Doc.new) + @parser = Nokogiri::HTML4::SAX::PushParser.new(Doc.new) end def test_end_document_called diff --git a/test/html4/test_attributes.rb b/test/html4/test_attributes.rb index d79aaf246d2..0c531db87de 100644 --- a/test/html4/test_attributes.rb +++ b/test/html4/test_attributes.rb @@ -55,7 +55,7 @@ class TestAttr < Nokogiri::TestCase html = %{<#{config[:tag]} #{config[:attr]}='example.com'>test} - reparsed = HTML.fragment(HTML.fragment(html).to_html) + reparsed = Nokogiri::HTML4.fragment(Nokogiri::HTML4.fragment(html).to_html) attributes = reparsed.at_css(config[:tag]).attribute_nodes assert_equal [config[:attr]], attributes.collect(&:name) diff --git a/test/html4/test_attributes_properly_escaped.rb b/test/html4/test_attributes_properly_escaped.rb index 43fdc20f25f..16d0cb237bb 100755 --- a/test/html4/test_attributes_properly_escaped.rb +++ b/test/html4/test_attributes_properly_escaped.rb @@ -9,7 +9,7 @@ def test_attribute_macros_are_escaped skip_unless_libxml2_patch("0001-Remove-script-macro-support.patch") if Nokogiri.uses_libxml? html = "

}\">

" - document = Nokogiri::HTML::Document.new + document = Nokogiri::HTML4::Document.new nodes = document.parse(html) assert_equal("

", nodes[0].to_s) @@ -19,7 +19,7 @@ def test_libxml_escapes_server_side_includes skip_unless_libxml2_patch("0002-Update-entities-to-remove-handling-of-ssi.patch") if Nokogiri.uses_libxml? original_html = %(

) - document = Nokogiri::HTML::Document.new + document = Nokogiri::HTML4::Document.new html = document.parse(original_html).to_s assert_match(/!--%22><test>/, html) @@ -29,7 +29,7 @@ def test_libxml_escapes_server_side_includes_without_nested_quotes skip_unless_libxml2_patch("0002-Update-entities-to-remove-handling-of-ssi.patch") if Nokogiri.uses_libxml? original_html = %(

) - document = Nokogiri::HTML::Document.new + document = Nokogiri::HTML4::Document.new html = document.parse(original_html).to_s assert_match(/<!--<test>/, html) diff --git a/test/html4/test_builder.rb b/test/html4/test_builder.rb index 8ee20e61a6b..6188c1ebf79 100644 --- a/test/html4/test_builder.rb +++ b/test/html4/test_builder.rb @@ -8,11 +8,11 @@ class TestBuilder < Nokogiri::TestCase def test_top_level_function_builds foo = nil Nokogiri() { |xml| foo = xml } - assert_instance_of(Nokogiri::HTML::Builder, foo) + assert_instance_of(Nokogiri::HTML4::Builder, foo) end def test_builder_with_explicit_tags - html_doc = Nokogiri::HTML::Builder.new do + html_doc = Nokogiri::HTML4::Builder.new do div.slide(class: "another_class") do node = Nokogiri::XML::Node.new("id", doc) node.content = "hello" @@ -24,7 +24,7 @@ def test_builder_with_explicit_tags end def test_hash_as_attributes_for_attribute_method - html = Nokogiri::HTML::Builder.new do || + html = Nokogiri::HTML4::Builder.new do || div.slide(class: "another_class") do span("Slide 1") end @@ -33,7 +33,7 @@ def test_hash_as_attributes_for_attribute_method end def test_hash_as_attributes - builder = Nokogiri::HTML::Builder.new do + builder = Nokogiri::HTML4::Builder.new do div(id: "awesome") do h1("america") end @@ -54,7 +54,7 @@ def test_href_with_attributes end def test_tag_nesting - builder = Nokogiri::HTML::Builder.new do + builder = Nokogiri::HTML4::Builder.new do body do span.left("") span.middle do @@ -68,7 +68,7 @@ def test_tag_nesting end def test_has_ampersand - builder = Nokogiri::HTML::Builder.new do + builder = Nokogiri::HTML4::Builder.new do div.rad.thing! do text("") b("hello & world") @@ -81,7 +81,7 @@ def test_has_ampersand end def test_multi_tags - builder = Nokogiri::HTML::Builder.new do + builder = Nokogiri::HTML4::Builder.new do div.rad.thing! do text("") b("hello") @@ -94,7 +94,7 @@ def test_multi_tags end def test_attributes_plus_block - builder = Nokogiri::HTML::Builder.new do + builder = Nokogiri::HTML4::Builder.new do div.rad.thing! do text("") end @@ -104,7 +104,7 @@ def test_attributes_plus_block end def test_builder_adds_attributes - builder = Nokogiri::HTML::Builder.new do + builder = Nokogiri::HTML4::Builder.new do div.rad.thing!("tender div") end assert_equal('
tender div
', @@ -112,14 +112,14 @@ def test_builder_adds_attributes end def test_bold_tag - builder = Nokogiri::HTML::Builder.new do + builder = Nokogiri::HTML4::Builder.new do b("bold tag") end assert_equal("bold tag", builder.doc.root.to_html.chomp) end def test_html_then_body_tag - builder = Nokogiri::HTML::Builder.new do + builder = Nokogiri::HTML4::Builder.new do html do body do b("bold tag") @@ -137,12 +137,12 @@ def foo end end - builder = Nokogiri::HTML::Builder.new { text(foo) } + builder = Nokogiri::HTML4::Builder.new { text(foo) } assert_includes(builder.to_html, "foo!") end def test_builder_with_param - doc = Nokogiri::HTML::Builder.new do |html| + doc = Nokogiri::HTML4::Builder.new do |html| html.body do html.p("hello world") end @@ -154,7 +154,7 @@ def test_builder_with_param def test_builder_with_id text = "hello world" - doc = Nokogiri::HTML::Builder.new do |html| + doc = Nokogiri::HTML4::Builder.new do |html| html.body do html.id_(text) end diff --git a/test/html4/test_comments.rb b/test/html4/test_comments.rb index 96af26a8662..b29c36ce689 100644 --- a/test/html4/test_comments.rb +++ b/test/html4/test_comments.rb @@ -15,7 +15,7 @@ class TestComment < Nokogiri::TestCase # or ). The parser behaves as if the comment is # closed correctly. describe "abrupt closing of empty comment" do - let(:doc) { Nokogiri::HTML(html) } + let(:doc) { Nokogiri::HTML4(html) } let(:subject) { doc.at_css("div#under-test") } let(:other_div) { doc.at_css("div#also-here") } @@ -101,7 +101,7 @@ class TestComment < Nokogiri::TestCase # stream. describe "eof in comment" do let(:html) { "
" } - let(:doc) { Nokogiri::HTML(html) } + let(:doc) { Nokogiri::HTML4(html) } let(:subject) { doc.at_css("div#under-test") } let(:inner_div) { doc.at_css("div#do-i-exist") } @@ -169,7 +169,7 @@ class TestComment < Nokogiri::TestCase describe "incorrectly opened comment" do let(:html) { "
inner content
-->hello" } - let(:doc) { Nokogiri::HTML(html) } + let(:doc) { Nokogiri::HTML4(html) } let(:body) { doc.at_css("body") } let(:subject) { doc.at_css("div#under-test") } @@ -225,7 +225,7 @@ class TestComment < Nokogiri::TestCase # everything that follows will be treated as markup. describe "nested comment" do let(:html) { "
-->
" } - let(:doc) { Nokogiri::HTML(html) } + let(:doc) { Nokogiri::HTML4(html) } let(:subject) { doc.at_css("div#under-test") } let(:inner_div) { doc.at_css("div#do-i-exist") } diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb index ce7d56f1514..b1c39a7fcee 100644 --- a/test/html4/test_document.rb +++ b/test/html4/test_document.rb @@ -3,10 +3,10 @@ require "helper" module Nokogiri - module HTML + module HTML4 class TestDocument < Nokogiri::TestCase - describe Nokogiri::HTML::Document do - let(:html) { Nokogiri::HTML.parse(File.read(HTML_FILE)) } + describe Nokogiri::HTML4::Document do + let(:html) { Nokogiri::HTML4.parse(File.read(HTML_FILE)) } def test_nil_css # Behavior is undefined but shouldn't break @@ -15,7 +15,7 @@ def test_nil_css end def test_does_not_fail_with_illformatted_html - doc = Nokogiri::HTML((+'"";').force_encoding(Encoding::BINARY)) + doc = Nokogiri::HTML4((+'"";').force_encoding(Encoding::BINARY)) refute_nil(doc) end @@ -34,7 +34,7 @@ def test_fragment def test_document_takes_config_block options = nil - Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg| + Nokogiri::HTML4(File.read(HTML_FILE), HTML_FILE) do |cfg| options = cfg options.nonet.nowarning.dtdattr end @@ -45,7 +45,7 @@ def test_document_takes_config_block def test_parse_takes_config_block options = nil - Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg| + Nokogiri::HTML4.parse(File.read(HTML_FILE), HTML_FILE) do |cfg| options = cfg options.nonet.nowarning.dtdattr end @@ -54,71 +54,39 @@ def test_parse_takes_config_block assert_predicate(options, :dtdattr?) end - def test_subclass - klass = Class.new(Nokogiri::HTML::Document) - doc = klass.new - assert_instance_of(klass, doc) - end - - def test_subclass_initialize - klass = Class.new(Nokogiri::HTML::Document) do - attr_accessor :initialized_with - - def initialize(*args) - super - @initialized_with = args - end - end - doc = klass.new("uri", "external_id", 1) - assert_equal(["uri", "external_id", 1], doc.initialized_with) - end - - def test_subclass_dup - klass = Class.new(Nokogiri::HTML::Document) - doc = klass.new.dup - assert_instance_of(klass, doc) - end - - def test_subclass_parse - klass = Class.new(Nokogiri::HTML::Document) - doc = klass.parse(File.read(HTML_FILE)) - assert_equal(html.to_s, doc.to_s) - assert_instance_of(klass, doc) - end - def test_document_parse_method - html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE)) + html = Nokogiri::HTML4::Document.parse(File.read(HTML_FILE)) assert_equal(html.to_s, html.to_s) end def test_document_parse_method_with_url - doc = Nokogiri::HTML("", "http://foobar.example.com/", "UTF-8") + doc = Nokogiri::HTML4("", "http://foobar.example.com/", "UTF-8") refute_empty(doc.to_s, "Document should not be empty") assert_equal("http://foobar.example.com/", doc.url) end ### - # Nokogiri::HTML returns an empty Document when given a blank string GH#11 + # Nokogiri::HTML4 returns an empty Document when given a blank string GH#11 def test_empty_string_returns_empty_doc - doc = Nokogiri::HTML("") - assert_instance_of(Nokogiri::HTML::Document, doc) + doc = Nokogiri::HTML4("") + assert_instance_of(Nokogiri::HTML4::Document, doc) assert_nil(doc.root) end def test_to_xhtml_with_indent skip if Nokogiri.uses_libxml?("~> 2.6.0") - doc = Nokogiri::HTML("foo") - doc = Nokogiri::HTML(doc.to_xhtml(indent: 2)) + doc = Nokogiri::HTML4("foo") + doc = Nokogiri::HTML4(doc.to_xhtml(indent: 2)) assert_indent(2, doc) end def test_write_to_xhtml_with_indent skip if Nokogiri.uses_libxml?("~> 2.6.0") io = StringIO.new - doc = Nokogiri::HTML("foo") + doc = Nokogiri::HTML4("foo") doc.write_xhtml_to(io, indent: 5) io.rewind - doc = Nokogiri::HTML(io.read) + doc = Nokogiri::HTML4(io.read) assert_indent(5, doc) end @@ -139,7 +107,7 @@ def test_meta_encoding end def test_meta_encoding_is_strict_about_http_equiv - doc = Nokogiri::HTML(<<~EOHTML) + doc = Nokogiri::HTML4(<<~HTML) @@ -148,12 +116,12 @@ def test_meta_encoding_is_strict_about_http_equiv foo - EOHTML + HTML assert_nil(doc.meta_encoding) end def test_meta_encoding_handles_malformed_content_charset - doc = Nokogiri::HTML(<<~EOHTML) + doc = Nokogiri::HTML4(<<~HTML) @@ -162,12 +130,12 @@ def test_meta_encoding_handles_malformed_content_charset foo - EOHTML + HTML assert_nil(doc.meta_encoding) end def test_meta_encoding_checks_charset - doc = Nokogiri::HTML(<<~EOHTML) + doc = Nokogiri::HTML4(<<~HTML) @@ -176,7 +144,7 @@ def test_meta_encoding_checks_charset foo - EOHTML + HTML assert_equal("UTF-8", doc.meta_encoding) end @@ -187,12 +155,12 @@ def test_meta_encoding= def test_title assert_equal("Tender Lovemaking ", html.title) - doc = Nokogiri::HTML("foo") + doc = Nokogiri::HTML4("foo") assert_nil(doc.title) end def test_title= - doc = Nokogiri::HTML(<<~EOHTML) + doc = Nokogiri::HTML4(<<~HTML) old @@ -201,12 +169,12 @@ def test_title= foo - EOHTML + HTML doc.title = "new" assert_equal(1, doc.css("title").size) assert_equal("new", doc.title) - doc = Nokogiri::HTML(<<~EOHTML) + doc = Nokogiri::HTML4(<<~HTML) @@ -215,7 +183,7 @@ def test_title= foo - EOHTML + HTML doc.title = "new" assert_equal("new", doc.title) title = doc.at("/html/head/title") @@ -223,13 +191,13 @@ def test_title= assert_equal("new", title.text) assert_equal(-1, doc.at("meta[@http-equiv]") <=> title) - doc = Nokogiri::HTML(<<~EOHTML) + doc = Nokogiri::HTML4(<<~HTML) foo - EOHTML + HTML doc.title = "new" assert_equal("new", doc.title) # may or may not be added @@ -238,26 +206,26 @@ def test_title= assert_equal("new", title.text) assert_equal(-1, title <=> doc.at("body")) - doc = Nokogiri::HTML(<<~EOHTML) + doc = Nokogiri::HTML4(<<~HTML) foo - EOHTML + HTML doc.title = "new" assert_equal("new", doc.title) assert_equal(-1, doc.at("meta[@charset]") <=> doc.at("title")) assert_equal(-1, doc.at("title") <=> doc.at("body")) - doc = Nokogiri::HTML("

hello") + doc = Nokogiri::HTML4("

hello") doc.title = "new" assert_equal("new", doc.title) assert_instance_of(Nokogiri::XML::DTD, doc.children.first) assert_equal(-1, doc.at("title") <=> doc.at("p")) - doc = Nokogiri::HTML("") + doc = Nokogiri::HTML4("") doc.title = "new" assert_equal("new", doc.title) assert_equal("new", doc.at("/html/head/title/text()").to_s) @@ -265,7 +233,7 @@ def test_title= def test_meta_encoding_without_head encoding = "EUC-JP" - html = Nokogiri::HTML("foo", nil, encoding) + html = Nokogiri::HTML4("foo", nil, encoding) assert_nil(html.meta_encoding) @@ -280,7 +248,7 @@ def test_meta_encoding_without_head def test_html5_meta_encoding_without_head encoding = "EUC-JP" - html = Nokogiri::HTML("foo", nil, encoding) + html = Nokogiri::HTML4("foo", nil, encoding) assert_nil(html.meta_encoding) @@ -294,7 +262,7 @@ def test_html5_meta_encoding_without_head end def test_meta_encoding_with_empty_content_type - html = Nokogiri::HTML(<<~EOHTML) + html = Nokogiri::HTML4(<<~HTML) @@ -303,10 +271,10 @@ def test_meta_encoding_with_empty_content_type foo - EOHTML + HTML assert_nil(html.meta_encoding) - html = Nokogiri::HTML(<<~EOHTML) + html = Nokogiri::HTML4(<<~HTML) @@ -315,30 +283,30 @@ def test_meta_encoding_with_empty_content_type foo - EOHTML + HTML assert_nil(html.meta_encoding) end def test_root_node_parent_is_document parent = html.root.parent assert_equal(html, parent) - assert_instance_of(Nokogiri::HTML::Document, parent) + assert_instance_of(Nokogiri::HTML4::Document, parent) end def test_parse_handles_nil_gracefully - @doc = Nokogiri::HTML::Document.parse(nil) - assert_instance_of(Nokogiri::HTML::Document, @doc) + @doc = Nokogiri::HTML4::Document.parse(nil) + assert_instance_of(Nokogiri::HTML4::Document, @doc) end def test_parse_empty_document - doc = Nokogiri::HTML("\n") + doc = Nokogiri::HTML4("\n") assert_equal(0, doc.css("a").length) assert_equal(0, doc.xpath("//a").length) assert_equal(0, doc.search("//a").length) end - def test_HTML_function - html = Nokogiri::HTML(File.read(HTML_FILE)) + def test_html_predicate + html = Nokogiri::HTML4(File.read(HTML_FILE)) assert_predicate(html, :html?) end @@ -354,7 +322,7 @@ def read(*args) end end - doc = Nokogiri::HTML.parse(klass.new) + doc = Nokogiri::HTML4.parse(klass.new) assert_equal("foo", doc.at_css("div").content) end @@ -364,8 +332,8 @@ def test_parse_temp_file temp_html_file.close temp_html_file.open assert_equal( - Nokogiri::HTML.parse(File.read(HTML_FILE)).xpath("//div/a").length, - Nokogiri::HTML.parse(temp_html_file).xpath("//div/a").length + Nokogiri::HTML4.parse(File.read(HTML_FILE)).xpath("//div/a").length, + Nokogiri::HTML4.parse(temp_html_file).xpath("//div/a").length ) end @@ -378,23 +346,23 @@ def test_to_xhtml def test_to_xhtml_self_closing_tags # https://github.com/sparklemotion/nokogiri/issues/2324 html = "
" - doc = Nokogiri::HTML::Document.parse(html) + doc = Nokogiri::HTML4::Document.parse(html) xhtml = doc.to_xhtml assert_match(%r(
), xhtml) assert_match(%r(), xhtml) end def test_no_xml_header - html = Nokogiri::HTML(<<~EOHTML) + html = Nokogiri::HTML4(<<~HTML) - EOHTML + HTML refute_empty(html.to_html, "html length is too short") refute_match(/^<\?xml/, html.to_html) end def test_document_has_error - html = Nokogiri::HTML(<<~EOHTML) + html = Nokogiri::HTML4(<<~HTML)
@@ -433,14 +401,14 @@ def test_multi_css
- EOHTML + HTML set = html.css("p, a") assert_equal(2, set.length) assert_equal(["a tag", "p tag"].sort, set.map(&:content).sort) end def test_inner_text - html = Nokogiri::HTML(<<~EOHTML) + html = Nokogiri::HTML4(<<~HTML)
@@ -450,20 +418,20 @@ def test_inner_text
- EOHTML + HTML node = html.xpath("//div").first assert_equal("Hello world!", node.inner_text.strip) end def test_doc_type - html = Nokogiri::HTML(<<~EOHTML) + html = Nokogiri::HTML4(<<~HTML)

Rainbow Dash

- EOHTML + HTML assert_equal("html", html.internal_subset.name) assert_equal("-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id) assert_equal("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id) @@ -474,7 +442,7 @@ def test_doc_type end def test_content_size - html = Nokogiri::HTML("
\n
") + html = Nokogiri::HTML4("
\n
") assert_equal(1, html.content.size) assert_equal(1, html.content.split("").size) assert_equal("\n", html.content) @@ -536,7 +504,7 @@ def test_dup_document assert(dup = html.dup) refute_equal(dup, html) assert_predicate(html, :html?) - assert_instance_of(Nokogiri::HTML::Document, dup) + assert_instance_of(Nokogiri::HTML4::Document, dup) assert_predicate(dup, :html?, "duplicate should be html") assert_equal(html.to_s, dup.to_s) end @@ -557,7 +525,7 @@ def test_dup # issue 1060 def test_node_ownership_after_dup html = "
replace me
" - doc = Nokogiri::HTML::Document.parse(html) + doc = Nokogiri::HTML4::Document.parse(html) dup = doc.dup assert_same(dup, dup.at_css("div").document) @@ -566,7 +534,7 @@ def test_node_ownership_after_dup end def test_inner_html - html = Nokogiri::HTML(<<~EOHTML) + html = Nokogiri::HTML4(<<~HTML)
@@ -576,28 +544,28 @@ def test_inner_html
- EOHTML + HTML node = html.xpath("//div").first assert_equal("

Helloworld!

", node.inner_html.gsub(/\s/, "")) end def test_round_trip - doc = Nokogiri::HTML(html.inner_html) + doc = Nokogiri::HTML4(html.inner_html) assert_equal(html.root.to_html, doc.root.to_html) end def test_fragment_contains_text_node - fragment = Nokogiri::HTML.fragment("fooo") + fragment = Nokogiri::HTML4.fragment("fooo") assert_equal(1, fragment.children.length) assert_equal("fooo", fragment.inner_text) end def test_fragment_includes_two_tags - assert_equal(2, Nokogiri::HTML.fragment("

").children.length) + assert_equal(2, Nokogiri::HTML4.fragment("

").children.length) end def test_relative_css_finder - doc = Nokogiri::HTML(<<~EOHTML) + doc = Nokogiri::HTML4(<<~HTML)
@@ -612,7 +580,7 @@ def test_relative_css_finder
- EOHTML + HTML red_divs = doc.css("div.red") assert_equal(1, red_divs.length) p_tags = red_divs.first.css("p") @@ -621,7 +589,7 @@ def test_relative_css_finder end def test_find_classes - doc = Nokogiri::HTML(<<~EOHTML) + doc = Nokogiri::HTML4(<<~HTML)

RED

@@ -630,7 +598,7 @@ def test_find_classes

GREEN

- EOHTML + HTML list = doc.css(".red") assert_equal(2, list.length) assert_equal(["RED", "RED"], list.map(&:text)) @@ -639,7 +607,7 @@ def test_find_classes def test_parse_can_take_io html = nil File.open(HTML_FILE, "rb") do |f| - html = Nokogiri::HTML(f) + html = Nokogiri::HTML4(f) end assert_predicate(html, :html?) assert_equal(HTML_FILE, html.url) @@ -651,7 +619,7 @@ def html.path "/i/should/be/the/document/url" end - doc = Nokogiri::HTML.parse(html) + doc = Nokogiri::HTML4.parse(html) assert_equal("/i/should/be/the/document/url", doc.url) end @@ -660,7 +628,7 @@ def html.path def test_parse_can_take_pathnames assert(File.size(HTML_FILE) > 4096) # file must be big enough to trip the read callback more than once - doc = Nokogiri::HTML.parse(Pathname.new(HTML_FILE)) + doc = Nokogiri::HTML4.parse(Pathname.new(HTML_FILE)) # an arbitrary assertion on the structure of the document assert_equal(166, doc.css("a").length) @@ -679,13 +647,13 @@ def test_serialize def test_empty_document # empty document should return "" #699 - assert_equal("", Nokogiri::HTML.parse(nil).text) - assert_equal("", Nokogiri::HTML.parse("").text) + assert_equal("", Nokogiri::HTML4.parse(nil).text) + assert_equal("", Nokogiri::HTML4.parse("").text) end def test_capturing_nonparse_errors_during_document_clone # see https://github.com/sparklemotion/nokogiri/issues/1196 for background - original = Nokogiri::HTML.parse("
") + original = Nokogiri::HTML4.parse("
") original_errors = original.errors.dup copy = original.dup @@ -694,8 +662,8 @@ def test_capturing_nonparse_errors_during_document_clone def test_capturing_nonparse_errors_during_node_copy_between_docs # Errors should be emitted while parsing only, and should not change when moving nodes. - doc1 = Nokogiri::HTML("one") - doc2 = Nokogiri::HTML("two") + doc1 = Nokogiri::HTML4("one") + doc2 = Nokogiri::HTML4("two") node1 = doc1.at_css("#unique") node2 = doc2.at_css("#unique") original_errors1 = doc1.errors.dup @@ -721,7 +689,7 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262 # having `ID unique-issue-1262 already defined` emitted to # stderr when running the test suite. # - doc = Nokogiri::HTML::Document.new + doc = Nokogiri::HTML4::Document.new Nokogiri::XML::Element.new("div", doc).set_attribute("id", "unique-issue-1262") Nokogiri::XML::Element.new("div", doc).set_attribute("id", "unique-issue-1262") assert_equal(0, doc.errors.length) @@ -734,41 +702,41 @@ def test_leaking_dtd_nodes_after_internal_subset_removal # don't otherwise have any test coverage for removing DTDs. # 100.times do |_i| - Nokogiri::HTML::Document.new.internal_subset.remove + Nokogiri::HTML4::Document.new.internal_subset.remove end end it "skips encoding for script tags" do - html = Nokogiri::HTML(<<~EOHTML) + html = Nokogiri::HTML4(<<~HTML) - EOHTML + HTML node = html.xpath("//script").first assert_equal("var isGreater = 4 > 5;", node.inner_html) end it "skips encoding for style tags" do - html = Nokogiri::HTML(<<~EOHTML) + html = Nokogiri::HTML4(<<~HTML) - EOHTML + HTML node = html.xpath("//style").first assert_equal("tr > div { display:block; }", node.inner_html) end it "does not fail when converting to_html using explicit encoding" do - html_fragment = <<~EOHTML + html_fragment = <<~HTML Inactive hide details for "User" ---19/05/2015 12:55:29---Provvediamo subito nell’integrare - EOHTML - doc = Nokogiri::HTML(html_fragment, nil, "ISO-8859-1") + HTML + doc = Nokogiri::HTML4(html_fragment, nil, "ISO-8859-1") html = doc.to_html assert html.index("src=\"images/icon.gif\"") assert_equal "ISO-8859-1", html.encoding.name @@ -831,7 +799,7 @@ def test_leaking_dtd_nodes_after_internal_subset_removal it "raises exception on parse error" do exception = assert_raises(Nokogiri::SyntaxError) do - Nokogiri::HTML.parse(input, nil, nil, parse_options) + Nokogiri::HTML4.parse(input, nil, nil, parse_options) end assert_match(/Parser without recover option encountered error or warning/, exception.to_s) end @@ -839,7 +807,7 @@ def test_leaking_dtd_nodes_after_internal_subset_removal describe "default options" do it "does not raise exception on parse error" do - doc = Nokogiri::HTML.parse(input) + doc = Nokogiri::HTML4.parse(input) assert_operator(doc.errors.length, :>, 0) end end @@ -853,7 +821,7 @@ def test_leaking_dtd_nodes_after_internal_subset_removal it "raises exception on parse error" do exception = assert_raises(Nokogiri::SyntaxError) do - Nokogiri::HTML.parse(input, nil, "UTF-8", parse_options) + Nokogiri::HTML4.parse(input, nil, "UTF-8", parse_options) end assert_match(/Parser without recover option encountered error or warning/, exception.to_s) end @@ -861,7 +829,7 @@ def test_leaking_dtd_nodes_after_internal_subset_removal describe "default options" do it "does not raise exception on parse error" do - doc = Nokogiri::HTML.parse(input, nil, "UTF-8") + doc = Nokogiri::HTML4.parse(input, nil, "UTF-8") assert_operator(doc.errors.length, :>, 0) end end @@ -870,7 +838,7 @@ def test_leaking_dtd_nodes_after_internal_subset_removal describe "subclassing" do let(:klass) do - Class.new(Nokogiri::HTML::Document) do + Class.new(Nokogiri::HTML4::Document) do attr_accessor :initialized_with, :initialized_count def initialize(*args) diff --git a/test/html4/test_document_encoding.rb b/test/html4/test_document_encoding.rb index 6115301764b..e96eb59dc63 100644 --- a/test/html4/test_document_encoding.rb +++ b/test/html4/test_document_encoding.rb @@ -4,10 +4,10 @@ require "helper" class TestNokogiriHtmlDocument < Nokogiri::TestCase - describe "Nokogiri::HTML::Document" do + describe "Nokogiri::HTML4::Document" do describe "Encoding" do def test_encoding - doc = Nokogiri::HTML(File.open(SHIFT_JIS_HTML, "rb")) + doc = Nokogiri::HTML4(File.open(SHIFT_JIS_HTML, "rb")) hello = "こんにちは" @@ -21,7 +21,7 @@ def test_encoding end def test_encoding_without_charset - doc = Nokogiri::HTML(File.open(SHIFT_JIS_NO_CHARSET, "r:Shift_JIS:Shift_JIS").read) + doc = Nokogiri::HTML4(File.open(SHIFT_JIS_NO_CHARSET, "r:Shift_JIS:Shift_JIS").read) hello = "こんにちは" @@ -42,7 +42,7 @@ def test_default_to_encoding_from_string eohtml - doc = Nokogiri::HTML(bad_charset) + doc = Nokogiri::HTML4(bad_charset) assert_equal(bad_charset.encoding.name, doc.encoding) doc = Nokogiri.parse(bad_charset) @@ -58,7 +58,7 @@ def test_encoding_non_utf8 #{orig} eohtml - text = Nokogiri::HTML.parse(html).at("title").inner_text + text = Nokogiri::HTML4.parse(html).at("title").inner_text assert_equal( orig.encode(enc).force_encoding(bin), text.encode(enc).force_encoding(bin) @@ -78,14 +78,14 @@ def test_encoding_with_a_bad_name eohtml - doc = Nokogiri::HTML(bad_charset, nil, "askldjfhalsdfjhlkasdfjh") + doc = Nokogiri::HTML4(bad_charset, nil, "askldjfhalsdfjhlkasdfjh") assert_equal(["http://tenderlovemaking.com/"], doc.css("a").map { |a| a["href"] }) end def test_empty_doc_encoding encoding = "US-ASCII" - assert_equal(encoding, Nokogiri::HTML.parse(nil, nil, encoding).encoding) + assert_equal(encoding, Nokogiri::HTML4.parse(nil, nil, encoding).encoding) end describe "Detection" do @@ -98,31 +98,31 @@ def binopen(file) end it "handles both memory and IO" do - from_stream = Nokogiri::HTML(binopen(NOENCODING_FILE)) - from_string = Nokogiri::HTML(binread(NOENCODING_FILE)) + from_stream = Nokogiri::HTML4(binopen(NOENCODING_FILE)) + from_string = Nokogiri::HTML4(binread(NOENCODING_FILE)) assert_equal(from_string.to_s.size, from_stream.to_s.size) assert_operator(from_string.to_s.size, :>, 0) end it "uses meta charset encoding when present" do - html = Nokogiri::HTML(binopen(METACHARSET_FILE)) + html = Nokogiri::HTML4(binopen(METACHARSET_FILE)) assert_equal("iso-2022-jp", html.encoding) assert_equal("たこ焼き仮面", html.title) end { "xhtml" => ENCODING_XHTML_FILE, "html" => ENCODING_HTML_FILE }.each do |flavor, file| it "detects #{flavor} document encoding" do - doc_from_string_enc = Nokogiri::HTML(binread(file), nil, "Shift_JIS") + doc_from_string_enc = Nokogiri::HTML4(binread(file), nil, "Shift_JIS") ary_from_string_enc = doc_from_string_enc.xpath("//p/text()").map(&:text) - doc_from_string = Nokogiri::HTML(binread(file)) + doc_from_string = Nokogiri::HTML4(binread(file)) ary_from_string = doc_from_string.xpath("//p/text()").map(&:text) - doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, "Shift_JIS") + doc_from_file_enc = Nokogiri::HTML4(binopen(file), nil, "Shift_JIS") ary_from_file_enc = doc_from_file_enc.xpath("//p/text()").map(&:text) - doc_from_file = Nokogiri::HTML(binopen(file)) + doc_from_file = Nokogiri::HTML4(binopen(file)) ary_from_file = doc_from_file.xpath("//p/text()").map(&:text) title = "たこ焼き仮面" @@ -150,7 +150,7 @@ def binopen(file) { "read_memory" => RAW, "read_io" => StringIO.new(RAW) }.each do |flavor, input| it "#{flavor} should handle errors" do - doc = Nokogiri::HTML.parse(input) + doc = Nokogiri::HTML4.parse(input) assert_operator(doc.errors.length, :>, 0) end end diff --git a/test/html4/test_document_fragment.rb b/test/html4/test_document_fragment.rb index ea3c1ca492a..6dffc9ffc38 100644 --- a/test/html4/test_document_fragment.rb +++ b/test/html4/test_document_fragment.rb @@ -6,48 +6,48 @@ module Nokogiri module HTML class TestDocumentFragment < Nokogiri::TestCase - describe Nokogiri::HTML::DocumentFragment do - let(:html) { Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) } + describe Nokogiri::HTML4::DocumentFragment do + let(:html) { Nokogiri::HTML4.parse(File.read(HTML_FILE), HTML_FILE) } def test_ascii_8bit_encoding s = +"hello" s.force_encoding(::Encoding::ASCII_8BIT) - assert_equal("hello", Nokogiri::HTML::DocumentFragment.parse(s).to_html) + assert_equal("hello", Nokogiri::HTML4::DocumentFragment.parse(s).to_html) end def test_inspect_encoding fragment = "
こんにちは!
".encode("EUC-JP") - f = Nokogiri::HTML::DocumentFragment.parse(fragment) + f = Nokogiri::HTML4::DocumentFragment.parse(fragment) assert_equal("こんにちは!", f.content) end def test_html_parse_encoding fragment = "
こんにちは!
".encode("EUC-JP") - f = Nokogiri::HTML.fragment(fragment) + f = Nokogiri::HTML4.fragment(fragment) assert_equal("EUC-JP", f.document.encoding) assert_equal("こんにちは!", f.content) end def test_unlink_empty_document - frag = Nokogiri::HTML::DocumentFragment.parse("").unlink # must_not_raise + frag = Nokogiri::HTML4::DocumentFragment.parse("").unlink # must_not_raise assert_nil(frag.parent) end def test_colons_are_not_removed - doc = Nokogiri::HTML::DocumentFragment.parse("3:30pm") + doc = Nokogiri::HTML4::DocumentFragment.parse("3:30pm") assert_match(/3:30/, doc.to_s) end def test_parse_encoding fragment = "
hello world
" - f = Nokogiri::HTML::DocumentFragment.parse(fragment, "ISO-8859-1") + f = Nokogiri::HTML4::DocumentFragment.parse(fragment, "ISO-8859-1") assert_equal("ISO-8859-1", f.document.encoding) assert_equal("hello world", f.content) end def test_html_parse_with_encoding fragment = "
hello world
" - f = Nokogiri::HTML.fragment(fragment, "ISO-8859-1") + f = Nokogiri::HTML4.fragment(fragment, "ISO-8859-1") assert_equal("ISO-8859-1", f.document.encoding) assert_equal("hello world", f.content) end @@ -57,7 +57,7 @@ def test_parse_in_context end def test_inner_html= - fragment = Nokogiri::HTML.fragment("
") + fragment = Nokogiri::HTML4.fragment("
") fragment.inner_html = "hello" assert_equal("hello", fragment.inner_html) @@ -71,70 +71,70 @@ def test_ancestors_search
EOF - fragment = Nokogiri::HTML.fragment(html) + fragment = Nokogiri::HTML4.fragment(html) li = fragment.at("li") assert(li.matches?("li")) end def test_fun_encoding string = %(こんにちは) - html = Nokogiri::HTML::DocumentFragment.parse( + html = Nokogiri::HTML4::DocumentFragment.parse( string ).to_html(encoding: "UTF-8") assert_equal(string, html) end def test_new - assert(Nokogiri::HTML::DocumentFragment.new(html)) + assert(Nokogiri::HTML4::DocumentFragment.new(html)) end def test_body_fragment_should_contain_body - fragment = Nokogiri::HTML::DocumentFragment.parse("
foo
") + fragment = Nokogiri::HTML4::DocumentFragment.parse("
foo
") assert_match(/^/, fragment.to_s) end def test_nonbody_fragment_should_not_contain_body - fragment = Nokogiri::HTML::DocumentFragment.parse("
foo
") + fragment = Nokogiri::HTML4::DocumentFragment.parse("
foo
") assert_match(/^
/, fragment.to_s) end def test_fragment_should_have_document - fragment = Nokogiri::HTML::DocumentFragment.new(html) + fragment = Nokogiri::HTML4::DocumentFragment.new(html) assert_equal(html, fragment.document) end def test_empty_fragment_should_be_searchable_by_css - fragment = Nokogiri::HTML.fragment("") + fragment = Nokogiri::HTML4.fragment("") assert_equal(0, fragment.css("a").size) end def test_empty_fragment_should_be_searchable - fragment = Nokogiri::HTML.fragment("") + fragment = Nokogiri::HTML4.fragment("") assert_equal(0, fragment.search("//a").size) end def test_name - fragment = Nokogiri::HTML::DocumentFragment.new(html) + fragment = Nokogiri::HTML4::DocumentFragment.new(html) assert_equal("#document-fragment", fragment.name) end def test_static_method - fragment = Nokogiri::HTML::DocumentFragment.parse("
a
") - assert_instance_of(Nokogiri::HTML::DocumentFragment, fragment) + fragment = Nokogiri::HTML4::DocumentFragment.parse("
a
") + assert_instance_of(Nokogiri::HTML4::DocumentFragment, fragment) end def test_many_fragments - 100.times { Nokogiri::HTML::DocumentFragment.new(html) } + 100.times { Nokogiri::HTML4::DocumentFragment.new(html) } end def test_html_fragment - fragment = Nokogiri::HTML.fragment("
a
") + fragment = Nokogiri::HTML4.fragment("
a
") assert_equal("
a
", fragment.to_s) end def test_html_fragment_has_outer_text doc = "a
b
c" - fragment = Nokogiri::HTML::Document.new.fragment(doc) + fragment = Nokogiri::HTML4::Document.new.fragment(doc) if Nokogiri.uses_libxml?("<= 2.6.16") assert_equal("a
b

c

", fragment.to_s) else @@ -144,59 +144,59 @@ def test_html_fragment_has_outer_text def test_html_fragment_case_insensitivity doc = "
b
" - fragment = Nokogiri::HTML::Document.new.fragment(doc) + fragment = Nokogiri::HTML4::Document.new.fragment(doc) assert_equal("
b
", fragment.to_s) end def test_html_fragment_with_leading_whitespace doc = "
b
" - fragment = Nokogiri::HTML::Document.new.fragment(doc) + fragment = Nokogiri::HTML4::Document.new.fragment(doc) assert_match(%r%
b
*%, fragment.to_s) end def test_html_fragment_with_leading_whitespace_and_newline doc = " \n
b
" - fragment = Nokogiri::HTML::Document.new.fragment(doc) + fragment = Nokogiri::HTML4::Document.new.fragment(doc) assert_match(%r% \n
b
*%, fragment.to_s) end def test_html_fragment_with_input_and_intermediate_whitespace doc = " span" - fragment = Nokogiri::HTML::Document.new.fragment(doc) + fragment = Nokogiri::HTML4::Document.new.fragment(doc) assert_equal(" span", fragment.to_s) end def test_html_fragment_with_leading_text_and_newline - fragment = HTML::Document.new.fragment("First line\nSecond line
Broken line") + fragment = Nokogiri::HTML4::Document.new.fragment("First line\nSecond line
Broken line") assert_equal("First line\nSecond line
Broken line", fragment.to_s) end def test_html_fragment_with_leading_whitespace_and_text_and_newline - fragment = HTML::Document.new.fragment(" First line\nSecond line
Broken line") + fragment = Nokogiri::HTML4::Document.new.fragment(" First line\nSecond line
Broken line") assert_equal(" First line\nSecond line
Broken line", fragment.to_s) end def test_html_fragment_with_leading_entity failed = ""test
test"" - fragment = Nokogiri::HTML::DocumentFragment.parse(failed) + fragment = Nokogiri::HTML4::DocumentFragment.parse(failed) assert_equal('"test
test"', fragment.to_html) end def test_to_s doc = "foo
bar" - fragment = Nokogiri::HTML::Document.new.fragment(doc) + fragment = Nokogiri::HTML4::Document.new.fragment(doc) assert_equal("foo
bar", fragment.to_s) end def test_to_html doc = "foo
bar" - fragment = Nokogiri::HTML::Document.new.fragment(doc) + fragment = Nokogiri::HTML4::Document.new.fragment(doc) assert_equal("foo
bar", fragment.to_html) end def test_to_xhtml doc = "foo
bar

" - fragment = Nokogiri::HTML::Document.new.fragment(doc) + fragment = Nokogiri::HTML4::Document.new.fragment(doc) if Nokogiri.jruby? || Nokogiri.uses_libxml?(">= 2.7.0") assert_equal("foo
bar

", fragment.to_xhtml) else @@ -208,19 +208,19 @@ def test_to_xhtml def test_to_xml doc = "foo
bar" - fragment = Nokogiri::HTML::Document.new.fragment(doc) + fragment = Nokogiri::HTML4::Document.new.fragment(doc) assert_equal("foo
bar", fragment.to_xml) end def test_fragment_script_tag_with_cdata - doc = HTML::Document.new + doc = Nokogiri::HTML4::Document.new fragment = doc.fragment("") assert_equal("", fragment.to_s) end def test_fragment_with_comment - doc = HTML::Document.new + doc = Nokogiri::HTML4::Document.new fragment = doc.fragment("

hello

") assert_equal("

hello

", fragment.to_s) @@ -230,41 +230,41 @@ def test_element_children_counts if Nokogiri.uses_libxml?("<= 2.9.1") skip("#elements doesn't work in 2.9.1, see 1793a5a for history") end - doc = Nokogiri::HTML::DocumentFragment.parse("
\n ") + doc = Nokogiri::HTML4::DocumentFragment.parse("
\n ") assert_equal(1, doc.element_children.count) end def test_malformed_fragment_is_corrected - fragment = HTML::DocumentFragment.parse("
") + fragment = Nokogiri::HTML4::DocumentFragment.parse("
") assert_equal("
", fragment.to_s) end def test_unclosed_script_tag # see GH#315 - fragment = HTML::DocumentFragment.parse("foo ", fragment.to_html) end def test_error_propagation_on_fragment_parse - frag = Nokogiri::HTML::DocumentFragment.parse("oh, hello there.") + frag = Nokogiri::HTML4::DocumentFragment.parse("oh, hello there.") assert(frag.errors.any? { |err| err.to_s.include?("Tag hello invalid") }, "errors should be copied to the fragment") end def test_error_propagation_on_fragment_parse_in_node_context - doc = Nokogiri::HTML::Document.parse("
") + doc = Nokogiri::HTML4::Document.parse("
") context_node = doc.at_css("div") - frag = Nokogiri::HTML::DocumentFragment.new(doc, "oh, hello there.", context_node) + frag = Nokogiri::HTML4::DocumentFragment.new(doc, "oh, hello there.", context_node) assert(frag.errors.any? do |err| err.to_s.include?("Tag hello invalid") end, "errors should be on the context node's document") end def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_preexisting_errors - doc = Nokogiri::HTML::Document.parse("
") + doc = Nokogiri::HTML4::Document.parse("
") assert(doc.errors.any? { |err| err.to_s.include?("jimmy") }, "assert on setup") context_node = doc.at_css("div") - frag = Nokogiri::HTML::DocumentFragment.new(doc, "oh, hello there.", context_node) + frag = Nokogiri::HTML4::DocumentFragment.new(doc, "oh, hello there.", context_node) assert(frag.errors.any? do |err| err.to_s.include?("Tag hello invalid") end, "errors should be on the context node's document") @@ -275,7 +275,7 @@ def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_ def test_capturing_nonparse_errors_during_fragment_clone # see https://github.com/sparklemotion/nokogiri/issues/1196 for background - original = Nokogiri::HTML.fragment("
") + original = Nokogiri::HTML4.fragment("
") original_errors = original.errors.dup copy = original.dup @@ -284,8 +284,8 @@ def test_capturing_nonparse_errors_during_fragment_clone def test_capturing_nonparse_errors_during_node_copy_between_fragments # Errors should be emitted while parsing only, and should not change when moving nodes. - frag1 = Nokogiri::HTML.fragment("one") - frag2 = Nokogiri::HTML.fragment("two") + frag1 = Nokogiri::HTML4.fragment("one") + frag2 = Nokogiri::HTML4.fragment("two") node1 = frag1.at_css("#unique") node2 = frag2.at_css("#unique") original_errors1 = frag1.errors.dup @@ -301,9 +301,9 @@ def test_capturing_nonparse_errors_during_node_copy_between_fragments def test_dup_should_create_an_html_document_fragment # https://github.com/sparklemotion/nokogiri/issues/1846 - original = Nokogiri::HTML::DocumentFragment.parse("

hello

") + original = Nokogiri::HTML4::DocumentFragment.parse("

hello

") duplicate = original.dup - assert_instance_of(Nokogiri::HTML::DocumentFragment, duplicate) + assert_instance_of(Nokogiri::HTML4::DocumentFragment, duplicate) end describe "parse options" do @@ -449,7 +449,7 @@ def test_dup_should_create_an_html_document_fragment describe "subclassing" do let(:klass) do - Class.new(Nokogiri::HTML::DocumentFragment) do + Class.new(Nokogiri::HTML4::DocumentFragment) do attr_accessor :initialized_with, :initialized_count def initialize(*args) @@ -496,7 +496,7 @@ def initialize(*args) it "passes the fragment" do fragment = klass.parse("
a
") - assert_equal(Nokogiri::HTML::DocumentFragment.parse("
a
").to_s, fragment.to_s) + assert_equal(Nokogiri::HTML4::DocumentFragment.parse("
a
").to_s, fragment.to_s) end end end diff --git a/test/html4/test_element_description.rb b/test/html4/test_element_description.rb index 6f17f592afb..1df0379e042 100644 --- a/test/html4/test_element_description.rb +++ b/test/html4/test_element_description.rb @@ -72,7 +72,7 @@ def test_default_sub_element end def test_null_default_sub_element - doc = Nokogiri::HTML("foo") + doc = Nokogiri::HTML4("foo") doc.root.description.default_sub_element end diff --git a/test/html4/test_node.rb b/test/html4/test_node.rb index 06537ec690d..56aa283de1f 100644 --- a/test/html4/test_node.rb +++ b/test/html4/test_node.rb @@ -9,7 +9,7 @@ module HTML class TestNode < Nokogiri::TestCase def setup super - @html = Nokogiri::HTML(<<-eohtml) + @html = Nokogiri::HTML4(<<-eohtml) @@ -40,12 +40,12 @@ def test_get_attribute # are treated as as undeclared and have to be accessed via prefix:tagname def test_ns_attribute html = '' - doc = Nokogiri::HTML(html) + doc = Nokogiri::HTML4(html) assert_equal("baz", (doc % "i")["foo:bar"]) end def test_css_path_round_trip - doc = Nokogiri::HTML(File.read(HTML_FILE)) + doc = Nokogiri::HTML4(File.read(HTML_FILE)) ["#header", "small", "div[2]", "div.post", "body"].each do |css_sel| ele = doc.at(css_sel) assert_equal(ele, doc.at(ele.css_path), ele.css_path) @@ -53,7 +53,7 @@ def test_css_path_round_trip end def test_path_round_trip - doc = Nokogiri::HTML(File.read(HTML_FILE)) + doc = Nokogiri::HTML4(File.read(HTML_FILE)) ["#header", "small", "div[2]", "div.post", "body"].each do |css_sel| ele = doc.at(css_sel) assert_equal(ele, doc.at(ele.path), ele.path) @@ -62,7 +62,7 @@ def test_path_round_trip def test_append_with_document assert_raises(ArgumentError) do - @html.root << Nokogiri::HTML::Document.new + @html.root << Nokogiri::HTML4::Document.new end end @@ -161,7 +161,7 @@ def test_fragment end def test_fragment_serialization - fragment = Nokogiri::HTML.fragment("
foo
") + fragment = Nokogiri::HTML4.fragment("
foo
") assert_equal("
foo
", fragment.serialize.chomp) assert_equal("
foo
", fragment.to_xml.chomp) assert_equal("
foo
", fragment.inner_html) @@ -178,7 +178,7 @@ def test_to_html_does_not_contain_entities foo bar

EOH - nokogiri = Nokogiri::HTML.parse(html) + nokogiri = Nokogiri::HTML4.parse(html) if RUBY_PLATFORM.include?("java") # NKF linebreak modes are not supported as of jruby 1.2 @@ -193,7 +193,7 @@ def test_to_html_does_not_contain_entities def test_GH_1042 file = File.join(ASSETS_DIR, "GH_1042.html") - html = Nokogiri::HTML(File.read(file)) + html = Nokogiri::HTML4(File.read(file)) table = html.xpath("//table")[1] trs = table.xpath("tr").drop(1) diff --git a/test/html4/test_node_encoding.rb b/test/html4/test_node_encoding.rb index 28855f3a734..3aae047e8b2 100644 --- a/test/html4/test_node_encoding.rb +++ b/test/html4/test_node_encoding.rb @@ -8,7 +8,7 @@ module HTML class TestNodeEncoding < Nokogiri::TestCase def setup super - @html = Nokogiri::HTML(File.open(NICH_FILE, "rb")) + @html = Nokogiri::HTML4(File.open(NICH_FILE, "rb")) end def test_get_attribute @@ -28,12 +28,12 @@ def test_serialize_encoding_html assert_equal(@html.encoding.downcase, @html.serialize.encoding.name.downcase) - @doc = Nokogiri::HTML(@html.serialize) + @doc = Nokogiri::HTML4(@html.serialize) assert_equal(@html.serialize, @doc.serialize) end def test_default_encoding - doc = Nokogiri::HTML(nil) + doc = Nokogiri::HTML4(nil) assert_nil(doc.encoding) assert_equal("UTF-8", doc.serialize.encoding.name) end @@ -59,7 +59,7 @@ def test_path end def test_inner_html - doc = Nokogiri::HTML(File.open(SHIFT_JIS_HTML, "rb")) + doc = Nokogiri::HTML4(File.open(SHIFT_JIS_HTML, "rb")) hello = "こんにちは" @@ -76,7 +76,7 @@ def test_inner_html end def test_encoding_GH_1113 - doc = Nokogiri::HTML::Document.new + doc = Nokogiri::HTML4::Document.new hex = "

🍀

" decimal = "

🍀

" encoded = "

🍀

" diff --git a/test/html5/test_monkey_patch.rb b/test/html5/test_monkey_patch.rb index 0483cdf8cd1..1ad88f0789f 100644 --- a/test/html5/test_monkey_patch.rb +++ b/test/html5/test_monkey_patch.rb @@ -11,7 +11,7 @@ def test_to_xml end def test_html4_fragment - frag = Nokogiri::HTML.fragment("") - assert(frag.is_a?(Nokogiri::HTML::DocumentFragment)) + frag = Nokogiri::HTML4.fragment("") + assert(frag.is_a?(Nokogiri::HTML4::DocumentFragment)) end end if Nokogiri.uses_gumbo? diff --git a/test/html4/test_html_module.rb b/test/test_html.rb similarity index 50% rename from test/html4/test_html_module.rb rename to test/test_html.rb index 1af5c6727cb..504410acb37 100644 --- a/test/html4/test_html_module.rb +++ b/test/test_html.rb @@ -3,7 +3,7 @@ require "helper" module Nokogiri - class TestCase + class TestHtml < Nokogiri::TestCase describe Nokogiri::HTML do it "is the same as Nokogiri::HTML4" do assert_same(Nokogiri::HTML, Nokogiri::HTML4) @@ -14,6 +14,16 @@ class TestCase it "is the same as Nokogiri.HTML4()" do assert_equal(Nokogiri.method(:HTML), Nokogiri.method(:HTML4)) end + + it "returns a Nokogiri::HTML4::Document" do + assert_instance_of(Nokogiri::HTML4::Document, Nokogiri::HTML::Document.parse("")) + end + end + + describe Nokogiri::HTML::Document do + it "is the same as Nokogiri::HTML4::Document" do + assert_same(Nokogiri::HTML4::Document, Nokogiri::HTML::Document) + end end end end diff --git a/test/test_memory_leak.rb b/test/test_memory_leak.rb index 34ca85886e3..50d4306e4d9 100644 --- a/test/test_memory_leak.rb +++ b/test/test_memory_leak.rb @@ -114,8 +114,8 @@ def test_sax_parser_context Nokogiri::XML::SAX::ParserContext.new(io) io.rewind - Nokogiri::HTML::SAX::ParserContext.new(@str) - Nokogiri::HTML::SAX::ParserContext.new(io) + Nokogiri::HTML4::SAX::ParserContext.new(@str) + Nokogiri::HTML4::SAX::ParserContext.new(io) io.rewind end end @@ -136,7 +136,7 @@ def test_jumping_sax_handler loop do catch(:foo) do - Nokogiri::HTML::SAX::Parser.new(doc).parse(@str) + Nokogiri::HTML4::SAX::Parser.new(doc).parse(@str) end end end @@ -194,7 +194,7 @@ def test_leaking_namespace_node_strings_with_prefix def test_leaking_dtd_nodes_after_internal_subset_removal # see https://github.com/sparklemotion/nokogiri/issues/1784 100_000.times do |i| - doc = Nokogiri::HTML::Document.new + doc = Nokogiri::HTML4::Document.new doc.internal_subset.remove puts MemInfo.rss if i % 1000 == 0 end diff --git a/test/test_nokogiri.rb b/test/test_nokogiri.rb index 077b5b2f399..1acd1a34955 100644 --- a/test/test_nokogiri.rb +++ b/test/test_nokogiri.rb @@ -12,7 +12,7 @@ def test_libxml_iconv def test_parse_with_io doc = Nokogiri.parse(StringIO.new("")) - assert_instance_of(Nokogiri::HTML::Document, doc) + assert_instance_of(Nokogiri::HTML4::Document, doc) end def test_xml? @@ -40,13 +40,15 @@ def test_nokogiri_method_with_html end def test_nokogiri_method_with_block - doc = Nokogiri { b("bold tag") } - assert_equal("bold tag", doc.to_html.chomp) + root = Nokogiri { b("bold tag") } + assert_instance_of(Nokogiri::HTML4::Document, root.document) + assert_equal("bold tag", root.to_html.chomp) end def test_make_with_html - doc = Nokogiri.make("bold tag") - assert_equal("bold tag", doc.to_html.chomp) + root = Nokogiri.make("bold tag") + assert_instance_of(Nokogiri::HTML4::Document, root.document) + assert_equal("bold tag", root.to_html.chomp) end def test_make_with_block diff --git a/test/xml/test_node.rb b/test/xml/test_node.rb index 69bd4c5a253..0c855ba67d4 100644 --- a/test/xml/test_node.rb +++ b/test/xml/test_node.rb @@ -151,7 +151,7 @@ def test_parse_with_unparented_text_context_node end def test_parse_with_unparented_html_text_context_node - doc = HTML::Document.new + doc = Nokogiri::HTML4::Document.new elem = XML::Text.new("div", doc) x = elem.parse("
") # should not raise an exception assert_equal("div", x.first.name) @@ -165,7 +165,7 @@ def test_parse_with_unparented_fragment_text_context_node end def test_parse_with_unparented_html_fragment_text_context_node - doc = HTML::DocumentFragment.parse("
foo
") + doc = Nokogiri::HTML4::DocumentFragment.parse("
foo
") elem = doc.at_css("span") x = elem.parse("") # should not raise an exception assert_equal("span", x.first.name) @@ -196,8 +196,8 @@ def test_dup_shallow_copy def test_dup_to_another_document skip_unless_libxml2("Node.dup with new_parent arg is only implemented on CRuby") - doc1 = HTML::Document.parse("

hello

") - doc2 = HTML::Document.parse("
") + doc1 = Nokogiri::HTML4::Document.parse("

hello

") + doc2 = Nokogiri::HTML4::Document.parse("
") div = doc1.at_css("div") duplicate_div = div.dup(1, doc2) @@ -1127,7 +1127,7 @@ def test_namespace_without_an_href_on_html_node # describe how we handle microsoft word's HTML formatting. # this test is descriptive, not prescriptive. # - html = Nokogiri::HTML.parse(<<~XML) + html = Nokogiri::HTML4.parse(<<~XML)
foo
XML node = html.at("div").children.first @@ -1291,7 +1291,7 @@ def test_text_node_robustness_gh1426 # side note: this was fixed in libxml-ruby 2.9.0 by https://github.com/xml4r/libxml-ruby/pull/119 message = "

BOOM!

" 10_000.times do - node = Nokogiri::HTML::DocumentFragment.parse(message).at_css("h2") + node = Nokogiri::HTML4::DocumentFragment.parse(message).at_css("h2") node.add_previous_sibling(Nokogiri::XML::Text.new("before", node.document)) node.add_next_sibling(Nokogiri::XML::Text.new("after", node.document)) end diff --git a/test/xml/test_node_reparenting.rb b/test/xml/test_node_reparenting.rb index 08fb2ecabe6..78824b9805a 100644 --- a/test/xml/test_node_reparenting.rb +++ b/test/xml/test_node_reparenting.rb @@ -603,7 +603,7 @@ def coerce(data) end it "should remove the child node after the operation" do - fragment = Nokogiri::HTML::DocumentFragment.parse("ab") + fragment = Nokogiri::HTML4::DocumentFragment.parse("ab") node = fragment.children.last node.add_previous_sibling(node.children) assert_empty node.children, "should have no childrens" diff --git a/test/xml/test_xpath.rb b/test/xml/test_xpath.rb index 8fae2ea39c5..0de81ba3513 100644 --- a/test/xml/test_xpath.rb +++ b/test/xml/test_xpath.rb @@ -116,7 +116,7 @@ def test_node_search_with_multiple_queries def test_css_search_with_ambiguous_integer_or_string_attributes # https://github.com/sparklemotion/nokogiri/issues/711 html = "
" - doc = Nokogiri::HTML(html) + doc = Nokogiri::HTML4(html) refute_nil(doc.at_css("img[width='200']")) refute_nil(doc.at_css("img[width=200]")) end @@ -315,7 +315,7 @@ def awesome!; end def test_code_that_invokes_OP_RESET_inside_libxml2 doc = "hi" xpath = 'id("foo")//foo' - nokogiri = Nokogiri::HTML.parse(doc) + nokogiri = Nokogiri::HTML4.parse(doc) assert(nokogiri.xpath(xpath)) end @@ -500,7 +500,7 @@ def add(context, rhs) describe "nokogiri-builtin:css-class xpath function" do before do - @doc = Nokogiri::HTML::Document.parse("") + @doc = Nokogiri::HTML4::Document.parse("") end it "accepts exactly two arguments" do From ebde7dae770aaefa667ee02ac57a2c0bfed1164c Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sun, 8 May 2022 11:59:40 -0400 Subject: [PATCH 2/2] fix: make sure HTML5::Document{,Fragment} subclass properly Loofah and other downstream libraries rely on this behavior. This is long-term prep for a day when HTML5 may become the default on supported platforms. --- ext/nokogiri/gumbo.c | 10 +-- lib/nokogiri/html5/document.rb | 2 +- test/html5/test_api.rb | 118 +++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 5 deletions(-) diff --git a/ext/nokogiri/gumbo.c b/ext/nokogiri/gumbo.c index f7fa18e97cc..5acc486f0a2 100644 --- a/ext/nokogiri/gumbo.c +++ b/ext/nokogiri/gumbo.c @@ -23,7 +23,7 @@ // // Processing starts by calling gumbo_parse_with_options. The resulting document tree // is then walked, a parallel libxml2 tree is constructed, and the final document is -// then wrapped using Nokogiri_wrap_xml_document. This approach reduces memory and CPU +// then wrapped using noko_xml_document_wrap. This approach reduces memory and CPU // requirements as Ruby objects are only built when necessary. // @@ -297,6 +297,7 @@ typedef struct { GumboOutput *output; VALUE input; VALUE url_or_frag; + VALUE klass; xmlDocPtr doc; } ParseArgs; @@ -321,7 +322,7 @@ static VALUE parse_continue(VALUE parse_args); * @!visibility protected */ static VALUE -parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth) +parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth, VALUE klass) { GumboOptions options = kGumboDefaultOptions; options.max_attributes = NUM2INT(max_attributes); @@ -333,6 +334,7 @@ parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors .output = output, .input = input, .url_or_frag = url, + .klass = klass, .doc = NULL, }; @@ -357,7 +359,7 @@ parse_continue(VALUE parse_args) } args->doc = doc; // Make sure doc gets cleaned up if an error is thrown. build_tree(doc, (xmlNodePtr)doc, output->document); - VALUE rdoc = Nokogiri_wrap_xml_document(cNokogiriHtml5Document, doc); + VALUE rdoc = noko_xml_document_wrap(args->klass, doc); args->doc = NULL; // The Ruby runtime now owns doc so don't delete it. add_errors(output, rdoc, args->input, args->url_or_frag); return rdoc; @@ -577,7 +579,7 @@ noko_init_gumbo() parent = rb_intern_const("parent"); // Define Nokogumbo module with parse and fragment methods. - rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 5); + rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 6); rb_define_singleton_method(mNokogiriGumbo, "fragment", fragment, 6); } diff --git a/lib/nokogiri/html5/document.rb b/lib/nokogiri/html5/document.rb index cc0961c8641..d3b69431dee 100644 --- a/lib/nokogiri/html5/document.rb +++ b/lib/nokogiri/html5/document.rb @@ -63,7 +63,7 @@ def do_parse(string_or_io, url, encoding, options) max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH - doc = Nokogiri::Gumbo.parse(string, url, max_attributes, max_errors, max_depth) + doc = Nokogiri::Gumbo.parse(string, url, max_attributes, max_errors, max_depth, self) doc.encoding = "UTF-8" doc end diff --git a/test/html5/test_api.rb b/test/html5/test_api.rb index af10dc11db9..e338ae20934 100644 --- a/test/html5/test_api.rb +++ b/test/html5/test_api.rb @@ -182,4 +182,122 @@ def test_html_eh assert_predicate(doc, :html?) refute_predicate(doc, :xml?) end + + describe Nokogiri::HTML5::Document do + describe "subclassing" do + let(:klass) do + Class.new(Nokogiri::HTML5::Document) do + attr_accessor :initialized_with, :initialized_count + + def initialize(*args) + super + @initialized_with = args + @initialized_count ||= 0 + @initialized_count += 1 + end + end + end + + describe ".new" do + it "returns an instance of the expected class" do + doc = klass.new + assert_instance_of(klass, doc) + end + + it "calls #initialize exactly once" do + doc = klass.new + assert_equal(1, doc.initialized_count) + end + + it "passes arguments to #initialize" do + doc = klass.new("http://www.w3.org/TR/REC-html40/loose.dtd", "-//W3C//DTD HTML 4.0 Transitional//EN") + assert_equal( + ["http://www.w3.org/TR/REC-html40/loose.dtd", "-//W3C//DTD HTML 4.0 Transitional//EN"], + doc.initialized_with + ) + end + end + + it "#dup returns the expected class" do + doc = klass.new.dup + assert_instance_of(klass, doc) + end + + describe ".parse" do + let(:html) { Nokogiri::HTML5.parse(File.read(HTML_FILE)) } + + it "returns an instance of the expected class" do + doc = klass.parse(File.read(HTML_FILE)) + assert_instance_of(klass, doc) + end + + it "calls #initialize exactly once" do + doc = klass.parse(File.read(HTML_FILE)) + assert_equal(1, doc.initialized_count) + end + + it "parses the doc" do + doc = klass.parse(File.read(HTML_FILE)) + assert_equal(html.root.to_s, doc.root.to_s) + end + end + end + end + + describe Nokogiri::HTML5::DocumentFragment do + describe "subclassing" do + let(:klass) do + Class.new(Nokogiri::HTML5::DocumentFragment) do + attr_accessor :initialized_with, :initialized_count + + def initialize(*args) + super + @initialized_with = args + @initialized_count ||= 0 + @initialized_count += 1 + end + end + end + let(:html) { Nokogiri::HTML5.parse(File.read(HTML_FILE), HTML_FILE) } + + describe ".new" do + it "returns an instance of the right class" do + fragment = klass.new(html, "
a
") + assert_instance_of(klass, fragment) + end + + it "calls #initialize exactly once" do + fragment = klass.new(html, "
a
") + assert_equal(1, fragment.initialized_count) + end + + it "passes args to #initialize" do + fragment = klass.new(html, "
a
") + assert_equal([html, "
a
"], fragment.initialized_with) + end + end + + it "#dup returns the expected class" do + doc = klass.new(html, "
a
").dup + assert_instance_of(klass, doc) + end + + describe ".parse" do + it "returns an instance of the right class" do + fragment = klass.parse("
a
") + assert_instance_of(klass, fragment) + end + + it "calls #initialize exactly once" do + fragment = klass.parse("
a
") + assert_equal(1, fragment.initialized_count) + end + + it "passes the fragment" do + fragment = klass.parse("
a
") + assert_equal(Nokogiri::HTML5::DocumentFragment.parse("
a
").to_s, fragment.to_s) + end + end + end + end end if Nokogiri.uses_gumbo?