From 4600a1c7c4d653e41e3be1b4ecabe22cdcc15e59 Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike.dalessio@gmail.com>
Date: Sun, 8 May 2022 11:23:37 -0400
Subject: [PATCH 1/2] test: be explicit about HTML4 and not just HTML

This is long-term prep for a day when HTML5 may become the default on
supported platforms.
---
 test/html4/sax/test_parser.rb                 |   8 +-
 test/html4/sax/test_parser_text.rb            |   2 +-
 test/html4/sax/test_push_parser.rb            |   2 +-
 test/html4/test_attributes.rb                 |   2 +-
 .../html4/test_attributes_properly_escaped.rb |   6 +-
 test/html4/test_builder.rb                    |  28 +--
 test/html4/test_comments.rb                   |  10 +-
 test/html4/test_document.rb                   | 220 ++++++++----------
 test/html4/test_document_encoding.rb          |  30 +--
 test/html4/test_document_fragment.rb          | 104 ++++-----
 test/html4/test_element_description.rb        |   2 +-
 test/html4/test_node.rb                       |  16 +-
 test/html4/test_node_encoding.rb              |  10 +-
 test/html5/test_monkey_patch.rb               |   4 +-
 .../test_html_module.rb => test_html.rb}      |  12 +-
 test/test_memory_leak.rb                      |   8 +-
 test/test_nokogiri.rb                         |  12 +-
 test/xml/test_node.rb                         |  12 +-
 test/xml/test_node_reparenting.rb             |   2 +-
 test/xml/test_xpath.rb                        |   6 +-
 20 files changed, 238 insertions(+), 258 deletions(-)
 rename test/{html4/test_html_module.rb => test_html.rb} (50%)

diff --git a/test/html4/sax/test_parser.rb b/test/html4/sax/test_parser.rb
index 4dc6ac61fd0..2d1e6451ef3 100644
--- a/test/html4/sax/test_parser.rb
+++ b/test/html4/sax/test_parser.rb
@@ -9,7 +9,7 @@ module SAX
       class TestParser < Nokogiri::SAX::TestCase
         def setup
           super
-          @parser = HTML::SAX::Parser.new(Doc.new)
+          @parser = Nokogiri::HTML4::SAX::Parser.new(Doc.new)
         end
 
         def test_parse_empty_document
@@ -163,9 +163,9 @@ def test_empty_processing_instruction
         end
 
         it "handles invalid types gracefully" do
-          assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse(0xcafecafe) }
-          assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_memory(0xcafecafe) }
-          assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_io(0xcafecafe) }
+          assert_raises(TypeError) { Nokogiri::HTML4::SAX::Parser.new.parse(0xcafecafe) }
+          assert_raises(TypeError) { Nokogiri::HTML4::SAX::Parser.new.parse_memory(0xcafecafe) }
+          assert_raises(TypeError) { Nokogiri::HTML4::SAX::Parser.new.parse_io(0xcafecafe) }
         end
       end
     end
diff --git a/test/html4/sax/test_parser_text.rb b/test/html4/sax/test_parser_text.rb
index 0564000b824..2866e893f0d 100644
--- a/test/html4/sax/test_parser_text.rb
+++ b/test/html4/sax/test_parser_text.rb
@@ -10,7 +10,7 @@ class TestParserText < Nokogiri::SAX::TestCase
         def setup
           super
           @doc    = DocWithOrderedItems.new
-          @parser = HTML::SAX::Parser.new(@doc)
+          @parser = Nokogiri::HTML4::SAX::Parser.new(@doc)
         end
 
         def test_texts_order
diff --git a/test/html4/sax/test_push_parser.rb b/test/html4/sax/test_push_parser.rb
index 53e62b31ce4..f11018f613e 100644
--- a/test/html4/sax/test_push_parser.rb
+++ b/test/html4/sax/test_push_parser.rb
@@ -9,7 +9,7 @@ module SAX
       class TestPushParser < Nokogiri::SAX::TestCase
         def setup
           super
-          @parser = HTML::SAX::PushParser.new(Doc.new)
+          @parser = Nokogiri::HTML4::SAX::PushParser.new(Doc.new)
         end
 
         def test_end_document_called
diff --git a/test/html4/test_attributes.rb b/test/html4/test_attributes.rb
index d79aaf246d2..0c531db87de 100644
--- a/test/html4/test_attributes.rb
+++ b/test/html4/test_attributes.rb
@@ -55,7 +55,7 @@ class TestAttr < Nokogiri::TestCase
 
           html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=unsafevalue()>-->le.com'>test</#{config[:tag]}>}
 
-          reparsed = HTML.fragment(HTML.fragment(html).to_html)
+          reparsed = Nokogiri::HTML4.fragment(Nokogiri::HTML4.fragment(html).to_html)
           attributes = reparsed.at_css(config[:tag]).attribute_nodes
 
           assert_equal [config[:attr]], attributes.collect(&:name)
diff --git a/test/html4/test_attributes_properly_escaped.rb b/test/html4/test_attributes_properly_escaped.rb
index 43fdc20f25f..16d0cb237bb 100755
--- a/test/html4/test_attributes_properly_escaped.rb
+++ b/test/html4/test_attributes_properly_escaped.rb
@@ -9,7 +9,7 @@ def test_attribute_macros_are_escaped
         skip_unless_libxml2_patch("0001-Remove-script-macro-support.patch") if Nokogiri.uses_libxml?
 
         html = "<p><i for=\"&{<test>}\"></i></p>"
-        document = Nokogiri::HTML::Document.new
+        document = Nokogiri::HTML4::Document.new
         nodes = document.parse(html)
 
         assert_equal("<p><i for=\"&amp;{&lt;test&gt;}\"></i></p>", nodes[0].to_s)
@@ -19,7 +19,7 @@ def test_libxml_escapes_server_side_includes
         skip_unless_libxml2_patch("0002-Update-entities-to-remove-handling-of-ssi.patch") if Nokogiri.uses_libxml?
 
         original_html = %(<p><a href='<!--"><test>-->'></a></p>)
-        document = Nokogiri::HTML::Document.new
+        document = Nokogiri::HTML4::Document.new
         html = document.parse(original_html).to_s
 
         assert_match(/!--%22&gt;&lt;test&gt;/, html)
@@ -29,7 +29,7 @@ def test_libxml_escapes_server_side_includes_without_nested_quotes
         skip_unless_libxml2_patch("0002-Update-entities-to-remove-handling-of-ssi.patch") if Nokogiri.uses_libxml?
 
         original_html = %(<p><i for="<!--<test>-->"></i></p>)
-        document = Nokogiri::HTML::Document.new
+        document = Nokogiri::HTML4::Document.new
         html = document.parse(original_html).to_s
 
         assert_match(/&lt;!--&lt;test&gt;/, html)
diff --git a/test/html4/test_builder.rb b/test/html4/test_builder.rb
index 8ee20e61a6b..6188c1ebf79 100644
--- a/test/html4/test_builder.rb
+++ b/test/html4/test_builder.rb
@@ -8,11 +8,11 @@ class TestBuilder < Nokogiri::TestCase
       def test_top_level_function_builds
         foo = nil
         Nokogiri() { |xml| foo = xml }
-        assert_instance_of(Nokogiri::HTML::Builder, foo)
+        assert_instance_of(Nokogiri::HTML4::Builder, foo)
       end
 
       def test_builder_with_explicit_tags
-        html_doc = Nokogiri::HTML::Builder.new do
+        html_doc = Nokogiri::HTML4::Builder.new do
           div.slide(class: "another_class") do
             node = Nokogiri::XML::Node.new("id", doc)
             node.content = "hello"
@@ -24,7 +24,7 @@ def test_builder_with_explicit_tags
       end
 
       def test_hash_as_attributes_for_attribute_method
-        html = Nokogiri::HTML::Builder.new do ||
+        html = Nokogiri::HTML4::Builder.new do ||
           div.slide(class: "another_class") do
             span("Slide 1")
           end
@@ -33,7 +33,7 @@ def test_hash_as_attributes_for_attribute_method
       end
 
       def test_hash_as_attributes
-        builder = Nokogiri::HTML::Builder.new do
+        builder = Nokogiri::HTML4::Builder.new do
           div(id: "awesome") do
             h1("america")
           end
@@ -54,7 +54,7 @@ def test_href_with_attributes
       end
 
       def test_tag_nesting
-        builder = Nokogiri::HTML::Builder.new do
+        builder = Nokogiri::HTML4::Builder.new do
           body do
             span.left("")
             span.middle do
@@ -68,7 +68,7 @@ def test_tag_nesting
       end
 
       def test_has_ampersand
-        builder = Nokogiri::HTML::Builder.new do
+        builder = Nokogiri::HTML4::Builder.new do
           div.rad.thing! do
             text("<awe&some>")
             b("hello & world")
@@ -81,7 +81,7 @@ def test_has_ampersand
       end
 
       def test_multi_tags
-        builder = Nokogiri::HTML::Builder.new do
+        builder = Nokogiri::HTML4::Builder.new do
           div.rad.thing! do
             text("<awesome>")
             b("hello")
@@ -94,7 +94,7 @@ def test_multi_tags
       end
 
       def test_attributes_plus_block
-        builder = Nokogiri::HTML::Builder.new do
+        builder = Nokogiri::HTML4::Builder.new do
           div.rad.thing! do
             text("<awesome>")
           end
@@ -104,7 +104,7 @@ def test_attributes_plus_block
       end
 
       def test_builder_adds_attributes
-        builder = Nokogiri::HTML::Builder.new do
+        builder = Nokogiri::HTML4::Builder.new do
           div.rad.thing!("tender div")
         end
         assert_equal('<div class="rad" id="thing">tender div</div>',
@@ -112,14 +112,14 @@ def test_builder_adds_attributes
       end
 
       def test_bold_tag
-        builder = Nokogiri::HTML::Builder.new do
+        builder = Nokogiri::HTML4::Builder.new do
           b("bold tag")
         end
         assert_equal("<b>bold tag</b>", builder.doc.root.to_html.chomp)
       end
 
       def test_html_then_body_tag
-        builder = Nokogiri::HTML::Builder.new do
+        builder = Nokogiri::HTML4::Builder.new do
           html do
             body do
               b("bold tag")
@@ -137,12 +137,12 @@ def foo
           end
         end
 
-        builder = Nokogiri::HTML::Builder.new { text(foo) }
+        builder = Nokogiri::HTML4::Builder.new { text(foo) }
         assert_includes(builder.to_html, "foo!")
       end
 
       def test_builder_with_param
-        doc = Nokogiri::HTML::Builder.new do |html|
+        doc = Nokogiri::HTML4::Builder.new do |html|
           html.body do
             html.p("hello world")
           end
@@ -154,7 +154,7 @@ def test_builder_with_param
 
       def test_builder_with_id
         text = "hello world"
-        doc = Nokogiri::HTML::Builder.new do |html|
+        doc = Nokogiri::HTML4::Builder.new do |html|
           html.body do
             html.id_(text)
           end
diff --git a/test/html4/test_comments.rb b/test/html4/test_comments.rb
index 96af26a8662..b29c36ce689 100644
--- a/test/html4/test_comments.rb
+++ b/test/html4/test_comments.rb
@@ -15,7 +15,7 @@ class TestComment < Nokogiri::TestCase
       # <!--> or <!--->). The parser behaves as if the comment is
       # closed correctly.
       describe "abrupt closing of empty comment" do
-        let(:doc) { Nokogiri::HTML(html) }
+        let(:doc) { Nokogiri::HTML4(html) }
         let(:subject) { doc.at_css("div#under-test") }
         let(:other_div) { doc.at_css("div#also-here") }
 
@@ -101,7 +101,7 @@ class TestComment < Nokogiri::TestCase
       # stream.
       describe "eof in comment" do
         let(:html) { "<html><body><div id=under-test><!--start of unterminated comment" }
-        let(:doc) { Nokogiri::HTML(html) }
+        let(:doc) { Nokogiri::HTML4(html) }
         let(:subject) { doc.at_css("div#under-test") }
 
         if Nokogiri.uses_libxml?
@@ -129,7 +129,7 @@ class TestComment < Nokogiri::TestCase
       # code point sequence.
       describe "incorrectly closed comment" do
         let(:html) { "<html><body><div id=under-test><!--foo--!><div id=do-i-exist></div><!--bar--></div></body></html>" }
-        let(:doc) { Nokogiri::HTML(html) }
+        let(:doc) { Nokogiri::HTML4(html) }
         let(:subject) { doc.at_css("div#under-test") }
         let(:inner_div) { doc.at_css("div#do-i-exist") }
 
@@ -169,7 +169,7 @@ class TestComment < Nokogiri::TestCase
       describe "incorrectly opened comment" do
         let(:html) { "<html><body><div id=under-test><! comment <div id=do-i-exist>inner content</div>-->hello</div></body></html>" }
 
-        let(:doc) { Nokogiri::HTML(html) }
+        let(:doc) { Nokogiri::HTML4(html) }
         let(:body) { doc.at_css("body") }
         let(:subject) { doc.at_css("div#under-test") }
 
@@ -225,7 +225,7 @@ class TestComment < Nokogiri::TestCase
       # everything that follows will be treated as markup.
       describe "nested comment" do
         let(:html) { "<html><body><div id=under-test><!-- outer <!-- inner --><div id=do-i-exist></div>--></div></body></html>" }
-        let(:doc) { Nokogiri::HTML(html) }
+        let(:doc) { Nokogiri::HTML4(html) }
         let(:subject) { doc.at_css("div#under-test") }
         let(:inner_div) { doc.at_css("div#do-i-exist") }
 
diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb
index ce7d56f1514..b1c39a7fcee 100644
--- a/test/html4/test_document.rb
+++ b/test/html4/test_document.rb
@@ -3,10 +3,10 @@
 require "helper"
 
 module Nokogiri
-  module HTML
+  module HTML4
     class TestDocument < Nokogiri::TestCase
-      describe Nokogiri::HTML::Document do
-        let(:html) { Nokogiri::HTML.parse(File.read(HTML_FILE)) }
+      describe Nokogiri::HTML4::Document do
+        let(:html) { Nokogiri::HTML4.parse(File.read(HTML_FILE)) }
 
         def test_nil_css
           # Behavior is undefined but shouldn't break
@@ -15,7 +15,7 @@ def test_nil_css
         end
 
         def test_does_not_fail_with_illformatted_html
-          doc = Nokogiri::HTML((+'"</html>";').force_encoding(Encoding::BINARY))
+          doc = Nokogiri::HTML4((+'"</html>";').force_encoding(Encoding::BINARY))
           refute_nil(doc)
         end
 
@@ -34,7 +34,7 @@ def test_fragment
 
         def test_document_takes_config_block
           options = nil
-          Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg|
+          Nokogiri::HTML4(File.read(HTML_FILE), HTML_FILE) do |cfg|
             options = cfg
             options.nonet.nowarning.dtdattr
           end
@@ -45,7 +45,7 @@ def test_document_takes_config_block
 
         def test_parse_takes_config_block
           options = nil
-          Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg|
+          Nokogiri::HTML4.parse(File.read(HTML_FILE), HTML_FILE) do |cfg|
             options = cfg
             options.nonet.nowarning.dtdattr
           end
@@ -54,71 +54,39 @@ def test_parse_takes_config_block
           assert_predicate(options, :dtdattr?)
         end
 
-        def test_subclass
-          klass = Class.new(Nokogiri::HTML::Document)
-          doc = klass.new
-          assert_instance_of(klass, doc)
-        end
-
-        def test_subclass_initialize
-          klass = Class.new(Nokogiri::HTML::Document) do
-            attr_accessor :initialized_with
-
-            def initialize(*args)
-              super
-              @initialized_with = args
-            end
-          end
-          doc = klass.new("uri", "external_id", 1)
-          assert_equal(["uri", "external_id", 1], doc.initialized_with)
-        end
-
-        def test_subclass_dup
-          klass = Class.new(Nokogiri::HTML::Document)
-          doc = klass.new.dup
-          assert_instance_of(klass, doc)
-        end
-
-        def test_subclass_parse
-          klass = Class.new(Nokogiri::HTML::Document)
-          doc = klass.parse(File.read(HTML_FILE))
-          assert_equal(html.to_s, doc.to_s)
-          assert_instance_of(klass, doc)
-        end
-
         def test_document_parse_method
-          html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE))
+          html = Nokogiri::HTML4::Document.parse(File.read(HTML_FILE))
           assert_equal(html.to_s, html.to_s)
         end
 
         def test_document_parse_method_with_url
-          doc = Nokogiri::HTML("<html></html>", "http://foobar.example.com/", "UTF-8")
+          doc = Nokogiri::HTML4("<html></html>", "http://foobar.example.com/", "UTF-8")
           refute_empty(doc.to_s, "Document should not be empty")
           assert_equal("http://foobar.example.com/", doc.url)
         end
 
         ###
-        # Nokogiri::HTML returns an empty Document when given a blank string GH#11
+        # Nokogiri::HTML4 returns an empty Document when given a blank string GH#11
         def test_empty_string_returns_empty_doc
-          doc = Nokogiri::HTML("")
-          assert_instance_of(Nokogiri::HTML::Document, doc)
+          doc = Nokogiri::HTML4("")
+          assert_instance_of(Nokogiri::HTML4::Document, doc)
           assert_nil(doc.root)
         end
 
         def test_to_xhtml_with_indent
           skip if Nokogiri.uses_libxml?("~> 2.6.0")
-          doc = Nokogiri::HTML("<html><body><a>foo</a></body></html>")
-          doc = Nokogiri::HTML(doc.to_xhtml(indent: 2))
+          doc = Nokogiri::HTML4("<html><body><a>foo</a></body></html>")
+          doc = Nokogiri::HTML4(doc.to_xhtml(indent: 2))
           assert_indent(2, doc)
         end
 
         def test_write_to_xhtml_with_indent
           skip if Nokogiri.uses_libxml?("~> 2.6.0")
           io = StringIO.new
-          doc = Nokogiri::HTML("<html><body><a>foo</a></body></html>")
+          doc = Nokogiri::HTML4("<html><body><a>foo</a></body></html>")
           doc.write_xhtml_to(io, indent: 5)
           io.rewind
-          doc = Nokogiri::HTML(io.read)
+          doc = Nokogiri::HTML4(io.read)
           assert_indent(5, doc)
         end
 
@@ -139,7 +107,7 @@ def test_meta_encoding
         end
 
         def test_meta_encoding_is_strict_about_http_equiv
-          doc = Nokogiri::HTML(<<~EOHTML)
+          doc = Nokogiri::HTML4(<<~HTML)
             <html>
               <head>
                 <meta http-equiv="X-Content-Type" content="text/html; charset=Shift_JIS">
@@ -148,12 +116,12 @@ def test_meta_encoding_is_strict_about_http_equiv
                 foo
               </body>
             </html>
-          EOHTML
+          HTML
           assert_nil(doc.meta_encoding)
         end
 
         def test_meta_encoding_handles_malformed_content_charset
-          doc = Nokogiri::HTML(<<~EOHTML)
+          doc = Nokogiri::HTML4(<<~HTML)
             <html>
               <head>
                 <meta http-equiv="Content-type" content="text/html; utf-8" />
@@ -162,12 +130,12 @@ def test_meta_encoding_handles_malformed_content_charset
                 foo
               </body>
             </html>
-          EOHTML
+          HTML
           assert_nil(doc.meta_encoding)
         end
 
         def test_meta_encoding_checks_charset
-          doc = Nokogiri::HTML(<<~EOHTML)
+          doc = Nokogiri::HTML4(<<~HTML)
             <html>
               <head>
                 <meta charset="UTF-8">
@@ -176,7 +144,7 @@ def test_meta_encoding_checks_charset
                 foo
               </body>
             </html>
-          EOHTML
+          HTML
           assert_equal("UTF-8", doc.meta_encoding)
         end
 
@@ -187,12 +155,12 @@ def test_meta_encoding=
 
         def test_title
           assert_equal("Tender Lovemaking  ", html.title)
-          doc = Nokogiri::HTML("<html><body>foo</body></html>")
+          doc = Nokogiri::HTML4("<html><body>foo</body></html>")
           assert_nil(doc.title)
         end
 
         def test_title=
-          doc = Nokogiri::HTML(<<~EOHTML)
+          doc = Nokogiri::HTML4(<<~HTML)
             <html>
               <head>
                 <title>old</title>
@@ -201,12 +169,12 @@ def test_title=
                 foo
               </body>
             </html>
-          EOHTML
+          HTML
           doc.title = "new"
           assert_equal(1, doc.css("title").size)
           assert_equal("new", doc.title)
 
-          doc = Nokogiri::HTML(<<~EOHTML)
+          doc = Nokogiri::HTML4(<<~HTML)
             <html>
               <head>
                 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
@@ -215,7 +183,7 @@ def test_title=
                 foo
               </body>
             </html>
-          EOHTML
+          HTML
           doc.title = "new"
           assert_equal("new", doc.title)
           title = doc.at("/html/head/title")
@@ -223,13 +191,13 @@ def test_title=
           assert_equal("new", title.text)
           assert_equal(-1, doc.at("meta[@http-equiv]") <=> title)
 
-          doc = Nokogiri::HTML(<<~EOHTML)
+          doc = Nokogiri::HTML4(<<~HTML)
             <html>
               <body>
                 foo
               </body>
             </html>
-          EOHTML
+          HTML
           doc.title = "new"
           assert_equal("new", doc.title)
           # <head> may or may not be added
@@ -238,26 +206,26 @@ def test_title=
           assert_equal("new", title.text)
           assert_equal(-1, title <=> doc.at("body"))
 
-          doc = Nokogiri::HTML(<<~EOHTML)
+          doc = Nokogiri::HTML4(<<~HTML)
             <html>
               <meta charset="UTF-8">
               <body>
                 foo
               </body>
             </html>
-          EOHTML
+          HTML
           doc.title = "new"
           assert_equal("new", doc.title)
           assert_equal(-1, doc.at("meta[@charset]") <=> doc.at("title"))
           assert_equal(-1, doc.at("title") <=> doc.at("body"))
 
-          doc = Nokogiri::HTML("<!DOCTYPE html><p>hello")
+          doc = Nokogiri::HTML4("<!DOCTYPE html><p>hello")
           doc.title = "new"
           assert_equal("new", doc.title)
           assert_instance_of(Nokogiri::XML::DTD, doc.children.first)
           assert_equal(-1, doc.at("title") <=> doc.at("p"))
 
-          doc = Nokogiri::HTML("")
+          doc = Nokogiri::HTML4("")
           doc.title = "new"
           assert_equal("new", doc.title)
           assert_equal("new", doc.at("/html/head/title/text()").to_s)
@@ -265,7 +233,7 @@ def test_title=
 
         def test_meta_encoding_without_head
           encoding = "EUC-JP"
-          html = Nokogiri::HTML("<html><body>foo</body></html>", nil, encoding)
+          html = Nokogiri::HTML4("<html><body>foo</body></html>", nil, encoding)
 
           assert_nil(html.meta_encoding)
 
@@ -280,7 +248,7 @@ def test_meta_encoding_without_head
 
         def test_html5_meta_encoding_without_head
           encoding = "EUC-JP"
-          html = Nokogiri::HTML("<!DOCTYPE html><html><body>foo</body></html>", nil, encoding)
+          html = Nokogiri::HTML4("<!DOCTYPE html><html><body>foo</body></html>", nil, encoding)
 
           assert_nil(html.meta_encoding)
 
@@ -294,7 +262,7 @@ def test_html5_meta_encoding_without_head
         end
 
         def test_meta_encoding_with_empty_content_type
-          html = Nokogiri::HTML(<<~EOHTML)
+          html = Nokogiri::HTML4(<<~HTML)
             <html>
               <head>
                 <meta http-equiv="Content-Type" content="">
@@ -303,10 +271,10 @@ def test_meta_encoding_with_empty_content_type
                 foo
               </body>
             </html>
-          EOHTML
+          HTML
           assert_nil(html.meta_encoding)
 
-          html = Nokogiri::HTML(<<~EOHTML)
+          html = Nokogiri::HTML4(<<~HTML)
             <html>
               <head>
                 <meta http-equiv="Content-Type">
@@ -315,30 +283,30 @@ def test_meta_encoding_with_empty_content_type
                 foo
               </body>
             </html>
-          EOHTML
+          HTML
           assert_nil(html.meta_encoding)
         end
 
         def test_root_node_parent_is_document
           parent = html.root.parent
           assert_equal(html, parent)
-          assert_instance_of(Nokogiri::HTML::Document, parent)
+          assert_instance_of(Nokogiri::HTML4::Document, parent)
         end
 
         def test_parse_handles_nil_gracefully
-          @doc = Nokogiri::HTML::Document.parse(nil)
-          assert_instance_of(Nokogiri::HTML::Document, @doc)
+          @doc = Nokogiri::HTML4::Document.parse(nil)
+          assert_instance_of(Nokogiri::HTML4::Document, @doc)
         end
 
         def test_parse_empty_document
-          doc = Nokogiri::HTML("\n")
+          doc = Nokogiri::HTML4("\n")
           assert_equal(0, doc.css("a").length)
           assert_equal(0, doc.xpath("//a").length)
           assert_equal(0, doc.search("//a").length)
         end
 
-        def test_HTML_function
-          html = Nokogiri::HTML(File.read(HTML_FILE))
+        def test_html_predicate
+          html = Nokogiri::HTML4(File.read(HTML_FILE))
           assert_predicate(html, :html?)
         end
 
@@ -354,7 +322,7 @@ def read(*args)
             end
           end
 
-          doc = Nokogiri::HTML.parse(klass.new)
+          doc = Nokogiri::HTML4.parse(klass.new)
           assert_equal("foo", doc.at_css("div").content)
         end
 
@@ -364,8 +332,8 @@ def test_parse_temp_file
           temp_html_file.close
           temp_html_file.open
           assert_equal(
-            Nokogiri::HTML.parse(File.read(HTML_FILE)).xpath("//div/a").length,
-            Nokogiri::HTML.parse(temp_html_file).xpath("//div/a").length
+            Nokogiri::HTML4.parse(File.read(HTML_FILE)).xpath("//div/a").length,
+            Nokogiri::HTML4.parse(temp_html_file).xpath("//div/a").length
           )
         end
 
@@ -378,23 +346,23 @@ def test_to_xhtml
         def test_to_xhtml_self_closing_tags
           # https://github.com/sparklemotion/nokogiri/issues/2324
           html = "<html><body><br><table><colgroup><col>"
-          doc = Nokogiri::HTML::Document.parse(html)
+          doc = Nokogiri::HTML4::Document.parse(html)
           xhtml = doc.to_xhtml
           assert_match(%r(<br ?/>), xhtml)
           assert_match(%r(<col ?/>), xhtml)
         end
 
         def test_no_xml_header
-          html = Nokogiri::HTML(<<~EOHTML)
+          html = Nokogiri::HTML4(<<~HTML)
             <html>
             </html>
-          EOHTML
+          HTML
           refute_empty(html.to_html, "html length is too short")
           refute_match(/^<\?xml/, html.to_html)
         end
 
         def test_document_has_error
-          html = Nokogiri::HTML(<<~EOHTML)
+          html = Nokogiri::HTML4(<<~HTML)
             <html>
               <body>
                 <div awesome="asdf>
@@ -403,12 +371,12 @@ def test_document_has_error
                 <p>outside div tag</p>
               </body>
             </html>
-          EOHTML
+          HTML
           refute_empty(html.errors)
         end
 
         def test_relative_css
-          html = Nokogiri::HTML(<<~EOHTML)
+          html = Nokogiri::HTML4(<<~HTML)
             <html>
               <body>
                 <div>
@@ -417,14 +385,14 @@ def test_relative_css
                 <p>outside div tag</p>
               </body>
             </html>
-          EOHTML
+          HTML
           set = html.search("div").search("p")
           assert_equal(1, set.length)
           assert_equal("inside div tag", set.first.inner_text)
         end
 
         def test_multi_css
-          html = Nokogiri::HTML(<<~EOHTML)
+          html = Nokogiri::HTML4(<<~HTML)
             <html>
               <body>
                 <div>
@@ -433,14 +401,14 @@ def test_multi_css
                 </div>
               </body>
             </html>
-          EOHTML
+          HTML
           set = html.css("p, a")
           assert_equal(2, set.length)
           assert_equal(["a tag", "p tag"].sort, set.map(&:content).sort)
         end
 
         def test_inner_text
-          html = Nokogiri::HTML(<<~EOHTML)
+          html = Nokogiri::HTML4(<<~HTML)
             <html>
               <body>
                 <div>
@@ -450,20 +418,20 @@ def test_inner_text
                 </div>
               </body>
             </html>
-          EOHTML
+          HTML
           node = html.xpath("//div").first
           assert_equal("Hello world!", node.inner_text.strip)
         end
 
         def test_doc_type
-          html = Nokogiri::HTML(<<~EOHTML)
+          html = Nokogiri::HTML4(<<~HTML)
             <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
             <html xmlns="http://www.w3.org/1999/xhtml">
               <body>
                 <p>Rainbow Dash</p>
               </body>
             </html>
-          EOHTML
+          HTML
           assert_equal("html", html.internal_subset.name)
           assert_equal("-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id)
           assert_equal("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id)
@@ -474,7 +442,7 @@ def test_doc_type
         end
 
         def test_content_size
-          html = Nokogiri::HTML("<div>\n</div>")
+          html = Nokogiri::HTML4("<div>\n</div>")
           assert_equal(1, html.content.size)
           assert_equal(1, html.content.split("").size)
           assert_equal("\n", html.content)
@@ -536,7 +504,7 @@ def test_dup_document
           assert(dup = html.dup)
           refute_equal(dup, html)
           assert_predicate(html, :html?)
-          assert_instance_of(Nokogiri::HTML::Document, dup)
+          assert_instance_of(Nokogiri::HTML4::Document, dup)
           assert_predicate(dup, :html?, "duplicate should be html")
           assert_equal(html.to_s, dup.to_s)
         end
@@ -557,7 +525,7 @@ def test_dup
         # issue 1060
         def test_node_ownership_after_dup
           html = "<html><head></head><body><div>replace me</div></body></html>"
-          doc = Nokogiri::HTML::Document.parse(html)
+          doc = Nokogiri::HTML4::Document.parse(html)
           dup = doc.dup
           assert_same(dup, dup.at_css("div").document)
 
@@ -566,7 +534,7 @@ def test_node_ownership_after_dup
         end
 
         def test_inner_html
-          html = Nokogiri::HTML(<<~EOHTML)
+          html = Nokogiri::HTML4(<<~HTML)
             <html>
               <body>
                 <div>
@@ -576,28 +544,28 @@ def test_inner_html
                 </div>
               </body>
             </html>
-          EOHTML
+          HTML
           node = html.xpath("//div").first
           assert_equal("<p>Helloworld!</p>", node.inner_html.gsub(/\s/, ""))
         end
 
         def test_round_trip
-          doc = Nokogiri::HTML(html.inner_html)
+          doc = Nokogiri::HTML4(html.inner_html)
           assert_equal(html.root.to_html, doc.root.to_html)
         end
 
         def test_fragment_contains_text_node
-          fragment = Nokogiri::HTML.fragment("fooo")
+          fragment = Nokogiri::HTML4.fragment("fooo")
           assert_equal(1, fragment.children.length)
           assert_equal("fooo", fragment.inner_text)
         end
 
         def test_fragment_includes_two_tags
-          assert_equal(2, Nokogiri::HTML.fragment("<br/><hr/>").children.length)
+          assert_equal(2, Nokogiri::HTML4.fragment("<br/><hr/>").children.length)
         end
 
         def test_relative_css_finder
-          doc = Nokogiri::HTML(<<~EOHTML)
+          doc = Nokogiri::HTML4(<<~HTML)
             <html>
               <body>
                 <div class="red">
@@ -612,7 +580,7 @@ def test_relative_css_finder
                 </div>
               </body>
             </html>
-          EOHTML
+          HTML
           red_divs = doc.css("div.red")
           assert_equal(1, red_divs.length)
           p_tags = red_divs.first.css("p")
@@ -621,7 +589,7 @@ def test_relative_css_finder
         end
 
         def test_find_classes
-          doc = Nokogiri::HTML(<<~EOHTML)
+          doc = Nokogiri::HTML4(<<~HTML)
             <html>
               <body>
                 <p class="red">RED</p>
@@ -630,7 +598,7 @@ def test_find_classes
                 <p class="green notred">GREEN</p>
               </body>
             </html>
-          EOHTML
+          HTML
           list = doc.css(".red")
           assert_equal(2, list.length)
           assert_equal(["RED", "RED"], list.map(&:text))
@@ -639,7 +607,7 @@ def test_find_classes
         def test_parse_can_take_io
           html = nil
           File.open(HTML_FILE, "rb") do |f|
-            html = Nokogiri::HTML(f)
+            html = Nokogiri::HTML4(f)
           end
           assert_predicate(html, :html?)
           assert_equal(HTML_FILE, html.url)
@@ -651,7 +619,7 @@ def html.path
             "/i/should/be/the/document/url"
           end
 
-          doc = Nokogiri::HTML.parse(html)
+          doc = Nokogiri::HTML4.parse(html)
 
           assert_equal("/i/should/be/the/document/url", doc.url)
         end
@@ -660,7 +628,7 @@ def html.path
         def test_parse_can_take_pathnames
           assert(File.size(HTML_FILE) > 4096) # file must be big enough to trip the read callback more than once
 
-          doc = Nokogiri::HTML.parse(Pathname.new(HTML_FILE))
+          doc = Nokogiri::HTML4.parse(Pathname.new(HTML_FILE))
 
           # an arbitrary assertion on the structure of the document
           assert_equal(166, doc.css("a").length)
@@ -679,13 +647,13 @@ def test_serialize
 
         def test_empty_document
           # empty document should return "" #699
-          assert_equal("", Nokogiri::HTML.parse(nil).text)
-          assert_equal("", Nokogiri::HTML.parse("").text)
+          assert_equal("", Nokogiri::HTML4.parse(nil).text)
+          assert_equal("", Nokogiri::HTML4.parse("").text)
         end
 
         def test_capturing_nonparse_errors_during_document_clone
           # see https://github.com/sparklemotion/nokogiri/issues/1196 for background
-          original = Nokogiri::HTML.parse("<div id='unique'></div><div id='unique'></div>")
+          original = Nokogiri::HTML4.parse("<div id='unique'></div><div id='unique'></div>")
           original_errors = original.errors.dup
 
           copy = original.dup
@@ -694,8 +662,8 @@ def test_capturing_nonparse_errors_during_document_clone
 
         def test_capturing_nonparse_errors_during_node_copy_between_docs
           # Errors should be emitted while parsing only, and should not change when moving nodes.
-          doc1 = Nokogiri::HTML("<html><body><diva id='unique'>one</diva></body></html>")
-          doc2 = Nokogiri::HTML("<html><body><dive id='unique'>two</dive></body></html>")
+          doc1 = Nokogiri::HTML4("<html><body><diva id='unique'>one</diva></body></html>")
+          doc2 = Nokogiri::HTML4("<html><body><dive id='unique'>two</dive></body></html>")
           node1 = doc1.at_css("#unique")
           node2 = doc2.at_css("#unique")
           original_errors1 = doc1.errors.dup
@@ -721,7 +689,7 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262
           # having `ID unique-issue-1262 already defined` emitted to
           # stderr when running the test suite.
           #
-          doc = Nokogiri::HTML::Document.new
+          doc = Nokogiri::HTML4::Document.new
           Nokogiri::XML::Element.new("div", doc).set_attribute("id", "unique-issue-1262")
           Nokogiri::XML::Element.new("div", doc).set_attribute("id", "unique-issue-1262")
           assert_equal(0, doc.errors.length)
@@ -734,41 +702,41 @@ def test_leaking_dtd_nodes_after_internal_subset_removal
           # don't otherwise have any test coverage for removing DTDs.
           #
           100.times do |_i|
-            Nokogiri::HTML::Document.new.internal_subset.remove
+            Nokogiri::HTML4::Document.new.internal_subset.remove
           end
         end
 
         it "skips encoding for script tags" do
-          html = Nokogiri::HTML(<<~EOHTML)
+          html = Nokogiri::HTML4(<<~HTML)
             <html>
               <head>
                 <script>var isGreater = 4 > 5;</script>
               </head>
               <body></body>
             </html>
-          EOHTML
+          HTML
           node = html.xpath("//script").first
           assert_equal("var isGreater = 4 > 5;", node.inner_html)
         end
 
         it "skips encoding for style tags" do
-          html = Nokogiri::HTML(<<~EOHTML)
+          html = Nokogiri::HTML4(<<~HTML)
             <html>
               <head>
                 <style>tr > div { display:block; }</style>
               </head>
               <body></body>
             </html>
-          EOHTML
+          HTML
           node = html.xpath("//style").first
           assert_equal("tr > div { display:block; }", node.inner_html)
         end
 
         it "does not fail when converting to_html using explicit encoding" do
-          html_fragment = <<~EOHTML
+          html_fragment = <<~HTML
             <img width="16" height="16" src="images/icon.gif" border="0" alt="Inactive hide details for &quot;User&quot; ---19/05/2015 12:55:29---Provvediamo subito nell&#8217;integrare">
-          EOHTML
-          doc = Nokogiri::HTML(html_fragment, nil, "ISO-8859-1")
+          HTML
+          doc = Nokogiri::HTML4(html_fragment, nil, "ISO-8859-1")
           html = doc.to_html
           assert html.index("src=\"images/icon.gif\"")
           assert_equal "ISO-8859-1", html.encoding.name
@@ -831,7 +799,7 @@ def test_leaking_dtd_nodes_after_internal_subset_removal
 
               it "raises exception on parse error" do
                 exception = assert_raises(Nokogiri::SyntaxError) do
-                  Nokogiri::HTML.parse(input, nil, nil, parse_options)
+                  Nokogiri::HTML4.parse(input, nil, nil, parse_options)
                 end
                 assert_match(/Parser without recover option encountered error or warning/, exception.to_s)
               end
@@ -839,7 +807,7 @@ def test_leaking_dtd_nodes_after_internal_subset_removal
 
             describe "default options" do
               it "does not raise exception on parse error" do
-                doc = Nokogiri::HTML.parse(input)
+                doc = Nokogiri::HTML4.parse(input)
                 assert_operator(doc.errors.length, :>, 0)
               end
             end
@@ -853,7 +821,7 @@ def test_leaking_dtd_nodes_after_internal_subset_removal
 
               it "raises exception on parse error" do
                 exception = assert_raises(Nokogiri::SyntaxError) do
-                  Nokogiri::HTML.parse(input, nil, "UTF-8", parse_options)
+                  Nokogiri::HTML4.parse(input, nil, "UTF-8", parse_options)
                 end
                 assert_match(/Parser without recover option encountered error or warning/, exception.to_s)
               end
@@ -861,7 +829,7 @@ def test_leaking_dtd_nodes_after_internal_subset_removal
 
             describe "default options" do
               it "does not raise exception on parse error" do
-                doc = Nokogiri::HTML.parse(input, nil, "UTF-8")
+                doc = Nokogiri::HTML4.parse(input, nil, "UTF-8")
                 assert_operator(doc.errors.length, :>, 0)
               end
             end
@@ -870,7 +838,7 @@ def test_leaking_dtd_nodes_after_internal_subset_removal
 
         describe "subclassing" do
           let(:klass) do
-            Class.new(Nokogiri::HTML::Document) do
+            Class.new(Nokogiri::HTML4::Document) do
               attr_accessor :initialized_with, :initialized_count
 
               def initialize(*args)
diff --git a/test/html4/test_document_encoding.rb b/test/html4/test_document_encoding.rb
index 6115301764b..e96eb59dc63 100644
--- a/test/html4/test_document_encoding.rb
+++ b/test/html4/test_document_encoding.rb
@@ -4,10 +4,10 @@
 require "helper"
 
 class TestNokogiriHtmlDocument < Nokogiri::TestCase
-  describe "Nokogiri::HTML::Document" do
+  describe "Nokogiri::HTML4::Document" do
     describe "Encoding" do
       def test_encoding
-        doc = Nokogiri::HTML(File.open(SHIFT_JIS_HTML, "rb"))
+        doc = Nokogiri::HTML4(File.open(SHIFT_JIS_HTML, "rb"))
 
         hello = "こんにちは"
 
@@ -21,7 +21,7 @@ def test_encoding
       end
 
       def test_encoding_without_charset
-        doc = Nokogiri::HTML(File.open(SHIFT_JIS_NO_CHARSET, "r:Shift_JIS:Shift_JIS").read)
+        doc = Nokogiri::HTML4(File.open(SHIFT_JIS_NO_CHARSET, "r:Shift_JIS:Shift_JIS").read)
 
         hello = "こんにちは"
 
@@ -42,7 +42,7 @@ def test_default_to_encoding_from_string
           </body>
           </html>
         eohtml
-        doc = Nokogiri::HTML(bad_charset)
+        doc = Nokogiri::HTML4(bad_charset)
         assert_equal(bad_charset.encoding.name, doc.encoding)
 
         doc = Nokogiri.parse(bad_charset)
@@ -58,7 +58,7 @@ def test_encoding_non_utf8
             <meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
             <title xml:lang="ja">#{orig}</title></html>
           eohtml
-          text = Nokogiri::HTML.parse(html).at("title").inner_text
+          text = Nokogiri::HTML4.parse(html).at("title").inner_text
           assert_equal(
             orig.encode(enc).force_encoding(bin),
             text.encode(enc).force_encoding(bin)
@@ -78,14 +78,14 @@ def test_encoding_with_a_bad_name
           </body>
           </html>
         eohtml
-        doc = Nokogiri::HTML(bad_charset, nil, "askldjfhalsdfjhlkasdfjh")
+        doc = Nokogiri::HTML4(bad_charset, nil, "askldjfhalsdfjhlkasdfjh")
         assert_equal(["http://tenderlovemaking.com/"],
           doc.css("a").map { |a| a["href"] })
       end
 
       def test_empty_doc_encoding
         encoding = "US-ASCII"
-        assert_equal(encoding, Nokogiri::HTML.parse(nil, nil, encoding).encoding)
+        assert_equal(encoding, Nokogiri::HTML4.parse(nil, nil, encoding).encoding)
       end
 
       describe "Detection" do
@@ -98,31 +98,31 @@ def binopen(file)
         end
 
         it "handles both memory and IO" do
-          from_stream = Nokogiri::HTML(binopen(NOENCODING_FILE))
-          from_string = Nokogiri::HTML(binread(NOENCODING_FILE))
+          from_stream = Nokogiri::HTML4(binopen(NOENCODING_FILE))
+          from_string = Nokogiri::HTML4(binread(NOENCODING_FILE))
 
           assert_equal(from_string.to_s.size, from_stream.to_s.size)
           assert_operator(from_string.to_s.size, :>, 0)
         end
 
         it "uses meta charset encoding when present" do
-          html = Nokogiri::HTML(binopen(METACHARSET_FILE))
+          html = Nokogiri::HTML4(binopen(METACHARSET_FILE))
           assert_equal("iso-2022-jp", html.encoding)
           assert_equal("たこ焼き仮面", html.title)
         end
 
         { "xhtml" => ENCODING_XHTML_FILE, "html" => ENCODING_HTML_FILE }.each do |flavor, file|
           it "detects #{flavor} document encoding" do
-            doc_from_string_enc = Nokogiri::HTML(binread(file), nil, "Shift_JIS")
+            doc_from_string_enc = Nokogiri::HTML4(binread(file), nil, "Shift_JIS")
             ary_from_string_enc = doc_from_string_enc.xpath("//p/text()").map(&:text)
 
-            doc_from_string = Nokogiri::HTML(binread(file))
+            doc_from_string = Nokogiri::HTML4(binread(file))
             ary_from_string = doc_from_string.xpath("//p/text()").map(&:text)
 
-            doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, "Shift_JIS")
+            doc_from_file_enc = Nokogiri::HTML4(binopen(file), nil, "Shift_JIS")
             ary_from_file_enc = doc_from_file_enc.xpath("//p/text()").map(&:text)
 
-            doc_from_file = Nokogiri::HTML(binopen(file))
+            doc_from_file = Nokogiri::HTML4(binopen(file))
             ary_from_file = doc_from_file.xpath("//p/text()").map(&:text)
 
             title = "たこ焼き仮面"
@@ -150,7 +150,7 @@ def binopen(file)
 
           { "read_memory" => RAW, "read_io" => StringIO.new(RAW) }.each do |flavor, input|
             it "#{flavor} should handle errors" do
-              doc = Nokogiri::HTML.parse(input)
+              doc = Nokogiri::HTML4.parse(input)
               assert_operator(doc.errors.length, :>, 0)
             end
           end
diff --git a/test/html4/test_document_fragment.rb b/test/html4/test_document_fragment.rb
index ea3c1ca492a..6dffc9ffc38 100644
--- a/test/html4/test_document_fragment.rb
+++ b/test/html4/test_document_fragment.rb
@@ -6,48 +6,48 @@
 module Nokogiri
   module HTML
     class TestDocumentFragment < Nokogiri::TestCase
-      describe Nokogiri::HTML::DocumentFragment do
-        let(:html) { Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) }
+      describe Nokogiri::HTML4::DocumentFragment do
+        let(:html) { Nokogiri::HTML4.parse(File.read(HTML_FILE), HTML_FILE) }
 
         def test_ascii_8bit_encoding
           s = +"hello"
           s.force_encoding(::Encoding::ASCII_8BIT)
-          assert_equal("hello", Nokogiri::HTML::DocumentFragment.parse(s).to_html)
+          assert_equal("hello", Nokogiri::HTML4::DocumentFragment.parse(s).to_html)
         end
 
         def test_inspect_encoding
           fragment = "<div>こんにちは！</div>".encode("EUC-JP")
-          f = Nokogiri::HTML::DocumentFragment.parse(fragment)
+          f = Nokogiri::HTML4::DocumentFragment.parse(fragment)
           assert_equal("こんにちは！", f.content)
         end
 
         def test_html_parse_encoding
           fragment = "<div>こんにちは！</div>".encode("EUC-JP")
-          f = Nokogiri::HTML.fragment(fragment)
+          f = Nokogiri::HTML4.fragment(fragment)
           assert_equal("EUC-JP", f.document.encoding)
           assert_equal("こんにちは！", f.content)
         end
 
         def test_unlink_empty_document
-          frag = Nokogiri::HTML::DocumentFragment.parse("").unlink # must_not_raise
+          frag = Nokogiri::HTML4::DocumentFragment.parse("").unlink # must_not_raise
           assert_nil(frag.parent)
         end
 
         def test_colons_are_not_removed
-          doc = Nokogiri::HTML::DocumentFragment.parse("<span>3:30pm</span>")
+          doc = Nokogiri::HTML4::DocumentFragment.parse("<span>3:30pm</span>")
           assert_match(/3:30/, doc.to_s)
         end
 
         def test_parse_encoding
           fragment = "<div>hello world</div>"
-          f = Nokogiri::HTML::DocumentFragment.parse(fragment, "ISO-8859-1")
+          f = Nokogiri::HTML4::DocumentFragment.parse(fragment, "ISO-8859-1")
           assert_equal("ISO-8859-1", f.document.encoding)
           assert_equal("hello world", f.content)
         end
 
         def test_html_parse_with_encoding
           fragment = "<div>hello world</div>"
-          f = Nokogiri::HTML.fragment(fragment, "ISO-8859-1")
+          f = Nokogiri::HTML4.fragment(fragment, "ISO-8859-1")
           assert_equal("ISO-8859-1", f.document.encoding)
           assert_equal("hello world", f.content)
         end
@@ -57,7 +57,7 @@ def test_parse_in_context
         end
 
         def test_inner_html=
-          fragment = Nokogiri::HTML.fragment("<hr />")
+          fragment = Nokogiri::HTML4.fragment("<hr />")
 
           fragment.inner_html = "hello"
           assert_equal("hello", fragment.inner_html)
@@ -71,70 +71,70 @@ def test_ancestors_search
               </ul>
             </div>
           EOF
-          fragment = Nokogiri::HTML.fragment(html)
+          fragment = Nokogiri::HTML4.fragment(html)
           li = fragment.at("li")
           assert(li.matches?("li"))
         end
 
         def test_fun_encoding
           string = %(<body>こんにちは</body>)
-          html = Nokogiri::HTML::DocumentFragment.parse(
+          html = Nokogiri::HTML4::DocumentFragment.parse(
             string
           ).to_html(encoding: "UTF-8")
           assert_equal(string, html)
         end
 
         def test_new
-          assert(Nokogiri::HTML::DocumentFragment.new(html))
+          assert(Nokogiri::HTML4::DocumentFragment.new(html))
         end
 
         def test_body_fragment_should_contain_body
-          fragment = Nokogiri::HTML::DocumentFragment.parse("  <body><div>foo</div></body>")
+          fragment = Nokogiri::HTML4::DocumentFragment.parse("  <body><div>foo</div></body>")
           assert_match(/^<body>/, fragment.to_s)
         end
 
         def test_nonbody_fragment_should_not_contain_body
-          fragment = Nokogiri::HTML::DocumentFragment.parse("<div>foo</div>")
+          fragment = Nokogiri::HTML4::DocumentFragment.parse("<div>foo</div>")
           assert_match(/^<div>/, fragment.to_s)
         end
 
         def test_fragment_should_have_document
-          fragment = Nokogiri::HTML::DocumentFragment.new(html)
+          fragment = Nokogiri::HTML4::DocumentFragment.new(html)
           assert_equal(html, fragment.document)
         end
 
         def test_empty_fragment_should_be_searchable_by_css
-          fragment = Nokogiri::HTML.fragment("")
+          fragment = Nokogiri::HTML4.fragment("")
           assert_equal(0, fragment.css("a").size)
         end
 
         def test_empty_fragment_should_be_searchable
-          fragment = Nokogiri::HTML.fragment("")
+          fragment = Nokogiri::HTML4.fragment("")
           assert_equal(0, fragment.search("//a").size)
         end
 
         def test_name
-          fragment = Nokogiri::HTML::DocumentFragment.new(html)
+          fragment = Nokogiri::HTML4::DocumentFragment.new(html)
           assert_equal("#document-fragment", fragment.name)
         end
 
         def test_static_method
-          fragment = Nokogiri::HTML::DocumentFragment.parse("<div>a</div>")
-          assert_instance_of(Nokogiri::HTML::DocumentFragment, fragment)
+          fragment = Nokogiri::HTML4::DocumentFragment.parse("<div>a</div>")
+          assert_instance_of(Nokogiri::HTML4::DocumentFragment, fragment)
         end
 
         def test_many_fragments
-          100.times { Nokogiri::HTML::DocumentFragment.new(html) }
+          100.times { Nokogiri::HTML4::DocumentFragment.new(html) }
         end
 
         def test_html_fragment
-          fragment = Nokogiri::HTML.fragment("<div>a</div>")
+          fragment = Nokogiri::HTML4.fragment("<div>a</div>")
           assert_equal("<div>a</div>", fragment.to_s)
         end
 
         def test_html_fragment_has_outer_text
           doc = "a<div>b</div>c"
-          fragment = Nokogiri::HTML::Document.new.fragment(doc)
+          fragment = Nokogiri::HTML4::Document.new.fragment(doc)
           if Nokogiri.uses_libxml?("<= 2.6.16")
             assert_equal("a<div>b</div><p>c</p>", fragment.to_s)
           else
@@ -144,59 +144,59 @@ def test_html_fragment_has_outer_text
 
         def test_html_fragment_case_insensitivity
           doc = "<Div>b</Div>"
-          fragment = Nokogiri::HTML::Document.new.fragment(doc)
+          fragment = Nokogiri::HTML4::Document.new.fragment(doc)
           assert_equal("<div>b</div>", fragment.to_s)
         end
 
         def test_html_fragment_with_leading_whitespace
           doc = "     <div>b</div>  "
-          fragment = Nokogiri::HTML::Document.new.fragment(doc)
+          fragment = Nokogiri::HTML4::Document.new.fragment(doc)
           assert_match(%r%     <div>b</div> *%, fragment.to_s)
         end
 
         def test_html_fragment_with_leading_whitespace_and_newline
           doc = "     \n<div>b</div>  "
-          fragment = Nokogiri::HTML::Document.new.fragment(doc)
+          fragment = Nokogiri::HTML4::Document.new.fragment(doc)
           assert_match(%r%     \n<div>b</div> *%, fragment.to_s)
         end
 
         def test_html_fragment_with_input_and_intermediate_whitespace
           doc = "<label>Label</label><input type=\"text\"> <span>span</span>"
-          fragment = Nokogiri::HTML::Document.new.fragment(doc)
+          fragment = Nokogiri::HTML4::Document.new.fragment(doc)
           assert_equal("<label>Label</label><input type=\"text\"> <span>span</span>", fragment.to_s)
         end
 
         def test_html_fragment_with_leading_text_and_newline
-          fragment = HTML::Document.new.fragment("First line\nSecond line<br>Broken line")
+          fragment = Nokogiri::HTML4::Document.new.fragment("First line\nSecond line<br>Broken line")
           assert_equal("First line\nSecond line<br>Broken line", fragment.to_s)
         end
 
         def test_html_fragment_with_leading_whitespace_and_text_and_newline
-          fragment = HTML::Document.new.fragment("  First line\nSecond line<br>Broken line")
+          fragment = Nokogiri::HTML4::Document.new.fragment("  First line\nSecond line<br>Broken line")
           assert_equal("  First line\nSecond line<br>Broken line", fragment.to_s)
         end
 
         def test_html_fragment_with_leading_entity
           failed = "&quot;test<br/>test&quot;"
-          fragment = Nokogiri::HTML::DocumentFragment.parse(failed)
+          fragment = Nokogiri::HTML4::DocumentFragment.parse(failed)
           assert_equal('"test<br>test"', fragment.to_html)
         end
 
         def test_to_s
           doc = "<span>foo<br></span><span>bar</span>"
-          fragment = Nokogiri::HTML::Document.new.fragment(doc)
+          fragment = Nokogiri::HTML4::Document.new.fragment(doc)
           assert_equal("<span>foo<br></span><span>bar</span>", fragment.to_s)
         end
 
         def test_to_html
           doc = "<span>foo<br></span><span>bar</span>"
-          fragment = Nokogiri::HTML::Document.new.fragment(doc)
+          fragment = Nokogiri::HTML4::Document.new.fragment(doc)
           assert_equal("<span>foo<br></span><span>bar</span>", fragment.to_html)
         end
 
         def test_to_xhtml
           doc = "<span>foo<br></span><span>bar</span><p></p>"
-          fragment = Nokogiri::HTML::Document.new.fragment(doc)
+          fragment = Nokogiri::HTML4::Document.new.fragment(doc)
           if Nokogiri.jruby? || Nokogiri.uses_libxml?(">= 2.7.0")
             assert_equal("<span>foo<br /></span><span>bar</span><p></p>", fragment.to_xhtml)
           else
@@ -208,19 +208,19 @@ def test_to_xhtml
 
         def test_to_xml
           doc = "<span>foo<br></span><span>bar</span>"
-          fragment = Nokogiri::HTML::Document.new.fragment(doc)
+          fragment = Nokogiri::HTML4::Document.new.fragment(doc)
           assert_equal("<span>foo<br/></span><span>bar</span>", fragment.to_xml)
         end
 
         def test_fragment_script_tag_with_cdata
-          doc = HTML::Document.new
+          doc = Nokogiri::HTML4::Document.new
           fragment = doc.fragment("<script>var foo = 'bar';</script>")
           assert_equal("<script>var foo = 'bar';</script>",
             fragment.to_s)
         end
 
         def test_fragment_with_comment
-          doc = HTML::Document.new
+          doc = Nokogiri::HTML4::Document.new
           fragment = doc.fragment("<p>hello<!-- your ad here --></p>")
           assert_equal("<p>hello<!-- your ad here --></p>",
             fragment.to_s)
@@ -230,41 +230,41 @@ def test_element_children_counts
           if Nokogiri.uses_libxml?("<= 2.9.1")
             skip("#elements doesn't work in 2.9.1, see 1793a5a for history")
           end
-          doc = Nokogiri::HTML::DocumentFragment.parse("   <div>  </div>\n   ")
+          doc = Nokogiri::HTML4::DocumentFragment.parse("   <div>  </div>\n   ")
           assert_equal(1, doc.element_children.count)
         end
 
         def test_malformed_fragment_is_corrected
-          fragment = HTML::DocumentFragment.parse("<div </div>")
+          fragment = Nokogiri::HTML4::DocumentFragment.parse("<div </div>")
           assert_equal("<div></div>", fragment.to_s)
         end
 
         def test_unclosed_script_tag
           # see GH#315
-          fragment = HTML::DocumentFragment.parse("foo <script>bar")
+          fragment = Nokogiri::HTML4::DocumentFragment.parse("foo <script>bar")
           assert_equal("foo <script>bar</script>", fragment.to_html)
         end
 
         def test_error_propagation_on_fragment_parse
-          frag = Nokogiri::HTML::DocumentFragment.parse("<hello>oh, hello there.</hello>")
+          frag = Nokogiri::HTML4::DocumentFragment.parse("<hello>oh, hello there.</hello>")
           assert(frag.errors.any? { |err| err.to_s.include?("Tag hello invalid") }, "errors should be copied to the fragment")
         end
 
         def test_error_propagation_on_fragment_parse_in_node_context
-          doc = Nokogiri::HTML::Document.parse("<html><body><div></div></body></html>")
+          doc = Nokogiri::HTML4::Document.parse("<html><body><div></div></body></html>")
           context_node = doc.at_css("div")
-          frag = Nokogiri::HTML::DocumentFragment.new(doc, "<hello>oh, hello there.</hello>", context_node)
+          frag = Nokogiri::HTML4::DocumentFragment.new(doc, "<hello>oh, hello there.</hello>", context_node)
           assert(frag.errors.any? do |err|
                    err.to_s.include?("Tag hello invalid")
                  end, "errors should be on the context node's document")
         end
 
         def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_preexisting_errors
-          doc = Nokogiri::HTML::Document.parse("<html><body><div></div><jimmy></jimmy></body></html>")
+          doc = Nokogiri::HTML4::Document.parse("<html><body><div></div><jimmy></jimmy></body></html>")
           assert(doc.errors.any? { |err| err.to_s.include?("jimmy") }, "assert on setup")
 
           context_node = doc.at_css("div")
-          frag = Nokogiri::HTML::DocumentFragment.new(doc, "<hello>oh, hello there.</hello>", context_node)
+          frag = Nokogiri::HTML4::DocumentFragment.new(doc, "<hello>oh, hello there.</hello>", context_node)
           assert(frag.errors.any? do |err|
                    err.to_s.include?("Tag hello invalid")
                  end, "errors should be on the context node's document")
@@ -275,7 +275,7 @@ def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_
 
         def test_capturing_nonparse_errors_during_fragment_clone
           # see https://github.com/sparklemotion/nokogiri/issues/1196 for background
-          original = Nokogiri::HTML.fragment("<div id='unique'></div><div id='unique'></div>")
+          original = Nokogiri::HTML4.fragment("<div id='unique'></div><div id='unique'></div>")
           original_errors = original.errors.dup
 
           copy = original.dup
@@ -284,8 +284,8 @@ def test_capturing_nonparse_errors_during_fragment_clone
 
         def test_capturing_nonparse_errors_during_node_copy_between_fragments
           # Errors should be emitted while parsing only, and should not change when moving nodes.
-          frag1 = Nokogiri::HTML.fragment("<diva id='unique'>one</diva>")
-          frag2 = Nokogiri::HTML.fragment("<dive id='unique'>two</dive>")
+          frag1 = Nokogiri::HTML4.fragment("<diva id='unique'>one</diva>")
+          frag2 = Nokogiri::HTML4.fragment("<dive id='unique'>two</dive>")
           node1 = frag1.at_css("#unique")
           node2 = frag2.at_css("#unique")
           original_errors1 = frag1.errors.dup
@@ -301,9 +301,9 @@ def test_capturing_nonparse_errors_during_node_copy_between_fragments
 
         def test_dup_should_create_an_html_document_fragment
           # https://github.com/sparklemotion/nokogiri/issues/1846
-          original = Nokogiri::HTML::DocumentFragment.parse("<div><p>hello</p></div>")
+          original = Nokogiri::HTML4::DocumentFragment.parse("<div><p>hello</p></div>")
           duplicate = original.dup
-          assert_instance_of(Nokogiri::HTML::DocumentFragment, duplicate)
+          assert_instance_of(Nokogiri::HTML4::DocumentFragment, duplicate)
         end
 
         describe "parse options" do
@@ -449,7 +449,7 @@ def test_dup_should_create_an_html_document_fragment
 
         describe "subclassing" do
           let(:klass) do
-            Class.new(Nokogiri::HTML::DocumentFragment) do
+            Class.new(Nokogiri::HTML4::DocumentFragment) do
               attr_accessor :initialized_with, :initialized_count
 
               def initialize(*args)
@@ -496,7 +496,7 @@ def initialize(*args)
 
             it "passes the fragment" do
               fragment = klass.parse("<div>a</div>")
-              assert_equal(Nokogiri::HTML::DocumentFragment.parse("<div>a</div>").to_s, fragment.to_s)
+              assert_equal(Nokogiri::HTML4::DocumentFragment.parse("<div>a</div>").to_s, fragment.to_s)
             end
           end
         end
diff --git a/test/html4/test_element_description.rb b/test/html4/test_element_description.rb
index 6f17f592afb..1df0379e042 100644
--- a/test/html4/test_element_description.rb
+++ b/test/html4/test_element_description.rb
@@ -72,7 +72,7 @@ def test_default_sub_element
       end
 
       def test_null_default_sub_element
-        doc = Nokogiri::HTML("foo")
+        doc = Nokogiri::HTML4("foo")
         doc.root.description.default_sub_element
       end
 
diff --git a/test/html4/test_node.rb b/test/html4/test_node.rb
index 06537ec690d..56aa283de1f 100644
--- a/test/html4/test_node.rb
+++ b/test/html4/test_node.rb
@@ -9,7 +9,7 @@ module HTML
     class TestNode < Nokogiri::TestCase
       def setup
         super
-        @html = Nokogiri::HTML(<<-eohtml)
+        @html = Nokogiri::HTML4(<<-eohtml)
         <html>
           <head></head>
           <body>
@@ -40,12 +40,12 @@ def test_get_attribute
       # are treated as as undeclared and have to be accessed via prefix:tagname
       def test_ns_attribute
         html = '<i foo:bar="baz"></i>'
-        doc = Nokogiri::HTML(html)
+        doc = Nokogiri::HTML4(html)
         assert_equal("baz", (doc % "i")["foo:bar"])
       end
 
       def test_css_path_round_trip
-        doc = Nokogiri::HTML(File.read(HTML_FILE))
+        doc = Nokogiri::HTML4(File.read(HTML_FILE))
         ["#header", "small", "div[2]", "div.post", "body"].each do |css_sel|
           ele = doc.at(css_sel)
           assert_equal(ele, doc.at(ele.css_path), ele.css_path)
@@ -53,7 +53,7 @@ def test_css_path_round_trip
       end
 
       def test_path_round_trip
-        doc = Nokogiri::HTML(File.read(HTML_FILE))
+        doc = Nokogiri::HTML4(File.read(HTML_FILE))
         ["#header", "small", "div[2]", "div.post", "body"].each do |css_sel|
           ele = doc.at(css_sel)
           assert_equal(ele, doc.at(ele.path), ele.path)
@@ -62,7 +62,7 @@ def test_path_round_trip
 
       def test_append_with_document
         assert_raises(ArgumentError) do
-          @html.root << Nokogiri::HTML::Document.new
+          @html.root << Nokogiri::HTML4::Document.new
         end
       end
 
@@ -161,7 +161,7 @@ def test_fragment
       end
 
       def test_fragment_serialization
-        fragment = Nokogiri::HTML.fragment("<div>foo</div>")
+        fragment = Nokogiri::HTML4.fragment("<div>foo</div>")
         assert_equal("<div>foo</div>", fragment.serialize.chomp)
         assert_equal("<div>foo</div>", fragment.to_xml.chomp)
         assert_equal("<div>foo</div>", fragment.inner_html)
@@ -178,7 +178,7 @@ def test_to_html_does_not_contain_entities
         foo bar </p>
         </body></html>
         EOH
-        nokogiri = Nokogiri::HTML.parse(html)
+        nokogiri = Nokogiri::HTML4.parse(html)
 
         if RUBY_PLATFORM.include?("java")
           # NKF linebreak modes are not supported as of jruby 1.2
@@ -193,7 +193,7 @@ def test_to_html_does_not_contain_entities
 
       def test_GH_1042
         file = File.join(ASSETS_DIR, "GH_1042.html")
-        html = Nokogiri::HTML(File.read(file))
+        html = Nokogiri::HTML4(File.read(file))
         table = html.xpath("//table")[1]
         trs = table.xpath("tr").drop(1)
 
diff --git a/test/html4/test_node_encoding.rb b/test/html4/test_node_encoding.rb
index 28855f3a734..3aae047e8b2 100644
--- a/test/html4/test_node_encoding.rb
+++ b/test/html4/test_node_encoding.rb
@@ -8,7 +8,7 @@ module HTML
     class TestNodeEncoding < Nokogiri::TestCase
       def setup
         super
-        @html = Nokogiri::HTML(File.open(NICH_FILE, "rb"))
+        @html = Nokogiri::HTML4(File.open(NICH_FILE, "rb"))
       end
 
       def test_get_attribute
@@ -28,12 +28,12 @@ def test_serialize_encoding_html
         assert_equal(@html.encoding.downcase,
           @html.serialize.encoding.name.downcase)
 
-        @doc = Nokogiri::HTML(@html.serialize)
+        @doc = Nokogiri::HTML4(@html.serialize)
         assert_equal(@html.serialize, @doc.serialize)
       end
 
       def test_default_encoding
-        doc = Nokogiri::HTML(nil)
+        doc = Nokogiri::HTML4(nil)
         assert_nil(doc.encoding)
         assert_equal("UTF-8", doc.serialize.encoding.name)
       end
@@ -59,7 +59,7 @@ def test_path
       end
 
       def test_inner_html
-        doc = Nokogiri::HTML(File.open(SHIFT_JIS_HTML, "rb"))
+        doc = Nokogiri::HTML4(File.open(SHIFT_JIS_HTML, "rb"))
 
         hello = "こんにちは"
 
@@ -76,7 +76,7 @@ def test_inner_html
       end
 
       def test_encoding_GH_1113
-        doc = Nokogiri::HTML::Document.new
+        doc = Nokogiri::HTML4::Document.new
         hex = "<p>&#x1f340;</p>"
         decimal = "<p>&#127808;</p>"
         encoded = "<p>🍀</p>"
diff --git a/test/html5/test_monkey_patch.rb b/test/html5/test_monkey_patch.rb
index 0483cdf8cd1..1ad88f0789f 100644
--- a/test/html5/test_monkey_patch.rb
+++ b/test/html5/test_monkey_patch.rb
@@ -11,7 +11,7 @@ def test_to_xml
   end
 
   def test_html4_fragment
-    frag = Nokogiri::HTML.fragment("<span></span>")
-    assert(frag.is_a?(Nokogiri::HTML::DocumentFragment))
+    frag = Nokogiri::HTML4.fragment("<span></span>")
+    assert(frag.is_a?(Nokogiri::HTML4::DocumentFragment))
   end
 end if Nokogiri.uses_gumbo?
diff --git a/test/html4/test_html_module.rb b/test/test_html.rb
similarity index 50%
rename from test/html4/test_html_module.rb
rename to test/test_html.rb
index 1af5c6727cb..504410acb37 100644
--- a/test/html4/test_html_module.rb
+++ b/test/test_html.rb
@@ -3,7 +3,7 @@
 require "helper"
 
 module Nokogiri
-  class TestCase
+  class TestHtml < Nokogiri::TestCase
     describe Nokogiri::HTML do
       it "is the same as Nokogiri::HTML4" do
         assert_same(Nokogiri::HTML, Nokogiri::HTML4)
@@ -14,6 +14,16 @@ class TestCase
       it "is the same as Nokogiri.HTML4()" do
         assert_equal(Nokogiri.method(:HTML), Nokogiri.method(:HTML4))
       end
+
+      it "returns a Nokogiri::HTML4::Document" do
+        assert_instance_of(Nokogiri::HTML4::Document, Nokogiri::HTML::Document.parse("<html></html>"))
+      end
+    end
+
+    describe Nokogiri::HTML::Document do
+      it "is the same as Nokogiri::HTML4::Document" do
+        assert_same(Nokogiri::HTML4::Document, Nokogiri::HTML::Document)
+      end
     end
   end
 end
diff --git a/test/test_memory_leak.rb b/test/test_memory_leak.rb
index 34ca85886e3..50d4306e4d9 100644
--- a/test/test_memory_leak.rb
+++ b/test/test_memory_leak.rb
@@ -114,8 +114,8 @@ def test_sax_parser_context
         Nokogiri::XML::SAX::ParserContext.new(io)
         io.rewind
 
-        Nokogiri::HTML::SAX::ParserContext.new(@str)
-        Nokogiri::HTML::SAX::ParserContext.new(io)
+        Nokogiri::HTML4::SAX::ParserContext.new(@str)
+        Nokogiri::HTML4::SAX::ParserContext.new(io)
         io.rewind
       end
     end
@@ -136,7 +136,7 @@ def test_jumping_sax_handler
 
       loop do
         catch(:foo) do
-          Nokogiri::HTML::SAX::Parser.new(doc).parse(@str)
+          Nokogiri::HTML4::SAX::Parser.new(doc).parse(@str)
         end
       end
     end
@@ -194,7 +194,7 @@ def test_leaking_namespace_node_strings_with_prefix
     def test_leaking_dtd_nodes_after_internal_subset_removal
       # see https://github.com/sparklemotion/nokogiri/issues/1784
       100_000.times do |i|
-        doc = Nokogiri::HTML::Document.new
+        doc = Nokogiri::HTML4::Document.new
         doc.internal_subset.remove
         puts MemInfo.rss if i % 1000 == 0
       end
diff --git a/test/test_nokogiri.rb b/test/test_nokogiri.rb
index 077b5b2f399..1acd1a34955 100644
--- a/test/test_nokogiri.rb
+++ b/test/test_nokogiri.rb
@@ -12,7 +12,7 @@ def test_libxml_iconv
 
       def test_parse_with_io
         doc = Nokogiri.parse(StringIO.new("<html><head><title></title><body></body></html>"))
-        assert_instance_of(Nokogiri::HTML::Document, doc)
+        assert_instance_of(Nokogiri::HTML4::Document, doc)
       end
 
       def test_xml?
@@ -40,13 +40,15 @@ def test_nokogiri_method_with_html
       end
 
       def test_nokogiri_method_with_block
-        doc = Nokogiri { b("bold tag") }
-        assert_equal("<b>bold tag</b>", doc.to_html.chomp)
+        root = Nokogiri { b("bold tag") }
+        assert_instance_of(Nokogiri::HTML4::Document, root.document)
+        assert_equal("<b>bold tag</b>", root.to_html.chomp)
       end
 
       def test_make_with_html
-        doc = Nokogiri.make("<b>bold tag</b>")
-        assert_equal("<b>bold tag</b>", doc.to_html.chomp)
+        root = Nokogiri.make("<b>bold tag</b>")
+        assert_instance_of(Nokogiri::HTML4::Document, root.document)
+        assert_equal("<b>bold tag</b>", root.to_html.chomp)
       end
 
       def test_make_with_block
diff --git a/test/xml/test_node.rb b/test/xml/test_node.rb
index 69bd4c5a253..0c855ba67d4 100644
--- a/test/xml/test_node.rb
+++ b/test/xml/test_node.rb
@@ -151,7 +151,7 @@ def test_parse_with_unparented_text_context_node
         end
 
         def test_parse_with_unparented_html_text_context_node
-          doc = HTML::Document.new
+          doc = Nokogiri::HTML4::Document.new
           elem = XML::Text.new("div", doc)
           x = elem.parse("<div/>") # should not raise an exception
           assert_equal("div", x.first.name)
@@ -165,7 +165,7 @@ def test_parse_with_unparented_fragment_text_context_node
         end
 
         def test_parse_with_unparented_html_fragment_text_context_node
-          doc = HTML::DocumentFragment.parse("<div><span>foo</span></div>")
+          doc = Nokogiri::HTML4::DocumentFragment.parse("<div><span>foo</span></div>")
           elem = doc.at_css("span")
           x = elem.parse("<span/>") # should not raise an exception
           assert_equal("span", x.first.name)
@@ -196,8 +196,8 @@ def test_dup_shallow_copy
 
         def test_dup_to_another_document
           skip_unless_libxml2("Node.dup with new_parent arg is only implemented on CRuby")
-          doc1 = HTML::Document.parse("<root><div><p>hello</p></div></root>")
-          doc2 = HTML::Document.parse("<div></div>")
+          doc1 = Nokogiri::HTML4::Document.parse("<root><div><p>hello</p></div></root>")
+          doc2 = Nokogiri::HTML4::Document.parse("<div></div>")
 
           div = doc1.at_css("div")
           duplicate_div = div.dup(1, doc2)
@@ -1127,7 +1127,7 @@ def test_namespace_without_an_href_on_html_node
           #  describe how we handle microsoft word's HTML formatting.
           #  this test is descriptive, not prescriptive.
           #
-          html = Nokogiri::HTML.parse(<<~XML)
+          html = Nokogiri::HTML4.parse(<<~XML)
             <div><o:p>foo</o:p></div>
           XML
           node = html.at("div").children.first
@@ -1291,7 +1291,7 @@ def test_text_node_robustness_gh1426
           # side note: this was fixed in libxml-ruby 2.9.0 by https://github.com/xml4r/libxml-ruby/pull/119
           message = "<section><h2>BOOM!</h2></section>"
           10_000.times do
-            node = Nokogiri::HTML::DocumentFragment.parse(message).at_css("h2")
+            node = Nokogiri::HTML4::DocumentFragment.parse(message).at_css("h2")
             node.add_previous_sibling(Nokogiri::XML::Text.new("before", node.document))
             node.add_next_sibling(Nokogiri::XML::Text.new("after", node.document))
           end
diff --git a/test/xml/test_node_reparenting.rb b/test/xml/test_node_reparenting.rb
index 08fb2ecabe6..78824b9805a 100644
--- a/test/xml/test_node_reparenting.rb
+++ b/test/xml/test_node_reparenting.rb
@@ -603,7 +603,7 @@ def coerce(data)
           end
 
           it "should remove the child node after the operation" do
-            fragment = Nokogiri::HTML::DocumentFragment.parse("a<a>b</a>")
+            fragment = Nokogiri::HTML4::DocumentFragment.parse("a<a>b</a>")
             node = fragment.children.last
             node.add_previous_sibling(node.children)
             assert_empty node.children, "should have no childrens"
diff --git a/test/xml/test_xpath.rb b/test/xml/test_xpath.rb
index 8fae2ea39c5..0de81ba3513 100644
--- a/test/xml/test_xpath.rb
+++ b/test/xml/test_xpath.rb
@@ -116,7 +116,7 @@ def test_node_search_with_multiple_queries
       def test_css_search_with_ambiguous_integer_or_string_attributes
         # https://github.com/sparklemotion/nokogiri/issues/711
         html = "<body><div><img width=200>"
-        doc = Nokogiri::HTML(html)
+        doc = Nokogiri::HTML4(html)
         refute_nil(doc.at_css("img[width='200']"))
         refute_nil(doc.at_css("img[width=200]"))
       end
@@ -315,7 +315,7 @@ def awesome!; end
       def test_code_that_invokes_OP_RESET_inside_libxml2
         doc = "<html><body id='foo'><foo>hi</foo></body></html>"
         xpath = 'id("foo")//foo'
-        nokogiri = Nokogiri::HTML.parse(doc)
+        nokogiri = Nokogiri::HTML4.parse(doc)
         assert(nokogiri.xpath(xpath))
       end
 
@@ -500,7 +500,7 @@ def add(context, rhs)
 
       describe "nokogiri-builtin:css-class xpath function" do
         before do
-          @doc = Nokogiri::HTML::Document.parse("<html></html>")
+          @doc = Nokogiri::HTML4::Document.parse("<html></html>")
         end
 
         it "accepts exactly two arguments" do

From ebde7dae770aaefa667ee02ac57a2c0bfed1164c Mon Sep 17 00:00:00 2001
From: Mike Dalessio <mike.dalessio@gmail.com>
Date: Sun, 8 May 2022 11:59:40 -0400
Subject: [PATCH 2/2] fix: make sure HTML5::Document{,Fragment} subclass
 properly

Loofah and other downstream libraries rely on this behavior. This is
long-term prep for a day when HTML5 may become the default on
supported platforms.
---
 ext/nokogiri/gumbo.c           |  10 +--
 lib/nokogiri/html5/document.rb |   2 +-
 test/html5/test_api.rb         | 118 +++++++++++++++++++++++++++++++++
 3 files changed, 125 insertions(+), 5 deletions(-)

diff --git a/ext/nokogiri/gumbo.c b/ext/nokogiri/gumbo.c
index f7fa18e97cc..5acc486f0a2 100644
--- a/ext/nokogiri/gumbo.c
+++ b/ext/nokogiri/gumbo.c
@@ -23,7 +23,7 @@
 //
 // Processing starts by calling gumbo_parse_with_options. The resulting document tree
 // is then walked, a parallel libxml2 tree is constructed, and the final document is
-// then wrapped using Nokogiri_wrap_xml_document. This approach reduces memory and CPU
+// then wrapped using noko_xml_document_wrap. This approach reduces memory and CPU
 // requirements as Ruby objects are only built when necessary.
 //
 
@@ -297,6 +297,7 @@ typedef struct {
   GumboOutput *output;
   VALUE input;
   VALUE url_or_frag;
+  VALUE klass;
   xmlDocPtr doc;
 } ParseArgs;
 
@@ -321,7 +322,7 @@ static VALUE parse_continue(VALUE parse_args);
  *  @!visibility protected
  */
 static VALUE
-parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth)
+parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth, VALUE klass)
 {
   GumboOptions options = kGumboDefaultOptions;
   options.max_attributes = NUM2INT(max_attributes);
@@ -333,6 +334,7 @@ parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors
     .output = output,
     .input = input,
     .url_or_frag = url,
+    .klass = klass,
     .doc = NULL,
   };
 
@@ -357,7 +359,7 @@ parse_continue(VALUE parse_args)
   }
   args->doc = doc; // Make sure doc gets cleaned up if an error is thrown.
   build_tree(doc, (xmlNodePtr)doc, output->document);
-  VALUE rdoc = Nokogiri_wrap_xml_document(cNokogiriHtml5Document, doc);
+  VALUE rdoc = noko_xml_document_wrap(args->klass, doc);
   args->doc = NULL; // The Ruby runtime now owns doc so don't delete it.
   add_errors(output, rdoc, args->input, args->url_or_frag);
   return rdoc;
@@ -577,7 +579,7 @@ noko_init_gumbo()
   parent = rb_intern_const("parent");
 
   // Define Nokogumbo module with parse and fragment methods.
-  rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 5);
+  rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 6);
   rb_define_singleton_method(mNokogiriGumbo, "fragment", fragment, 6);
 }
 
diff --git a/lib/nokogiri/html5/document.rb b/lib/nokogiri/html5/document.rb
index cc0961c8641..d3b69431dee 100644
--- a/lib/nokogiri/html5/document.rb
+++ b/lib/nokogiri/html5/document.rb
@@ -63,7 +63,7 @@ def do_parse(string_or_io, url, encoding, options)
           max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
           max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
           max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
-          doc = Nokogiri::Gumbo.parse(string, url, max_attributes, max_errors, max_depth)
+          doc = Nokogiri::Gumbo.parse(string, url, max_attributes, max_errors, max_depth, self)
           doc.encoding = "UTF-8"
           doc
         end
diff --git a/test/html5/test_api.rb b/test/html5/test_api.rb
index af10dc11db9..e338ae20934 100644
--- a/test/html5/test_api.rb
+++ b/test/html5/test_api.rb
@@ -182,4 +182,122 @@ def test_html_eh
     assert_predicate(doc, :html?)
     refute_predicate(doc, :xml?)
   end
+
+  describe Nokogiri::HTML5::Document do
+    describe "subclassing" do
+      let(:klass) do
+        Class.new(Nokogiri::HTML5::Document) do
+          attr_accessor :initialized_with, :initialized_count
+
+          def initialize(*args)
+            super
+            @initialized_with = args
+            @initialized_count ||= 0
+            @initialized_count += 1
+          end
+        end
+      end
+
+      describe ".new" do
+        it "returns an instance of the expected class" do
+          doc = klass.new
+          assert_instance_of(klass, doc)
+        end
+
+        it "calls #initialize exactly once" do
+          doc = klass.new
+          assert_equal(1, doc.initialized_count)
+        end
+
+        it "passes arguments to #initialize" do
+          doc = klass.new("http://www.w3.org/TR/REC-html40/loose.dtd", "-//W3C//DTD HTML 4.0 Transitional//EN")
+          assert_equal(
+            ["http://www.w3.org/TR/REC-html40/loose.dtd", "-//W3C//DTD HTML 4.0 Transitional//EN"],
+            doc.initialized_with
+          )
+        end
+      end
+
+      it "#dup returns the expected class" do
+        doc = klass.new.dup
+        assert_instance_of(klass, doc)
+      end
+
+      describe ".parse" do
+        let(:html) { Nokogiri::HTML5.parse(File.read(HTML_FILE)) }
+
+        it "returns an instance of the expected class" do
+          doc = klass.parse(File.read(HTML_FILE))
+          assert_instance_of(klass, doc)
+        end
+
+        it "calls #initialize exactly once" do
+          doc = klass.parse(File.read(HTML_FILE))
+          assert_equal(1, doc.initialized_count)
+        end
+
+        it "parses the doc" do
+          doc = klass.parse(File.read(HTML_FILE))
+          assert_equal(html.root.to_s, doc.root.to_s)
+        end
+      end
+    end
+  end
+
+  describe Nokogiri::HTML5::DocumentFragment do
+    describe "subclassing" do
+      let(:klass) do
+        Class.new(Nokogiri::HTML5::DocumentFragment) do
+          attr_accessor :initialized_with, :initialized_count
+
+          def initialize(*args)
+            super
+            @initialized_with = args
+            @initialized_count ||= 0
+            @initialized_count += 1
+          end
+        end
+      end
+      let(:html) { Nokogiri::HTML5.parse(File.read(HTML_FILE), HTML_FILE) }
+
+      describe ".new" do
+        it "returns an instance of the right class" do
+          fragment = klass.new(html, "<div>a</div>")
+          assert_instance_of(klass, fragment)
+        end
+
+        it "calls #initialize exactly once" do
+          fragment = klass.new(html, "<div>a</div>")
+          assert_equal(1, fragment.initialized_count)
+        end
+
+        it "passes args to #initialize" do
+          fragment = klass.new(html, "<div>a</div>")
+          assert_equal([html, "<div>a</div>"], fragment.initialized_with)
+        end
+      end
+
+      it "#dup returns the expected class" do
+        doc = klass.new(html, "<div>a</div>").dup
+        assert_instance_of(klass, doc)
+      end
+
+      describe ".parse" do
+        it "returns an instance of the right class" do
+          fragment = klass.parse("<div>a</div>")
+          assert_instance_of(klass, fragment)
+        end
+
+        it "calls #initialize exactly once" do
+          fragment = klass.parse("<div>a</div>")
+          assert_equal(1, fragment.initialized_count)
+        end
+
+        it "passes the fragment" do
+          fragment = klass.parse("<div>a</div>")
+          assert_equal(Nokogiri::HTML5::DocumentFragment.parse("<div>a</div>").to_s, fragment.to_s)
+        end
+      end
+    end
+  end
 end if Nokogiri.uses_gumbo?