Skip to content

Commit

Permalink
HTML5::DocumentFragment.parse and #initialize take kwargs
Browse files Browse the repository at this point in the history
Related to #3323

This commit was merged and expanded from #3335, thank you @infews!

Co-authored-by: Davis W. Frank <[email protected]>
  • Loading branch information
flavorjones and infews committed Dec 8, 2024
1 parent 41abed7 commit 98a878c
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 40 deletions.
1 change: 0 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ We've resolved many long-standing bugs in the various schema classes, validation

* The undocumented and unused method `Nokogiri::CSS.parse` is now deprecated and will generate a warning. The AST returned by this method is private and subject to change and removal in future versions of Nokogiri. This method will be removed in a future version of Nokogiri.
* Passing an options hash to `CSS.xpath_for` is now deprecated and will generate a warning. Use keyword arguments instead. This will become an error in a future version of Nokogiri.
* Passing an options hash to `HTML5::DocumentFragment.parse` is now deprecated and will generate a warning. Use keyword arguments instead. This will become an error in a future version of Nokogiri.
* Passing libxml2 encoding IDs to `SAX::ParserContext` methods is now deprecated and will generate a warning. The use of `SAX::Parser::ENCODINGS` is also deprecated. Use `Encoding` objects or encoding names instead.


Expand Down
125 changes: 99 additions & 26 deletions lib/nokogiri/html5/document_fragment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,42 +27,61 @@ module HTML5
class DocumentFragment < Nokogiri::HTML4::DocumentFragment
class << self
# :call-seq:
# parse(tags, **options)
# parse(tags, encoding = nil, **options)
# parse(input, **options) → HTML5::DocumentFragment
#
# Parse an HTML5 document fragment from +tags+, returning a Nodeset.
# Parse \HTML5 fragment input from a String, and return a new HTML5::DocumentFragment. This
# method creates a new, empty HTML5::Document to contain the fragment.
#
# [Parameters]
# - +tags+ [String, IO] The HTML5 document fragment to parse.
# - +encoding+ [String] The name of the encoding to use when parsing the document fragment. (default +nil+)
# - +input+ (String | IO) The HTML5 document fragment to parse.
#
# Also see Nokogiri::HTML5 for a longer explanation of how encoding is handled by the parser.
# [Optional Keyword Arguments]
# - +encoding:+ (String | Encoding) The encoding, or name of the encoding, that should be
# used when processing the document. When not provided, the encoding will be determined
# based on the document content. Also see Nokogiri::HTML5 for a longer explanation of how
# encoding is handled by the parser.
#
# [Options]
# - +:context+ [String, Nokogiri::XML::Node] The context in which to parse the document fragment. (default +"body"+)
# - +:max_errors+ [Integer] The maximum number of parse errors to record. (default +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
# - +:max_tree_depth+ [Integer] The maximum depth of the parse tree. (default +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
# - +:max_attributes+ [Integer] The maximum number of attributes allowed on an element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
# - +:parse_noscript_content_as_text+ [Boolean] Whether to parse the content of +noscript+ elements as text. (default +false+)
# - +context:+ (String | Nokogiri::XML::Node) The node, or the name of an HTML5 element, "in
# context" of which to parse the document fragment. See below for more
# information. (default +"body"+)
#
# Also see Nokogiri::HTML5 for a longer explanation of the options.
# - +max_errors:+ (Integer) The maximum number of parse errors to record. (default
# +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
#
# [Returns]
# - [Nokogiri::XML::NodeSet] A node set containing the root nodes of the parsed fragment.
# - +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default
# +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
#
def parse(tags, encoding = nil, positional_options_hash = nil, **options)
unless positional_options_hash.nil?
warn("Nokogiri::HTML5::DocumentFragment.parse: Passing options as an explicit hash is deprecated. Use keyword arguments instead. This will become an error in a future release.", uplevel: 1, category: :deprecated)
# - +max_attributes:+ (Integer) The maximum number of attributes allowed on an
# element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
#
# - +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+
# elements as text. (default +false+)
#
# See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.
#
# [Returns] Nokogiri::HTML5::DocumentFragment
#
# === Context \Node
#
# If a context node is specified using +context:+, then the parser will behave as if that
# Node, or a hypothetical tag named as specified, is the parent of the fragment subtree.
#
def parse(
input,
encoding_ = nil, positional_options_hash = nil,
encoding: encoding_, **options
)
unless positional_options_hash.nil? || positional_options_hash.empty?
options.merge!(positional_options_hash)
end

context = options.delete(:context)

document = HTML5::Document.new
document.encoding = "UTF-8"
tags = HTML5.read_and_encode(tags, encoding)
input = HTML5.read_and_encode(input, encoding)

new(document, tags, context, options)
new(document, input, context, options)
end
end

Expand All @@ -71,26 +90,80 @@ def parse(tags, encoding = nil, positional_options_hash = nil, **options)

# Get the parser's quirks mode value. See HTML5::QuirksMode.
#
# This method returns `nil` if the parser was not invoked (e.g., `Nokogiri::HTML5::DocumentFragment.new(doc)`).
# This method returns `nil` if the parser was not invoked (e.g.,
# `Nokogiri::HTML5::DocumentFragment.new(doc)`).
#
# Since v1.14.0
attr_reader :quirks_mode

# Create a document fragment.
def initialize(doc, tags = nil, context = nil, options = {}) # rubocop:disable Lint/MissingSuper
#
# :call-seq:
# new(document, input, **options) → HTML5::DocumentFragment
#
# Parse \HTML5 fragment input from a String, and return a new HTML5::DocumentFragment.
#
# 💡 It's recommended to use either HTML5::DocumentFragment.parse or HTML5::Node#fragment
# rather than call this method directly.
#
# [Required Parameters]
# - +document+ (HTML5::Document) The parent document to associate the returned fragment with.
#
# [Optional Parameters]
# - +input+ (String) The content to be parsed.
#
# [Optional Keyword Arguments]
# - +encoding:+ (String | Encoding) The encoding, or name of the encoding, that should be
# used when processing the document. When not provided, the encoding will be determined
# based on the document content. Also see Nokogiri::HTML5 for a longer explanation of how
# encoding is handled by the parser.
#
# - +context:+ (String | Nokogiri::XML::Node) The node, or the name of an HTML5 element, in
# which to parse the document fragment. (default +"body"+)
#
# - +max_errors:+ (Integer) The maximum number of parse errors to record. (default
# +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
#
# - +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default
# +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
#
# - +max_attributes:+ (Integer) The maximum number of attributes allowed on an
# element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
#
# - +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+
# elements as text. (default +false+)
#
# See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.
#
# [Returns] HTML5::DocumentFragment
#
# === Context \Node
#
# If a context node is specified using +context:+, then the parser will behave as if that
# Node, or a hypothetical tag named as specified, is the parent of the fragment subtree.
#
def initialize(
doc, input = nil,
context_ = nil, positional_options_hash = nil,
context: context_,
**options
) # rubocop:disable Lint/MissingSuper
unless positional_options_hash.nil? || positional_options_hash.empty?
options.merge!(positional_options_hash)
end

@document = doc
@errors = []
return self unless tags
return self unless input

tags = Nokogiri::HTML5.read_and_encode(tags, nil)
input = Nokogiri::HTML5.read_and_encode(input, nil)

context = options.delete(:context) if options.key?(:context)

options[:max_attributes] ||= Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
options[:max_errors] ||= options.delete(:max_parse_errors) || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
options[:max_tree_depth] ||= Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH

Nokogiri::Gumbo.fragment(self, tags, context, **options)
Nokogiri::Gumbo.fragment(self, input, context, **options)
end

def serialize(options = {}, &block) # :nodoc:
Expand Down
31 changes: 18 additions & 13 deletions test/html5/test_api.rb
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ def test_fragment_encoding

assert_match(/おはようございます/, Nokogiri::HTML5.fragment(raw, Encoding::SHIFT_JIS).to_s)
assert_match(/おはようございます/, Nokogiri::HTML5::DocumentFragment.parse(raw, Encoding::SHIFT_JIS).to_s)

# with kwargs
assert_match(/おはようございます/, Nokogiri::HTML5.fragment(raw, encoding: Encoding::SHIFT_JIS).to_s)
assert_match(/おはようございます/, Nokogiri::HTML5::DocumentFragment.parse(raw, encoding: Encoding::SHIFT_JIS).to_s)
end

def test_fragment_serialization_encoding
Expand Down Expand Up @@ -432,15 +436,13 @@ def initialize(*args)

describe "to DocumentFragment.parse" do
it "as an options hash" do
assert_output(nil, /Passing options as an explicit hash is deprecated/) do
fragment = Nokogiri::HTML5::DocumentFragment.parse(
"<body><div>foo</div></body>",
nil,
{ context: "html" },
)
assert_match(/<body>/, fragment.to_s)
assert_match(/<head>/, fragment.to_s)
end
fragment = Nokogiri::HTML5::DocumentFragment.parse(
"<body><div>foo</div></body>",
nil,
{ context: "html" },
)
assert_match(/<body>/, fragment.to_s)
assert_match(/<head>/, fragment.to_s)
end

it "as keyword argument" do
Expand All @@ -462,9 +464,9 @@ def initialize(*args)
Class.new(Nokogiri::HTML5::DocumentFragment) do
attr_accessor :initialized_with, :initialized_count

def initialize(*args)
def initialize(*args, **kwargs)
super
@initialized_with = args
@initialized_with = [args, **kwargs]
@initialized_count ||= 0
@initialized_count += 1
end
Expand All @@ -484,8 +486,11 @@ def initialize(*args)
end

it "passes args to #initialize" do
fragment = klass.new(html, "<div>a</div>")
assert_equal([html, "<div>a</div>"], fragment.initialized_with)
fragment = klass.new(html, "<div>a</div>", max_errors: 1)
assert_equal(
[[html, "<div>a</div>"], { max_errors: 1 }],
fragment.initialized_with,
)
end
end

Expand Down

0 comments on commit 98a878c

Please sign in to comment.