diff --git a/CHANGELOG.md b/CHANGELOG.md index d4f8851587..961df20034 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,11 @@ This release ships separate precompiled GNU and Musl gems for all linux platform This release drops precompiled native platform gems for `x86-linux` and `x86-mingw32`. **These platforms are still supported.** Users on these platforms must install the "ruby platform" gem which requires a compiler toolchain. See [Installing the `ruby` platform gem](https://nokogiri.org/tutorials/installing_nokogiri.html#installing-the-ruby-platform-gem) in the installation docs. (#3369, #3081) +### Improved + +* CSS and XPath queries are faster now that `Node#xpath`, `Node#css`, and related functions are re-using the underlying xpath context object (which is expensive to initialize). We benchmarked a 2.8x improvement for a 6kb file, and a more modest 1.3x improvement for a 70kb file. (#3378) @flavorjones + + ## v1.17.2 / 2024-12-12 ### Fixed diff --git a/ext/java/nokogiri/XmlXpathContext.java b/ext/java/nokogiri/XmlXpathContext.java index 16939f3066..40599232e4 100644 --- a/ext/java/nokogiri/XmlXpathContext.java +++ b/ext/java/nokogiri/XmlXpathContext.java @@ -179,7 +179,7 @@ public class XmlXpathContext extends RubyObject final NokogiriXPathFunctionResolver fnResolver = NokogiriXPathFunctionResolver.create(handler); try { return tryGetNodeSet(context, expr, fnResolver); - } catch (TransformerException ex) { + } catch (TransformerException | RuntimeException ex) { throw XmlSyntaxError.createXMLXPathSyntaxError(context.runtime, (expr + ": " + ex.toString()), ex).toThrowable(); diff --git a/ext/nokogiri/xml_xpath_context.c b/ext/nokogiri/xml_xpath_context.c index 69b6993bdb..5e231b3991 100644 --- a/ext/nokogiri/xml_xpath_context.c +++ b/ext/nokogiri/xml_xpath_context.c @@ -124,6 +124,7 @@ xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs) * register_ns(prefix, uri) → Nokogiri::XML::XPathContext * * Register the namespace with +prefix+ and +uri+ for use in future queries. + * Passing a uri of +nil+ will unregister the namespace. * * [Returns] +self+ */ @@ -131,13 +132,25 @@ static VALUE rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri) { xmlXPathContextPtr c_context; + const xmlChar *ns_uri; TypedData_Get_Struct(rb_context, xmlXPathContext, &xml_xpath_context_type, c_context); - xmlXPathRegisterNs(c_context, - (const xmlChar *)StringValueCStr(prefix), - (const xmlChar *)StringValueCStr(uri) - ); + if (NIL_P(uri)) { + ns_uri = NULL; + } else { + ns_uri = (const xmlChar *)StringValueCStr(uri); + } + + xmlXPathRegisterNs(c_context, (const xmlChar *)StringValueCStr(prefix), ns_uri); + + VALUE registered_namespaces = rb_iv_get(rb_context, "@registered_namespaces"); + if (NIL_P(uri)) { + rb_hash_delete(registered_namespaces, prefix); + } else { + rb_hash_aset(registered_namespaces, prefix, Qtrue); + } + return rb_context; } @@ -146,6 +159,7 @@ rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri) * register_variable(name, value) → Nokogiri::XML::XPathContext * * Register the variable +name+ with +value+ for use in future queries. + * Passing a value of +nil+ will unregister the variable. * * [Returns] +self+ */ @@ -157,13 +171,20 @@ rb_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value TypedData_Get_Struct(rb_context, xmlXPathContext, &xml_xpath_context_type, c_context); - xmlValue = xmlXPathNewCString(StringValueCStr(value)); + if (NIL_P(value)) { + xmlValue = NULL; + } else { + xmlValue = xmlXPathNewCString(StringValueCStr(value)); + } - xmlXPathRegisterVariable( - c_context, - (const xmlChar *)StringValueCStr(name), - xmlValue - ); + xmlXPathRegisterVariable(c_context, (const xmlChar *)StringValueCStr(name), xmlValue); + + VALUE registered_variables = rb_iv_get(rb_context, "@registered_variables"); + if (NIL_P(value)) { + rb_hash_delete(registered_variables, name); + } else { + rb_hash_aset(registered_variables, name, Qtrue); + } return rb_context; } @@ -394,6 +415,7 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context) xmlSetStructuredErrorFunc(NULL, NULL); xmlSetGenericErrorFunc(NULL, NULL); + xmlXPathRegisterFuncLookup(c_context, NULL, NULL); if (xpath == NULL) { rb_exc_raise(rb_ary_entry(errors, 0)); @@ -447,9 +469,30 @@ rb_xml_xpath_context_new(VALUE klass, VALUE rb_node) &xml_xpath_context_type, c_context ); + + rb_iv_set(rb_context, "@registered_namespaces", rb_hash_new()); + rb_iv_set(rb_context, "@registered_variables", rb_hash_new()); + return rb_context; } + +/* :nodoc: */ +static VALUE +rb_xml_xpath_context_set_node(VALUE rb_context, VALUE rb_node) +{ + xmlNodePtr c_node; + xmlXPathContextPtr c_context; + + TypedData_Get_Struct(rb_context, xmlXPathContext, &xml_xpath_context_type, c_context); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); + + c_context->doc = c_node->doc; + c_context->node = c_node; + + return rb_node; +} + void noko_init_xml_xpath_context(void) { @@ -465,4 +508,5 @@ noko_init_xml_xpath_context(void) rb_define_method(cNokogiriXmlXpathContext, "evaluate", rb_xml_xpath_context_evaluate, -1); rb_define_method(cNokogiriXmlXpathContext, "register_variable", rb_xml_xpath_context_register_variable, 2); rb_define_method(cNokogiriXmlXpathContext, "register_ns", rb_xml_xpath_context_register_ns, 2); + rb_define_method(cNokogiriXmlXpathContext, "node=", rb_xml_xpath_context_set_node, 1); } diff --git a/lib/nokogiri/xml/searchable.rb b/lib/nokogiri/xml/searchable.rb index 75fafd77c0..11c0532639 100644 --- a/lib/nokogiri/xml/searchable.rb +++ b/lib/nokogiri/xml/searchable.rb @@ -261,18 +261,36 @@ def xpath_internal(node, paths, handler, ns, binds) end def xpath_impl(node, path, handler, ns, binds) - ctx = get_xpath_context(node) + get_xpath_context(node) do |context| + context.register_namespaces(ns) + context.register_variables(binds) - ctx.register_namespaces(ns) - ctx.register_variables(binds) + path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml? - path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml? - - ctx.evaluate(path, handler) + context.evaluate(path, handler) + end end - def get_xpath_context(node) - XPathContext.new(node) + if Nokogiri.uses_libxml? && ENV["NOKOGIRI_DEOPTIMIZE_XPATH"].nil? # env var is an escape hatch + # optimized path + def get_xpath_context(node) + context = Thread.current.thread_variable_get(:nokogiri_xpath_context) + if context + context.node = node + else + context = Thread.current.thread_variable_set(:nokogiri_xpath_context, XPathContext.new(node)) + end + + begin + yield context + ensure + context.reset + end + end + else + def get_xpath_context(node) + yield XPathContext.new(node) + end end end end diff --git a/lib/nokogiri/xml/xpath_context.rb b/lib/nokogiri/xml/xpath_context.rb index 033af04b02..2078f40098 100644 --- a/lib/nokogiri/xml/xpath_context.rb +++ b/lib/nokogiri/xml/xpath_context.rb @@ -22,6 +22,28 @@ def register_variables(binds) register_variable(key, value) end end + + if Nokogiri.uses_libxml? + def reset + return unless + + @registered_namespaces.each do |key, _| + register_ns(key, nil) + end + unless @registered_namespaces.empty? + warn "Nokogiri::XML::XPathContext#reset: unexpected registered namespaces: #{@registered_namespaces.keys}" + @registered_namespaces.clear + end + + @registered_variables.each do |key, _| + register_variable(key, nil) + end + unless @registered_variables.empty? + warn "Nokogiri::XML::XPathContext#reset: unexpected registered variables: #{@registered_variables.keys}" + @registered_variables.clear + end + end + end end end end diff --git a/test/xml/test_xpath.rb b/test/xml/test_xpath.rb index 7948cac051..790f0094d0 100644 --- a/test/xml/test_xpath.rb +++ b/test/xml/test_xpath.rb @@ -698,6 +698,86 @@ def collision(nodes) assert_equal(3, doc.xpath("//self::*:child").length) end end + + describe "re-using XPathContext objects within a thread" do + # see https://github.com/sparklemotion/nokogiri/issues/3266 + # this optimization is CRuby-only, but we run the tests on JRuby for consistency + let(:doc) { + Nokogiri::XML::Document.parse(<<~XML) + + default + ns1 + + XML + } + + it "clears registered namespaces" do + # make sure the thread-local XPathContext is initialized + doc.xpath("//xmlns:child") + + # do namespaces behave the way we expect? + node = doc.at_xpath("//ns:child", { "ns" => "http://nokogiri.org/ns1" }) + assert_equal("ns1", node.text) + + assert_raises(XPath::SyntaxError) { + doc.at_xpath("//ns:child") + } + + node = doc.at_xpath("//child") + assert_nil(node) + + # make sure the nokogiri and nokogiri-builting namespaces are re-registered + doc.xpath("//xmlns:child[nokogiri:thing(.)]", @handler) + assert_equal(1, @handler.things.length) + + if Nokogiri.uses_libxml? + nodes = doc.xpath("//xmlns:child[nokogiri-builtin:local-name-is('child')]") + assert_equal(1, nodes.length) + end + end + + it "clears registered handlers and functions" do + # make sure the thread-local XPathContext is initialized + doc.xpath("//xmlns:child") + + doc.xpath("//xmlns:child[nokogiri:thing(.)]", @handler) + assert_equal(1, @handler.things.length) + + assert_raises(XPath::SyntaxError) { + doc.xpath("//xmlns:child[nokogiri:thing(.)]") + } + + doc.xpath("//xmlns:child[nokogiri:thing(.)]", @handler) + assert_equal(2, @handler.things.length) + + if Nokogiri.uses_libxml? + nodes = doc.xpath("//xmlns:child[nokogiri-builtin:local-name-is('child')]") + assert_equal(1, nodes.length) + end + end + + it "clears registered variables" do + # make sure the thread-local XPathContext is initialized + doc.xpath("//xmlns:child") + + nodes = @xml.xpath("//address[@domestic=$value]", nil, value: "Yes") + assert_equal(4, nodes.length) + + assert_raises(XPath::SyntaxError) { + @xml.xpath("//address[@domestic=$value]") + } + + nodes = @xml.xpath("//address[@domestic=$value]", nil, value: "Qwerty") + assert_empty(nodes) + + assert_raises(XPath::SyntaxError) { + @xml.xpath("//address[@domestic=$value]") + } + + nodes = @xml.xpath("//address[@domestic=$value]", nil, value: "Yes") + assert_equal(4, nodes.length) + end + end end end end