From 3f20dd912fd9a0126ec7e60fe639114ffbf15a2f Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 21 Dec 2024 14:53:39 -0500 Subject: [PATCH] ext: backport libxml2/gnome@bf5fcf6e for xmlXPathContext perf See extended discussion at #3378 Benchmark comparing this commit against v1.17.x ("main"): Comparison: large: main: 3910.6 i/s large: patched: 3759.6 i/s - same-ish: difference falls within error Comparison: small: patched: 242901.7 i/s small: main: 127486.0 i/s - 1.91x slower I think we could get greater performance gains by re-using XPathContext objects, but only at the cost of a significant amount of additional complexity, since in order to properly support recursive XPath evaluation, Nokogiri would have to push and pop "stack frames" containing: - internal state contextSize and proximityPosition - registered namespaces - registered variables - function lookup handler That feels like a lot of code for a small win. Comparatively, pulling in this upstream patch is still a 2x speedup for zero additional complexity. --- CHANGELOG.md | 2 +- ...te-static-hash-table-for-standard-fu.patch | 244 ++++++++++++++++++ 2 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch diff --git a/CHANGELOG.md b/CHANGELOG.md index 7effa5c5884..5378bb54d3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,7 @@ This release drops precompiled native platform gems for `x86-linux` and `x86-min ### Improved -* [CRuby] CSS and XPath queries are faster now that `Node#xpath`, `Node#css`, and related functions are re-using the underlying xpath context object (which is expensive to initialize). We benchmarked a 2.8x improvement for a 6kb file, and a more modest 1.3x improvement for a 70kb file. (#3378) @flavorjones +* [CRuby] CSS and XPath queries are faster now that `Node#xpath`, `Node#css`, and related functions are using a faster XPathContext initialization process. We benchmarked a 1.9x improvement for a 6kb file. Big thanks to @nwellnhof for helping with this one. (#3378, superseded by #3389) @flavorjones ## v1.17.2 / 2024-12-12 diff --git a/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch b/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch new file mode 100644 index 00000000000..f84dc8e52ee --- /dev/null +++ b/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch @@ -0,0 +1,244 @@ +From d3e3526111097560cf7c002613e2cb1d469b59e0 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Sat, 21 Dec 2024 16:03:46 +0100 +Subject: [PATCH] xpath: Use separate static hash table for standard functions + +This avoids registering standard functions when creating an XPath +context. + +Lookup of extension functions is a bit slower now, but ultimately, all +function lookups should be moved to the compilation phase. + +(cherry picked from commit bf5fcf6e646bb51a0f6a3655a1d64bea97274867) +--- + xpath.c | 170 ++++++++++++++++++++++++++++++++------------------------ + 1 file changed, 98 insertions(+), 72 deletions(-) + +diff --git a/xpath.c b/xpath.c +index 485d7747..21711653 100644 +--- a/xpath.c ++++ b/xpath.c +@@ -136,11 +136,48 @@ + + #if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED) + +-/************************************************************************ +- * * +- * Floating point stuff * +- * * +- ************************************************************************/ ++static void ++xmlXPathNameFunction(xmlXPathParserContextPtr ctxt, int nargs); ++ ++static const struct { ++ const char *name; ++ xmlXPathFunction func; ++} xmlXPathStandardFunctions[] = { ++ { "boolean", xmlXPathBooleanFunction }, ++ { "ceiling", xmlXPathCeilingFunction }, ++ { "count", xmlXPathCountFunction }, ++ { "concat", xmlXPathConcatFunction }, ++ { "contains", xmlXPathContainsFunction }, ++ { "id", xmlXPathIdFunction }, ++ { "false", xmlXPathFalseFunction }, ++ { "floor", xmlXPathFloorFunction }, ++ { "last", xmlXPathLastFunction }, ++ { "lang", xmlXPathLangFunction }, ++ { "local-name", xmlXPathLocalNameFunction }, ++ { "not", xmlXPathNotFunction }, ++ { "name", xmlXPathNameFunction }, ++ { "namespace-uri", xmlXPathNamespaceURIFunction }, ++ { "normalize-space", xmlXPathNormalizeFunction }, ++ { "number", xmlXPathNumberFunction }, ++ { "position", xmlXPathPositionFunction }, ++ { "round", xmlXPathRoundFunction }, ++ { "string", xmlXPathStringFunction }, ++ { "string-length", xmlXPathStringLengthFunction }, ++ { "starts-with", xmlXPathStartsWithFunction }, ++ { "substring", xmlXPathSubstringFunction }, ++ { "substring-before", xmlXPathSubstringBeforeFunction }, ++ { "substring-after", xmlXPathSubstringAfterFunction }, ++ { "sum", xmlXPathSumFunction }, ++ { "true", xmlXPathTrueFunction }, ++ { "translate", xmlXPathTranslateFunction } ++}; ++ ++#define NUM_STANDARD_FUNCTIONS \ ++ (sizeof(xmlXPathStandardFunctions) / sizeof(xmlXPathStandardFunctions[0])) ++ ++#define SF_HASH_SIZE 64 ++ ++static unsigned char xmlXPathSFHash[SF_HASH_SIZE]; + + double xmlXPathNAN = 0.0; + double xmlXPathPINF = 0.0; +@@ -156,6 +193,18 @@ xmlXPathInit(void) { + xmlInitParser(); + } + ++ATTRIBUTE_NO_SANITIZE_INTEGER ++static unsigned ++xmlXPathSFComputeHash(const xmlChar *name) { ++ unsigned hashValue = 5381; ++ const xmlChar *ptr; ++ ++ for (ptr = name; *ptr; ptr++) ++ hashValue = hashValue * 33 + *ptr; ++ ++ return(hashValue); ++} ++ + /** + * xmlInitXPathInternal: + * +@@ -164,6 +213,8 @@ xmlXPathInit(void) { + ATTRIBUTE_NO_SANITIZE("float-divide-by-zero") + void + xmlInitXPathInternal(void) { ++ size_t i; ++ + #if defined(NAN) && defined(INFINITY) + xmlXPathNAN = NAN; + xmlXPathPINF = INFINITY; +@@ -175,8 +226,34 @@ xmlInitXPathInternal(void) { + xmlXPathPINF = 1.0 / zero; + xmlXPathNINF = -xmlXPathPINF; + #endif ++ ++ /* ++ * Initialize hash table for standard functions ++ */ ++ ++ for (i = 0; i < SF_HASH_SIZE; i++) ++ xmlXPathSFHash[i] = UCHAR_MAX; ++ ++ for (i = 0; i < NUM_STANDARD_FUNCTIONS; i++) { ++ const char *name = xmlXPathStandardFunctions[i].name; ++ int bucketIndex = xmlXPathSFComputeHash(BAD_CAST name) % SF_HASH_SIZE; ++ ++ while (xmlXPathSFHash[bucketIndex] != UCHAR_MAX) { ++ bucketIndex += 1; ++ if (bucketIndex >= SF_HASH_SIZE) ++ bucketIndex = 0; ++ } ++ ++ xmlXPathSFHash[bucketIndex] = i; ++ } + } + ++/************************************************************************ ++ * * ++ * Floating point stuff * ++ * * ++ ************************************************************************/ ++ + /** + * xmlXPathIsNaN: + * @val: a double value +@@ -3979,18 +4056,6 @@ xmlXPathRegisterFuncLookup (xmlXPathContextPtr ctxt, + */ + xmlXPathFunction + xmlXPathFunctionLookup(xmlXPathContextPtr ctxt, const xmlChar *name) { +- if (ctxt == NULL) +- return (NULL); +- +- if (ctxt->funcLookupFunc != NULL) { +- xmlXPathFunction ret; +- xmlXPathFuncLookupFunc f; +- +- f = ctxt->funcLookupFunc; +- ret = f(ctxt->funcLookupData, name, NULL); +- if (ret != NULL) +- return(ret); +- } + return(xmlXPathFunctionLookupNS(ctxt, name, NULL)); + } + +@@ -4015,6 +4080,22 @@ xmlXPathFunctionLookupNS(xmlXPathContextPtr ctxt, const xmlChar *name, + if (name == NULL) + return(NULL); + ++ if (ns_uri == NULL) { ++ int bucketIndex = xmlXPathSFComputeHash(name) % SF_HASH_SIZE; ++ ++ while (xmlXPathSFHash[bucketIndex] != UCHAR_MAX) { ++ int funcIndex = xmlXPathSFHash[bucketIndex]; ++ ++ if (strcmp(xmlXPathStandardFunctions[funcIndex].name, ++ (char *) name) == 0) ++ return(xmlXPathStandardFunctions[funcIndex].func); ++ ++ bucketIndex += 1; ++ if (bucketIndex >= SF_HASH_SIZE) ++ bucketIndex = 0; ++ } ++ } ++ + if (ctxt->funcLookupFunc != NULL) { + xmlXPathFuncLookupFunc f; + +@@ -13494,61 +13575,6 @@ xmlXPathEscapeUriFunction(xmlXPathParserContextPtr ctxt, int nargs) { + void + xmlXPathRegisterAllFunctions(xmlXPathContextPtr ctxt) + { +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"boolean", +- xmlXPathBooleanFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"ceiling", +- xmlXPathCeilingFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"count", +- xmlXPathCountFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"concat", +- xmlXPathConcatFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"contains", +- xmlXPathContainsFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"id", +- xmlXPathIdFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"false", +- xmlXPathFalseFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"floor", +- xmlXPathFloorFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"last", +- xmlXPathLastFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"lang", +- xmlXPathLangFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"local-name", +- xmlXPathLocalNameFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"not", +- xmlXPathNotFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"name", +- xmlXPathNameFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"namespace-uri", +- xmlXPathNamespaceURIFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"normalize-space", +- xmlXPathNormalizeFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"number", +- xmlXPathNumberFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"position", +- xmlXPathPositionFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"round", +- xmlXPathRoundFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string", +- xmlXPathStringFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string-length", +- xmlXPathStringLengthFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"starts-with", +- xmlXPathStartsWithFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring", +- xmlXPathSubstringFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-before", +- xmlXPathSubstringBeforeFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-after", +- xmlXPathSubstringAfterFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"sum", +- xmlXPathSumFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"true", +- xmlXPathTrueFunction); +- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"translate", +- xmlXPathTranslateFunction); +- + xmlXPathRegisterFuncNS(ctxt, (const xmlChar *)"escape-uri", + (const xmlChar *)"http://www.w3.org/2002/08/xquery-functions", + xmlXPathEscapeUriFunction); +-- +2.47.1 +