-
-
Notifications
You must be signed in to change notification settings - Fork 905
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf: use libxml2 patch for faster xpath context init (#3389)
**What problem is this PR intended to solve?** After chatting with @nwellnhof in #3378, he went ahead and made an performance-focused change upstream in libxml2/gnome@bf5fcf6e that I'm pulling in here to the vendored library. Benchmark comparing this PR against v1.17.x ("main"): Comparison: large: main: 3910.6 i/s large: patched: 3759.6 i/s - same-ish: difference falls within error Comparison: small: patched: 242901.7 i/s small: main: 127486.0 i/s - 1.91x slower We could get greater performance gains by re-using `XPathContext` objects, but only at the cost of a significant amount of additional complexity, since in order to properly support recursive XPath evaluation, Nokogiri would have to push and pop "stack frames" containing: - internal state contextSize and proximityPosition - registered namespaces - registered variables - function lookup handler That feels like a lot of code for a small win. Comparatively, pulling in this upstream patch is still a ~2x speedup with zero additional complexity. That's a win. **Have you included adequate test coverage?** N/A **Does this change affect the behavior of either the C or the Java implementations?** Performance improvement impacting CRuby only.
- Loading branch information
Showing
7 changed files
with
329 additions
and
148 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
244 changes: 244 additions & 0 deletions
244
patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,244 @@ | ||
From d3e3526111097560cf7c002613e2cb1d469b59e0 Mon Sep 17 00:00:00 2001 | ||
From: Nick Wellnhofer <[email protected]> | ||
Date: Sat, 21 Dec 2024 16:03:46 +0100 | ||
Subject: [PATCH] xpath: Use separate static hash table for standard functions | ||
|
||
This avoids registering standard functions when creating an XPath | ||
context. | ||
|
||
Lookup of extension functions is a bit slower now, but ultimately, all | ||
function lookups should be moved to the compilation phase. | ||
|
||
(cherry picked from commit bf5fcf6e646bb51a0f6a3655a1d64bea97274867) | ||
--- | ||
xpath.c | 170 ++++++++++++++++++++++++++++++++------------------------ | ||
1 file changed, 98 insertions(+), 72 deletions(-) | ||
|
||
diff --git a/xpath.c b/xpath.c | ||
index 485d7747..21711653 100644 | ||
--- a/xpath.c | ||
+++ b/xpath.c | ||
@@ -136,11 +136,48 @@ | ||
|
||
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED) | ||
|
||
-/************************************************************************ | ||
- * * | ||
- * Floating point stuff * | ||
- * * | ||
- ************************************************************************/ | ||
+static void | ||
+xmlXPathNameFunction(xmlXPathParserContextPtr ctxt, int nargs); | ||
+ | ||
+static const struct { | ||
+ const char *name; | ||
+ xmlXPathFunction func; | ||
+} xmlXPathStandardFunctions[] = { | ||
+ { "boolean", xmlXPathBooleanFunction }, | ||
+ { "ceiling", xmlXPathCeilingFunction }, | ||
+ { "count", xmlXPathCountFunction }, | ||
+ { "concat", xmlXPathConcatFunction }, | ||
+ { "contains", xmlXPathContainsFunction }, | ||
+ { "id", xmlXPathIdFunction }, | ||
+ { "false", xmlXPathFalseFunction }, | ||
+ { "floor", xmlXPathFloorFunction }, | ||
+ { "last", xmlXPathLastFunction }, | ||
+ { "lang", xmlXPathLangFunction }, | ||
+ { "local-name", xmlXPathLocalNameFunction }, | ||
+ { "not", xmlXPathNotFunction }, | ||
+ { "name", xmlXPathNameFunction }, | ||
+ { "namespace-uri", xmlXPathNamespaceURIFunction }, | ||
+ { "normalize-space", xmlXPathNormalizeFunction }, | ||
+ { "number", xmlXPathNumberFunction }, | ||
+ { "position", xmlXPathPositionFunction }, | ||
+ { "round", xmlXPathRoundFunction }, | ||
+ { "string", xmlXPathStringFunction }, | ||
+ { "string-length", xmlXPathStringLengthFunction }, | ||
+ { "starts-with", xmlXPathStartsWithFunction }, | ||
+ { "substring", xmlXPathSubstringFunction }, | ||
+ { "substring-before", xmlXPathSubstringBeforeFunction }, | ||
+ { "substring-after", xmlXPathSubstringAfterFunction }, | ||
+ { "sum", xmlXPathSumFunction }, | ||
+ { "true", xmlXPathTrueFunction }, | ||
+ { "translate", xmlXPathTranslateFunction } | ||
+}; | ||
+ | ||
+#define NUM_STANDARD_FUNCTIONS \ | ||
+ (sizeof(xmlXPathStandardFunctions) / sizeof(xmlXPathStandardFunctions[0])) | ||
+ | ||
+#define SF_HASH_SIZE 64 | ||
+ | ||
+static unsigned char xmlXPathSFHash[SF_HASH_SIZE]; | ||
|
||
double xmlXPathNAN = 0.0; | ||
double xmlXPathPINF = 0.0; | ||
@@ -156,6 +193,18 @@ xmlXPathInit(void) { | ||
xmlInitParser(); | ||
} | ||
|
||
+ATTRIBUTE_NO_SANITIZE_INTEGER | ||
+static unsigned | ||
+xmlXPathSFComputeHash(const xmlChar *name) { | ||
+ unsigned hashValue = 5381; | ||
+ const xmlChar *ptr; | ||
+ | ||
+ for (ptr = name; *ptr; ptr++) | ||
+ hashValue = hashValue * 33 + *ptr; | ||
+ | ||
+ return(hashValue); | ||
+} | ||
+ | ||
/** | ||
* xmlInitXPathInternal: | ||
* | ||
@@ -164,6 +213,8 @@ xmlXPathInit(void) { | ||
ATTRIBUTE_NO_SANITIZE("float-divide-by-zero") | ||
void | ||
xmlInitXPathInternal(void) { | ||
+ size_t i; | ||
+ | ||
#if defined(NAN) && defined(INFINITY) | ||
xmlXPathNAN = NAN; | ||
xmlXPathPINF = INFINITY; | ||
@@ -175,8 +226,34 @@ xmlInitXPathInternal(void) { | ||
xmlXPathPINF = 1.0 / zero; | ||
xmlXPathNINF = -xmlXPathPINF; | ||
#endif | ||
+ | ||
+ /* | ||
+ * Initialize hash table for standard functions | ||
+ */ | ||
+ | ||
+ for (i = 0; i < SF_HASH_SIZE; i++) | ||
+ xmlXPathSFHash[i] = UCHAR_MAX; | ||
+ | ||
+ for (i = 0; i < NUM_STANDARD_FUNCTIONS; i++) { | ||
+ const char *name = xmlXPathStandardFunctions[i].name; | ||
+ int bucketIndex = xmlXPathSFComputeHash(BAD_CAST name) % SF_HASH_SIZE; | ||
+ | ||
+ while (xmlXPathSFHash[bucketIndex] != UCHAR_MAX) { | ||
+ bucketIndex += 1; | ||
+ if (bucketIndex >= SF_HASH_SIZE) | ||
+ bucketIndex = 0; | ||
+ } | ||
+ | ||
+ xmlXPathSFHash[bucketIndex] = i; | ||
+ } | ||
} | ||
|
||
+/************************************************************************ | ||
+ * * | ||
+ * Floating point stuff * | ||
+ * * | ||
+ ************************************************************************/ | ||
+ | ||
/** | ||
* xmlXPathIsNaN: | ||
* @val: a double value | ||
@@ -3979,18 +4056,6 @@ xmlXPathRegisterFuncLookup (xmlXPathContextPtr ctxt, | ||
*/ | ||
xmlXPathFunction | ||
xmlXPathFunctionLookup(xmlXPathContextPtr ctxt, const xmlChar *name) { | ||
- if (ctxt == NULL) | ||
- return (NULL); | ||
- | ||
- if (ctxt->funcLookupFunc != NULL) { | ||
- xmlXPathFunction ret; | ||
- xmlXPathFuncLookupFunc f; | ||
- | ||
- f = ctxt->funcLookupFunc; | ||
- ret = f(ctxt->funcLookupData, name, NULL); | ||
- if (ret != NULL) | ||
- return(ret); | ||
- } | ||
return(xmlXPathFunctionLookupNS(ctxt, name, NULL)); | ||
} | ||
|
||
@@ -4015,6 +4080,22 @@ xmlXPathFunctionLookupNS(xmlXPathContextPtr ctxt, const xmlChar *name, | ||
if (name == NULL) | ||
return(NULL); | ||
|
||
+ if (ns_uri == NULL) { | ||
+ int bucketIndex = xmlXPathSFComputeHash(name) % SF_HASH_SIZE; | ||
+ | ||
+ while (xmlXPathSFHash[bucketIndex] != UCHAR_MAX) { | ||
+ int funcIndex = xmlXPathSFHash[bucketIndex]; | ||
+ | ||
+ if (strcmp(xmlXPathStandardFunctions[funcIndex].name, | ||
+ (char *) name) == 0) | ||
+ return(xmlXPathStandardFunctions[funcIndex].func); | ||
+ | ||
+ bucketIndex += 1; | ||
+ if (bucketIndex >= SF_HASH_SIZE) | ||
+ bucketIndex = 0; | ||
+ } | ||
+ } | ||
+ | ||
if (ctxt->funcLookupFunc != NULL) { | ||
xmlXPathFuncLookupFunc f; | ||
|
||
@@ -13494,61 +13575,6 @@ xmlXPathEscapeUriFunction(xmlXPathParserContextPtr ctxt, int nargs) { | ||
void | ||
xmlXPathRegisterAllFunctions(xmlXPathContextPtr ctxt) | ||
{ | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"boolean", | ||
- xmlXPathBooleanFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"ceiling", | ||
- xmlXPathCeilingFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"count", | ||
- xmlXPathCountFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"concat", | ||
- xmlXPathConcatFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"contains", | ||
- xmlXPathContainsFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"id", | ||
- xmlXPathIdFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"false", | ||
- xmlXPathFalseFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"floor", | ||
- xmlXPathFloorFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"last", | ||
- xmlXPathLastFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"lang", | ||
- xmlXPathLangFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"local-name", | ||
- xmlXPathLocalNameFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"not", | ||
- xmlXPathNotFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"name", | ||
- xmlXPathNameFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"namespace-uri", | ||
- xmlXPathNamespaceURIFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"normalize-space", | ||
- xmlXPathNormalizeFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"number", | ||
- xmlXPathNumberFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"position", | ||
- xmlXPathPositionFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"round", | ||
- xmlXPathRoundFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string", | ||
- xmlXPathStringFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"string-length", | ||
- xmlXPathStringLengthFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"starts-with", | ||
- xmlXPathStartsWithFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring", | ||
- xmlXPathSubstringFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-before", | ||
- xmlXPathSubstringBeforeFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"substring-after", | ||
- xmlXPathSubstringAfterFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"sum", | ||
- xmlXPathSumFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"true", | ||
- xmlXPathTrueFunction); | ||
- xmlXPathRegisterFunc(ctxt, (const xmlChar *)"translate", | ||
- xmlXPathTranslateFunction); | ||
- | ||
xmlXPathRegisterFuncNS(ctxt, (const xmlChar *)"escape-uri", | ||
(const xmlChar *)"http://www.w3.org/2002/08/xquery-functions", | ||
xmlXPathEscapeUriFunction); | ||
-- | ||
2.47.1 | ||
|
Oops, something went wrong.