diff --git a/README.md b/README.md
index c633b17..b0852bd 100644
--- a/README.md
+++ b/README.md
@@ -83,7 +83,7 @@ const xslt = new Xslt(options);
```
- `cData` (`boolean`, default `true`): resolves CDATA elements in the output. Content under CDATA is resolved as text. This overrides `escape` for CDATA content.
-- `escape` (`boolean`, default `true`): replaces symbols like `<`, `>`, `&` and `"` by the corresponding [XML entities](https://www.tutorialspoint.com/xml/xml_character_entities.htm).
+- `escape` (`boolean`, default `true`): replaces symbols like `<`, `>`, `&` and `"` by the corresponding [HTML/XML entities](https://www.tutorialspoint.com/xml/xml_character_entities.htm). Can be overridden by `disable-output-escaping`, that also does the opposite, unescaping `>` and `<` by `<` and `>`, respectively.
- `selfClosingTags` (`boolean`, default `true`): Self-closes tags that don't have inner elements, if `true`. For instance, `` becomes ``.
- `outputMethod` (`string`, default `xml`): Specifies the default output method. if `` is declared in your XSLT file, this will be overridden.
- `parameters` (`array`, default `[]`): external parameters that you want to use.
diff --git a/TODO.md b/TODO.md
index 7be2247..f577ee5 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,11 +1,14 @@
XSLT-processor TODO
=====
-* Rethink match algorithm, as described in https://github.com/DesignLiquido/xslt-processor/pull/62#issuecomment-1636684453;
+* Rethink match algorithm, as described in https://github.com/DesignLiquido/xslt-processor/pull/62#issuecomment-1636684453. There's a good number of issues open about this problem:
+ * https://github.com/DesignLiquido/xslt-processor/issues/108
+ * https://github.com/DesignLiquido/xslt-processor/issues/109
+ * https://github.com/DesignLiquido/xslt-processor/issues/110
* XSLT validation, besides the version number;
* XSL:number
* `attribute-set`, `decimal-format`, etc. (check `src/xslt.ts`)
-* `/html/body//ul/li|html/body//ol/li` has `/html/body//ul/li` evaluated by this XPath implementation as "absolute", and `/html/body//ol/li` as "relative". Both should be evaluated as "absolute".
-* Implement `` with correct template precedence.
+* `/html/body//ul/li|html/body//ol/li` has `/html/body//ul/li` evaluated by this XPath implementation as "absolute", and `/html/body//ol/li` as "relative". Both should be evaluated as "absolute". One idea is to rewrite the XPath logic entirely, since it is nearly impossible to debug it.
+* Implement `` with correct template precedence.
Help is much appreciated. It seems to currently work for most of our purposes, but fixes and additions are always welcome!
diff --git a/package.json b/package.json
index d6966c9..4a87c1e 100644
--- a/package.json
+++ b/package.json
@@ -48,6 +48,7 @@
"@rollup/plugin-typescript": "^11.1.1",
"@types/he": "^1.2.0",
"@types/jest": "^29.5.12",
+ "@types/node-fetch": "^2.6.11",
"@typescript-eslint/eslint-plugin": "^8.4.0",
"@typescript-eslint/parser": "^8.4.0",
"babel-jest": "^29.7.0",
diff --git a/src/dom/index.ts b/src/dom/index.ts
index bad61c5..c1a8220 100644
--- a/src/dom/index.ts
+++ b/src/dom/index.ts
@@ -3,4 +3,5 @@ export * from './xdocument';
export * from './xml-functions';
export * from './xml-output-options';
export * from './xml-parser';
+export * from './xbrowser-node';
export * from './xnode';
diff --git a/src/dom/xbrowser-node.ts b/src/dom/xbrowser-node.ts
new file mode 100644
index 0000000..baa9bea
--- /dev/null
+++ b/src/dom/xbrowser-node.ts
@@ -0,0 +1,10 @@
+import { XNode } from "./xnode";
+
+/**
+ * Special XNode class, that retains properties from browsers like
+ * IE, Opera, Safari, etc.
+ */
+export class XBrowserNode extends XNode {
+ innerText?: string;
+ textContent?: string;
+}
diff --git a/src/dom/xml-functions.ts b/src/dom/xml-functions.ts
index 88c6303..f0144b3 100644
--- a/src/dom/xml-functions.ts
+++ b/src/dom/xml-functions.ts
@@ -14,7 +14,7 @@ import { domGetAttributeValue } from './functions';
import { XNode } from './xnode';
import { XDocument } from './xdocument';
import { XmlOutputOptions } from './xml-output-options';
-
+import { XBrowserNode } from './xbrowser-node';
/**
* Returns the text value of a node; for nodes without children this
@@ -25,7 +25,7 @@ import { XmlOutputOptions } from './xml-output-options';
* @param disallowBrowserSpecificOptimization A boolean, to avoid browser optimization.
* @returns The XML value as a string.
*/
-export function xmlValue(node: XNode | any, disallowBrowserSpecificOptimization: boolean = false): string {
+export function xmlValue(node: XNode, disallowBrowserSpecificOptimization: boolean = false): string {
if (!node) {
return '';
}
@@ -33,7 +33,7 @@ export function xmlValue(node: XNode | any, disallowBrowserSpecificOptimization:
let ret = '';
switch (node.nodeType) {
case DOM_DOCUMENT_TYPE_NODE:
- return ``
+ return ``;
case DOM_TEXT_NODE:
case DOM_CDATA_SECTION_NODE:
case DOM_ATTRIBUTE_NODE:
@@ -44,19 +44,22 @@ export function xmlValue(node: XNode | any, disallowBrowserSpecificOptimization:
if (!disallowBrowserSpecificOptimization) {
// Only returns something if node has either `innerText` or `textContent` (not an XNode).
// IE, Safari, Opera, and friends (`innerText`)
- const innerText = node.innerText;
- if (innerText != undefined) {
+ const browserNode = node as XBrowserNode;
+ const innerText = browserNode.innerText;
+ if (innerText !== undefined) {
return innerText;
}
// Firefox (`textContent`)
- const textContent = node.textContent;
- if (textContent != undefined) {
+ const textContent = browserNode.textContent;
+ if (textContent !== undefined) {
return textContent;
}
}
if (node.transformedChildNodes.length > 0) {
- const transformedTextNodes = node.transformedChildNodes.filter((n: XNode) => n.nodeType !== DOM_ATTRIBUTE_NODE);
+ const transformedTextNodes = node.transformedChildNodes.filter(
+ (n: XNode) => n.nodeType !== DOM_ATTRIBUTE_NODE
+ );
for (let i = 0; i < transformedTextNodes.length; ++i) {
ret += xmlValue(transformedTextNodes[i]);
}
@@ -71,8 +74,15 @@ export function xmlValue(node: XNode | any, disallowBrowserSpecificOptimization:
}
}
-// TODO: Give a better name to this.
-export function xmlValue2(node: any, disallowBrowserSpecificOptimization: boolean = false) {
+/**
+ * The older version to obtain a XML value from a node.
+ * For now, this form is only used to get text from attribute nodes,
+ * and it should be removed in future versions.
+ * @param node The attribute node.
+ * @param disallowBrowserSpecificOptimization A boolean, to avoid browser optimization.
+ * @returns The XML value as a string.
+ */
+export function xmlValueLegacyBehavior(node: XNode, disallowBrowserSpecificOptimization: boolean = false) {
if (!node) {
return '';
}
@@ -91,13 +101,14 @@ export function xmlValue2(node: any, disallowBrowserSpecificOptimization: boolea
case DOM_ELEMENT_NODE:
if (!disallowBrowserSpecificOptimization) {
// IE, Safari, Opera, and friends
- const innerText = node.innerText;
- if (innerText != undefined) {
+ const browserNode = node as XBrowserNode;
+ const innerText = browserNode.innerText;
+ if (innerText !== undefined) {
return innerText;
}
// Firefox
- const textContent = node.textContent;
- if (textContent != undefined) {
+ const textContent = browserNode.textContent;
+ if (textContent !== undefined) {
return textContent;
}
}
@@ -121,17 +132,28 @@ export function xmlValue2(node: any, disallowBrowserSpecificOptimization: boolea
* @returns The XML string.
* @see xmlTransformedText
*/
-export function xmlText(node: XNode, options: XmlOutputOptions = {
- cData: true,
- escape: true,
- selfClosingTags: true,
- outputMethod: 'xml'
-}) {
+export function xmlText(
+ node: XNode,
+ options: XmlOutputOptions = {
+ cData: true,
+ escape: true,
+ selfClosingTags: true,
+ outputMethod: 'xml'
+ }
+) {
const buffer: string[] = [];
xmlTextRecursive(node, buffer, options);
return buffer.join('');
}
+/**
+ * The recursive logic to transform a node in XML text.
+ * It can be considered legacy, since it does not work with transformed nodes, and
+ * probably will be removed in the future.
+ * @param {XNode} node The node.
+ * @param {string[]} buffer The buffer, that will represent the transformed XML text.
+ * @param {XmlOutputOptions} options XML output options.
+ */
function xmlTextRecursive(node: XNode, buffer: string[], options: XmlOutputOptions) {
if (node.nodeType == DOM_TEXT_NODE) {
buffer.push(xmlEscapeText(node.nodeValue));
@@ -158,7 +180,10 @@ function xmlTextRecursive(node: XNode, buffer: string[], options: XmlOutputOptio
}
if (node.childNodes.length === 0) {
- if (options.selfClosingTags || (options.outputMethod === 'html' && ['hr', 'link'].includes(node.nodeName))) {
+ if (
+ options.selfClosingTags ||
+ (options.outputMethod === 'html' && ['hr', 'link'].includes(node.nodeName))
+ ) {
buffer.push('/>');
} else {
buffer.push(`>${xmlFullNodeName(node)}>`);
@@ -197,15 +222,20 @@ export function xmlTransformedText(
return buffer.join('');
}
-function xmlTransformedTextRecursive(node: XNode, buffer: any[], options: XmlOutputOptions) {
+/**
+ * The recursive logic to transform a node in XML text.
+ * @param {XNode} node The node.
+ * @param {string[]} buffer The buffer, that will represent the transformed XML text.
+ * @param {XmlOutputOptions} options XML output options.
+ */
+function xmlTransformedTextRecursive(node: XNode, buffer: string[], options: XmlOutputOptions) {
if (node.visited) return;
const nodeType = node.transformedNodeType || node.nodeType;
const nodeValue = node.transformedNodeValue || node.nodeValue;
if (nodeType === DOM_TEXT_NODE) {
if (node.transformedNodeValue && node.transformedNodeValue.trim() !== '') {
- const finalText = node.escape && options.escape?
- xmlEscapeText(node.transformedNodeValue) :
- node.transformedNodeValue;
+ const finalText =
+ node.escape && options.escape ? xmlEscapeText(node.transformedNodeValue): xmlUnescapeText(node.transformedNodeValue);
buffer.push(finalText);
}
} else if (nodeType === DOM_CDATA_SECTION_NODE) {
@@ -246,9 +276,9 @@ function xmlTransformedTextRecursive(node: XNode, buffer: any[], options: XmlOut
function xmlElementLogicTrivial(node: XNode, buffer: string[], options: XmlOutputOptions) {
buffer.push(`<${xmlFullNodeName(node)}`);
- let attributes = node.transformedChildNodes.filter(n => n.nodeType === DOM_ATTRIBUTE_NODE);
+ let attributes = node.transformedChildNodes.filter((n) => n.nodeType === DOM_ATTRIBUTE_NODE);
if (attributes.length === 0) {
- attributes = node.childNodes.filter(n => n.nodeType === DOM_ATTRIBUTE_NODE);
+ attributes = node.childNodes.filter((n) => n.nodeType === DOM_ATTRIBUTE_NODE);
}
for (let i = 0; i < attributes.length; ++i) {
@@ -262,9 +292,9 @@ function xmlElementLogicTrivial(node: XNode, buffer: string[], options: XmlOutpu
}
}
- let childNodes = node.transformedChildNodes.filter(n => n.nodeType !== DOM_ATTRIBUTE_NODE);
+ let childNodes = node.transformedChildNodes.filter((n) => n.nodeType !== DOM_ATTRIBUTE_NODE);
if (childNodes.length === 0) {
- childNodes = node.childNodes.filter(n => n.nodeType !== DOM_ATTRIBUTE_NODE);
+ childNodes = node.childNodes.filter((n) => n.nodeType !== DOM_ATTRIBUTE_NODE);
}
childNodes = childNodes.sort((a, b) => a.siblingPosition - b.siblingPosition);
@@ -317,7 +347,17 @@ function xmlFullNodeName(node: XNode): string {
}
/**
- * Escape XML special markup chracters: tag delimiter < > and entity
+ * Replaces HTML/XML entities to their literal characters.
+ * Currently implementing only tag delimiters.
+ * @param text The text to be transformed.
+ * @returns The unescaped text.
+ */
+export function xmlUnescapeText(text: string): string {
+ return `${text}`.replace(/</g, '<').replace(/>/g, '>');
+}
+
+/**
+ * Escape XML special markup characters: tag delimiter <, >, and entity
* reference start delimiter &. The escaped string can be used in XML
* text portions (i.e. between tags).
* @param s The string to be escaped.
@@ -332,8 +372,8 @@ export function xmlEscapeText(s: string): string {
}
/**
- * Escape XML special markup characters: tag delimiter < > entity
- * reference start delimiter & and quotes ". The escaped string can be
+ * Escape XML special markup characters: tag delimiter, <, >, entity
+ * reference start delimiter &, and double quotes ("). The escaped string can be
* used in double quoted XML attribute value portions (i.e. in
* attributes within start tags).
* @param s The string to be escaped.
diff --git a/src/xslt/xslt.ts b/src/xslt/xslt.ts
index 91768a9..6ee3d60 100644
--- a/src/xslt/xslt.ts
+++ b/src/xslt/xslt.ts
@@ -24,7 +24,7 @@ import {
xmlGetAttribute,
xmlTransformedText,
xmlValue,
- xmlValue2
+ xmlValueLegacyBehavior
} from '../dom';
import { ExprContext, XPath } from '../xpath';
@@ -366,7 +366,7 @@ export class Xslt {
const documentFragment = domCreateDocumentFragment(this.outputDocument);
await this.xsltChildNodes(context, template, documentFragment);
- const value = xmlValue2(documentFragment);
+ const value = xmlValueLegacyBehavior(documentFragment);
if (output && output.nodeType === DOM_DOCUMENT_FRAGMENT_NODE) {
domSetTransformedAttribute(output, name, value);
diff --git a/tests/xslt/xslt.test.tsx b/tests/xslt/xslt.test.tsx
index 1791292..a0ef37b 100644
--- a/tests/xslt/xslt.test.tsx
+++ b/tests/xslt/xslt.test.tsx
@@ -199,7 +199,16 @@ describe('xslt', () => {
});
describe('xsl:text', () => {
- it('disable-output-escaping', async () => {
+ // Apparently, this is not how `disable-output-escaping` works.
+ // By an initial research, `` explicitly mentioned in
+ // the XSLT gives an error like:
+ // `Unable to generate the XML document using the provided XML/XSL input.
+ // org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 70;
+ // A DOCTYPE is not allowed in content.`
+ // All the examples of `disable-output-escaping` usage will point out
+ // the opposite: `<!DOCTYPE html>` will become ``.
+ // This test will be kept here for historical purposes.
+ it.skip('disable-output-escaping', async () => {
const xml = ``;
const xslt = `
@@ -216,6 +225,23 @@ describe('xslt', () => {
assert.equal(html, '');
});
+ it('disable-output-escaping, XML/HTML entities', async () => {
+ const xml = ``;
+ const xslt = `
+
+
+ <!DOCTYPE html>
+
+ `;
+
+ const xsltClass = new Xslt();
+ const xmlParser = new XmlParser();
+ const parsedXml = xmlParser.xmlParse(xml);
+ const parsedXslt = xmlParser.xmlParse(xslt);
+ const html = await xsltClass.xsltProcess(parsedXml, parsedXslt);
+ assert.equal(html, '');
+ });
+
it('CDATA as JavaScript', async () => {
const xml = `
diff --git a/yarn.lock b/yarn.lock
index e186f99..ed5ebdc 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -1886,6 +1886,14 @@
resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.15.tgz#596a1747233694d50f6ad8a7869fcb6f56cf5841"
integrity sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==
+"@types/node-fetch@^2.6.11":
+ version "2.6.11"
+ resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.6.11.tgz#9b39b78665dae0e82a08f02f4967d62c66f95d24"
+ integrity sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==
+ dependencies:
+ "@types/node" "*"
+ form-data "^4.0.0"
+
"@types/node@*":
version "22.5.4"
resolved "https://registry.yarnpkg.com/@types/node/-/node-22.5.4.tgz#83f7d1f65bc2ed223bdbf57c7884f1d5a4fa84e8"