diff --git a/README.md b/README.md index c633b17..b0852bd 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ const xslt = new Xslt(options); ``` - `cData` (`boolean`, default `true`): resolves CDATA elements in the output. Content under CDATA is resolved as text. This overrides `escape` for CDATA content. -- `escape` (`boolean`, default `true`): replaces symbols like `<`, `>`, `&` and `"` by the corresponding [XML entities](https://www.tutorialspoint.com/xml/xml_character_entities.htm). +- `escape` (`boolean`, default `true`): replaces symbols like `<`, `>`, `&` and `"` by the corresponding [HTML/XML entities](https://www.tutorialspoint.com/xml/xml_character_entities.htm). Can be overridden by `disable-output-escaping`, that also does the opposite, unescaping `>` and `<` by `<` and `>`, respectively. - `selfClosingTags` (`boolean`, default `true`): Self-closes tags that don't have inner elements, if `true`. For instance, `` becomes ``. - `outputMethod` (`string`, default `xml`): Specifies the default output method. if `` is declared in your XSLT file, this will be overridden. - `parameters` (`array`, default `[]`): external parameters that you want to use. diff --git a/TODO.md b/TODO.md index 7be2247..f577ee5 100644 --- a/TODO.md +++ b/TODO.md @@ -1,11 +1,14 @@ XSLT-processor TODO ===== -* Rethink match algorithm, as described in https://github.com/DesignLiquido/xslt-processor/pull/62#issuecomment-1636684453; +* Rethink match algorithm, as described in https://github.com/DesignLiquido/xslt-processor/pull/62#issuecomment-1636684453. There's a good number of issues open about this problem: + * https://github.com/DesignLiquido/xslt-processor/issues/108 + * https://github.com/DesignLiquido/xslt-processor/issues/109 + * https://github.com/DesignLiquido/xslt-processor/issues/110 * XSLT validation, besides the version number; * XSL:number * `attribute-set`, `decimal-format`, etc. (check `src/xslt.ts`) -* `/html/body//ul/li|html/body//ol/li` has `/html/body//ul/li` evaluated by this XPath implementation as "absolute", and `/html/body//ol/li` as "relative". Both should be evaluated as "absolute". -* Implement `` with correct template precedence. +* `/html/body//ul/li|html/body//ol/li` has `/html/body//ul/li` evaluated by this XPath implementation as "absolute", and `/html/body//ol/li` as "relative". Both should be evaluated as "absolute". One idea is to rewrite the XPath logic entirely, since it is nearly impossible to debug it. +* Implement `` with correct template precedence. Help is much appreciated. It seems to currently work for most of our purposes, but fixes and additions are always welcome! diff --git a/package.json b/package.json index d6966c9..4a87c1e 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "@rollup/plugin-typescript": "^11.1.1", "@types/he": "^1.2.0", "@types/jest": "^29.5.12", + "@types/node-fetch": "^2.6.11", "@typescript-eslint/eslint-plugin": "^8.4.0", "@typescript-eslint/parser": "^8.4.0", "babel-jest": "^29.7.0", diff --git a/src/dom/index.ts b/src/dom/index.ts index bad61c5..c1a8220 100644 --- a/src/dom/index.ts +++ b/src/dom/index.ts @@ -3,4 +3,5 @@ export * from './xdocument'; export * from './xml-functions'; export * from './xml-output-options'; export * from './xml-parser'; +export * from './xbrowser-node'; export * from './xnode'; diff --git a/src/dom/xbrowser-node.ts b/src/dom/xbrowser-node.ts new file mode 100644 index 0000000..baa9bea --- /dev/null +++ b/src/dom/xbrowser-node.ts @@ -0,0 +1,10 @@ +import { XNode } from "./xnode"; + +/** + * Special XNode class, that retains properties from browsers like + * IE, Opera, Safari, etc. + */ +export class XBrowserNode extends XNode { + innerText?: string; + textContent?: string; +} diff --git a/src/dom/xml-functions.ts b/src/dom/xml-functions.ts index 88c6303..f0144b3 100644 --- a/src/dom/xml-functions.ts +++ b/src/dom/xml-functions.ts @@ -14,7 +14,7 @@ import { domGetAttributeValue } from './functions'; import { XNode } from './xnode'; import { XDocument } from './xdocument'; import { XmlOutputOptions } from './xml-output-options'; - +import { XBrowserNode } from './xbrowser-node'; /** * Returns the text value of a node; for nodes without children this @@ -25,7 +25,7 @@ import { XmlOutputOptions } from './xml-output-options'; * @param disallowBrowserSpecificOptimization A boolean, to avoid browser optimization. * @returns The XML value as a string. */ -export function xmlValue(node: XNode | any, disallowBrowserSpecificOptimization: boolean = false): string { +export function xmlValue(node: XNode, disallowBrowserSpecificOptimization: boolean = false): string { if (!node) { return ''; } @@ -33,7 +33,7 @@ export function xmlValue(node: XNode | any, disallowBrowserSpecificOptimization: let ret = ''; switch (node.nodeType) { case DOM_DOCUMENT_TYPE_NODE: - return `` + return ``; case DOM_TEXT_NODE: case DOM_CDATA_SECTION_NODE: case DOM_ATTRIBUTE_NODE: @@ -44,19 +44,22 @@ export function xmlValue(node: XNode | any, disallowBrowserSpecificOptimization: if (!disallowBrowserSpecificOptimization) { // Only returns something if node has either `innerText` or `textContent` (not an XNode). // IE, Safari, Opera, and friends (`innerText`) - const innerText = node.innerText; - if (innerText != undefined) { + const browserNode = node as XBrowserNode; + const innerText = browserNode.innerText; + if (innerText !== undefined) { return innerText; } // Firefox (`textContent`) - const textContent = node.textContent; - if (textContent != undefined) { + const textContent = browserNode.textContent; + if (textContent !== undefined) { return textContent; } } if (node.transformedChildNodes.length > 0) { - const transformedTextNodes = node.transformedChildNodes.filter((n: XNode) => n.nodeType !== DOM_ATTRIBUTE_NODE); + const transformedTextNodes = node.transformedChildNodes.filter( + (n: XNode) => n.nodeType !== DOM_ATTRIBUTE_NODE + ); for (let i = 0; i < transformedTextNodes.length; ++i) { ret += xmlValue(transformedTextNodes[i]); } @@ -71,8 +74,15 @@ export function xmlValue(node: XNode | any, disallowBrowserSpecificOptimization: } } -// TODO: Give a better name to this. -export function xmlValue2(node: any, disallowBrowserSpecificOptimization: boolean = false) { +/** + * The older version to obtain a XML value from a node. + * For now, this form is only used to get text from attribute nodes, + * and it should be removed in future versions. + * @param node The attribute node. + * @param disallowBrowserSpecificOptimization A boolean, to avoid browser optimization. + * @returns The XML value as a string. + */ +export function xmlValueLegacyBehavior(node: XNode, disallowBrowserSpecificOptimization: boolean = false) { if (!node) { return ''; } @@ -91,13 +101,14 @@ export function xmlValue2(node: any, disallowBrowserSpecificOptimization: boolea case DOM_ELEMENT_NODE: if (!disallowBrowserSpecificOptimization) { // IE, Safari, Opera, and friends - const innerText = node.innerText; - if (innerText != undefined) { + const browserNode = node as XBrowserNode; + const innerText = browserNode.innerText; + if (innerText !== undefined) { return innerText; } // Firefox - const textContent = node.textContent; - if (textContent != undefined) { + const textContent = browserNode.textContent; + if (textContent !== undefined) { return textContent; } } @@ -121,17 +132,28 @@ export function xmlValue2(node: any, disallowBrowserSpecificOptimization: boolea * @returns The XML string. * @see xmlTransformedText */ -export function xmlText(node: XNode, options: XmlOutputOptions = { - cData: true, - escape: true, - selfClosingTags: true, - outputMethod: 'xml' -}) { +export function xmlText( + node: XNode, + options: XmlOutputOptions = { + cData: true, + escape: true, + selfClosingTags: true, + outputMethod: 'xml' + } +) { const buffer: string[] = []; xmlTextRecursive(node, buffer, options); return buffer.join(''); } +/** + * The recursive logic to transform a node in XML text. + * It can be considered legacy, since it does not work with transformed nodes, and + * probably will be removed in the future. + * @param {XNode} node The node. + * @param {string[]} buffer The buffer, that will represent the transformed XML text. + * @param {XmlOutputOptions} options XML output options. + */ function xmlTextRecursive(node: XNode, buffer: string[], options: XmlOutputOptions) { if (node.nodeType == DOM_TEXT_NODE) { buffer.push(xmlEscapeText(node.nodeValue)); @@ -158,7 +180,10 @@ function xmlTextRecursive(node: XNode, buffer: string[], options: XmlOutputOptio } if (node.childNodes.length === 0) { - if (options.selfClosingTags || (options.outputMethod === 'html' && ['hr', 'link'].includes(node.nodeName))) { + if ( + options.selfClosingTags || + (options.outputMethod === 'html' && ['hr', 'link'].includes(node.nodeName)) + ) { buffer.push('/>'); } else { buffer.push(`>`); @@ -197,15 +222,20 @@ export function xmlTransformedText( return buffer.join(''); } -function xmlTransformedTextRecursive(node: XNode, buffer: any[], options: XmlOutputOptions) { +/** + * The recursive logic to transform a node in XML text. + * @param {XNode} node The node. + * @param {string[]} buffer The buffer, that will represent the transformed XML text. + * @param {XmlOutputOptions} options XML output options. + */ +function xmlTransformedTextRecursive(node: XNode, buffer: string[], options: XmlOutputOptions) { if (node.visited) return; const nodeType = node.transformedNodeType || node.nodeType; const nodeValue = node.transformedNodeValue || node.nodeValue; if (nodeType === DOM_TEXT_NODE) { if (node.transformedNodeValue && node.transformedNodeValue.trim() !== '') { - const finalText = node.escape && options.escape? - xmlEscapeText(node.transformedNodeValue) : - node.transformedNodeValue; + const finalText = + node.escape && options.escape ? xmlEscapeText(node.transformedNodeValue): xmlUnescapeText(node.transformedNodeValue); buffer.push(finalText); } } else if (nodeType === DOM_CDATA_SECTION_NODE) { @@ -246,9 +276,9 @@ function xmlTransformedTextRecursive(node: XNode, buffer: any[], options: XmlOut function xmlElementLogicTrivial(node: XNode, buffer: string[], options: XmlOutputOptions) { buffer.push(`<${xmlFullNodeName(node)}`); - let attributes = node.transformedChildNodes.filter(n => n.nodeType === DOM_ATTRIBUTE_NODE); + let attributes = node.transformedChildNodes.filter((n) => n.nodeType === DOM_ATTRIBUTE_NODE); if (attributes.length === 0) { - attributes = node.childNodes.filter(n => n.nodeType === DOM_ATTRIBUTE_NODE); + attributes = node.childNodes.filter((n) => n.nodeType === DOM_ATTRIBUTE_NODE); } for (let i = 0; i < attributes.length; ++i) { @@ -262,9 +292,9 @@ function xmlElementLogicTrivial(node: XNode, buffer: string[], options: XmlOutpu } } - let childNodes = node.transformedChildNodes.filter(n => n.nodeType !== DOM_ATTRIBUTE_NODE); + let childNodes = node.transformedChildNodes.filter((n) => n.nodeType !== DOM_ATTRIBUTE_NODE); if (childNodes.length === 0) { - childNodes = node.childNodes.filter(n => n.nodeType !== DOM_ATTRIBUTE_NODE); + childNodes = node.childNodes.filter((n) => n.nodeType !== DOM_ATTRIBUTE_NODE); } childNodes = childNodes.sort((a, b) => a.siblingPosition - b.siblingPosition); @@ -317,7 +347,17 @@ function xmlFullNodeName(node: XNode): string { } /** - * Escape XML special markup chracters: tag delimiter < > and entity + * Replaces HTML/XML entities to their literal characters. + * Currently implementing only tag delimiters. + * @param text The text to be transformed. + * @returns The unescaped text. + */ +export function xmlUnescapeText(text: string): string { + return `${text}`.replace(/</g, '<').replace(/>/g, '>'); +} + +/** + * Escape XML special markup characters: tag delimiter <, >, and entity * reference start delimiter &. The escaped string can be used in XML * text portions (i.e. between tags). * @param s The string to be escaped. @@ -332,8 +372,8 @@ export function xmlEscapeText(s: string): string { } /** - * Escape XML special markup characters: tag delimiter < > entity - * reference start delimiter & and quotes ". The escaped string can be + * Escape XML special markup characters: tag delimiter, <, >, entity + * reference start delimiter &, and double quotes ("). The escaped string can be * used in double quoted XML attribute value portions (i.e. in * attributes within start tags). * @param s The string to be escaped. diff --git a/src/xslt/xslt.ts b/src/xslt/xslt.ts index 91768a9..6ee3d60 100644 --- a/src/xslt/xslt.ts +++ b/src/xslt/xslt.ts @@ -24,7 +24,7 @@ import { xmlGetAttribute, xmlTransformedText, xmlValue, - xmlValue2 + xmlValueLegacyBehavior } from '../dom'; import { ExprContext, XPath } from '../xpath'; @@ -366,7 +366,7 @@ export class Xslt { const documentFragment = domCreateDocumentFragment(this.outputDocument); await this.xsltChildNodes(context, template, documentFragment); - const value = xmlValue2(documentFragment); + const value = xmlValueLegacyBehavior(documentFragment); if (output && output.nodeType === DOM_DOCUMENT_FRAGMENT_NODE) { domSetTransformedAttribute(output, name, value); diff --git a/tests/xslt/xslt.test.tsx b/tests/xslt/xslt.test.tsx index 1791292..a0ef37b 100644 --- a/tests/xslt/xslt.test.tsx +++ b/tests/xslt/xslt.test.tsx @@ -199,7 +199,16 @@ describe('xslt', () => { }); describe('xsl:text', () => { - it('disable-output-escaping', async () => { + // Apparently, this is not how `disable-output-escaping` works. + // By an initial research, `` explicitly mentioned in + // the XSLT gives an error like: + // `Unable to generate the XML document using the provided XML/XSL input. + // org.xml.sax.SAXParseException; lineNumber: 4; columnNumber: 70; + // A DOCTYPE is not allowed in content.` + // All the examples of `disable-output-escaping` usage will point out + // the opposite: `<!DOCTYPE html>` will become ``. + // This test will be kept here for historical purposes. + it.skip('disable-output-escaping', async () => { const xml = ``; const xslt = ` @@ -216,6 +225,23 @@ describe('xslt', () => { assert.equal(html, ''); }); + it('disable-output-escaping, XML/HTML entities', async () => { + const xml = ``; + const xslt = ` + + + <!DOCTYPE html> + + `; + + const xsltClass = new Xslt(); + const xmlParser = new XmlParser(); + const parsedXml = xmlParser.xmlParse(xml); + const parsedXslt = xmlParser.xmlParse(xslt); + const html = await xsltClass.xsltProcess(parsedXml, parsedXslt); + assert.equal(html, ''); + }); + it('CDATA as JavaScript', async () => { const xml = ` diff --git a/yarn.lock b/yarn.lock index e186f99..ed5ebdc 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1886,6 +1886,14 @@ resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.15.tgz#596a1747233694d50f6ad8a7869fcb6f56cf5841" integrity sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA== +"@types/node-fetch@^2.6.11": + version "2.6.11" + resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.6.11.tgz#9b39b78665dae0e82a08f02f4967d62c66f95d24" + integrity sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g== + dependencies: + "@types/node" "*" + form-data "^4.0.0" + "@types/node@*": version "22.5.4" resolved "https://registry.yarnpkg.com/@types/node/-/node-22.5.4.tgz#83f7d1f65bc2ed223bdbf57c7884f1d5a4fa84e8"