From 28fe96cbaf2178bd6285678815538a1f5a2c6e90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kaj=20Pity=C5=84ski?= Date: Thu, 31 Oct 2024 11:51:29 +0100 Subject: [PATCH] Enable XML mode of parsing documents --- package-lock.json | 121 ++++++++++++++++++----------------------- package.json | 2 +- src/Utils/xml/lexer.ts | 3 +- 3 files changed, 56 insertions(+), 70 deletions(-) diff --git a/package-lock.json b/package-lock.json index 8f45dfb..0d37f59 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,7 +10,7 @@ "dependencies": { "extract-zip": "^1.7.0", "fs-extra": "^5.0.0", - "htmlparser2": "^7.2.0", + "htmlparser2": "^9.1.0", "lodash": "^4.17.21", "md5": "^2.3.0", "vscode-extension-telemetry-wrapper": "^0.14.0", @@ -1241,26 +1241,18 @@ } }, "node_modules/dom-serializer": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz", - "integrity": "sha512-VHwB3KfrcOOkelEG2ZOfxqLZdfkil8PtJi4P8N2MMXucZq2yLp75ClViUlOVwyoHEDjYU433Aq+5zWP61+RGag==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", "dependencies": { - "domelementtype": "^2.0.1", - "domhandler": "^4.2.0", - "entities": "^2.0.0" + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" }, "funding": { "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" } }, - "node_modules/dom-serializer/node_modules/entities": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz", - "integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==", - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" - } - }, "node_modules/domelementtype": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", @@ -1273,11 +1265,11 @@ ] }, "node_modules/domhandler": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.1.tgz", - "integrity": "sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ==", + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", "dependencies": { - "domelementtype": "^2.2.0" + "domelementtype": "^2.3.0" }, "engines": { "node": ">= 4" @@ -1287,13 +1279,13 @@ } }, "node_modules/domutils": { - "version": "2.8.0", - "resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz", - "integrity": "sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==", + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz", + "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==", "dependencies": { - "dom-serializer": "^1.0.1", - "domelementtype": "^2.2.0", - "domhandler": "^4.2.0" + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" }, "funding": { "url": "https://github.com/fb55/domutils?sponsor=1" @@ -1331,9 +1323,9 @@ } }, "node_modules/entities": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/entities/-/entities-3.0.1.tgz", - "integrity": "sha512-WiyBqoomrwMdFG1e0kqvASYfnlb0lp8M5o5Fw2OFq1hNZxxcNk8Ik0Xm7LxzBhuidnZB/UtBqVCgUz3kBOP51Q==", + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", "engines": { "node": ">=0.12" }, @@ -1657,9 +1649,9 @@ } }, "node_modules/htmlparser2": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-7.2.0.tgz", - "integrity": "sha512-H7MImA4MS6cw7nbyURtLPO1Tms7C5H602LRETv95z1MxO/7CP7rDVROehUYeYBUYEON94NXXDEPmZuq+hX4sog==", + "version": "9.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-9.1.0.tgz", + "integrity": "sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==", "funding": [ "https://github.com/fb55/htmlparser2?sponsor=1", { @@ -1668,10 +1660,10 @@ } ], "dependencies": { - "domelementtype": "^2.0.1", - "domhandler": "^4.2.2", - "domutils": "^2.8.0", - "entities": "^3.0.1" + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.1.0", + "entities": "^4.5.0" } }, "node_modules/http-proxy-agent": { @@ -4579,20 +4571,13 @@ "dev": true }, "dom-serializer": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz", - "integrity": "sha512-VHwB3KfrcOOkelEG2ZOfxqLZdfkil8PtJi4P8N2MMXucZq2yLp75ClViUlOVwyoHEDjYU433Aq+5zWP61+RGag==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", "requires": { - "domelementtype": "^2.0.1", - "domhandler": "^4.2.0", - "entities": "^2.0.0" - }, - "dependencies": { - "entities": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz", - "integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==" - } + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" } }, "domelementtype": { @@ -4601,21 +4586,21 @@ "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==" }, "domhandler": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.1.tgz", - "integrity": "sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ==", + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", "requires": { - "domelementtype": "^2.2.0" + "domelementtype": "^2.3.0" } }, "domutils": { - "version": "2.8.0", - "resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz", - "integrity": "sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==", + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz", + "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==", "requires": { - "dom-serializer": "^1.0.1", - "domelementtype": "^2.2.0", - "domhandler": "^4.2.0" + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" } }, "eastasianwidth": { @@ -4647,9 +4632,9 @@ } }, "entities": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/entities/-/entities-3.0.1.tgz", - "integrity": "sha512-WiyBqoomrwMdFG1e0kqvASYfnlb0lp8M5o5Fw2OFq1hNZxxcNk8Ik0Xm7LxzBhuidnZB/UtBqVCgUz3kBOP51Q==" + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==" }, "envinfo": { "version": "7.13.0", @@ -4879,14 +4864,14 @@ "dev": true }, "htmlparser2": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-7.2.0.tgz", - "integrity": "sha512-H7MImA4MS6cw7nbyURtLPO1Tms7C5H602LRETv95z1MxO/7CP7rDVROehUYeYBUYEON94NXXDEPmZuq+hX4sog==", + "version": "9.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-9.1.0.tgz", + "integrity": "sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==", "requires": { - "domelementtype": "^2.0.1", - "domhandler": "^4.2.2", - "domutils": "^2.8.0", - "entities": "^3.0.1" + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.1.0", + "entities": "^4.5.0" } }, "http-proxy-agent": { diff --git a/package.json b/package.json index 589acaa..ae07518 100644 --- a/package.json +++ b/package.json @@ -201,7 +201,7 @@ "dependencies": { "extract-zip": "^1.7.0", "fs-extra": "^5.0.0", - "htmlparser2": "^7.2.0", + "htmlparser2": "^9.1.0", "lodash": "^4.17.21", "md5": "^2.3.0", "vscode-extension-telemetry-wrapper": "^0.14.0", diff --git a/src/Utils/xml/lexer.ts b/src/Utils/xml/lexer.ts index 86e9eae..5be10fa 100644 --- a/src/Utils/xml/lexer.ts +++ b/src/Utils/xml/lexer.ts @@ -18,7 +18,8 @@ export function getNodesByTag(text: string, tag: string): Node[] { // const tokens: number[][] = Lexx(text); const document: Document = hp.parseDocument(text, { withEndIndices: true, - withStartIndices: true + withStartIndices: true, + xmlMode: true }); const ret: Node[] = []; dfs(document, (node) => isTag(node) && node.tagName === tag, ret);