From f546ae98a628c2b5b237c26f3b732d6dfea4e9be Mon Sep 17 00:00:00 2001 From: Manuel Ruck Date: Sun, 5 Nov 2023 18:10:20 +0100 Subject: [PATCH] =?UTF-8?q?fix:=20=F0=9F=90=9B=20fix=20scapacra=20build?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Manuel Ruck --- services/scrapers/scapacra/package.json | 5 +- services/scrapers/scapacra/src/DataPackage.ts | 64 +++---- .../src/evaluator/DocumentEvaluator.ts | 165 +++++++++--------- services/scrapers/scapacra/tsconfig.json | 54 +----- 4 files changed, 127 insertions(+), 161 deletions(-) diff --git a/services/scrapers/scapacra/package.json b/services/scrapers/scapacra/package.json index 91cc0b9db..6e1c94d84 100644 --- a/services/scrapers/scapacra/package.json +++ b/services/scrapers/scapacra/package.json @@ -15,11 +15,10 @@ "scripts": { "build": "tsc", "link": "cd dist && yarn link", - "dev": "nodemon .\\src\\Example.ts", + "dev": "nodemon ./src/Example.ts", "lint": "tslint --project tsconfig.json && yarn typecheck", "typecheck": "tsc --noEmit", - "test": "mocha -r ts-node/register test/**/*.test.ts", - "prepare": "yarn run build" + "test": "mocha -r ts-node/register test/**/*.test.ts" }, "bugs": { "url": "https://github.com/demokratie-live/scapacra/issues" diff --git a/services/scrapers/scapacra/src/DataPackage.ts b/services/scrapers/scapacra/src/DataPackage.ts index 1bbfc90ae..2fda8afa1 100644 --- a/services/scrapers/scapacra/src/DataPackage.ts +++ b/services/scrapers/scapacra/src/DataPackage.ts @@ -1,42 +1,42 @@ /** * A data bundle decripes all forms of information it's their metadata. - * : Datatype of the wrapped data. + * : Datatype of the wrapped data. */ -export class DataPackage { - /** - * Meta Data. - */ - public meta: M | null = null; +export class DataPackage { + /** + * Meta Data. + */ + public meta: M | null = null; - /** - * Raw data. - */ - public data: D | null = null; + /** + * Raw data. + */ + public data: D | null = null; - constructor(data: D | null, meta?: M | null){ - this.setData(data); - if(meta){ - this.setMeta(meta); - } + constructor(data: D | null, meta?: M | null) { + this.setData(data); + if (meta) { + this.setMeta(meta); } + } - public setData(data: D | null) { - this.data = data; - } - public setMeta(meta: M | null) { - this.meta = meta; - } + public setData(data: D | null) { + this.data = data; + } + public setMeta(meta: M | null) { + this.meta = meta; + } - public getData(): D | null { - return this.data; - } + public getData(): D | null { + return this.data; + } - public getMeta(): M | null { - return this.meta; - } + public getMeta(): M | null { + return this.meta; + } - public free(): void { - delete this.data; - delete this.meta; - } -} \ No newline at end of file + public free(): void { + this.data = null; + this.meta = null; + } +} diff --git a/services/scrapers/scapacra/src/evaluator/DocumentEvaluator.ts b/services/scrapers/scapacra/src/evaluator/DocumentEvaluator.ts index 136ba2b61..65db4819c 100644 --- a/services/scrapers/scapacra/src/evaluator/DocumentEvaluator.ts +++ b/services/scrapers/scapacra/src/evaluator/DocumentEvaluator.ts @@ -6,100 +6,105 @@ import readline = require('readline'); export = Documents_Evaluator; namespace Documents_Evaluator { + /** + * Evaluates a xPath-Expression to a xml document and returning the matching content as JSON. + */ + export class DocumentEvaluater { + private readableStream: NodeJS.ReadableStream; + + private xml2jsOptions = { + explicitRoot: false, + explicitArray: false, + mergeAttrs: true, + }; + /** * Evaluates a xPath-Expression to a xml document and returning the matching content as JSON. + * + * @param readableStream Stream of the xml document. */ - export class DocumentEvaluater { - private readableStream: NodeJS.ReadableStream; + constructor(readableStream: NodeJS.ReadableStream) { + this.readableStream = readableStream; + } + + protected xmlDOMErrorCallback(msg: string): void { + console.log(`[xmldom error]: ${msg}`); + } + protected xmlDOMFatalErrorCallback(msg: string): void { + console.log(`[xmldom error]: ${msg}`); + } + protected xmlDOMWarningCallback(msg: string): void { + console.log(`[xmldom warning]: ${msg}`); + } - private xml2jsOptions = { - explicitRoot: false, - explicitArray: false, - mergeAttrs: true - }; + public async evaluate(xPathExpression: string): Promise { + const xml = await this.removeXmlHeader(this.readableStream); + const parser = new DOMParser({ /** - * Evaluates a xPath-Expression to a xml document and returning the matching content as JSON. - * - * @param readableStream Stream of the xml document. + * locator is always need for error position info */ - constructor(readableStream: NodeJS.ReadableStream) { - this.readableStream = readableStream; - } - - protected xmlDOMErrorCallback(msg: String): void { - console.log(`[xmldom error]: ${msg}`); - } - protected xmlDOMFatalErrorCallback(msg: String): void { - console.log(`[xmldom error]: ${msg}`); - } - protected xmlDOMWarningCallback(msg: String): void { - console.log(`[xmldom warning]: ${msg}`); - } - - - public async evaluate(xPathExpression: string): Promise { - let xml = await this.removeXmlHeader(this.readableStream); + locator: {}, + /** + * you can override the errorHandler for xml parser + * @link http://www.saxproject.org/apidoc/org/xml/sax/ErrorHandler.html + */ + errorHandler: { + warning: this.xmlDOMWarningCallback, + error: this.xmlDOMErrorCallback, + fatalError: this.xmlDOMFatalErrorCallback, + }, + }); + const doc = parser.parseFromString(xml); - let parser = new DOMParser({ - /** - * locator is always need for error position info - */ - locator: {}, - /** - * you can override the errorHandler for xml parser - * @link http://www.saxproject.org/apidoc/org/xml/sax/ErrorHandler.html - */ - errorHandler: { - warning: this.xmlDOMWarningCallback, - error: this.xmlDOMErrorCallback, - fatalError: this.xmlDOMFatalErrorCallback - } - }); - let doc = parser.parseFromString(xml); + const nodes = xpath.select(xPathExpression, doc) as any[]; - let nodes = xpath.select(xPathExpression, doc); + let elements: any[] = []; + if (nodes) { + for (const node of nodes) { + const value = await this.getValueFromSelectedNode(node); + elements = elements.concat(value); + } - let elements: any[] = []; - for (const node of nodes) { - let value = await this.getValueFromSelectedNode(node); - elements = elements.concat(value); - } + return elements; + } + } - return elements; + protected getValueFromSelectedNode(node: xpath.SelectedValue): Promise { + return new Promise((resolve, reject) => { + if (node == null) { + return resolve(null); } - protected getValueFromSelectedNode(node: xpath.SelectedValue): Promise { - return new Promise((resolve, reject) => { - parseString(node, this.xml2jsOptions, (err: any, result: any) => { - if (err == null) { - resolve(result); - } else { - reject(err); - } - }); - }); - } + parseString(node, this.xml2jsOptions, (err: any, result: any) => { + if (err == null) { + resolve(result); + } else { + reject(err); + } + }); + }); + } - /** - * Removes the stylesheet definition and doctype declarion from the xml document to - * garentee a proper xPath evaluation. - */ - private async removeXmlHeader(stream: NodeJS.ReadableStream): Promise { - return new Promise((resolve) => { - var rl = readline.createInterface(stream); - let output: string; + /** + * Removes the stylesheet definition and doctype declarion from the xml document to + * garentee a proper xPath evaluation. + */ + private async removeXmlHeader(stream: NodeJS.ReadableStream): Promise { + return new Promise((resolve) => { + const rl = readline.createInterface(stream); + let output: string; - rl.on('line', (line) => { - let isDeclarationHeader = line.match(/^\<(\?|\!).*$/); + rl.on('line', (line) => { + const isDeclarationHeader = line.match(/^\<(\?|\!).*$/); - if (isDeclarationHeader == null) { - output += line + '\n'; - } - }).on('close', () => { - resolve(output); - }); - }); - } + if (isDeclarationHeader == null) { + output += line + '\n'; + } + }).on('close', () => { + resolve(output); + }); + }); } -} \ No newline at end of file + } +} diff --git a/services/scrapers/scapacra/tsconfig.json b/services/scrapers/scapacra/tsconfig.json index 3e5a5283c..309e6e937 100644 --- a/services/scrapers/scapacra/tsconfig.json +++ b/services/scrapers/scapacra/tsconfig.json @@ -1,49 +1,11 @@ { "compilerOptions": { - /* Basic Options */ - "target": "es6", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', or 'ESNEXT'. */ - "module": "commonjs", /* Specify module code generation: 'commonjs', 'amd', 'system', 'umd' or 'es2015'. */ - "watch": false, - // "lib": [], /* Specify library files to be included in the compilation: */ - // "allowJs": true, /* Allow javascript files to be compiled. */ - // "checkJs": true, /* Report errors in .js files. */ - // "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */ - "declaration": true, /* Generates corresponding '.d.ts' file. */ - // "sourceMap": true, /* Generates corresponding '.map' file. */ - // "outFile": "./", /* Concatenate and emit output to single file. */ - "outDir": "./dist", /* Redirect output structure to the directory. */ - // "rootDir": "./", /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */ - // "removeComments": true, /* Do not emit comments to output. */ - // "noEmit": true, /* Do not emit outputs. */ - // "importHelpers": true, /* Import emit helpers from 'tslib'. */ - // "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */ - // "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */ - /* Strict Type-Checking Options */ - "strict": true /* Enable all strict type-checking options. */ - // "noImplicitAny": true, /* Raise error on expressions and declarations with an implied 'any' type. */ - // "strictNullChecks": true, /* Enable strict null checks. */ - // "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */ - // "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */ - /* Additional Checks */ - // "noUnusedLocals": true, /* Report errors on unused locals. */ - // "noUnusedParameters": true, /* Report errors on unused parameters. */ - // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ - // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ - /* Module Resolution Options */ - // "moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */ - // "baseUrl": "./", /* Base directory to resolve non-absolute module names. */ - // "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */ - // "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */ - // "typeRoots": [], /* List of folders to include type definitions from. */ - // "types": [], /* Type declaration files to be included in compilation. */ - // "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */ - /* Source Map Options */ - // "sourceRoot": "./", /* Specify the location where debugger should locate TypeScript files instead of source locations. */ - // "mapRoot": "./", /* Specify the location where debugger should locate map files instead of generated locations. */ - // "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */ - // "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */ - /* Experimental Options */ - // "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ - // "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */ + "target": "es6", + "module": "commonjs", + "declaration": true, + "outDir": "./dist", + + "strict": true, + "skipLibCheck": true } -} \ No newline at end of file +}