Skip to content

Commit

Permalink
feat: add support of eslint and typescript, plus more tests and excep…
Browse files Browse the repository at this point in the history
…tion handlings (#346)

* feat: add eslint, fix issue #321, add test PDF for #321:image exception will only be warned in log, won't stop the text and form parsing

* fix: issue #318, fixed together with #321

* fix: issue #343, resolve pkInfo at build time

* fix: issue #255 and #277, keep parsing if img data is not resolved

* maint: add ts dependency and config, remove root pdf2json.js, use ./bin/pdf2json.js instead

* maint: replace lib/p2jcom*.js with src/cli/p2jcli*.ts, compied cli is moved to /bin/cli

* maint: add compiled bundle to bin/cli and dist

* maint: remove unnecessary build artifacts

* build: add prepare scripts

* test: add more page content verification in test

* test: add more cli tests after jest

* fix: auto create output dir in CLI to make more tests run without setting up directory structure

* fix: #262: correct the condition check, also add i262_4hXzVq.pdf to test/misc
  • Loading branch information
modesty authored May 24, 2024
1 parent 4c85d0b commit 3e60cde
Show file tree
Hide file tree
Showing 38 changed files with 2,243 additions and 698 deletions.
134 changes: 134 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
{
"root": true,
"extends": [
"eslint:recommended",
"plugin:@typescript-eslint/eslint-recommended",
"plugin:@typescript-eslint/recommended"],
"parser": "@typescript-eslint/parser",
"parserOptions": {
"ecmaVersion": 2022,
"sourceType": "module"
},
"ignorePatterns": ["/dist/", "/bin/", "/test/", "/node_modules/", "/base/", "/lib/pdfjs-code.js"],
"overrides": [{
"files": ["*.js", "*.ts"],
"rules": {
"no-mixed-spaces-and-tabs": ["error", "smart-tabs"],
"@typescript-eslint/naming-convention": [
"error",
{
"selector": "variable",
"format": ["camelCase", "PascalCase", "UPPER_CASE"],
"filter": {
"regex": "^_",
"match": false
}
},
{
"selector": "function",
"format": ["camelCase", "PascalCase"]
},
{
"selector": "parameter",
"format": ["camelCase", "PascalCase"],
"filter": {
"regex": "^_",
"match": false
}
},
{
"selector": "property",
"format": ["camelCase", "PascalCase", "UPPER_CASE"],
"filter": {
"regex": "(x-ide-git-auth|^_|^sys_*|app_id)",
"match": false
}
},
{
"selector": "method",
"format": ["camelCase", "PascalCase"],
"filter": {
"regex": "^_",
"match": false
}
},
{
"selector": "accessor",
"format": ["camelCase", "PascalCase"]
},
{
"selector": "enumMember",
"format": ["camelCase", "PascalCase", "UPPER_CASE"]
},
{
"selector": "typeLike",
"format": ["PascalCase"]
}
],
"@typescript-eslint/no-unused-vars": [
"error",
{
"vars": "all",
"args": "after-used",
"argsIgnorePattern": "(^_?|fs|uri|options|opts|source|signal|destination|Uri$|args)",
"ignoreRestSiblings": true,
"destructuredArrayIgnorePattern": "^_?"
}
],
"@typescript-eslint/no-extra-semi": "off",
"@typescript-eslint/no-var-requires": "off",
"arrow-body-style": ["error", "as-needed"],
"dot-notation": ["error"],
"eqeqeq": ["error", "always"],
"no-case-declarations": "error",
"no-duplicate-imports": ["error"],
"no-else-return": [
"error",
{
"allowElseIf": true
}
],
"no-eval": [
"error",
{
"allowIndirect": false
}
],
"no-iterator": ["error"],
"no-multi-assign": ["error"],
"no-new-func": ["error"],
"no-new-wrappers": ["error"],
"no-object-constructor": ["error"],
"no-param-reassign": "off",
"no-restricted-imports": ["error", "lodash", "moment"],
"no-throw-literal": "warn",
"no-useless-call": ["error"],
"object-curly-spacing": ["error", "always"],
"object-shorthand": [
"error",
"always",
{
"avoidExplicitReturnArrows": true
}
],
"prefer-arrow-callback": [
"error",
{
"allowNamedFunctions": true
}
],
"prefer-const": ["error"],
"prefer-destructuring": [
"error",
{
"object": true,
"array": false
}
],
"prefer-rest-params": ["error"],
"prefer-spread": ["error"],
"prefer-template": ["error"]
}
}
]
}
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ node_modules/
target/
.idea
.npmrc
vscode/
dist/
.vscode/
lib/pdfjs-code.js
src/**/*.js
/dist/
/bin/cli/
4 changes: 4 additions & 0 deletions .prettierrc.cjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module.exports = {
...require('prettier-config-standard'),
trailingComma: 'es5',
}
6 changes: 3 additions & 3 deletions base/core/core.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ var Page = (function PageClosure() {
var promise = new Promise();
if (!this.resources) //empty page
promise.resolve();
else
else
this.resourcesPromise.then(function resourceSuccess() {
var objectLoader = new ObjectLoader(this.resources.map,
keys,
Expand Down Expand Up @@ -331,8 +331,8 @@ var PDFDocument = (function PDFDocumentClosure() {

PDFDocument.prototype = {
parse: function PDFDocument_parse(recoveryMode) {
this.setup(recoveryMode);
try {
this.setup(recoveryMode); // setup may throw
// checking if AcroForm is present
this.acroForm = this.catalog.catDict.get('AcroForm');
if (this.acroForm) {
Expand All @@ -345,7 +345,7 @@ var PDFDocument = (function PDFDocumentClosure() {
}
}
} catch (ex) {
info('Something wrong with AcroForm entry');
error(ex);
this.acroForm = null;
}
},
Expand Down
10 changes: 8 additions & 2 deletions base/core/image.js
Original file line number Diff line number Diff line change
Expand Up @@ -485,8 +485,14 @@ var PDFImage = (function PDFImageClosure() {
height: drawHeight,
data: new Uint8Array(drawWidth * drawHeight * 4)
};
var pixels = imgData.data;
this.fillRgbaBuffer(pixels, drawWidth, drawHeight);

var pixels = imgData.data;
try {
this.fillRgbaBuffer(pixels, drawWidth, drawHeight);
} catch (e) {
warn('Unable to decode image: ' + e); // keep moving on
}

return imgData;
},
getImageBytes: function PDFImage_getImageBytes(length) {
Expand Down
8 changes: 5 additions & 3 deletions base/core/stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -1648,7 +1648,9 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() {
this.str = str;
this.dict = str.dict;

params = params || new Dict();
if (!(params instanceof Dict)) {
params = new Dict();
}

this.encoding = params.get('K') || 0;
this.eoline = params.get('EndOfLine') || false;
Expand Down Expand Up @@ -1707,7 +1709,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() {

if (a1 > codingLine[codingPos]) {
if (a1 > this.columns) {
info('row is wrong length');
info('row is wrong length: ' + a1 + ' > ' + this.columns);
this.err = true;
a1 = this.columns;
}
Expand All @@ -1727,7 +1729,7 @@ var CCITTFaxStream = (function CCITTFaxStreamClosure() {

if (a1 > codingLine[codingPos]) {
if (a1 > this.columns) {
info('row is wrong length');
info('row is wrong length: ' + a1 + ' > ' + this.columns);
this.err = true;
a1 = this.columns;
}
Expand Down
12 changes: 7 additions & 5 deletions base/display/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ PDFJS.postMessageTransfers = PDFJS.postMessageTransfers === undefined ?
* - password - For decrypting password-protected PDFs.
* - initialData - A typed array with the first portion or all of the pdf data.
* Used by the extension since some data is already loaded
* before the switch to range requests.
* before the switch to range requests.
*
* @param {object} pdfDataRangeTransport is optional. It is used if you want
* to manually serve range requests for data in the PDF. See viewer.js for
Expand Down Expand Up @@ -417,7 +417,7 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
internalRenderTask.operatorListChanged();
}
catch(err) {
complete(err);
complete(err);
}
},
function pageDisplayReadPromiseError(reason) {
Expand Down Expand Up @@ -1005,9 +1005,11 @@ var PDFObjects = (function PDFObjectsClosure() {

// If there isn't an object yet or the object isn't resolved, then the
// data isn't ready yet!
if (!obj || !obj.resolved)
error('Requesting object that isn\'t resolved yet ' + objId);

if (!obj || !obj.resolved) { // fix issue #255, keep moving on, test PDF is i255_emojis.pdf
// error('Requesting object that isn\'t resolved yet ' + objId)
warn('Requesting object that isn\'t resolved yet ' + objId);
return null;
}
return obj.data;
},

Expand Down
2 changes: 1 addition & 1 deletion bin/pdf2json.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env node

import PDFCLI from "../lib/p2jcmd.js";
import PDFCLI from "./cli/pdfparser_cli.js";
new PDFCLI().start();
28 changes: 14 additions & 14 deletions lib/parserstream.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { Transform, Readable } from "stream";
import fs from "fs";
import { Buffer } from "buffer";

export class ParserStream extends Transform {
static createContentStream(jsonObj) {
const rStream = new Readable({objectMode: true});
const rStream = new Readable({ objectMode: true });
rStream.push(jsonObj);
rStream.push(null);
return rStream;
Expand All @@ -18,8 +19,8 @@ export class ParserStream extends Transform {

#pdfParser = null;
#chunks = [];
#parsedData = {Pages:[]};
#_flush_callback = null;
#parsedData = { Pages:[] };
#_flush_callback = null;

constructor(pdfParser, options) {
super(options);
Expand All @@ -32,14 +33,14 @@ export class ParserStream extends Transform {
// this.#_flush_callback();
// this.emit('end', null);
// });
this.#pdfParser.on("readable", meta => this.#parsedData = {...meta, Pages:[]});
this.#pdfParser.on("readable", meta => this.#parsedData = { ...meta, Pages:[] });
this.#pdfParser.on("data", page => {
if (!page) {
this.push(this.#parsedData);
this.#_flush_callback();
}
else
this.#parsedData.Pages.push(page);
if (!page) {
this.push(this.#parsedData);
this.#_flush_callback();
} else {
this.#parsedData.Pages.push(page);
}
});
}

Expand All @@ -57,17 +58,16 @@ export class ParserStream extends Transform {
_destroy() {
super.removeAllListeners();
this.#pdfParser = null;
this.#chunks = [];
this.#chunks = [];
}
}

}

export class StringifyStream extends Transform {
constructor(options) {
super(options);

this._readableState.objectMode = false;
this._writableState.objectMode = true;
this._writableState.objectMode = true;
}

_transform(obj, encoding, callback){
Expand Down
Loading

0 comments on commit 3e60cde

Please sign in to comment.