Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lower Scroll-to-Zoom delay #2

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
"autoprefixer": "^10.4.14",
"babel-loader": "^9.1.2",
"caniuse-lite": "^1.0.30001489",
"canvas": "^2.11.2",
"core-js": "^3.30.2",
"cross-env": "^7.0.3",
"es-module-shims": "1.4.7",
Expand Down Expand Up @@ -41,7 +40,6 @@
"postcss": "^8.4.23",
"postcss-dir-pseudo-class": "^7.0.2",
"prettier": "^2.8.8",
"puppeteer": "^20.5.0",
"rimraf": "^3.0.2",
"streamqueue": "^1.1.2",
"stylelint": "^15.6.2",
Expand All @@ -59,7 +57,6 @@
"yargs": "^17.7.2"
},
"scripts": {
"postinstall": "cross-env PUPPETEER_PRODUCT=firefox node node_modules/puppeteer/install.js"
},
"repository": {
"type": "git",
Expand Down
1 change: 1 addition & 0 deletions src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,7 @@ class Annotation {
hasOwnCanvas: false,
noRotate: !!(this.flags & AnnotationFlag.NOROTATE),
noHTML: isLocked && isContentLocked,
isZotero: (dict.get("NM") || '').startsWith('Zotero-') || dict.get("Zotero:Key"),
};

if (params.collectFields) {
Expand Down
112 changes: 112 additions & 0 deletions src/core/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ import { StructTreePage } from "./struct_tree.js";
import { writeObject } from "./writer.js";
import { XFAFactory } from "./xfa/factory.js";
import { XRef } from "./xref.js";
import { getParagraphs } from './text/structure.js';
import { OutlineAnalyzer, PageAnalyzer } from './text/analyzer.js';

const DEFAULT_USER_UNIT = 1.0;
const LETTER_SIZE_MEDIABOX = [0, 0, 612, 792];
Expand Down Expand Up @@ -460,6 +462,22 @@ class Page {
intentDisplay = !!(intent & RenderingIntentFlag.DISPLAY),
intentPrint = !!(intent & RenderingIntentFlag.PRINT);

const allowedSubtypes = [
'Link',
'Widget',
'Line',
'Circle',
'PolyLine',
'Polygon',
'Caret',
'Squiggly',
'StrikeOut',
'Stamp'
];

annotations = annotations.filter(x => allowedSubtypes.includes(x.data.subtype)
|| ['Square', 'Ink', 'FreeText'].includes(x.data.subtype) && !x.data.isZotero);

// Collect the operator list promises for the annotations. Each promise
// is resolved with the complete operator list for a single annotation.
const opListPromises = [];
Expand Down Expand Up @@ -554,6 +572,47 @@ class Page {
});
}

async getStructuredText({ handler, task, data }) {
let items = [];
let sink = {};
sink.enqueue = function (a, b) {
items.push(...a.items);
};

try {
await this.extractTextContent({
handler,
task,
sink,
includeMarkedContent: data.includeMarkedContent,
combineTextItems: data.combineTextItems,
});
} catch (e) {
console.log(e);
throw e;
}

let fingerprints = new Set();
let chars = [];
for (let item of items) {
if (!item.chars) {
continue;
}
for (let char of item.chars) {
// Some PDF files have their text layer characters repeated many times, therefore remove them
let fingerprint = char.c + char.rect.join('');
if (!fingerprints.has(fingerprint)) {
fingerprints.add(fingerprint);
char.index = chars.length;
chars.push(char);

}
}
}
let paragraphs = getParagraphs(chars);
return { paragraphs };
}

async getStructTree() {
const structTreeRoot = await this.pdfManager.ensureCatalog(
"structTreeRoot"
Expand Down Expand Up @@ -780,6 +839,7 @@ class PDFDocument {
this.xref = new XRef(stream, pdfManager);
this._pagePromises = new Map();
this._version = null;
this._structuredTexts = [];

const idCounters = {
font: 0,
Expand Down Expand Up @@ -1517,6 +1577,58 @@ class PDFDocument {
}
}

async getPageData({ handler, task, data }) {
let { pageIndex } = data;
let structuredTextProvider = async (pageIndex) => {
if (this._structuredTexts[pageIndex]) {
return this._structuredTexts[pageIndex];
}
let page = await this.getPage(pageIndex);
let structuredText;
try {
structuredText = await page.getStructuredText({ handler, task, data });
this._structuredTexts[pageIndex] = structuredText;
} catch (e) {
console.log(e);
}
return structuredText;
};

let structuredText = await structuredTextProvider(pageIndex);
let page = await this.getPage(pageIndex);

let pageAnalyzer = new PageAnalyzer(pageIndex, this, structuredTextProvider);
let overlays = await pageAnalyzer.getOverlays();
let pageLabel = await pageAnalyzer.getPageLabel();
let pageData = {
structuredText,
overlays,
viewBox: page.view,
pageLabel
};
return pageData;
}

async getOutline2({ handler, task, data = {} }) {
let { extract } = data;
let structuredTextProvider = async (pageIndex) => {
if (this._structuredTexts[pageIndex]) {
return this._structuredTexts[pageIndex];
}
let page = await this.getPage(pageIndex);
let structuredText;
try {
structuredText = await page.getStructuredText({ handler, task, data });
this._structuredTexts[pageIndex] = structuredText;
} catch (e) {
console.log(e);
}
return structuredText;
};
let outlineAnalyzer = new OutlineAnalyzer(this, structuredTextProvider);
return outlineAnalyzer.getOutline(extract);
}

async checkLastPage(recoveryMode = false) {
const { catalog, pdfManager } = this;

Expand Down
120 changes: 120 additions & 0 deletions src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -2284,6 +2284,7 @@ class PartialEvaluator {
transform: null,
fontName: null,
hasEOL: false,
chars: [],
};

// Use a circular buffer (length === 2) to save the last chars in the
Expand Down Expand Up @@ -2518,6 +2519,7 @@ class PartialEvaluator {
transform: textChunk.transform,
fontName: textChunk.fontName,
hasEOL: textChunk.hasEOL,
chars: textChunk.chars,
};
}

Expand Down Expand Up @@ -2843,6 +2845,9 @@ class PartialEvaluator {
scaledDim = 0;
}

let prevWidth = textChunk.width;
let m = Util.transform(textState.ctm, textState.textMatrix);

if (!font.vertical) {
scaledDim *= textState.textHScale;
textState.translateTextMatrix(scaledDim, 0);
Expand All @@ -2869,6 +2874,120 @@ class PartialEvaluator {
}
textChunk.str.push(glyphUnicode);

function closestStandardAngle(degrees) {
const standardAngles = [0, 90, 180, 270];
let closestAngle = standardAngles[0];
let minDifference = Math.abs(degrees - closestAngle);

for (let i = 1; i < standardAngles.length; i++) {
const difference = Math.abs(degrees - standardAngles[i]);
if (difference < minDifference) {
minDifference = difference;
closestAngle = standardAngles[i];
}
}

return closestAngle;
}

function matrixToDegrees(matrix) {
let radians = Math.atan2(matrix[1], matrix[0]);
if (radians < 0) {
radians += (2 * Math.PI);
}
let degrees = Math.round(radians * (180 / Math.PI));
degrees = degrees % 360;
if (degrees < 0) {
degrees += 360;
}
degrees = closestStandardAngle(degrees);
return degrees;
}

let rotation = matrixToDegrees(m);

let ascent = font.ascent;
let descent = font.descent;
if (descent > 0) {
descent = -descent;
}
if (ascent && descent) {
if (ascent > 1) {
ascent = 0.75;
}
if (descent < -0.5) {
descent = -0.25;
}
}
else {
ascent = 0.75;
descent = -0.25;
}

if (font.capHeight && font.capHeight < ascent) {
ascent = font.capHeight;
}

let charWidth = textChunk.width - prevWidth;
let rect = [0, textState.fontSize * descent, charWidth, textState.fontSize * ascent]

if (
font.isType3Font &&
textState.fontSize <= 1 &&
!isArrayEqual(textState.fontMatrix, FONT_IDENTITY_MATRIX)
) {
const glyphHeight = font.bbox[3] - font.bbox[1];
if (glyphHeight > 0) {
rect[1] = font.bbox[1] * textState.fontMatrix[3];
rect[3] = font.bbox[3] * textState.fontMatrix[3];
}
}

rect = Util.getAxialAlignedBoundingBox(rect, m);

let baselineRect = Util.getAxialAlignedBoundingBox([0, 0, 0, 0], m);
let baseline = 0;
if (rotation === 0 || rotation === 180) {
baseline = baselineRect[1];
}
else if (rotation === 90 || rotation === 270) {
baseline = baselineRect[0];
}

let p1 = [0, 0];
let p2 = [0, 1];

let [x1, y1] = Util.applyTransform(p1, getCurrentTextTransform());
let [x2, y2] = Util.applyTransform(p2, getCurrentTextTransform());
let fontSize = Math.hypot(x1 - x2, y1 - y2);

let diagonal = rotation % 90 !== 0;

if (
glyph.unicode !== ' '
&& fontSize !== 0
// Sometimes char can map to null and break strings
&& glyph.unicode.charCodeAt(0)
) {
textChunk.chars.push({
// Decomposed ligatures, normalized Arabic characters
c: glyphUnicode,
// Normalizes Arabic characters others characters where length remains 1, but preserves
// ligatures and more importantly avoids 'e\u00be' being converted into 'e \u0301'
// which is quite common in Spanish author names and because of the space prevents
// author name recognition
u: glyphUnicode.length === 1 ? glyphUnicode : glyph.unicode,
rect,
fontSize,
fontName: textState.fontName,
bold: textState.font.bold,
italic: textState.font.italic,
baseline,
rotation,
diagonal,
});
}

if (charSpacing) {
if (!font.vertical) {
textState.translateTextMatrix(
Expand Down Expand Up @@ -2949,6 +3068,7 @@ class PartialEvaluator {
textContent.items.push(runBidiTransform(textContentItem));
textContentItem.initialized = false;
textContentItem.str.length = 0;
textContentItem.chars = [];
}

function enqueueChunk(batch = false) {
Expand Down
Loading