diff --git a/package.json b/package.json
index 27508fe19258a..b6f335c7b7e35 100644
--- a/package.json
+++ b/package.json
@@ -10,7 +10,6 @@
     "autoprefixer": "^10.4.14",
     "babel-loader": "^9.1.2",
     "caniuse-lite": "^1.0.30001489",
-    "canvas": "^2.11.2",
     "core-js": "^3.30.2",
     "cross-env": "^7.0.3",
     "es-module-shims": "1.4.7",
@@ -41,7 +40,6 @@
     "postcss": "^8.4.23",
     "postcss-dir-pseudo-class": "^7.0.2",
     "prettier": "^2.8.8",
-    "puppeteer": "^20.5.0",
     "rimraf": "^3.0.2",
     "streamqueue": "^1.1.2",
     "stylelint": "^15.6.2",
@@ -59,7 +57,6 @@
     "yargs": "^17.7.2"
   },
   "scripts": {
-    "postinstall": "cross-env PUPPETEER_PRODUCT=firefox node node_modules/puppeteer/install.js"
   },
   "repository": {
     "type": "git",
diff --git a/src/core/annotation.js b/src/core/annotation.js
index 7937479d452aa..5beb266704594 100644
--- a/src/core/annotation.js
+++ b/src/core/annotation.js
@@ -495,6 +495,7 @@ class Annotation {
       hasOwnCanvas: false,
       noRotate: !!(this.flags & AnnotationFlag.NOROTATE),
       noHTML: isLocked && isContentLocked,
+      isZotero: (dict.get("NM") || '').startsWith('Zotero-') || dict.get("Zotero:Key"),
     };
 
     if (params.collectFields) {
diff --git a/src/core/document.js b/src/core/document.js
index 93f31b7950b97..51475cc0f258c 100644
--- a/src/core/document.js
+++ b/src/core/document.js
@@ -58,6 +58,8 @@ import { StructTreePage } from "./struct_tree.js";
 import { writeObject } from "./writer.js";
 import { XFAFactory } from "./xfa/factory.js";
 import { XRef } from "./xref.js";
+import { getParagraphs } from './text/structure.js';
+import { OutlineAnalyzer, PageAnalyzer } from './text/analyzer.js';
 
 const DEFAULT_USER_UNIT = 1.0;
 const LETTER_SIZE_MEDIABOX = [0, 0, 612, 792];
@@ -460,6 +462,22 @@ class Page {
         intentDisplay = !!(intent & RenderingIntentFlag.DISPLAY),
         intentPrint = !!(intent & RenderingIntentFlag.PRINT);
 
+      const allowedSubtypes = [
+        'Link',
+        'Widget',
+        'Line',
+        'Circle',
+        'PolyLine',
+        'Polygon',
+        'Caret',
+        'Squiggly',
+        'StrikeOut',
+        'Stamp'
+      ];
+
+      annotations = annotations.filter(x => allowedSubtypes.includes(x.data.subtype)
+        || ['Square', 'Ink', 'FreeText'].includes(x.data.subtype) && !x.data.isZotero);
+
       // Collect the operator list promises for the annotations. Each promise
       // is resolved with the complete operator list for a single annotation.
       const opListPromises = [];
@@ -554,6 +572,47 @@ class Page {
     });
   }
 
+  async getStructuredText({ handler, task, data }) {
+    let items = [];
+    let sink = {};
+    sink.enqueue = function (a, b) {
+      items.push(...a.items);
+    };
+
+    try {
+      await this.extractTextContent({
+        handler,
+        task,
+        sink,
+        includeMarkedContent: data.includeMarkedContent,
+        combineTextItems: data.combineTextItems,
+      });
+    } catch (e) {
+      console.log(e);
+      throw e;
+    }
+
+    let fingerprints = new Set();
+    let chars = [];
+    for (let item of items) {
+      if (!item.chars) {
+        continue;
+      }
+      for (let char of item.chars) {
+        // Some PDF files have their text layer characters repeated many times, therefore remove them
+        let fingerprint = char.c + char.rect.join('');
+        if (!fingerprints.has(fingerprint)) {
+          fingerprints.add(fingerprint);
+          char.index = chars.length;
+          chars.push(char);
+
+        }
+      }
+    }
+    let paragraphs = getParagraphs(chars);
+    return { paragraphs };
+  }
+
   async getStructTree() {
     const structTreeRoot = await this.pdfManager.ensureCatalog(
       "structTreeRoot"
@@ -780,6 +839,7 @@ class PDFDocument {
     this.xref = new XRef(stream, pdfManager);
     this._pagePromises = new Map();
     this._version = null;
+    this._structuredTexts = [];
 
     const idCounters = {
       font: 0,
@@ -1517,6 +1577,58 @@ class PDFDocument {
     }
   }
 
+  async getPageData({ handler, task, data }) {
+    let { pageIndex } = data;
+    let structuredTextProvider = async (pageIndex) => {
+      if (this._structuredTexts[pageIndex]) {
+        return this._structuredTexts[pageIndex];
+      }
+      let page = await this.getPage(pageIndex);
+      let structuredText;
+      try {
+        structuredText = await page.getStructuredText({ handler, task, data });
+        this._structuredTexts[pageIndex] = structuredText;
+      } catch (e) {
+        console.log(e);
+      }
+      return structuredText;
+    };
+
+    let structuredText = await structuredTextProvider(pageIndex);
+    let page = await this.getPage(pageIndex);
+
+    let pageAnalyzer = new PageAnalyzer(pageIndex, this, structuredTextProvider);
+    let overlays = await pageAnalyzer.getOverlays();
+    let pageLabel = await pageAnalyzer.getPageLabel();
+    let pageData = {
+      structuredText,
+      overlays,
+      viewBox: page.view,
+      pageLabel
+    };
+    return pageData;
+  }
+
+  async getOutline2({ handler, task, data = {} }) {
+    let { extract } = data;
+    let structuredTextProvider = async (pageIndex) => {
+      if (this._structuredTexts[pageIndex]) {
+        return this._structuredTexts[pageIndex];
+      }
+      let page = await this.getPage(pageIndex);
+      let structuredText;
+      try {
+        structuredText = await page.getStructuredText({ handler, task, data });
+        this._structuredTexts[pageIndex] = structuredText;
+      } catch (e) {
+        console.log(e);
+      }
+      return structuredText;
+    };
+    let outlineAnalyzer = new OutlineAnalyzer(this, structuredTextProvider);
+    return outlineAnalyzer.getOutline(extract);
+  }
+
   async checkLastPage(recoveryMode = false) {
     const { catalog, pdfManager } = this;
 
diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index a03fa7010286a..88115370e67ab 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -2284,6 +2284,7 @@ class PartialEvaluator {
       transform: null,
       fontName: null,
       hasEOL: false,
+      chars: [],
     };
 
     // Use a circular buffer (length === 2) to save the last chars in the
@@ -2518,6 +2519,7 @@ class PartialEvaluator {
         transform: textChunk.transform,
         fontName: textChunk.fontName,
         hasEOL: textChunk.hasEOL,
+        chars: textChunk.chars,
       };
     }
 
@@ -2843,6 +2845,9 @@ class PartialEvaluator {
           scaledDim = 0;
         }
 
+        let prevWidth = textChunk.width;
+        let m = Util.transform(textState.ctm, textState.textMatrix);
+
         if (!font.vertical) {
           scaledDim *= textState.textHScale;
           textState.translateTextMatrix(scaledDim, 0);
@@ -2869,6 +2874,120 @@ class PartialEvaluator {
         }
         textChunk.str.push(glyphUnicode);
 
+        function closestStandardAngle(degrees) {
+          const standardAngles = [0, 90, 180, 270];
+          let closestAngle = standardAngles[0];
+          let minDifference = Math.abs(degrees - closestAngle);
+
+          for (let i = 1; i < standardAngles.length; i++) {
+            const difference = Math.abs(degrees - standardAngles[i]);
+            if (difference < minDifference) {
+              minDifference = difference;
+              closestAngle = standardAngles[i];
+            }
+          }
+
+          return closestAngle;
+        }
+
+        function matrixToDegrees(matrix) {
+          let radians = Math.atan2(matrix[1], matrix[0]);
+          if (radians < 0) {
+            radians += (2 * Math.PI);
+          }
+          let degrees = Math.round(radians * (180 / Math.PI));
+          degrees = degrees % 360;
+          if (degrees < 0) {
+            degrees += 360;
+          }
+          degrees = closestStandardAngle(degrees);
+          return degrees;
+        }
+
+        let rotation = matrixToDegrees(m);
+
+        let ascent = font.ascent;
+        let descent = font.descent;
+        if (descent > 0) {
+          descent = -descent;
+        }
+        if (ascent && descent) {
+          if (ascent > 1) {
+            ascent = 0.75;
+          }
+          if (descent < -0.5) {
+            descent = -0.25;
+          }
+        }
+        else {
+          ascent = 0.75;
+          descent = -0.25;
+        }
+
+        if (font.capHeight && font.capHeight < ascent) {
+          ascent = font.capHeight;
+        }
+
+        let charWidth = textChunk.width - prevWidth;
+        let rect = [0, textState.fontSize * descent, charWidth, textState.fontSize * ascent]
+
+        if (
+          font.isType3Font &&
+          textState.fontSize <= 1 &&
+          !isArrayEqual(textState.fontMatrix, FONT_IDENTITY_MATRIX)
+        ) {
+          const glyphHeight = font.bbox[3] - font.bbox[1];
+          if (glyphHeight > 0) {
+            rect[1] = font.bbox[1] * textState.fontMatrix[3];
+            rect[3] = font.bbox[3] * textState.fontMatrix[3];
+          }
+        }
+
+        rect = Util.getAxialAlignedBoundingBox(rect, m);
+
+        let baselineRect = Util.getAxialAlignedBoundingBox([0, 0, 0, 0], m);
+        let baseline = 0;
+        if (rotation === 0 || rotation === 180) {
+          baseline = baselineRect[1];
+        }
+        else if (rotation === 90 || rotation === 270) {
+          baseline = baselineRect[0];
+        }
+
+        let p1 = [0, 0];
+        let p2 = [0, 1];
+
+        let [x1, y1] = Util.applyTransform(p1, getCurrentTextTransform());
+        let [x2, y2] = Util.applyTransform(p2, getCurrentTextTransform());
+        let fontSize = Math.hypot(x1 - x2, y1 - y2);
+
+        let diagonal = rotation % 90 !== 0;
+
+        if (
+          glyph.unicode !== ' '
+          && fontSize !== 0
+          // Sometimes char can map to null and break strings
+          && glyph.unicode.charCodeAt(0)
+        ) {
+          textChunk.chars.push({
+            // Decomposed ligatures, normalized Arabic characters
+            c: glyphUnicode,
+            // Normalizes Arabic characters others characters where length remains 1, but preserves
+            // ligatures and more importantly avoids 'e\u00be' being converted into 'e \u0301'
+            // which is quite common in Spanish author names and because of the space prevents
+            // author name recognition
+            u: glyphUnicode.length === 1 ? glyphUnicode : glyph.unicode,
+            rect,
+            fontSize,
+            fontName: textState.fontName,
+            bold: textState.font.bold,
+            italic: textState.font.italic,
+            baseline,
+            rotation,
+            diagonal,
+          });
+        }
+
         if (charSpacing) {
           if (!font.vertical) {
             textState.translateTextMatrix(
@@ -2949,6 +3068,7 @@ class PartialEvaluator {
       textContent.items.push(runBidiTransform(textContentItem));
       textContentItem.initialized = false;
       textContentItem.str.length = 0;
+      textContentItem.chars = [];
     }
 
     function enqueueChunk(batch = false) {
diff --git a/src/core/text/analyzer.js b/src/core/text/analyzer.js
new file mode 100644
index 0000000000000..a23488446c719
--- /dev/null
+++ b/src/core/text/analyzer.js
@@ -0,0 +1,492 @@
+let isNum = c => c >= '0' && c <= '9';
+
+function getSurroundedNumber(chars, idx) {
+  while (
+    idx > 0 && isNum(chars[idx - 1].c)
+    && Math.abs(chars[idx].rect[0] - chars[idx - 1].rect[2]) < chars[idx].rect[2] - chars[idx].rect[0]
+    && Math.abs(chars[idx - 1].rect[1] - chars[idx].rect[1]) < 2
+    ) {
+    idx--;
+  }
+
+  let str = chars[idx].c;
+
+  while (
+    idx < chars.length - 1 && isNum(chars[idx + 1].c)
+    && Math.abs(chars[idx + 1].rect[0] - chars[idx].rect[2]) < chars[idx + 1].rect[2] - chars[idx + 1].rect[0]
+    && Math.abs(chars[idx].rect[1] - chars[idx + 1].rect[1]) < 2
+    ) {
+    idx++;
+    str += chars[idx].c;
+  }
+
+  return parseInt(str);
+}
+
+function getSurroundedNumberAtPos(chars, x, y) {
+  for (let i = 0; i < chars.length; i++) {
+    let ch = chars[i];
+    let { x: x2, y: y2 } = getRectCenter(ch.rect);
+    if (isNum(ch.c) && Math.abs(x - x2) < 10 && Math.abs(y - y2) < 5) {
+      return getSurroundedNumber(chars, i);
+    }
+  }
+  return null;
+}
+
+function getRectCenter(rect) {
+  return {
+    x: rect[0] + (rect[2] - rect[0]) / 2,
+    y: rect[1] + (rect[3] - rect[1]) / 2
+  };
+}
+
+function filterNums(chars, pageHeight) {
+  return chars.filter(x => x.c >= '0' && x.c <= '9' && (x.rect[3] < pageHeight * 1 / 5 || x.rect[1] > pageHeight * 4 / 5));
+}
+
+export function flattenChars(structuredText) {
+  let flatCharsArray = [];
+  for (let paragraph of structuredText.paragraphs) {
+    for (let line of paragraph.lines) {
+      for (let word of line.words) {
+        for (let charObj of word.chars) {
+          flatCharsArray.push(charObj);
+        }
+      }
+    }
+  }
+  return flatCharsArray;
+}
+
+function getLineSelectionRect(line, charFrom, charTo) {
+  if (line.vertical) {
+    return [
+      line.rect[0],
+      Math.min(charFrom.rect[1], charTo.rect[1]),
+      line.rect[2],
+      Math.max(charFrom.rect[3], charTo.rect[3])
+    ];
+  }
+  else {
+    return [
+      Math.min(charFrom.rect[0], charTo.rect[0]),
+      line.rect[1],
+      Math.max(charFrom.rect[2], charTo.rect[2]),
+      line.rect[3]
+    ];
+  }
+}
+
+function getRangeRects(structuredText, charStart, charEnd) {
+  let extracting = false;
+  let rects = [];
+  let n = 0;
+  loop: for (let paragraph of structuredText.paragraphs) {
+    for (let line of paragraph.lines) {
+      let charFrom = null;
+      let charTo = null;
+      for (let word of line.words) {
+        for (let char of word.chars) {
+          if (n === charStart || extracting && !charFrom) {
+            charFrom = char;
+            extracting = true;
+          }
+          if (extracting) {
+            charTo = char;
+            if (n === charEnd) {
+              rects.push(getLineSelectionRect(line, charFrom, charTo));
+              break loop;
+            }
+          }
+          n++;
+        }
+      }
+      if (extracting && charFrom && charTo) {
+        rects.push(getLineSelectionRect(line, charFrom, charTo));
+        charFrom = null;
+      }
+    }
+  }
+  rects = rects.map(rect => rect.map(value => parseFloat(value.toFixed(3))));
+  return rects;
+}
+
+function extractLinks(structuredText) {
+  let chars = flattenChars(structuredText);
+  let spaceBefore = new Set();
+  for (let paragraph of structuredText.paragraphs) {
+    for (let line of paragraph.lines) {
+      for (let word of line.words) {
+        if (word.spaceAfter) {
+          spaceBefore.add(word.to + 1);
+        }
+      }
+    }
+  }
+
+  let sequences = [];
+  let sequence = { from: 0, to: 0, lbp: [] };
+
+  let urlBreakChars = ['&', '.', '#', '?', '/'];
+
+  for (let i = 0; i < chars.length; i++) {
+    let char = chars[i];
+    let charBefore = chars[i - 1];
+
+    if (spaceBefore.has(i)
+      || charBefore && (
+        char.fontSize !== charBefore.fontSize
+        || char.fontName !== charBefore.fontName
+        || charBefore.rect[0] > char.rect[0] && (
+          charBefore.rect[1] - char.rect[3] > (char.rect[3] - char.rect[1]) / 2
+          || !(urlBreakChars.includes(charBefore.c) || urlBreakChars.includes(char.c))
+        )
+      )
+    ) {
+      sequences.push(sequence);
+      sequence = { from: i, to: i };
+    }
+    else {
+      sequence.to = i;
+    }
+  }
+
+  if (sequence.from !== sequence.to) {
+    sequences.push(sequence);
+  }
+
+  let links = [];
+
+  let urlRegExp = new RegExp(/(https?:\/\/|www\.|10\.)[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*)/);
+  let doiRegExp = new RegExp(/10(?:\.[0-9]{4,})?\/[^\s]*[^\s\.,]/);
+
+  for (let sequence of sequences) {
+    let text = '';
+    for (let j = sequence.from; j <= sequence.to; j++) {
+      let char = chars[j];
+      text += char.c;
+    }
+    let match = text.match(urlRegExp);
+    if (match) {
+      let url = match[0];
+      if (url.includes('@')) {
+        continue;
+      }
+      url = url.replace(/[.)]*$/, '');
+      let from = sequence.from + match.index;
+      let to = from + url.length;
+      links.push({ from, to, url });
+    }
+    match = text.match(doiRegExp);
+    if (match) {
+      let from = sequence.from + match.index;
+      let to = from + match[0].length;
+      let url = 'https://doi.org/' + encodeURIComponent(match[0]);
+      links.push({ from, to, text: match[0], url });
+      continue;
+    }
+  }
+  return links;
+}
+
+function getSortIndex(pageIndex, offset) {
+  return [
+    pageIndex.toString().slice(0, 5).padStart(5, '0'),
+    offset.toString().slice(0, 6).padStart(6, '0'),
+    (0).toString().slice(0, 5).padStart(5, '0')
+  ].join('|');
+}
+
+function rectsDist([ax1, ay1, ax2, ay2], [bx1, by1, bx2, by2]) {
+  let left = bx2 < ax1;
+  let right = ax2 < bx1;
+  let bottom = by2 < ay1;
+  let top = ay2 < by1;
+
+  if (top && left) {
+    return Math.hypot(ax1 - bx2, ay2 - by1);
+  }
+  else if (left && bottom) {
+    return Math.hypot(ax1 - bx2, ay1 - by2);
+  }
+  else if (bottom && right) {
+    return Math.hypot(ax2 - bx1, ay1 - by2);
+  }
+  else if (right && top) {
+    return Math.hypot(ax2 - bx1, ay2 - by1);
+  }
+  else if (left) {
+    return ax1 - bx2;
+  }
+  else if (right) {
+    return bx1 - ax2;
+  }
+  else if (bottom) {
+    return ay1 - by2;
+  }
+  else if (top) {
+    return by1 - ay2;
+  }
+
+  return 0;
+}
+
+function getClosestOffset(chars, rect) {
+  let dist = Infinity;
+  let idx = 0;
+  for (let i = 0; i < chars.length; i++) {
+    let ch = chars[i];
+    let distance = rectsDist(ch.rect, rect);
+    if (distance < dist) {
+      dist = distance;
+      idx = i;
+    }
+  }
+  return idx;
+}
+
+export class PageAnalyzer {
+  constructor(pageIndex, pdfDocument, structuredTextProvider) {
+    this._pageIndex = pageIndex;
+    this._pdfDocument = pdfDocument;
+    this._structuredTextProvider = structuredTextProvider;
+  }
+
+  async _getPagesNum() {
+    return this._pdfDocument.pdfManager.ensureDoc('numPages');
+  }
+
+  _getPageLabelPoints(pageIndex, chars1, chars2, chars3, chars4, pageHeight) {
+    let charsNum1 = filterNums(chars1, pageHeight);
+    let charsNum2 = filterNums(chars2, pageHeight);
+    let charsNum3 = filterNums(chars3, pageHeight);
+    let charsNum4 = filterNums(chars4, pageHeight);
+
+    // Cut off the logic if one of the pages has too many digits
+    if ([charsNum1, charsNum2, charsNum3, charsNum4].find(x => x.length > 500)) {
+      return null;
+    }
+    for (let c1 = 0; c1 < charsNum1.length; c1++) {
+      let ch1 = charsNum1[c1];
+      for (let c3 = 0; c3 < charsNum3.length; c3++) {
+        let ch3 = charsNum3[c3];
+        let { x: x1, y: y1 } = getRectCenter(ch1.rect);
+        let { x: x2, y: y2 } = getRectCenter(ch3.rect);
+        if (Math.abs(x1 - x2) < 10 && Math.abs(y1 - y2) < 5) {
+          let num1 = getSurroundedNumber(charsNum1, c1);
+          let num3 = getSurroundedNumber(charsNum3, c3);
+          if (num1 && num1 + 2 === num3) {
+            let pos1 = { x: x1, y: y1, num: num1, idx: pageIndex };
+
+
+            let extractedNum2 = getSurroundedNumberAtPos(chars2, x1, y1);
+            if (num1 + 1 === extractedNum2) {
+              return [pos1];
+            }
+
+            for (let c2 = 0; c2 < charsNum2.length; c2++) {
+              let ch2 = charsNum2[c2];
+              for (let c4 = 0; c4 < charsNum4.length; c4++) {
+                let ch4 = charsNum4[c4];
+                let { x: x1, y: y1 } = getRectCenter(ch2.rect);
+                let { x: x2, y: y2 } = getRectCenter(ch4.rect);
+                if (Math.abs(x1 - x2) < 10 && Math.abs(y1 - y2) < 5) {
+                  let num2 = getSurroundedNumber(charsNum2, c2);
+                  let num4 = getSurroundedNumber(charsNum4, c4);
+                  if (num1 + 1 === num2 && num2 + 2 === num4) {
+                    let pos2 = { x: x1, y: y1, num: num2, idx: pageIndex + 2 };
+                    return [pos1, pos2];
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+
+    return null;
+  }
+
+  _getPageLabel(pageIndex, charsPrev, charsCur, charsNext, points) {
+    let numPrev, numCur, numNext;
+
+    // TODO: Instead of trying to extract from two positions, try to
+    //  guess the right position by determining whether the page is even or odd
+
+    // TODO: Take into account font parameters when comparing extracted numbers
+    let getNum = (charsNext, points) => points.length > 0 && getSurroundedNumberAtPos(charsNext, points[0].x, points[0].y)
+      || points.length > 1 && getSurroundedNumberAtPos(charsNext, points[1].x, points[1].y);
+
+    if (charsPrev) {
+      numPrev = getNum(charsPrev, points);
+    }
+
+    numCur = getNum(charsCur, points);
+
+    if (charsNext) {
+      numNext = getNum(charsNext, points);
+    }
+
+    if (numCur && (numCur - 1 === numPrev || numCur + 1 === numNext)) {
+      return numCur.toString();
+    }
+
+    if (pageIndex < points[0].idx) {
+      return (points[0].num - (points[0].idx - pageIndex)).toString();
+    }
+
+    return null;
+  }
+
+  async _extractPageLabelPoints(pageIndex) {
+    let numPages = await this._getPagesNum();
+    let start = pageIndex - 2;
+    if (start < 0) {
+      start = 0;
+    }
+    for (let i = start; i < start + 5 && i + 3 < numPages; i++) {
+      let chs1 = flattenChars(await this._structuredTextProvider(i));
+      let chs2 = flattenChars(await this._structuredTextProvider(i + 1));
+      let chs3 = flattenChars(await this._structuredTextProvider(i + 2));
+      let chs4 = flattenChars(await this._structuredTextProvider(i + 3));
+      let page = await this._pdfDocument.getPage(i);
+      let { view } = page;
+      let pageHeight = view[3] - view[1];
+      let res = this._getPageLabelPoints(i, chs1, chs2, chs3, chs4, pageHeight);
+      if (res) {
+        return res;
+      }
+    }
+    return null;
+  }
+
+  async _extractPageLabel(pageIndex, points) {
+    let chsPrev, chsCur, chsNext;
+    if (pageIndex > 0) {
+      chsPrev = flattenChars(await this._structuredTextProvider(pageIndex - 1));
+    }
+    chsCur = flattenChars(await this._structuredTextProvider(pageIndex));
+    let numPages = await this._getPagesNum();
+    if (pageIndex < numPages - 1) {
+      chsNext = flattenChars(await this._structuredTextProvider(pageIndex + 1));
+    }
+    return this._getPageLabel(pageIndex, chsPrev, chsCur, chsNext, points);
+  }
+
+  async getPageLabel() {
+    let existingPageLabels = await this._pdfDocument.pdfManager.ensureCatalog("pageLabels");
+    let pageLabel;
+    let points = await this._extractPageLabelPoints(this._pageIndex);
+    if (points) {
+      pageLabel = await this._extractPageLabel(this._pageIndex, points);
+    }
+    if (
+      (!pageLabel || pageLabel === '0')
+      && existingPageLabels
+      && existingPageLabels[this._pageIndex]
+    ) {
+      pageLabel = existingPageLabels[this._pageIndex];
+    }
+    return pageLabel;
+  }
+
+  // Overlays
+
+  async getOverlays() {
+    let overlays = [];
+
+    let pageIndex = this._pageIndex;
+
+    let structuredText = await this._structuredTextProvider(pageIndex);
+    let links = extractLinks(structuredText);
+    for (let link of links) {
+      let rects = getRangeRects(structuredText, link.from, link.to);
+      let overlay = {
+        type: 'external-link',
+        source: 'parsed',
+        url: link.url,
+        sortIndex: getSortIndex(pageIndex, link.from),
+        position: {
+          pageIndex,
+          rects,
+        },
+      };
+      overlays.push(overlay);
+    }
+
+    let chars = flattenChars(structuredText);
+    let page = await this._pdfDocument.getPage(pageIndex);
+    let annotations = await page._parsedAnnotations;
+    for (let annotation of annotations) {
+      annotation = annotation.data;
+      if (!annotation.url && !annotation.dest || !annotation.rect) {
+        continue;
+      }
+      let offset = getClosestOffset(chars, annotation.rect);
+      let overlay = {
+        source: 'annotation',
+        sortIndex: getSortIndex(pageIndex, offset),
+        position: {
+          pageIndex,
+          rects: [annotation.rect],
+        }
+      };
+      if (annotation.url) {
+        overlay.type = 'external-link';
+        overlay.url = annotation.url;
+      }
+      else if (annotation.dest) {
+        overlay.type = 'internal-link';
+        overlay.dest = annotation.dest;
+      }
+      else {
+        continue;
+      }
+      overlays.push(overlay);
+    }
+
+    return overlays;
+  }
+}
+
+export class OutlineAnalyzer {
+  constructor(pdfDocument, structuredTextProvider) {
+    this._pdfDocument = pdfDocument;
+    this._structuredTextProvider = structuredTextProvider;
+  }
+
+  async getOutline(extract) {
+    let outline = [];
+    let items = await this._pdfDocument.pdfManager.ensureCatalog("documentOutline");
+    function transformItems(items) {
+      let newItems = [];
+      for (let item of items) {
+        let newItem = {
+          title: item.title,
+          items: transformItems(item.items),
+          expanded: false,
+        };
+        if (item.dest) {
+          newItem.location = {
+            dest: item.dest,
+          };
+        } else if (item.unsafeUrl) {
+          newItem.url = item.unsafeUrl;
+        }
+        newItems.push(newItem);
+      }
+      return newItems;
+    }
+    if (items) {
+      outline = transformItems(items);
+      if (outline.length === 1) {
+        for (let item of outline) {
+          item.expanded = true;
+        }
+      }
+    }
+    return outline;
+  }
+}
diff --git a/src/core/text/structure.js b/src/core/text/structure.js
new file mode 100644
index 0000000000000..a9bca3a49361e
--- /dev/null
+++ b/src/core/text/structure.js
@@ -0,0 +1,864 @@
+
+// *** bidi.js starts here ***
+// This is taken from PDF.js source https://github.com/mozilla/pdf.js/blob/9416b14e8b06a39a1a57f2baf22aebab2370edeb/src/core/bidi.js
+
+/* Copyright 2012 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Character types for symbols from 0000 to 00FF.
+// Source: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
+// prettier-ignore
+let baseTypes = [
+  "BN", "BN", "BN", "BN", "BN", "BN", "BN", "BN", "BN", "S", "B", "S",
+  "WS", "B", "BN", "BN", "BN", "BN", "BN", "BN", "BN", "BN", "BN", "BN",
+  "BN", "BN", "BN", "BN", "B", "B", "B", "S", "WS", "ON", "ON", "ET",
+  "ET", "ET", "ON", "ON", "ON", "ON", "ON", "ES", "CS", "ES", "CS", "CS",
+  "EN", "EN", "EN", "EN", "EN", "EN", "EN", "EN", "EN", "EN", "CS", "ON",
+  "ON", "ON", "ON", "ON", "ON", "L", "L", "L", "L", "L", "L", "L", "L",
+  "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L",
+  "L", "L", "L", "L", "ON", "ON", "ON", "ON", "ON", "ON", "L", "L", "L",
+  "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L",
+  "L", "L", "L", "L", "L", "L", "L", "L", "L", "ON", "ON", "ON", "ON",
+  "BN", "BN", "BN", "BN", "BN", "BN", "B", "BN", "BN", "BN", "BN", "BN",
+  "BN", "BN", "BN", "BN", "BN", "BN", "BN", "BN", "BN", "BN", "BN", "BN",
+  "BN", "BN", "BN", "BN", "BN", "BN", "BN", "BN", "BN", "CS", "ON", "ET",
+  "ET", "ET", "ET", "ON", "ON", "ON", "ON", "L", "ON", "ON", "BN", "ON",
+  "ON", "ET", "ET", "EN", "EN", "ON", "L", "ON", "ON", "ON", "EN", "L",
+  "ON", "ON", "ON", "ON", "ON", "L", "L", "L", "L", "L", "L", "L", "L",
+  "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L",
+  "L", "ON", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L",
+  "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L", "L",
+  "L", "L", "L", "L", "L", "ON", "L", "L", "L", "L", "L", "L", "L", "L"
+];
+
+// Character types for symbols from 0600 to 06FF.
+// Source: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
+// Note that 061D does not exist in the Unicode standard (see
+// http://unicode.org/charts/PDF/U0600.pdf), so we replace it with an
+// empty string and issue a warning if we encounter this character. The
+// empty string is required to properly index the items after it.
+// prettier-ignore
+let arabicTypes = [
+  "AN", "AN", "AN", "AN", "AN", "AN", "ON", "ON", "AL", "ET", "ET", "AL",
+  "CS", "AL", "ON", "ON", "NSM", "NSM", "NSM", "NSM", "NSM", "NSM", "NSM",
+  "NSM", "NSM", "NSM", "NSM", "AL", "AL", "", "AL", "AL", "AL", "AL", "AL",
+  "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
+  "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
+  "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
+  "AL", "AL", "AL", "AL", "NSM", "NSM", "NSM", "NSM", "NSM", "NSM", "NSM",
+  "NSM", "NSM", "NSM", "NSM", "NSM", "NSM", "NSM", "NSM", "NSM", "NSM",
+  "NSM", "NSM", "NSM", "NSM", "AN", "AN", "AN", "AN", "AN", "AN", "AN",
+  "AN", "AN", "AN", "ET", "AN", "AN", "AL", "AL", "AL", "NSM", "AL", "AL",
+  "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
+  "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
+  "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
+  "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
+  "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
+  "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
+  "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
+  "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
+  "AL", "AL", "AL", "NSM", "NSM", "NSM", "NSM", "NSM", "NSM", "NSM", "AN",
+  "ON", "NSM", "NSM", "NSM", "NSM", "NSM", "NSM", "AL", "AL", "NSM", "NSM",
+  "ON", "NSM", "NSM", "NSM", "NSM", "AL", "AL", "EN", "EN", "EN", "EN",
+  "EN", "EN", "EN", "EN", "EN", "EN", "AL", "AL", "AL", "AL", "AL", "AL"
+];
+
+function isOdd(i) {
+  return (i & 1) !== 0;
+}
+
+function isEven(i) {
+  return (i & 1) === 0;
+}
+
+function findUnequal(arr, start, value) {
+  let j, jj;
+  for (j = start, jj = arr.length; j < jj; ++j) {
+    if (arr[j] !== value) {
+      return j;
+    }
+  }
+  return j;
+}
+
+function setValues(arr, start, end, value) {
+  for (let j = start; j < end; ++j) {
+    arr[j] = value;
+  }
+}
+
+function reverseValues(arr, start, end) {
+  for (let i = start, j = end - 1; i < j; ++i, --j) {
+    let temp = arr[i];
+    arr[i] = arr[j];
+    arr[j] = temp;
+  }
+}
+
+function createBidiText(chars, isLTR, vertical = false) {
+  let dir = "ltr";
+  if (vertical) {
+    dir = "ttb";
+  }
+  else if (!isLTR) {
+    dir = "rtl";
+  }
+  return { chars, dir };
+}
+
+// These are used in bidi(), which is called frequently. We re-use them on
+// each call to avoid unnecessary allocations.
+let types = [];
+
+function bidi(chars, startLevel = -1, vertical = false) {
+  let isLTR = true;
+  let strLength = chars.length;
+  if (strLength === 0 || vertical) {
+    return createBidiText(chars, isLTR, vertical);
+  }
+
+  // Get types and fill arrays
+  types.length = strLength;
+  let numBidi = 0;
+
+  let i, ii;
+  for (i = 0; i < strLength; ++i) {
+
+    let charCode = chars[i].c.charCodeAt(0);
+    let charType = "L";
+    if (charCode <= 0x00ff) {
+      charType = baseTypes[charCode];
+    }
+    else if (0x0590 <= charCode && charCode <= 0x05f4) {
+      charType = "R";
+    }
+    else if (0x0600 <= charCode && charCode <= 0x06ff) {
+      charType = arabicTypes[charCode & 0xff];
+      if (!charType) {
+        console.log("Bidi: invalid Unicode character " + charCode.toString(16));
+      }
+    }
+    else if (0x0700 <= charCode && charCode <= 0x08ac) {
+      charType = "AL";
+    }
+    if (charType === "R" || charType === "AL" || charType === "AN") {
+      numBidi++;
+    }
+    types[i] = charType;
+  }
+
+  // Detect the bidi method
+  // - If there are no rtl characters then no bidi needed
+  // - If less than 30% chars are rtl then string is primarily ltr,
+  //   unless the string is very short.
+  // - If more than 30% chars are rtl then string is primarily rtl
+  if (numBidi === 0) {
+    isLTR = true;
+    return createBidiText(chars, isLTR);
+  }
+
+  if (startLevel === -1) {
+    if (numBidi / strLength < 0.3 && strLength > 4) {
+      isLTR = true;
+      startLevel = 0;
+    }
+    else {
+      isLTR = false;
+      startLevel = 1;
+    }
+  }
+
+  let levels = [];
+  for (i = 0; i < strLength; ++i) {
+    levels[i] = startLevel;
+  }
+
+  /*
+   X1-X10: skip most of this, since we are NOT doing the embeddings.
+   */
+  let e = isOdd(startLevel) ? "R" : "L";
+  let sor = e;
+  let eor = sor;
+
+  /*
+   W1. Examine each non-spacing mark (NSM) in the level run, and change the
+   type of the NSM to the type of the previous character. If the NSM is at the
+   start of the level run, it will get the type of sor.
+   */
+  let lastType = sor;
+  for (i = 0; i < strLength; ++i) {
+    if (types[i] === "NSM") {
+      types[i] = lastType;
+    }
+    else {
+      lastType = types[i];
+    }
+  }
+
+  /*
+   W2. Search backwards from each instance of a European number until the
+   first strong type (R, L, AL, or sor) is found.  If an AL is found, change
+   the type of the European number to Arabic number.
+   */
+  lastType = sor;
+  let t;
+  for (i = 0; i < strLength; ++i) {
+    t = types[i];
+    if (t === "EN") {
+      types[i] = lastType === "AL" ? "AN" : "EN";
+    }
+    else if (t === "R" || t === "L" || t === "AL") {
+      lastType = t;
+    }
+  }
+
+  /*
+   W3. Change all ALs to R.
+   */
+  for (i = 0; i < strLength; ++i) {
+    t = types[i];
+    if (t === "AL") {
+      types[i] = "R";
+    }
+  }
+
+  /*
+   W4. A single European separator between two European numbers changes to a
+   European number. A single common separator between two numbers of the same
+   type changes to that type:
+   */
+  for (i = 1; i < strLength - 1; ++i) {
+    if (types[i] === "ES" && types[i - 1] === "EN" && types[i + 1] === "EN") {
+      types[i] = "EN";
+    }
+    if (
+      types[i] === "CS" &&
+      (types[i - 1] === "EN" || types[i - 1] === "AN") &&
+      types[i + 1] === types[i - 1]
+    ) {
+      types[i] = types[i - 1];
+    }
+  }
+
+  /*
+   W5. A sequence of European terminators adjacent to European numbers changes
+   to all European numbers:
+   */
+  for (i = 0; i < strLength; ++i) {
+    if (types[i] === "EN") {
+      // do before
+      for (let j = i - 1; j >= 0; --j) {
+        if (types[j] !== "ET") {
+          break;
+        }
+        types[j] = "EN";
+      }
+      // do after
+      for (let j = i + 1; j < strLength; ++j) {
+        if (types[j] !== "ET") {
+          break;
+        }
+        types[j] = "EN";
+      }
+    }
+  }
+
+  /*
+   W6. Otherwise, separators and terminators change to Other Neutral:
+   */
+  for (i = 0; i < strLength; ++i) {
+    t = types[i];
+    if (t === "WS" || t === "ES" || t === "ET" || t === "CS") {
+      types[i] = "ON";
+    }
+  }
+
+  /*
+   W7. Search backwards from each instance of a European number until the
+   first strong type (R, L, or sor) is found. If an L is found,  then change
+   the type of the European number to L.
+   */
+  lastType = sor;
+  for (i = 0; i < strLength; ++i) {
+    t = types[i];
+    if (t === "EN") {
+      types[i] = lastType === "L" ? "L" : "EN";
+    }
+    else if (t === "R" || t === "L") {
+      lastType = t;
+    }
+  }
+
+  /*
+   N1. A sequence of neutrals takes the direction of the surrounding strong
+   text if the text on both sides has the same direction. European and Arabic
+   numbers are treated as though they were R. Start-of-level-run (sor) and
+   end-of-level-run (eor) are used at level run boundaries.
+   */
+  for (i = 0; i < strLength; ++i) {
+    if (types[i] === "ON") {
+      let end = findUnequal(types, i + 1, "ON");
+      let before = sor;
+      if (i > 0) {
+        before = types[i - 1];
+      }
+
+      let after = eor;
+      if (end + 1 < strLength) {
+        after = types[end + 1];
+      }
+      if (before !== "L") {
+        before = "R";
+      }
+      if (after !== "L") {
+        after = "R";
+      }
+      if (before === after) {
+        setValues(types, i, end, before);
+      }
+      i = end - 1; // reset to end (-1 so next iteration is ok)
+    }
+  }
+
+  /*
+   N2. Any remaining neutrals take the embedding direction.
+   */
+  for (i = 0; i < strLength; ++i) {
+    if (types[i] === "ON") {
+      types[i] = e;
+    }
+  }
+
+  /*
+   I1. For all characters with an even (left-to-right) embedding direction,
+   those of type R go up one level and those of type AN or EN go up two
+   levels.
+   I2. For all characters with an odd (right-to-left) embedding direction,
+   those of type L, EN or AN go up one level.
+   */
+  for (i = 0; i < strLength; ++i) {
+    t = types[i];
+    if (isEven(levels[i])) {
+      if (t === "R") {
+        levels[i] += 1;
+      }
+      else if (t === "AN" || t === "EN") {
+        levels[i] += 2;
+      }
+    }
+    else {
+      // isOdd
+      if (t === "L" || t === "AN" || t === "EN") {
+        levels[i] += 1;
+      }
+    }
+  }
+
+  /*
+   L1. On each line, reset the embedding level of the following characters to
+   the paragraph embedding level:
+
+   segment separators,
+   paragraph separators,
+   any sequence of whitespace characters preceding a segment separator or
+   paragraph separator, and any sequence of white space characters at the end
+   of the line.
+   */
+
+  // don't bother as text is only single line
+
+  /*
+   L2. From the highest level found in the text to the lowest odd level on
+   each line, reverse any contiguous sequence of characters that are at that
+   level or higher.
+   */
+
+  // find highest level & lowest odd level
+  let highestLevel = -1;
+  let lowestOddLevel = 99;
+  let level;
+  for (i = 0, ii = levels.length; i < ii; ++i) {
+    level = levels[i];
+    if (highestLevel < level) {
+      highestLevel = level;
+    }
+    if (lowestOddLevel > level && isOdd(level)) {
+      lowestOddLevel = level;
+    }
+  }
+
+  // now reverse between those limits
+  for (level = highestLevel; level >= lowestOddLevel; --level) {
+    // find segments to reverse
+    let start = -1;
+    for (i = 0, ii = levels.length; i < ii; ++i) {
+      if (levels[i] < level) {
+        if (start >= 0) {
+          reverseValues(chars, start, i);
+          start = -1;
+        }
+      }
+      else if (start < 0) {
+        start = i;
+      }
+    }
+    if (start >= 0) {
+      reverseValues(chars, start, levels.length);
+    }
+  }
+
+  /*
+   L3. Combining marks applied to a right-to-left base character will at this
+   point precede their base character. If the rendering engine expects them to
+   follow the base characters in the final display process, then the ordering
+   of the marks and the base character must be reversed.
+   */
+
+  // don't bother for now
+
+  /*
+   L4. A character that possesses the mirrored property as specified by
+   Section 4.7, Mirrored, must be depicted by a mirrored glyph if the resolved
+   directionality of that character is R.
+   */
+
+  // don't mirror as characters are already mirrored in the pdf
+
+  // Finally, return string
+  for (i = 0, ii = chars.length; i < ii; ++i) {
+    let ch = chars[i];
+    if (ch === "<" || ch === ">") {
+      chars[i] = "";
+    }
+  }
+  return createBidiText(chars, isLTR);
+}
+
+function isRTL(char) {
+  let charCode = char.charCodeAt(0);
+  let charType = "L";
+  if (charCode <= 0x00ff) {
+    charType = baseTypes[charCode];
+  }
+  else if (0x0590 <= charCode && charCode <= 0x05f4) {
+    charType = "R";
+  }
+  else if (0x0600 <= charCode && charCode <= 0x06ff) {
+    charType = arabicTypes[charCode & 0xff];
+    if (!charType) {
+      console.log("Bidi: invalid Unicode character " + charCode.toString(16));
+    }
+  }
+  else if (0x0700 <= charCode && charCode <= 0x08ac) {
+    charType = "AL";
+  }
+  if (charType === "R" || charType === "AL" || charType === "AN") {
+    return true;
+  }
+  return false;
+}
+
+// *** bidi.js ends here ***
+
+
+
+// The function is adapted from Xpdf https://www.xpdfreader.com/opensource.html
+// Original copyright: 1996-2019 Glyph & Cog, LLC.
+function computeWordSpacingThreshold(chars) {
+  // Inter-character spacing that varies by less than this multiple of
+  // font size is assumed to be equivalent.
+  let uniformSpacing = 0.07;
+  // Typical word spacing, as a fraction of font size.  This will be
+  // added to the minimum inter-character spacing, to account for wide
+  // character spacing.
+  let wordSpacing = 0.1;
+  // Compute the inter-word spacing threshold for a line of chars.
+  // Spaces greater than this threshold will be considered inter-word
+  // spaces.
+
+  let char, char2;
+  let avgFontSize;
+  let minAdjGap, maxAdjGap, minSpGap, maxSpGap, minGap, maxGap, gap, gap2;
+  let i;
+
+  avgFontSize = 0;
+  minGap = maxGap = 0;
+  minAdjGap = minSpGap = 1;
+  maxAdjGap = maxSpGap = 0;
+  for (i = 0; i < chars.length; ++i) {
+    char = chars[i];
+    avgFontSize += char.fontSize;
+    if (i < chars.length - 1) {
+      char2 = chars[i + 1];
+      gap = getSpaceBetweenChars(char, char2);
+      if (char.spaceAfter) {
+        if (minSpGap > maxSpGap) {
+          minSpGap = maxSpGap = gap;
+        }
+        else if (gap < minSpGap) {
+          minSpGap = gap;
+        }
+        else if (gap > maxSpGap) {
+          maxSpGap = gap;
+        }
+      }
+      else if (minAdjGap > maxAdjGap) {
+        minAdjGap = maxAdjGap = gap;
+      }
+      else if (gap < minAdjGap) {
+        minAdjGap = gap;
+      }
+      else if (gap > maxAdjGap) {
+        maxAdjGap = gap;
+      }
+      if (i == 0 || gap < minGap) {
+        minGap = gap;
+      }
+      if (gap > maxGap) {
+        maxGap = gap;
+      }
+    }
+  }
+  avgFontSize /= chars.length;
+  if (minGap < 0) {
+    minGap = 0;
+  }
+
+  // if spacing is nearly uniform (minGap is close to maxGap), use the
+  // SpGap/AdjGap values if available, otherwise assume it's a single
+  // word (technically it could be either "ABC" or "A B C", but it's
+  // essentially impossible to tell)
+  if (maxGap - minGap < uniformSpacing * avgFontSize) {
+    if (minAdjGap <= maxAdjGap
+      && minSpGap <= maxSpGap
+      && minSpGap - maxAdjGap > 0.01) {
+      return 0.5 * (maxAdjGap + minSpGap);
+    }
+    else {
+      return maxGap + 1;
+    }
+
+    // if there is some variation in spacing, but it's small, assume
+    // there are some inter-word spaces
+  }
+  else if (maxGap - minGap < wordSpacing * avgFontSize) {
+    return 0.5 * (minGap + maxGap);
+
+    // if there is a large variation in spacing, use the SpGap/AdjGap
+    // values if they look reasonable, otherwise, assume a reasonable
+    // threshold for inter-word spacing (we can't use something like
+    // 0.5*(minGap+maxGap) here because there can be outliers at the
+    // high end)
+  }
+  else if (minAdjGap <= maxAdjGap
+    && minSpGap <= maxSpGap
+    && minSpGap - maxAdjGap > uniformSpacing * avgFontSize) {
+    gap = wordSpacing * avgFontSize;
+    gap2 = 0.5 * (minSpGap - minGap);
+    return minGap + (gap < gap2 ? gap : gap2);
+  }
+  else {
+    return minGap + wordSpacing * avgFontSize;
+  }
+}
+
+function getSpaceBetweenChars(char, char2) {
+  let { rotation } = char;
+  return !rotation && char2.rect[0] - char.rect[2]
+  || rotation === 90 && char2.rect[1] - char.rect[3]
+  || rotation === 180 && char.rect[0] - char2.rect[2]
+  || rotation === 270 && char.rect[1] - char2.rect[3]
+}
+
+function overlaps(rect1, rect2, rotation) {
+  if ([0, 180].includes(rotation)) {
+    return (rect1[1] <= rect2[1] && rect2[1] <= rect1[3]
+      || rect2[1] <= rect1[1] && rect1[1] <= rect2[3]);
+  }
+  return (
+    rect1[0] <= rect2[0] && rect2[0] <= rect1[2]
+    || rect2[0] <= rect1[0] && rect1[0] <= rect2[2]
+  );
+}
+
+function isDash(c) {
+  let re = /[\x2D\u058A\u05BE\u1400\u1806\u2010-\u2015\u2E17\u2E1A\u2E3A\u2E3B\u301C\u3030\u30A0\uFE31\uFE32\uFE58\uFE63\uFF0D]/;
+  return re.test(c);
+}
+
+function charHeight(char) {
+  return ([0, 180].includes(char.rotation) && char.rect[3] - char.rect[1]
+    || [90, 270].includes(char.rotation) && char.rect[2] - char.rect[0]);
+}
+
+function getBoundingRect(objs, from, to) {
+  let objs2 = objs.slice(from, to + 1);
+  return [
+    Math.min(...objs2.map(x => x.rect[0])),
+    Math.min(...objs2.map(x => x.rect[1])),
+    Math.max(...objs2.map(x => x.rect[2])),
+    Math.max(...objs2.map(x => x.rect[3])),
+  ];
+}
+
+function roundRect(rect) {
+  return rect.map(n => Math.round(n * 1000) / 1000);
+}
+
+function split(chars, reflowRTL) {
+  if (!chars.length) {
+    return [];
+  }
+  let lines = [];
+
+  let hasRTL = false;
+  for (let char of chars) {
+    if (isRTL(char.c)) {
+      hasRTL = true;
+      break;
+    }
+  }
+
+  let lineBreaks = [];
+
+  for (let i = 1; i < chars.length; i++) {
+    let char = chars[i - 1];
+    let char2 = chars[i];
+    if (
+      // Caret jumps to the next line start for non-RTL text and baseline isn't the same.
+      // (characters can sometimes even jump back in the same line)
+      !hasRTL && Math.abs(char.baseline - char2.baseline) > 0.01 && (
+        !char2.rotation && char.rect[0] - 10 > char2.rect[0]
+        || char2.rotation === 90 && char.rect[1] > char2.rect[1]
+        || char2.rotation === 180 && char.rect[0] < char2.rect[0]
+        || char2.rotation === 270 && char.rect[1] < char2.rect[1]
+      )
+      || hasRTL && Math.abs(char.baseline - char2.baseline) > 0.01
+      // Rotation changes
+      || char.rotation !== char2.rotation
+      // Chars aren't in the same line
+      || !overlaps(char.rect, char2.rect, char2.rotation)
+      // Line's first char is more than 2x larger than the following char, to put drop cap into a separate line
+      || lineBreaks.find(x => x === i - 1) && charHeight(char) > charHeight(char2) * 2
+    ) {
+      lineBreaks.push(i);
+    }
+  }
+
+  lineBreaks = [0, ...lineBreaks, chars.length];
+
+  // Sort characters in lines by their visual order. That fixes some RTL lines
+  // and weird cases when caret jumps back in the same line for LTR text
+  for (let i = 0; i < lineBreaks.length - 1; i++) {
+    let from = lineBreaks[i];
+    let to = lineBreaks[i + 1] - 1;
+    let lineChars = chars.slice(from, to + 1);
+    lineChars.sort((a, b) => {
+      let { rotation } = a;
+      let x1 = a.rect[0] + (a.rect[2] - a.rect[0]) / 2;
+      let x2 = b.rect[0] + (b.rect[2] - b.rect[0]) / 2;
+      let y1 = a.rect[1] + (a.rect[3] - a.rect[1]) / 2;
+      let y2 = b.rect[1] + (b.rect[3] - b.rect[1]) / 2;
+
+      return !rotation && x1 - x2
+        || rotation === 90 && y1 - y2
+        || rotation === 180 && x2 - x1
+        || rotation === 270 && y2 - y1
+    });
+    bidi(lineChars, -1, false);
+    chars.splice(from, to - from + 1, ...lineChars);
+  }
+
+  let extraLineBreaks = [];
+  let wordBreaks = [];
+  let wordSpaces = [];
+  // Get word breaks
+  for (let i = 0; i < lineBreaks.length - 1; i++) {
+    let from = lineBreaks[i];
+    let to = lineBreaks[i + 1] - 1;
+    let wordSp = computeWordSpacingThreshold(chars.slice(from, to + 1));
+    let spaces = [];
+    for (let j = from + 1; j <= to; j++) {
+      let sp = wordSp - 1;
+
+      let char = chars[j - 1];
+      let char2 = chars[j];
+
+      let rtl = isRTL(char.c) && isRTL(char2.c);
+      sp = rtl ? (char.rect[0] - char2.rect[2]) : getSpaceBetweenChars(char, char2);
+      if (sp > wordSp || sp < -char.fontSize) {
+        wordSpaces.push(j);
+        wordBreaks.push(j);
+        spaces.push({index: j, width: sp});
+        continue;
+      }
+
+      let punctuation = '?.,;!¡¿。、·(){}[]/$:';
+
+      if (
+        char.fontName !== char2.fontName
+        || Math.abs(char.fontSize - char2.fontSize) > 0.01
+        || Math.abs(char.baseline - char2.baseline) > 0.01
+        || punctuation.includes(char.c) || punctuation.includes(char2.c)
+      ) {
+        wordBreaks.push(j);
+      }
+    }
+    if (to < chars.length - 1) {
+      wordBreaks.push(to + 1);
+    }
+    let min = Math.min(...spaces.map(x => x.width));
+    for (let space of spaces) {
+      if (space.width > min * 10 && space.width > 10) {
+        extraLineBreaks.push(space.index);
+      }
+    }
+  }
+  wordBreaks = [0, ...wordBreaks, chars.length];
+  lineBreaks = [...lineBreaks, ...extraLineBreaks].sort((a, b) => a - b);
+
+  let paragraphBreaks = [];
+  for (let i = 1; i < lineBreaks.length - 1; i++) {
+    let previousRect = getBoundingRect(chars, lineBreaks[i - 1], lineBreaks[i] - 1);
+    let currentRect = getBoundingRect(chars, lineBreaks[i], lineBreaks[i + 1] - 1);
+
+    let lineSpacing = previousRect[1] - currentRect[3];
+
+    let previousLineHeight = previousRect[3] - previousRect[1];
+    let currentLineHeight = currentRect[3] - currentRect[1];
+
+    if (
+      // The lines shouldn't be in the same row
+      !(previousRect[1] > currentRect[3])
+      || Math.abs(previousLineHeight - currentLineHeight) > 0.1
+      || lineSpacing > previousLineHeight) {
+      paragraphBreaks.push(lineBreaks[i]);
+    }
+  }
+
+  paragraphBreaks = [0, ...paragraphBreaks, chars.length];
+
+  let paragraphs = [];
+
+  for (let p = 0; p < paragraphBreaks.length - 1; p++) {
+    let paragraphStart = paragraphBreaks[p];
+    let paragraphEnd = paragraphBreaks[p + 1];
+    let lines = [];
+    for (let l = 0; l < lineBreaks.length - 1; l++) {
+      if (lineBreaks[l] < paragraphStart || lineBreaks[l + 1] > paragraphEnd) {
+        continue;
+      }
+      let lineStart = lineBreaks[l];
+      let lineEnd = lineBreaks[l + 1];
+      let words = [];
+      for (let w = 0; w < wordBreaks.length - 1; w++) {
+        if (wordBreaks[w] < lineStart || wordBreaks[w + 1] > lineEnd) {
+          continue;
+        }
+        let wordStart = wordBreaks[w];
+        let wordEnd = wordBreaks[w + 1];
+        let spaceAfter = wordSpaces.includes(wordEnd);
+        let wordChars = chars.slice(wordStart, wordEnd);
+        words.push({
+          rect: getBoundingRect(chars, wordStart, wordEnd - 1),
+          chars: wordChars,
+          spaceAfter,
+        });
+      }
+      lines.push({
+        rect: getBoundingRect(words, 0, words.length - 1),
+        hyphenated: isDash(words.at(-1).chars.at(-1).c),
+        words
+      });
+    }
+    paragraphs.push({
+      rect: getBoundingRect(lines, 0, lines.length - 1),
+      lines
+    });
+  }
+  return paragraphs;
+}
+
+function getParagraphText(paragraph) {
+  let text = [];
+  for (let line of paragraph.lines) {
+    for (let word of line.words) {
+      for (let char of word.chars) {
+        text.push(char.c);
+      }
+      if (word.spaceAfter) {
+        text.push(' ');
+      }
+    }
+    text.push('\n');
+  }
+  return text.join('');
+}
+
+function sortParagraphs(paragraphs) {
+  let clusters = [];
+  for (let i = 0; i < paragraphs.length; i++) {
+    let rect1 = paragraphs[i].rect;
+    let addedToCluster = false;
+    for (let j = 0; j < clusters.length && !addedToCluster; j++) {
+      let cluster = clusters[j];
+      for (let idx of cluster) {
+        let rect2 = paragraphs[idx].rect;
+        if (overlaps(rect1, rect2, 0)) {
+          cluster.push(i);
+          cluster.sort((a, b) => a - b);
+          addedToCluster = true;
+          break;
+        }
+      }
+    }
+    if (!addedToCluster) {
+      clusters.push([i]);
+    }
+  }
+  let merged = true;
+  while (merged) {
+    merged = false;
+    for (let i = 0; i < clusters.length - 1; i++) {
+      for (let j = i + 1; j < clusters.length; j++) {
+        let [min1, max1] = [clusters[i][0], clusters[i][clusters[i].length - 1]];
+        let [min2, max2] = [clusters[j][0], clusters[j][clusters[j].length - 1]];
+        if ((min1 >= min2 && min1 <= max2) || (min2 >= min1 && min2 <= max1)) {
+          clusters[i] = [...new Set([...clusters[i], ...clusters[j]])].sort((a, b) => a - b);
+          clusters.splice(j, 1);
+          merged = true;
+          break;
+        }
+      }
+    }
+  }
+  let sortedClusters = clusters.sort((clusterA, clusterB) => {
+    let maxYA = Math.min(...clusterA.map(index => paragraphs[index].rect[3]));
+    let maxYB = Math.min(...clusterB.map(index => paragraphs[index].rect[3]));
+    return maxYB - maxYA;
+  });
+
+  let sortedParagraphs = sortedClusters.reduce((result, cluster) => {
+    return result.concat(cluster.map(index => paragraphs[index]));
+  }, []);
+
+  return sortedParagraphs;
+}
+
+export function getParagraphs(chars) {
+  let paragraphs = split(chars);
+  // paragraphs = sortParagraphs(paragraphs);
+  return paragraphs;
+}
diff --git a/src/core/worker.js b/src/core/worker.js
index d4b02815660a1..33e37e1bcbd20 100644
--- a/src/core/worker.js
+++ b/src/core/worker.js
@@ -764,6 +764,33 @@ class WorkerMessageHandler {
       });
     });
 
+    handler.on("GetPageData", async function (data) {
+      let pageIndex = data.pageIndex;
+      let task = new WorkerTask('GetPageData: ' + pageIndex);
+      startWorkerTask(task);
+      let pageData;
+      try {
+        pageData = await pdfManager.pdfDocument.getPageData({ handler, task, data });
+      } catch (e) {
+        console.log(e);
+      }
+      finishWorkerTask(task);
+      return pageData;
+    });
+
+    handler.on("GetOutline2", async function (data) {
+      let task = new WorkerTask('GetOutline2');
+      startWorkerTask(task);
+      let pageData;
+      try {
+        pageData = await pdfManager.pdfDocument.getOutline2({ handler, task, data });
+      } catch (e) {
+        console.log(e);
+      }
+      finishWorkerTask(task);
+      return pageData;
+    });
+
     handler.on("GetStructTree", function (data) {
       return pdfManager.getPage(data.pageIndex).then(function (page) {
         return pdfManager.ensure(page, "getStructTree");
diff --git a/src/display/api.js b/src/display/api.js
index 46ceef1ac6069..3a927aa2397d9 100644
--- a/src/display/api.js
+++ b/src/display/api.js
@@ -963,6 +963,14 @@ class PDFDocumentProxy {
     return this._transport.getOutline();
   }
 
+  getPageData(data) {
+    return this._transport.messageHandler.sendWithPromise("GetPageData", data);
+  }
+
+  getOutline2(data) {
+    return this._transport.messageHandler.sendWithPromise("GetOutline2", data);
+  }
+
   /**
    * @returns {Promise<OptionalContentConfig>} A promise that is resolved with
    *   an {@link OptionalContentConfig} that contains all the optional content
diff --git a/web/annotation_layer_builder.js b/web/annotation_layer_builder.js
index a0fb1ce52071e..2cdf3e92b259e 100644
--- a/web/annotation_layer_builder.js
+++ b/web/annotation_layer_builder.js
@@ -105,7 +105,7 @@ class AnnotationLayerBuilder {
       return;
     }
 
-    const [annotations, hasJSActions, fieldObjects] = await Promise.all([
+    let [annotations, hasJSActions, fieldObjects] = await Promise.all([
       this.pdfPage.getAnnotations({ intent }),
       this._hasJSActionsPromise,
       this._fieldObjectsPromise,
@@ -114,6 +114,21 @@ class AnnotationLayerBuilder {
       return;
     }
 
+    const allowedSubtypes = [
+      'Link',
+      'Widget',
+      'Line',
+      'Circle',
+      'PolyLine',
+      'Polygon',
+      'Caret',
+      'Squiggly',
+      'StrikeOut',
+      'Stamp'
+    ];
+    annotations = annotations.filter(x => allowedSubtypes.includes(x.subtype)
+      || ['Square', 'Ink', 'FreeText'].includes(x.subtype) && !x.isZotero);
+
     // Create an annotation layer div and render the annotations
     // if there is at least one annotation.
     const div = (this.div = document.createElement("div"));
diff --git a/web/app.js b/web/app.js
index ee6001ef42152..e755ff3c9ca88 100644
--- a/web/app.js
+++ b/web/app.js
@@ -78,7 +78,7 @@ import { Toolbar } from "web-toolbar";
 import { ViewHistory } from "./view_history.js";
 
 const FORCE_PAGES_LOADED_TIMEOUT = 10000; // ms
-const WHEEL_ZOOM_DISABLED_TIMEOUT = 1000; // ms
+const WHEEL_ZOOM_DISABLED_TIMEOUT = 20; // ms
 
 const ViewOnLoad = {
   UNKNOWN: -1,
@@ -982,7 +982,7 @@ const PDFViewerApplication = {
     const loadingTask = getDocument(params);
     this.pdfLoadingTask = loadingTask;
 
-    loadingTask.onPassword = (updateCallback, reason) => {
+    loadingTask.onPassword = this.onPassword || ((updateCallback, reason) => {
       if (this.isViewerEmbedded) {
         // The load event can't be triggered until the password is entered, so
         // if the viewer is in an iframe and its visibility depends on the
@@ -993,12 +993,13 @@ const PDFViewerApplication = {
       this.pdfLinkService.externalLinkEnabled = false;
       this.passwordPrompt.setUpdateCallback(updateCallback, reason);
       this.passwordPrompt.open();
-    };
+    });
 
     loadingTask.onProgress = ({ loaded, total }) => {
       this.progress(loaded / total);
     };
 
+    await this.initializedPromise;
     return loadingTask.promise.then(
       pdfDocument => {
         this.load(pdfDocument);
@@ -1177,9 +1178,9 @@ const PDFViewerApplication = {
       this.downloadComplete = true;
       this.loadingBar?.hide();
 
-      firstPagePromise.then(() => {
-        this.eventBus.dispatch("documentloaded", { source: this });
-      });
+      // firstPagePromise.then(() => {
+      //   this.eventBus.dispatch("documentloaded", { source: this });
+      // });
     });
 
     // Since the `setInitialView` call below depends on this being resolved,
@@ -1214,6 +1215,10 @@ const PDFViewerApplication = {
     pdfViewer.setDocument(pdfDocument);
     const { firstPagePromise, onePageRendered, pagesPromise } = pdfViewer;
 
+    firstPagePromise.then(() => {
+      this.eventBus.dispatch("documentloaded", { source: this });
+    });
+
     this.pdfThumbnailViewer?.setDocument(pdfDocument);
 
     const storedPromise = (this.store = new ViewHistory(
diff --git a/web/chromecom.js b/web/chromecom.js
index 60a83846c5d0f..f1bf1d3d944d1 100644
--- a/web/chromecom.js
+++ b/web/chromecom.js
@@ -258,10 +258,10 @@ if (window === top) {
     // If the runtime is still available, the unload is most likely a normal
     // tab closure. Otherwise it is most likely an extension reload.
     if (!isRuntimeAvailable()) {
-      localStorage.setItem(
-        "unload-" + Date.now() + "-" + document.hidden + "-" + location.href,
-        JSON.stringify(history.state)
-      );
+      // localStorage.setItem(
+      //   "unload-" + Date.now() + "-" + document.hidden + "-" + location.href,
+      //   JSON.stringify(history.state)
+      // );
     }
   });
 }
diff --git a/web/genericcom.js b/web/genericcom.js
index 6d5120e9be243..9f82038f4b5f9 100644
--- a/web/genericcom.js
+++ b/web/genericcom.js
@@ -29,11 +29,11 @@ const GenericCom = {};
 
 class GenericPreferences extends BasePreferences {
   async _writeToStorage(prefObj) {
-    localStorage.setItem("pdfjs.preferences", JSON.stringify(prefObj));
+    // localStorage.setItem("pdfjs.preferences", JSON.stringify(prefObj));
   }
 
   async _readFromStorage(prefObj) {
-    return JSON.parse(localStorage.getItem("pdfjs.preferences"));
+    // return JSON.parse(localStorage.getItem("pdfjs.preferences"));
   }
 }
 
diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js
index 201e716127cdb..647e6a59df060 100644
--- a/web/pdf_find_controller.js
+++ b/web/pdf_find_controller.js
@@ -29,7 +29,7 @@ const FindState = {
 };
 
 const FIND_TIMEOUT = 250; // ms
-const MATCH_SCROLL_OFFSET_TOP = -50; // px
+const MATCH_SCROLL_OFFSET_TOP = -120; // px
 const MATCH_SCROLL_OFFSET_LEFT = -400; // px
 
 const CHARACTERS_TO_NORMALIZE = {
diff --git a/web/pdf_page_view.js b/web/pdf_page_view.js
index 4825d1ef75b30..fdb768c28617f 100644
--- a/web/pdf_page_view.js
+++ b/web/pdf_page_view.js
@@ -289,6 +289,7 @@ class PDFPageView {
   }
 
   destroy() {
+    window.onDetachPage && window.onDetachPage(this);
     this.reset();
     this.pdfPage?.cleanup();
   }
@@ -928,6 +929,7 @@ class PDFPageView {
         this.hasRestrictedScaling = false;
       }
     }
+    this.currentCanvasWidth = width * outputScale.sx;
     const sfx = approximateFraction(outputScale.sx);
     const sfy = approximateFraction(outputScale.sy);
 
@@ -956,9 +958,11 @@ class PDFPageView {
     const renderTask = (this.renderTask = this.pdfPage.render(renderContext));
     renderTask.onContinue = renderContinueCallback;
 
+    let that = this;
     const resultPromise = renderTask.promise.then(
       async () => {
         showCanvas?.(true);
+        window.onAttachPage && window.onAttachPage(that);
         await this.#finishRenderTask(renderTask);
 
         this.#renderTextLayer();
diff --git a/web/pdf_print_service.js b/web/pdf_print_service.js
index 52af95969963b..5594bc1596fca 100644
--- a/web/pdf_print_service.js
+++ b/web/pdf_print_service.js
@@ -302,6 +302,7 @@ function renderProgress(index, total, l10n) {
 window.addEventListener(
   "keydown",
   function (event) {
+    return;
     // Intercept Cmd/Ctrl + P in all browsers.
     // Also intercept Cmd/Ctrl + Shift + P in Chrome and Opera
     if (