Skip to content

Commit

Permalink
refactor: use helper function to parse pages string
Browse files Browse the repository at this point in the history
  • Loading branch information
mojoaxel committed Dec 27, 2023
1 parent b3f61a9 commit c401956
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 85 deletions.
107 changes: 22 additions & 85 deletions PDFMergerBase.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { PDFDocument } from 'pdf-lib'

import { parsePagesString } from './parsePagesString'

/**
* @typedef {Object} Metadata
* @property {string} [producer]
Expand Down Expand Up @@ -68,42 +70,26 @@ export default class PDFMergerBase {
* Add pages from a PDF document to the end of the merged document.
*
* @async
* @param {string | Buffer | ArrayBuffer} inputFile a pdf source
* @param {PdfInput} input - a pdf source
* @param {string | string[] | number | number[] | undefined | null} [pages]
* @returns {Promise<void>}
*/
async add (inputFile, pages) {
async add (input, pages) {
await this._ensureDoc()
if (typeof pages === 'undefined' || pages === null) {
if (typeof pages === 'undefined' || pages === null || pages === 'all') {
// of no pages are given, add the entire document
await this._addEntireDocument(inputFile)
await this._addPagesFromDocument(input)
} else if (typeof pages === 'number') {
// e.g. 2
await this._addGivenPages(inputFile, [pages])
await this._addPagesFromDocument(input, [pages])
} else if (Array.isArray(pages)) {
// e.g. [2,3,6] or ["2","3","6"]
const pagesAsNumbers = pages.map(p => parseInt(p))
await this._addGivenPages(inputFile, pagesAsNumbers)
const pagesAsNumbers = pages.map(p => typeof p === 'string' ? parseInt(p.trim()) : p)
await this._addPagesFromDocument(input, pagesAsNumbers)
} else if (typeof pages === 'string' || pages instanceof String) {
if (pages === 'all') {
// of no pages are given, add the entire document
await this._addEntireDocument(inputFile)
} else if (pages.indexOf(',') > 0) {
// e.g. "2,3,6"
const list = pages.trim().replace(/ /g, '').split(',')
await this._addGivenPages(inputFile, list)
} else if (pages.toLowerCase().indexOf('to') >= 0) {
// e.g. "2 to 6" or "2to6"
const span = pages.trim().replace(/ /g, '').split('to')
await this._addFromToPage(inputFile, parseInt(span[0]), parseInt(span[1]))
} else if (pages.indexOf('-') >= 0) {
// e.g. "2 - 6" or "2-6"
const span = pages.trim().replace(/ /g, '').split('-')
await this._addFromToPage(inputFile, parseInt(span[0]), parseInt(span[1]))
} else if (pages.trim().match(/^[0-9]+$/)) {
// e.g. "2"
await this._addGivenPages(inputFile, [pages])
}
// e.g. "2,3,6" or "2-6" or "2to6,8,10-12"
const pagesArray = parsePagesString(pages)
await this._addPagesFromDocument(input, pagesArray)
} else {
throw new Error([
'Invalid parameter "pages".',
Expand Down Expand Up @@ -197,75 +183,26 @@ export default class PDFMergerBase {
}

/**
* Add the entire document to the merged document.
*
* @async
* @protected
* @param {PdfInput} input
* @param {number[] | undefined} pages - array of page numbers to add (starts at 1)
* @returns {Promise<void>}
*/
async _addEntireDocument (input) {
async _addPagesFromDocument (input, pages = undefined) {
const src = await this._getInputAsUint8Array(input)
const srcDoc = await PDFDocument.load(src, this._loadOptions)

const copiedPages = await this._doc.copyPages(srcDoc, srcDoc.getPageIndices())
copiedPages.forEach((page) => {
this._doc.addPage(page)
})
}

/**
* Add a range of pages from the document to the merged document.
*
* @async
* @protected
* @param {PdfInput} input
* @param {number} from - first page to add (starts at 1)
* @param {number} to - last page to add (starts at 1)
* @returns {Promise<void>}
*/
async _addFromToPage (input, from, to) {
if (typeof from !== 'number' || typeof to !== 'number' || from <= 0 || from <= 0) {
throw new Error('Invalid function parameter. \'from\' and \'to\' must be positive \'numbers\'.')
}
if (to < from) {
throw new Error('Invalid function parameter. \'to\' must be greater or equal to \'from\'.')
}

const src = await this._getInputAsUint8Array(input)
const srcDoc = await PDFDocument.load(src, this._loadOptions)
const pageCount = srcDoc.getPageCount()

if (from > pageCount || to > pageCount) {
throw new Error(`Invalid function parameter. The document has not enough pages. (from:${from}, to:${to}, pages:${pageCount})`)
}

// create a array [2,3,4] with from=2 and to=4
const pages = Array.from({ length: (to - from) + 1 }, (_, i) => i + from - 1)
const copiedPages = await this._doc.copyPages(srcDoc, pages)
copiedPages.forEach((page) => {
this._doc.addPage(page)
})
}

/**
* @async
* @protected
* @param {PdfInput} input
* @param {number[]} pages - array of page numbers to add (starts at 1)
* @returns {Promise<void>}
*/
async _addGivenPages (input, pages) {
if (pages.length <= 0) {
return
let indices = []
if (pages === undefined) {
// add the whole document
indices = srcDoc.getPageIndices()
} else {
// add selected pages switching to a 0-based index
indices = pages.map(p => p - 1)
}

const src = await this._getInputAsUint8Array(input)
const srcDoc = await PDFDocument.load(src, this._loadOptions)

// switch from indexed 1 to indexed 0
const pagesIndexed1 = pages.map(p => p - 1)
const copiedPages = await this._doc.copyPages(srcDoc, pagesIndexed1)
const copiedPages = await this._doc.copyPages(srcDoc, indices)
copiedPages.forEach((page) => {
this._doc.addPage(page)
})
Expand Down
51 changes: 51 additions & 0 deletions parsePagesString.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/**
* Takes a string like "1,2,3" or "1-3" and returns an Array of numbers.
*
* @param {string} pages
*
* @example ```js
* parsePagesString('2') // [2]
* parsePagesString('1,2,3') // [1,2,3]
* parsePagesString('1-3') // [1,2,3]
* parsePagesString('1to3') // [1,2,3]
* parsePagesString('1 to 3') // [1,2,3]
* parsePagesString('10,1-3') // [10,1,2,3]
* parsePagesString('9,1-3,5-7') // [9,1,2,3,5,6,7]
* ```
*/
export function parsePagesString (pages) {
const throwError = () => {
throw new Error([
'Invalid parameter "pages".',
'Must be a string like "1,2,3" or "1-3" or "1to3"',
`Was "${pages}" instead.`
].join(' '))
}

const isRangeString = (rangeString) => {
return rangeString.includes('-') || rangeString.toLowerCase().includes('to')
}

const parseRange = (rangeString) => {
const [start, end] = rangeString.split(/-|to/).map(s => typeof s === 'string' ? parseInt(s.trim()) : s)
return Array.from({ length: end - start + 1 }, (_, i) => start + i)
}

if (typeof pages !== 'string') {
throwError()
} else if (!pages.trim().replace(/ /g, '').match(/^(\d+|\d+-\d+|\d+to\d+)(,(\d+|\d+-\d+|\d+to\d+))*$/)) {
// string does not fit the expected pattern
throwError()
} else if (pages.trim().match(/^\d+$/)) {
// string consists of a single page-number
return [parseInt(pages.trim())]
} else if (pages.trim().includes(',')) {
// string consists od a list of page-numbers and/or ranges
return pages.split(',').flatMap(s => isRangeString(s) ? parseRange(s) : parseInt(s))
} else if (isRangeString(pages)) {
// string consists of a single range
return parseRange(pages)
}

throwError()
}
50 changes: 50 additions & 0 deletions test/parsePagesString.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { parsePagesString } from '../parsePagesString'

describe('parsePagesString', () => {
test('should parse lists', () => {
expect(parsePagesString('2')).toStrictEqual([2])
expect(parsePagesString(' 2 ')).toStrictEqual([2])
expect(parsePagesString('1,2,3')).toStrictEqual([1, 2, 3])
expect(parsePagesString(' 1,2,3 ')).toStrictEqual([1, 2, 3])
expect(parsePagesString(' 1, 2, 3 ')).toStrictEqual([1, 2, 3])
expect(parsePagesString(' 1 , 2 , 3 ')).toStrictEqual([1, 2, 3])
expect(parsePagesString(' 2,4,6 ')).toStrictEqual([2, 4, 6])
})

test('should parse ranges', () => {
expect(parsePagesString('1-3')).toStrictEqual([1, 2, 3])
expect(parsePagesString(' 1-3 ')).toStrictEqual([1, 2, 3])
expect(parsePagesString(' 1 - 3 ')).toStrictEqual([1, 2, 3])
expect(parsePagesString('2-6')).toStrictEqual([2, 3, 4, 5, 6])
})

test('should parse range with "to"', () => {
expect(parsePagesString('1to3')).toStrictEqual([1, 2, 3])
expect(parsePagesString(' 1to3 ')).toStrictEqual([1, 2, 3])
expect(parsePagesString(' 1 to 3 ')).toStrictEqual([1, 2, 3])
expect(parsePagesString('2 to 6')).toStrictEqual([2, 3, 4, 5, 6])
})

test('should parse combined lists and ranges', () => {
expect(parsePagesString('1-3,4')).toStrictEqual([1, 2, 3, 4])
expect(parsePagesString(' 1-3 , 4 , 5')).toStrictEqual([1, 2, 3, 4, 5])
expect(parsePagesString(' 1 - 3, 5-7 ')).toStrictEqual([1, 2, 3, 5, 6, 7])
expect(parsePagesString(' 9,8,1 - 3, 5-6 ,8,9')).toStrictEqual([9, 8, 1, 2, 3, 5, 6, 8, 9])
expect(parsePagesString('2-6,8')).toStrictEqual([2, 3, 4, 5, 6, 8])
expect(parsePagesString('1,3-5,7')).toStrictEqual([1, 3, 4, 5, 7])
expect(parsePagesString('11-13,5,8,16-18,14')).toStrictEqual([11, 12, 13, 5, 8, 16, 17, 18, 14])
})

test('invalid', () => {
expect(() => parsePagesString()).toThrow()
expect(() => parsePagesString(null)).toThrow()
expect(() => parsePagesString({})).toThrow()

expect(() => parsePagesString('')).toThrow()
expect(() => parsePagesString('-1to-3')).toThrow()
expect(() => parsePagesString('1--3')).toThrow()
expect(() => parsePagesString('1 until 3')).toThrow()
expect(() => parsePagesString('1-')).toThrow()
expect(() => parsePagesString('10e3')).toThrow()
})
})

0 comments on commit c401956

Please sign in to comment.