diff --git a/.changeset/healthy-timers-hear.md b/.changeset/healthy-timers-hear.md new file mode 100644 index 0000000..17b158d --- /dev/null +++ b/.changeset/healthy-timers-hear.md @@ -0,0 +1,5 @@ +--- +"coda-mover": patch +--- + +Use markdown as main import format, with images collected from html export diff --git a/src/modules/simple-mover/Mover.ts b/src/modules/simple-mover/Mover.ts index a622cfa..bb21163 100644 --- a/src/modules/simple-mover/Mover.ts +++ b/src/modules/simple-mover/Mover.ts @@ -137,6 +137,12 @@ export class Mover implements IMover { this.setStatus(docId, ITEM_STATUS_PENDING) + // when doc is marked for listing or re-listing, + // its current inner pages should be also marked as stale (pending for revalidation) as well + this.getInnerPages(doc).forEach(page => { + this.itemStatuses[page.id] = { id: page.id, status: ITEM_STATUS_PENDING } + }) + this.tasks.add({ id: `list:${docId}`, execute: async () => { diff --git a/src/modules/simple-mover/events.ts b/src/modules/simple-mover/events.ts index b600497..ec6f652 100644 --- a/src/modules/simple-mover/events.ts +++ b/src/modules/simple-mover/events.ts @@ -30,6 +30,10 @@ export const ITEM_STATUS_ARCHIVING = 'archiving' export const ITEM_STATUS_RETRYING = 'retrying' export const ITEM_STATUS_CANCELLED = 'cancelled' +export const ITEM_STATUS_FETCHING_IMAGES = 'fetching images' +export const ITEM_STATUS_DOWNLOADING_IMAGES = 'downloading images' +export const ITEM_STATUS_REPLACING_IMAGES = 'replacing images' + export const ItemStatuses = [ ITEM_STATUS_PENDING, ITEM_STATUS_LISTING, @@ -45,4 +49,7 @@ export const ItemStatuses = [ ITEM_STATUS_CONFIRMING, ITEM_STATUS_CANCELLED, ITEM_STATUS_RETRYING, + ITEM_STATUS_FETCHING_IMAGES, + ITEM_STATUS_DOWNLOADING_IMAGES, + ITEM_STATUS_REPLACING_IMAGES, ] as const diff --git a/src/modules/simple-mover/transfers/CodaExporter.ts b/src/modules/simple-mover/transfers/CodaExporter.ts index 39e3755..3127845 100644 --- a/src/modules/simple-mover/transfers/CodaExporter.ts +++ b/src/modules/simple-mover/transfers/CodaExporter.ts @@ -1,10 +1,24 @@ import { TaskEmitter, TaskPriority } from '@abxvn/tasks' import { isAxiosError } from 'axios' import type { ICodaApis, ICodaPage, IMover, IExporter, IStatus } from '../interfaces' -import { createWriteStream, ensureDir } from 'fs-extra' +import { createWriteStream, ensureDir, readFile, writeFile } from 'fs-extra' import { getCurrentIsoDateTime, getParentDir, trimSlashes } from '../lib' import { download } from '../apis' -import { ITEM_STATUS_DONE, ITEM_STATUS_DOWNLOADING, ITEM_STATUS_ERROR, ITEM_STATUS_EXPORTING, ITEM_STATUS_PENDING, SERVER_SAVE_ITEMS } from '../events' +import { + ITEM_STATUS_DONE, + ITEM_STATUS_DOWNLOADING, + ITEM_STATUS_ERROR, + ITEM_STATUS_EXPORTING, + ITEM_STATUS_PENDING, + SERVER_SAVE_ITEMS, + ITEM_STATUS_FETCHING_IMAGES, + ITEM_STATUS_REPLACING_IMAGES, + ITEM_STATUS_DOWNLOADING_IMAGES, +} from '../events' +import { dirname } from 'path' + +const CODA_IMAGE_REPLACEMENT_START_REGEX = /^\n{2}/ +const CODA_IMAGE_REPLACEMENT_BODY_REGEX = /\n{4}/g export class CodaExporter implements IExporter { private importChunkCounter = 0 @@ -48,45 +62,158 @@ export class CodaExporter implements IExporter { this.tasks.next() } - async exportPage (page: ICodaPage, exportId?: string) { + async exportPage (page: ICodaPage, exportId?: string, imageExportId?: string) { const docId = trimSlashes(page.treePath).split('/').shift() if (!docId) throw Error('invalid page tree path') const parentDir = getParentDir(page, this.items) - const pageFilePath = `${parentDir}/${page.name.replace(/\//g, ' ')}.html` - - if (!exportId) { - this.setStatus(page.id, ITEM_STATUS_EXPORTING) - const exportRes = await this.apis.exportPage(docId, page.id) + const pageFilePath = `${parentDir}/${page.name.replace(/\//g, ' ')}.md` + + if (!exportId) exportId = await this.exportPageAsMarkdown(docId, page) + if (!exportId) throw Error('markdown export isn\'t requested') + if (!imageExportId) { + const isMarkdownDownloaded = await this.downloadMarkdownExport(docId, page, pageFilePath, exportId) + if (!isMarkdownDownloaded) { + return + } + } - exportId = exportRes.id + const markdownContent = await readFile(pageFilePath, 'utf8') + const shouldAddImages = CODA_IMAGE_REPLACEMENT_START_REGEX.test(markdownContent) || + CODA_IMAGE_REPLACEMENT_BODY_REGEX.test(markdownContent) + + if (shouldAddImages) { + if (!imageExportId) imageExportId = await this.exportPageAsHtml(docId, page) + if (!imageExportId) throw Error('html images export isn\'t requested') + + const isImageReplaced = await this.downloadImageExportAndReplaceInMarkdown( + docId, + page, + pageFilePath, + markdownContent, + exportId, + imageExportId, + ) + if (!isImageReplaced) { + return + } } - if (!exportId) throw Error('export isn\'t requested') + this.setStatus(page.id, ITEM_STATUS_DONE) + } + + private async exportPageAsMarkdown (docId: string, page: ICodaPage) { + this.setStatus(page.id, ITEM_STATUS_EXPORTING) + const exportRes = await this.apis.exportPage(docId, page.id, 'markdown') - const pageExport = await this.apis.getPageExport(docId, page.id, exportId) + return exportRes.id + } - if (!pageExport.downloadLink) { // retry later at low priority + private async downloadMarkdownExport ( + docId: string, + page: ICodaPage, + pageFilePath: string, + markdownExportId: string, + ) { + const pageExport = await this.apis.getPageExport(docId, page.id, markdownExportId) + + if (!pageExport.downloadLink) { + // retry later at low priority with current markdown export id this.tasks.add({ id: page.id, - execute: async () => await this.exportPage(page, exportId), + execute: async () => await this.exportPage(page, markdownExportId), priority: TaskPriority.LOW, }) - return + return false } this.setStatus(page.id, ITEM_STATUS_DOWNLOADING) this.items[page.id].syncedAt = getCurrentIsoDateTime() - await ensureDir(parentDir) + await ensureDir(dirname(pageFilePath)) await download(pageExport.downloadLink, createWriteStream(pageFilePath, { flags: 'w', encoding: 'utf8', })) this.items[page.id].filePath = pageFilePath - this.setStatus(page.id, ITEM_STATUS_DONE) + + return true + } + + private async exportPageAsHtml (docId: string, page: ICodaPage) { + this.setStatus(page.id, ITEM_STATUS_FETCHING_IMAGES) + const exportRes = await this.apis.exportPage(docId, page.id, 'html') + + return exportRes.id + } + + private async downloadImageExportAndReplaceInMarkdown ( + docId: string, + page: ICodaPage, + pageFilePath: string, + markdownContent: string, + markdownExportId: string, + htmlExportId: string, + ) { + this.setStatus(page.id, ITEM_STATUS_DOWNLOADING_IMAGES) + const htmlExport = await this.apis.getPageExport(docId, page.id, htmlExportId) + const htmlFilePath = pageFilePath.replace(/\.md$/, '.html') + + if (!htmlExport.downloadLink) { + // retry later at low priority with current both markdown and html export ids + this.tasks.add({ + id: page.id, + execute: async () => await this.exportPage(page, markdownExportId, htmlExportId), + priority: TaskPriority.LOW, + }) + + return false + } + + await download(htmlExport.downloadLink, createWriteStream(htmlFilePath, { + flags: 'w', + encoding: 'utf8', + })) + + this.setStatus(page.id, ITEM_STATUS_REPLACING_IMAGES) + const htmlContent = await readFile(htmlFilePath, 'utf8') + const replacedBlocks: string[] = [] + // img and hr tags are rendered as 3 empty lines or 2 empty lines at start + const replacedHtmlTags = htmlContent.match(/]+src="[^">]+"|
{ + if (!tag.startsWith(' { + return replacedBlocks[replacementCount] + ? `${replacedBlocks[replacementCount++]}\n\n` + : emptyLines // restore empty lines if replacement not found from html export + }) + + markdownContentWithImages = markdownContentWithImages.replace(CODA_IMAGE_REPLACEMENT_BODY_REGEX, emptyLines => { + return replacedBlocks[replacementCount] + ? `\n\n${replacedBlocks[replacementCount++]}\n\n` + : emptyLines // restore empty lines if replacement not found from html export + }) + + if (replacementCount < replacedBlocks.length) { + markdownContentWithImages += replacedBlocks.slice(replacementCount).join('\n\n') + } + + await writeFile(pageFilePath, markdownContentWithImages, 'utf8') + + return true } stopPendingExports () {