Skip to content

Commit

Permalink
feat: using markdown as main import format, html for images (#34)
Browse files Browse the repository at this point in the history
* feat: using markdown as main import format, html for images
* feat: update export markdown images flow
* fix: wrong image detection with custom element and hr tags in coda contents

---------

Co-authored-by: Hung Luu <[email protected]>
  • Loading branch information
duhoang00 and hungluu authored Feb 27, 2024
1 parent e4adc19 commit 95458c9
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 16 deletions.
5 changes: 5 additions & 0 deletions .changeset/healthy-timers-hear.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"coda-mover": patch
---

Use markdown as main import format, with images collected from html export
6 changes: 6 additions & 0 deletions src/modules/simple-mover/Mover.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,12 @@ export class Mover implements IMover {

this.setStatus(docId, ITEM_STATUS_PENDING)

// when doc is marked for listing or re-listing,
// its current inner pages should be also marked as stale (pending for revalidation) as well
this.getInnerPages(doc).forEach(page => {
this.itemStatuses[page.id] = { id: page.id, status: ITEM_STATUS_PENDING }
})

this.tasks.add({
id: `list:${docId}`,
execute: async () => {
Expand Down
7 changes: 7 additions & 0 deletions src/modules/simple-mover/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ export const ITEM_STATUS_ARCHIVING = 'archiving'
export const ITEM_STATUS_RETRYING = 'retrying'
export const ITEM_STATUS_CANCELLED = 'cancelled'

export const ITEM_STATUS_FETCHING_IMAGES = 'fetching images'
export const ITEM_STATUS_DOWNLOADING_IMAGES = 'downloading images'
export const ITEM_STATUS_REPLACING_IMAGES = 'replacing images'

export const ItemStatuses = [
ITEM_STATUS_PENDING,
ITEM_STATUS_LISTING,
Expand All @@ -45,4 +49,7 @@ export const ItemStatuses = [
ITEM_STATUS_CONFIRMING,
ITEM_STATUS_CANCELLED,
ITEM_STATUS_RETRYING,
ITEM_STATUS_FETCHING_IMAGES,
ITEM_STATUS_DOWNLOADING_IMAGES,
ITEM_STATUS_REPLACING_IMAGES,
] as const
159 changes: 143 additions & 16 deletions src/modules/simple-mover/transfers/CodaExporter.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,24 @@
import { TaskEmitter, TaskPriority } from '@abxvn/tasks'
import { isAxiosError } from 'axios'
import type { ICodaApis, ICodaPage, IMover, IExporter, IStatus } from '../interfaces'
import { createWriteStream, ensureDir } from 'fs-extra'
import { createWriteStream, ensureDir, readFile, writeFile } from 'fs-extra'
import { getCurrentIsoDateTime, getParentDir, trimSlashes } from '../lib'
import { download } from '../apis'
import { ITEM_STATUS_DONE, ITEM_STATUS_DOWNLOADING, ITEM_STATUS_ERROR, ITEM_STATUS_EXPORTING, ITEM_STATUS_PENDING, SERVER_SAVE_ITEMS } from '../events'
import {
ITEM_STATUS_DONE,
ITEM_STATUS_DOWNLOADING,
ITEM_STATUS_ERROR,
ITEM_STATUS_EXPORTING,
ITEM_STATUS_PENDING,
SERVER_SAVE_ITEMS,
ITEM_STATUS_FETCHING_IMAGES,
ITEM_STATUS_REPLACING_IMAGES,
ITEM_STATUS_DOWNLOADING_IMAGES,
} from '../events'
import { dirname } from 'path'

const CODA_IMAGE_REPLACEMENT_START_REGEX = /^\n{2}/
const CODA_IMAGE_REPLACEMENT_BODY_REGEX = /\n{4}/g

export class CodaExporter implements IExporter {
private importChunkCounter = 0
Expand Down Expand Up @@ -48,45 +62,158 @@ export class CodaExporter implements IExporter {
this.tasks.next()
}

async exportPage (page: ICodaPage, exportId?: string) {
async exportPage (page: ICodaPage, exportId?: string, imageExportId?: string) {
const docId = trimSlashes(page.treePath).split('/').shift()
if (!docId) throw Error('invalid page tree path')

const parentDir = getParentDir(page, this.items)
const pageFilePath = `${parentDir}/${page.name.replace(/\//g, ' ')}.html`

if (!exportId) {
this.setStatus(page.id, ITEM_STATUS_EXPORTING)
const exportRes = await this.apis.exportPage(docId, page.id)
const pageFilePath = `${parentDir}/${page.name.replace(/\//g, ' ')}.md`

if (!exportId) exportId = await this.exportPageAsMarkdown(docId, page)
if (!exportId) throw Error('markdown export isn\'t requested')
if (!imageExportId) {
const isMarkdownDownloaded = await this.downloadMarkdownExport(docId, page, pageFilePath, exportId)
if (!isMarkdownDownloaded) {
return
}
}

exportId = exportRes.id
const markdownContent = await readFile(pageFilePath, 'utf8')
const shouldAddImages = CODA_IMAGE_REPLACEMENT_START_REGEX.test(markdownContent) ||
CODA_IMAGE_REPLACEMENT_BODY_REGEX.test(markdownContent)

if (shouldAddImages) {
if (!imageExportId) imageExportId = await this.exportPageAsHtml(docId, page)
if (!imageExportId) throw Error('html images export isn\'t requested')

const isImageReplaced = await this.downloadImageExportAndReplaceInMarkdown(
docId,
page,
pageFilePath,
markdownContent,
exportId,
imageExportId,
)
if (!isImageReplaced) {
return
}
}

if (!exportId) throw Error('export isn\'t requested')
this.setStatus(page.id, ITEM_STATUS_DONE)
}

private async exportPageAsMarkdown (docId: string, page: ICodaPage) {
this.setStatus(page.id, ITEM_STATUS_EXPORTING)
const exportRes = await this.apis.exportPage(docId, page.id, 'markdown')

const pageExport = await this.apis.getPageExport(docId, page.id, exportId)
return exportRes.id
}

if (!pageExport.downloadLink) { // retry later at low priority
private async downloadMarkdownExport (
docId: string,
page: ICodaPage,
pageFilePath: string,
markdownExportId: string,
) {
const pageExport = await this.apis.getPageExport(docId, page.id, markdownExportId)

if (!pageExport.downloadLink) {
// retry later at low priority with current markdown export id
this.tasks.add({
id: page.id,
execute: async () => await this.exportPage(page, exportId),
execute: async () => await this.exportPage(page, markdownExportId),
priority: TaskPriority.LOW,
})

return
return false
}

this.setStatus(page.id, ITEM_STATUS_DOWNLOADING)
this.items[page.id].syncedAt = getCurrentIsoDateTime()

await ensureDir(parentDir)
await ensureDir(dirname(pageFilePath))
await download(pageExport.downloadLink, createWriteStream(pageFilePath, {
flags: 'w',
encoding: 'utf8',
}))

this.items[page.id].filePath = pageFilePath
this.setStatus(page.id, ITEM_STATUS_DONE)

return true
}

private async exportPageAsHtml (docId: string, page: ICodaPage) {
this.setStatus(page.id, ITEM_STATUS_FETCHING_IMAGES)
const exportRes = await this.apis.exportPage(docId, page.id, 'html')

return exportRes.id
}

private async downloadImageExportAndReplaceInMarkdown (
docId: string,
page: ICodaPage,
pageFilePath: string,
markdownContent: string,
markdownExportId: string,
htmlExportId: string,
) {
this.setStatus(page.id, ITEM_STATUS_DOWNLOADING_IMAGES)
const htmlExport = await this.apis.getPageExport(docId, page.id, htmlExportId)
const htmlFilePath = pageFilePath.replace(/\.md$/, '.html')

if (!htmlExport.downloadLink) {
// retry later at low priority with current both markdown and html export ids
this.tasks.add({
id: page.id,
execute: async () => await this.exportPage(page, markdownExportId, htmlExportId),
priority: TaskPriority.LOW,
})

return false
}

await download(htmlExport.downloadLink, createWriteStream(htmlFilePath, {
flags: 'w',
encoding: 'utf8',
}))

this.setStatus(page.id, ITEM_STATUS_REPLACING_IMAGES)
const htmlContent = await readFile(htmlFilePath, 'utf8')
const replacedBlocks: string[] = []
// img and hr tags are rendered as 3 empty lines or 2 empty lines at start
const replacedHtmlTags = htmlContent.match(/<img[^>]+src="[^">]+"|<hr/g)

replacedHtmlTags?.forEach(tag => {
if (!tag.startsWith('<img')) { // not image tag, ignored
return replacedBlocks.push('\n')
}

const src = tag.match(/src="([^"]*)"/)?.[1]
const alt = tag.match(/alt="([^"]*)"/)?.[1]

replacedBlocks.push(`![${alt}](${src})`)
})

let replacementCount = 0
let markdownContentWithImages = markdownContent.replace(CODA_IMAGE_REPLACEMENT_START_REGEX, emptyLines => {
return replacedBlocks[replacementCount]
? `${replacedBlocks[replacementCount++]}\n\n`
: emptyLines // restore empty lines if replacement not found from html export
})

markdownContentWithImages = markdownContentWithImages.replace(CODA_IMAGE_REPLACEMENT_BODY_REGEX, emptyLines => {
return replacedBlocks[replacementCount]
? `\n\n${replacedBlocks[replacementCount++]}\n\n`
: emptyLines // restore empty lines if replacement not found from html export
})

if (replacementCount < replacedBlocks.length) {
markdownContentWithImages += replacedBlocks.slice(replacementCount).join('\n\n')
}

await writeFile(pageFilePath, markdownContentWithImages, 'utf8')

return true
}

stopPendingExports () {
Expand Down

0 comments on commit 95458c9

Please sign in to comment.