Skip to content

Commit

Permalink
fix: wrong image detection with hr tags in coda contents
Browse files Browse the repository at this point in the history
  • Loading branch information
hungluu committed Feb 27, 2024
1 parent 99dab49 commit 7277798
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 22 deletions.
6 changes: 6 additions & 0 deletions src/modules/simple-mover/Mover.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,12 @@ export class Mover implements IMover {

this.setStatus(docId, ITEM_STATUS_PENDING)

// when doc is marked for listing or re-listing,
// its current inner pages should be also marked as stale (pending for revalidation) as well
this.getInnerPages(doc).forEach(page => {
this.itemStatuses[page.id] = { id: page.id, status: ITEM_STATUS_PENDING }
})

this.tasks.add({
id: `list:${docId}`,
execute: async () => {
Expand Down
67 changes: 45 additions & 22 deletions src/modules/simple-mover/transfers/CodaExporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ import {
} from '../events'
import { dirname } from 'path'

const CODA_IMAGE_REPLACEMENT_REGEX = /^\n{2}|\n{3}/g
const CODA_IMAGE_REPLACEMENT_START_REGEX = /^\n{2}/
const CODA_IMAGE_REPLACEMENT_BODY_REGEX = /\n{4}/g

export class CodaExporter implements IExporter {
private importChunkCounter = 0
Expand Down Expand Up @@ -70,23 +71,32 @@ export class CodaExporter implements IExporter {

if (!exportId) exportId = await this.exportPageAsMarkdown(docId, page)
if (!exportId) throw Error('markdown export isn\'t requested')
if (!imageExportId) await this.downloadMarkdownExport(docId, page, pageFilePath, exportId)
if (!imageExportId) {
const isMarkdownDownloaded = await this.downloadMarkdownExport(docId, page, pageFilePath, exportId)
if (!isMarkdownDownloaded) {
return
}
}

const markdownContent = await readFile(pageFilePath, 'utf8')
const shouldAddImages = CODA_IMAGE_REPLACEMENT_REGEX.test(markdownContent)
const shouldAddImages = CODA_IMAGE_REPLACEMENT_START_REGEX.test(markdownContent) ||
CODA_IMAGE_REPLACEMENT_BODY_REGEX.test(markdownContent)

if (shouldAddImages) {
if (!imageExportId) imageExportId = await this.exportPageAsHtml(docId, page)
if (!imageExportId) throw Error('html images export isn\'t requested')

await this.downloadImageExportAndReplaceInMarkdown(
const isImageReplaced = await this.downloadImageExportAndReplaceInMarkdown(
docId,
page,
pageFilePath,
markdownContent,
exportId,
imageExportId,
)
if (!isImageReplaced) {
return
}
}

this.setStatus(page.id, ITEM_STATUS_DONE)
Expand Down Expand Up @@ -115,7 +125,7 @@ export class CodaExporter implements IExporter {
priority: TaskPriority.LOW,
})

return
return false
}

this.setStatus(page.id, ITEM_STATUS_DOWNLOADING)
Expand All @@ -128,6 +138,8 @@ export class CodaExporter implements IExporter {
}))

this.items[page.id].filePath = pageFilePath

return true
}

private async exportPageAsHtml (docId: string, page: ICodaPage) {
Expand Down Expand Up @@ -157,7 +169,7 @@ export class CodaExporter implements IExporter {
priority: TaskPriority.LOW,
})

return
return false
}

await download(htmlExport.downloadLink, createWriteStream(htmlFilePath, {
Expand All @@ -167,30 +179,41 @@ export class CodaExporter implements IExporter {

this.setStatus(page.id, ITEM_STATUS_REPLACING_IMAGES)
const htmlContent = await readFile(htmlFilePath, 'utf8')
const imageBlocks: string[] = []
const imgTags = htmlContent.match(/<img[^>]+src="([^">]+)"/g)
const replacedBlocks: string[] = []
// img and hr tags are rendered as 3 empty lines or 2 empty lines at start
const replacedHtmlTags = htmlContent.match(/<img[^>]+src="[^">]+"|<hr/g)

if (imgTags) {
imgTags.forEach(imgTag => {
const src = imgTag.match(/src="([^"]+)"/)?.[1]
const alt = imgTag.match(/alt="([^"]*)"/)?.[1]
replacedHtmlTags?.forEach(tag => {
if (!tag.startsWith('<img')) { // not image tag, ignored
return replacedBlocks.push('\n')
}

imageBlocks.push(`![${alt}](${src})`)
})
}
const src = tag.match(/src="([^"]*)"/)?.[1]
const alt = tag.match(/alt="([^"]*)"/)?.[1]

replacedBlocks.push(`![${alt}](${src})`)
})

let replacedImageCount = 0
let markdownContentWithImages = markdownContent.replace(CODA_IMAGE_REPLACEMENT_REGEX, emptyLines => {
return imageBlocks[replacedImageCount]
? `\n\n${imageBlocks[replacedImageCount++]}\n\n`
: emptyLines // restored empty lines if no images found from html export
let replacementCount = 0
let markdownContentWithImages = markdownContent.replace(CODA_IMAGE_REPLACEMENT_START_REGEX, emptyLines => {
return replacedBlocks[replacementCount]
? `${replacedBlocks[replacementCount++]}\n\n`
: emptyLines // restore empty lines if replacement not found from html export
})

if (replacedImageCount < imageBlocks.length) {
markdownContentWithImages += imageBlocks.slice(replacedImageCount).join('\n')
markdownContentWithImages = markdownContentWithImages.replace(CODA_IMAGE_REPLACEMENT_BODY_REGEX, emptyLines => {
return replacedBlocks[replacementCount]
? `\n\n${replacedBlocks[replacementCount++]}\n\n`
: emptyLines // restore empty lines if replacement not found from html export
})

if (replacementCount < replacedBlocks.length) {
markdownContentWithImages += replacedBlocks.slice(replacementCount).join('\n\n')
}

await writeFile(pageFilePath, markdownContentWithImages, 'utf8')

return true
}

stopPendingExports () {
Expand Down

0 comments on commit 7277798

Please sign in to comment.