Skip to content

Commit

Permalink
feat: replace whitespaces in the *_sentences.tsv files
Browse files Browse the repository at this point in the history
  • Loading branch information
moz-dfeller committed Sep 13, 2024
1 parent 002a9f6 commit cebd5c6
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions bundler/src/core/sentences.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import fs from 'node:fs'
import { pipeline } from 'node:stream/promises'
import { Transform } from 'node:stream'
import path from 'node:path'

import { readerTaskEither as RTE, taskEither as TE } from 'fp-ts'
Expand Down Expand Up @@ -30,6 +31,19 @@ const logError = (err: unknown) => {
return Error(String(err))
}

const replaceWhitespaces = () =>
new Transform({
transform(chunk: { sentence: string }, encoding, callback) {
const updatedClipRow = {
...chunk,
sentence: chunk.sentence.replace(/\s/gi, ' '),
}

callback(null, updatedClipRow)
},
objectMode: true,
})

const fetchSentences =
(validated: boolean) => (releaseDirPath: string) => (locale: string) =>
TE.tryCatch(async () => {
Expand All @@ -56,6 +70,7 @@ const fetchSentences =

await pipeline(
stream,
replaceWhitespaces(),
stringify({ header: true, delimiter: '\t' }),
writeStream,
)
Expand Down

0 comments on commit cebd5c6

Please sign in to comment.