Skip to content

Commit

Permalink
feat: add @docen/image
Browse files Browse the repository at this point in the history
  • Loading branch information
DemoMacro committed Jun 29, 2024
1 parent ce5256a commit 195f629
Show file tree
Hide file tree
Showing 23 changed files with 254 additions and 77 deletions.
5 changes: 1 addition & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,7 @@
"type": "git",
"url": "git+https://github.com/docenjs/docen.git"
},
"keywords": [
"pandoc",
"mammoth"
],
"keywords": ["pandoc", "mammoth"],
"author": {
"name": "Demo Macro",
"email": "[email protected]",
Expand Down
11 changes: 2 additions & 9 deletions packages/csv/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
"description": "Programmed implementation of csv format, powered by Demo Macro.",
"main": "dist/index.mjs",
"types": "dist/index.d.ts",
"files": [
"dist"
],
"files": ["dist"],
"exports": {
".": {
"import": "./dist/index.mjs",
Expand All @@ -20,12 +18,7 @@
"type": "git",
"url": "git+https://github.com/docenjs/docen.git"
},
"keywords": [
"pandoc",
"papaparse",
"csv",
"json"
],
"keywords": ["pandoc", "papaparse", "csv", "json"],
"author": {
"name": "Demo Macro",
"email": "[email protected]",
Expand Down
12 changes: 6 additions & 6 deletions packages/csv/src/json.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export function convertCSVToJSON(source: string, options?: csvConvertOptions) {
// const regex = /("[^"]*"|[^,]+)/g;
const regex = new RegExp(
`(${quotechar}[^${quotechar}]*${quotechar}|[^${delimiter}]+)`,
"g",
"g"
);

return row.match(regex) ?? [];
Expand All @@ -40,7 +40,7 @@ export function convertCSVToJSON(source: string, options?: csvConvertOptions) {
} else if (options?.header) {
header = Array.from(
{ length: maxColumns },
(_, i) => `${options?.header}${i + 1}`,
(_, i) => `${options?.header}${i + 1}`
);
}

Expand All @@ -52,7 +52,7 @@ export function convertCSVToJSON(source: string, options?: csvConvertOptions) {

if (header.length < maxColumns) {
header = header.concat(
Array.from({ length: maxColumns - header.length }, (_, i) => `${i + 1}`),
Array.from({ length: maxColumns - header.length }, (_, i) => `${i + 1}`)
);
}

Expand Down Expand Up @@ -80,7 +80,7 @@ export function convertCSVToJSON(source: string, options?: csvConvertOptions) {
export function convertJSONToCSV(
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
source: Record<string, any>[],
options?: csvConvertOptions,
options?: csvConvertOptions
) {
const delimiter = options?.delimiter ?? ",";

Expand All @@ -103,7 +103,7 @@ export function convertJSONToCSV(
} else if (options?.header) {
header = Array.from(
{ length: maxColumns },
(_, i) => `${options?.header}${i + 1}`,
(_, i) => `${options?.header}${i + 1}`
);
}

Expand All @@ -115,7 +115,7 @@ export function convertJSONToCSV(

if (header.length < maxColumns) {
header = header.concat(
Array.from({ length: maxColumns - header.length }, (_, i) => `${i + 1}`),
Array.from({ length: maxColumns - header.length }, (_, i) => `${i + 1}`)
);
}

Expand Down
4 changes: 2 additions & 2 deletions packages/docen/bundle.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ execSync("pnpm prepack", { stdio: "inherit" });
execSync("pnpm ncc build dist/cli.cjs -o .cache/ncc -a", { stdio: "inherit" });

const version = readFileSync("package.json", "utf-8").match(
/"version": "(.*?)"/,
/"version": "(.*?)"/
)[1];

execSync(`mv .cache/ncc/index.cjs .cache/ncc/docen-${version}.js`, {
Expand All @@ -17,5 +17,5 @@ execSync(
`pnpm dlx pkg .cache/ncc/docen-${version}.js --out-path .cache/bundle -c pkg.config.json -C GZip`,
{
stdio: "inherit",
},
}
);
9 changes: 2 additions & 7 deletions packages/docen/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
"description": "Programmatically and command-line implementation of document formatting, powered by Demo Macro.",
"main": "dist/index.mjs",
"types": "dist/index.d.ts",
"files": [
"dist"
],
"files": ["dist"],
"bin": {
"docen": "dist/cli.mjs"
},
Expand All @@ -24,10 +22,7 @@
"type": "git",
"url": "git+https://github.com/docenjs/docen.git"
},
"keywords": [
"pandoc",
"mammoth"
],
"keywords": ["pandoc", "mammoth"],
"author": {
"name": "Demo Macro",
"email": "[email protected]",
Expand Down
2 changes: 1 addition & 1 deletion packages/docen/src/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export async function extractText(
source: DataType,
options?: {
sourceType?: string;
},
}
) {
let text: string;

Expand Down
2 changes: 1 addition & 1 deletion packages/docen/src/utils/file-type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export async function detectFileType(source: Uint8Array) {
const buffer = Buffer.from(source);
const isTxt = Buffer.compare(
buffer.subarray(0, 3),
Buffer.from([0xef, 0xbb, 0xbf]),
Buffer.from([0xef, 0xbb, 0xbf])
);
if (isTxt) {
fileType = {
Expand Down
9 changes: 2 additions & 7 deletions packages/docx/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
"description": "Programmed implementation of docx format, powered by Demo Macro.",
"main": "dist/index.mjs",
"types": "dist/index.d.ts",
"files": [
"dist"
],
"files": ["dist"],
"exports": {
".": {
"import": "./dist/index.mjs",
Expand All @@ -20,10 +18,7 @@
"type": "git",
"url": "git+https://github.com/docenjs/docen.git"
},
"keywords": [
"pandoc",
"mammoth"
],
"keywords": ["pandoc", "mammoth"],
"author": {
"name": "Demo Macro",
"email": "[email protected]",
Expand Down
4 changes: 2 additions & 2 deletions packages/docx/src/document.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ import { type DataType, toUint8Array } from "undio";

export async function getDocumentXML(source: DataType) {
return JSZIP.loadAsync(toUint8Array(source)).then((zip) =>
zip?.file("word/document.xml")?.async("text"),
zip?.file("word/document.xml")?.async("text")
);
}

export async function getStylesXML(source: DataType) {
return JSZIP.loadAsync(toUint8Array(source)).then((zip) =>
zip?.file("word/styles.xml")?.async("text"),
zip?.file("word/styles.xml")?.async("text")
);
}

Expand Down
30 changes: 30 additions & 0 deletions packages/image/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# @docen/image

![npm version](https://img.shields.io/npm/v/@docen/image)
![npm downloads](https://img.shields.io/npm/dw/@docen/image)
![npm license](https://img.shields.io/npm/l/@docen/image)

> Programmed implementation of image format, powered by Demo Macro.
## Getting started

```bash
# npm
$ npm install @docen/image

# yarn
$ yarn add @docen/image

# pnpm
$ pnpm add @docen/image
```

## Usage

```ts

```

## License

- [MIT](LICENSE) &copy; [Demo Macro](https://imst.xyz/)
12 changes: 12 additions & 0 deletions packages/image/build.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { defineBuildConfig } from "unbuild";

export default defineBuildConfig({
declaration: true,
entries: ["src/index"],
rollup: {
emitCJS: true,
esbuild: {
minify: true,
},
},
});
37 changes: 37 additions & 0 deletions packages/image/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"name": "@docen/image",
"version": "0.0.7-edge.3",
"description": "Programmed implementation of image format, powered by Demo Macro.",
"main": "dist/index.mjs",
"types": "dist/index.d.ts",
"files": ["dist"],
"exports": {
".": {
"import": "./dist/index.mjs",
"require": "./dist/index.cjs"
}
},
"scripts": {
"prepack": "unbuild"
},
"repository": {
"type": "git",
"url": "git+https://github.com/docenjs/docen.git"
},
"keywords": ["pandoc", "image"],
"author": {
"name": "Demo Macro",
"email": "[email protected]",
"url": "https://imst.xyz/"
},
"license": "MIT",
"bugs": {
"url": "https://github.com/docenjs/docen/issues"
},
"homepage": "https://github.com/docenjs/docen#readme",
"dependencies": {
"image-meta": "^0.2.0",
"tesseract.js": "5.1.0",
"undio": "0.2.0"
}
}
1 change: 1 addition & 0 deletions packages/image/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from "./ocr";
31 changes: 31 additions & 0 deletions packages/image/src/ocr.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { imageMeta } from "image-meta";
import { createWorker } from "tesseract.js";
import { type DataType, toArrayBuffer, toUint8Array } from "undio";

export async function convertImageToText(
source: DataType,
options?: {
// https://tesseract-ocr.github.io/tessdoc/Data-Files
lang?: string | string[];
}
) {
const { type } = imageMeta(toUint8Array(source));

// bmp、jpg、png、pbm、webp
const isImageType =
type && ["bmp", "jpg", "png", "pbm", "webp"].includes(type);

if (isImageType) {
const worker = await createWorker(options?.lang);

const {
data: { text },
} = await worker.recognize(toArrayBuffer(source));

await worker.terminate();

return text;
}

throw new Error("Unsupported image type");
}
11 changes: 2 additions & 9 deletions packages/jsonp/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
"description": "Programmed implementation of jsonp format, powered by Demo Macro.",
"main": "dist/index.mjs",
"types": "dist/index.d.ts",
"files": [
"dist"
],
"files": ["dist"],
"exports": {
".": {
"import": "./dist/index.mjs",
Expand All @@ -20,12 +18,7 @@
"type": "git",
"url": "git+https://github.com/docenjs/docen.git"
},
"keywords": [
"pandoc",
"jsonp",
"csv",
"json"
],
"keywords": ["pandoc", "jsonp", "csv", "json"],
"author": {
"name": "Demo Macro",
"email": "[email protected]",
Expand Down
4 changes: 2 additions & 2 deletions packages/jsonp/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
export function convertJSONPToJSON(
source: string,
options: { callbackName?: string; multiple?: boolean } = {},
options: { callbackName?: string; multiple?: boolean } = {}
): unknown {
const multiple: boolean = options.multiple ?? false;

const regGroup: RegExpMatchArray | null = source.match(
/(?<functionName>.+)\(.*\)/,
/(?<functionName>.+)\(.*\)/
);

const functionName: string | undefined = regGroup?.groups?.functionName;
Expand Down
10 changes: 2 additions & 8 deletions packages/pdf/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
"description": "Programmed implementation of pdf format, powered by Demo Macro.",
"main": "dist/index.mjs",
"types": "dist/index.d.ts",
"files": [
"dist"
],
"files": ["dist"],
"exports": {
".": {
"import": "./dist/index.mjs",
Expand All @@ -20,11 +18,7 @@
"type": "git",
"url": "git+https://github.com/docenjs/docen.git"
},
"keywords": [
"pandoc",
"unpdf",
"pdf"
],
"keywords": ["pandoc", "unpdf", "pdf"],
"author": {
"name": "Demo Macro",
"email": "[email protected]",
Expand Down
Loading

0 comments on commit 195f629

Please sign in to comment.