Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
irskep committed Sep 11, 2024
1 parent 1077464 commit 6bec192
Show file tree
Hide file tree
Showing 12 changed files with 98 additions and 52 deletions.
Binary file modified bun.lockb
Binary file not shown.
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
},
"dependencies": {
"@djot/djot": "^0.3.1",
"@types/mdast": "^4.0.4",
"argparse": "^2.0.1",
"fast-glob": "^3.3.2",
"gluegun": "^5.2.0",
Expand All @@ -49,7 +50,8 @@
"remark-parse": "^11.0.0",
"remark-rehype": "^11.1.0",
"remark-stringify": "^11.0.0",
"shiki": "^1.14.1"
"shiki": "^1.14.1",
"unified": "^11.0.5"
},
"devDependencies": {
"@djockey/linkmapper-typedoc": "^0.0.7",
Expand Down
9 changes: 7 additions & 2 deletions src/engine/executeConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,13 @@ export async function readDocSet(
): Promise<DocSet> {
const logCollector = logCollectorParent.getChild("Parsing documents");

const parsePromises = config.fileList.map((path_) =>
parseFile(config.input_dir, path_, config, logCollector)
if (!config.fileList.length) {
throw new Error("No files");

Check failure on line 52 in src/engine/executeConfig.ts

View workflow job for this annotation

GitHub Actions / tests

error: No files

at /home/runner/work/djockey/djockey/src/engine/executeConfig.ts:52:11 at readDocSet (/home/runner/work/djockey/djockey/src/engine/executeConfig.ts:46:3) at /home/runner/work/djockey/djockey/src/engine/executeConfig.ts:33:24 at executeConfig (/home/runner/work/djockey/djockey/src/engine/executeConfig.ts:25:3) at /home/runner/work/djockey/djockey/src/endToEnd.test.ts:42:9 at /home/runner/work/djockey/djockey/src/endToEnd.test.ts:34:24
}

const parsePromises = config.fileList.map(
async (path_) =>
await parseFile(config.input_dir, path_, config, logCollector)
);

const docs = (await Promise.all(parsePromises)).filter((doc) => !!doc);
Expand Down
51 changes: 34 additions & 17 deletions src/input/fileExtensions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,52 @@ import {
export function getExtensionForInputFormat(fmt: DjockeyInputFormat): string[] {
switch (fmt) {
case "gfm":
return ["md", "markdown"];
return [".md", ".markdown"];
case "djot":
return ["djot", "dj"];
return [".djot", ".dj"];
case "myst":
return ["md"];
return [".myst.md", ".md"];
case "commonmark":
return [".common.md", ".md", ".markdown"];
}
}

export function getInputFormatForFileExtension(
ext: string,
export function getInputFormatForFileName(
filename: string,
config: DjockeyConfig,
frontMatter: Record<string, unknown>
): DjockeyInputFormat | null {
const bareExt = ext[0] === "." ? ext.slice(1) : ext;

const defaultMarkdownVariant: MarkdownVariant =
(frontMatter.md_variant as MarkdownVariant | undefined) ??
config.default_markdown_variant;

switch (bareExt) {
case "dj":
return "djot";
case "djot":
return "djot";
case "md":
return defaultMarkdownVariant;
case "markdown":
for (const fmt of ALL_INPUT_FORMATS) {
for (const ext of getExtensionForInputFormat(fmt)) {
// Double-extensions disambiguate between Markdown formats.
if (ext.split(".").length > 2 && filename.endsWith(ext)) {
return fmt;
}
}
}

// If we didn't find a totally unambiguous extension, try Markdown.
const mdExts = [".md", ".markdown"];
for (const ext of mdExts) {
if (filename.endsWith(ext)) {
return defaultMarkdownVariant;
default:
return null;
}
}

// Otherwise, try everything else.
for (const fmt of ALL_INPUT_FORMATS) {
for (const ext of getExtensionForInputFormat(fmt)) {
if (filename.endsWith(ext)) {
return fmt;
}
}
}

console.error("Can't figure out format for", filename);

return null;
}
31 changes: 25 additions & 6 deletions src/input/parseFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@ import fs from "fs";
import path from "path";
import { basename } from "path";

import yaml from "js-yaml";
import { fromPandoc, parse } from "@djot/djot";
import { mystParse } from "myst-parser";
import remarkParse from "remark-parse";
import remarkGfm from "remark-gfm";
import { unified } from "unified";
import yaml from "js-yaml";

import { DjockeyConfig, DjockeyDoc, PolyglotDoc } from "../types.js";
import { getPandocAST } from "../pandoc.js";
import { getInputFormatForFileExtension } from "./fileExtensions.js";
import { getInputFormatForFileName } from "./fileExtensions.js";
import { LogCollector } from "../utils/logUtils.js";
import { fsext, fsname, fssplit, refjoin } from "../utils/pathUtils.js";

import { mystParse } from "myst-parser";
import { fsbase, fsext, fsname, fssplit, refjoin } from "../utils/pathUtils.js";

function removeExtensionFromPath(path_: string): string {
return path_.slice(0, path_.length - path.parse(path_).ext.length);
Expand Down Expand Up @@ -45,8 +48,17 @@ export async function parseFile(

let polyglotDoc: PolyglotDoc | undefined;

switch (getInputFormatForFileExtension(fsext(fsPath), config, frontMatter)) {
logCollector.warning(
`${getInputFormatForFileName(
fsbase(fsPath),
config,
frontMatter
)} ${fsPath}`
);

switch (getInputFormatForFileName(fsbase(fsPath), config, frontMatter)) {
case "djot":
console.log("Parse", fsbase(fsPath), "as djot");
polyglotDoc = {
kind: "djot",
value: parse(text, {
Expand All @@ -56,10 +68,17 @@ export async function parseFile(
};
break;
case "gfm":
console.log("Parse", fsbase(fsPath), "as gfm");
const ast = getPandocAST(fsPath);
polyglotDoc = { kind: "djot", value: fromPandoc(ast as any) };
break;
case "commonmark":
console.log("Parse", fsbase(fsPath), "as gfm");
const file = await unified().use(remarkParse).use(remarkGfm).process();
console.log(file);
break;
case "myst":
console.log("Parse", fsbase(fsPath), "as myst");
polyglotDoc = { kind: "mdast", value: mystParse(text) };
console.log(yaml.dump(polyglotDoc.value));
break;
Expand Down
7 changes: 3 additions & 4 deletions src/plugins/autoTitlePlugin.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import { Heading } from "@djot/djot";
import { visit, EXIT } from "unist-util-visit";
import mdast from "mdast";

import { applyFilter } from "../engine/djotFiltersPlus.js";
import { DjockeyDoc, DjockeyPlugin } from "../types.js";
import { djotASTToText, mystASTToText } from "../utils/djotUtils.js";
import { djotASTToText } from "../utils/djotUtils.js";
import { LogCollector } from "../utils/logUtils.js";
import { mystParse } from "myst-parser";
import {
djotASTToMystAST_Inline,
mystASTToDjotAST_Inline,
} from "../utils/astUtils.js";
import { toString } from "mdast-util-to-string";

export class AutoTitlePlugin implements DjockeyPlugin {
name = "Auto Titler";
Expand Down Expand Up @@ -40,7 +39,7 @@ export class AutoTitlePlugin implements DjockeyPlugin {
break;
case "mdast":
visit(doc.docs.content.value, "heading", (node) => {
doc.title = mystASTToText(node as mdast.Heading);
doc.title = toString(node);
doc.titleASTDjot = mystASTToDjotAST_Inline(node);
doc.titleASTMyst = node;
return EXIT;
Expand Down
1 change: 1 addition & 0 deletions src/renderers/htmlRenderer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ export class HTMLRenderer implements DjockeyRenderer {
urlLists: filteredURLListsAsURLs,
});

console.log("Write to", outputFSPath);
await writeFile(outputFSPath, outputPage);
}
}
Expand Down
13 changes: 9 additions & 4 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export interface LinkMappingConfig {
url_root: string;
}

export type MarkdownVariant = "gfm" | "myst";
export type MarkdownVariant = "gfm" | "myst" | "commonmark";

export interface DjockeyConfig {
input_dir: string;
Expand Down Expand Up @@ -61,7 +61,7 @@ export interface DjockeyConfigResolved extends DjockeyConfig {
rootPath: string;
fileList: string[];
url_root: string;
default_markdown_variant: "gfm" | "myst";
default_markdown_variant: "gfm" | "myst" | "commonmark";
link_mappings: LinkMappingConfig[];
}

Expand Down Expand Up @@ -96,8 +96,13 @@ export interface DjockeyDoc {
}

// These correspond to pandoc formats. Keep these two lines in sync.
export type DjockeyInputFormat = "djot" | "gfm" | "myst";
export const ALL_INPUT_FORMATS: DjockeyInputFormat[] = ["djot", "gfm", "myst"];
export type DjockeyInputFormat = "djot" | "gfm" | "myst" | "commonmark";
export const ALL_INPUT_FORMATS: DjockeyInputFormat[] = [
"djot",
"gfm",
"myst",
"commonmark",
];

// Keep these two lines in sync.
export type DjockeyOutputFormat = "html" | "gfm";
Expand Down
14 changes: 7 additions & 7 deletions src/utils/astUtils.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import { Block, Heading, Inline } from "@djot/djot";
import { Parent, PhrasingContent } from "mdast";
import unist from "unist";
import { visit } from "unist-util-visit";
import { toString } from "mdast-util-to-string";

import { applyFilter } from "../engine/djotFiltersPlus.js";
import { DjockeyDoc, PolyglotDoc, PolyglotDoc_MDAST } from "../types.js";
import { djotASTToText, mystASTToText } from "./djotUtils.js";
import { djotASTToText } from "./djotUtils.js";

export function getDoesDocHaveContent(doc: PolyglotDoc): boolean {
switch (doc.kind) {
Expand Down Expand Up @@ -43,14 +45,12 @@ export function getFirstHeadingIsAlreadyDocumentTitle(
return didFindNode;
}

export function mystASTToDjotAST_Inline(mystRoot: Parent): Inline[] {
return [{ tag: "str", text: mystASTToText(mystRoot) }];
export function mystASTToDjotAST_Inline(root: unist.Parent): Inline[] {
return [{ tag: "str", text: toString(root) }];
}

export function mystASTToDjotAST_Block(mystRoot: Parent): Block[] {
return [
{ tag: "para", children: [{ tag: "str", text: mystASTToText(mystRoot) }] },
];
export function mystASTToDjotAST_Block(root: unist.Parent): Block[] {
return [{ tag: "para", children: [{ tag: "str", text: toString(root) }] }];
}

export function djotASTToMystAST_Inline(djotRoot: Inline[]): PhrasingContent[] {
Expand Down
10 changes: 1 addition & 9 deletions src/utils/djotUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ import {
isBlock,
} from "@djot/djot";
import mdast from "mdast";
import unist from "unist";
import { visit } from "unist-util-visit";

import { processAllNodes } from "../engine/djotFiltersPlus.js";
import { MystDoc } from "../types.js";

export function getHasClass(node: HasAttributes, cls: string): boolean {
if (!node.attributes || !node.attributes["class"]) return false;
Expand Down Expand Up @@ -58,14 +58,6 @@ export function djotASTToText(children: Block[]) {
return result.join("");
}

export function mystASTToText(root: mdast.Parent) {
const result = new Array<string>();
visit(root, "text", (node) => {
result.push((node as mdast.Text).value);
});
return result.join("");
}

export function djotASTToTextWithLineBreaks(children: Block[]) {
const result = new Array<string>();

Expand Down
4 changes: 4 additions & 0 deletions src/utils/pathUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ export function fsname(s: string): string {
return path.parse(s).name;
}

export function fsbase(s: string): string {
return path.parse(s).base;
}

export function fsext(s: string): string {
return path.parse(s).ext;
}
Expand Down
6 changes: 4 additions & 2 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1396,7 +1396,7 @@ __metadata:
languageName: node
linkType: hard

"@types/mdast@npm:^4.0.0":
"@types/mdast@npm:^4.0.0, @types/mdast@npm:^4.0.4":
version: 4.0.4
resolution: "@types/mdast@npm:4.0.4"
dependencies:
Expand Down Expand Up @@ -2825,6 +2825,7 @@ __metadata:
"@types/js-yaml": ^4.0.9
"@types/log-update": ^3.1.0
"@types/lunr": ^2.3.7
"@types/mdast": ^4.0.4
"@types/micromatch": ^4.0.9
"@types/node": ^22.4.0
"@types/nunjucks": ^3.2.6
Expand Down Expand Up @@ -2856,6 +2857,7 @@ __metadata:
ts-jest: ^29.2.5
typedoc: ^0.26.6
typescript: ^5.5.4
unified: ^11.0.5
bin:
djockey: ./dist/cli.js
languageName: unknown
Expand Down Expand Up @@ -7555,7 +7557,7 @@ __metadata:
languageName: node
linkType: hard

"unified@npm:^11.0.0":
"unified@npm:^11.0.0, unified@npm:^11.0.5":
version: 11.0.5
resolution: "unified@npm:11.0.5"
dependencies:
Expand Down

0 comments on commit 6bec192

Please sign in to comment.