Skip to content

Commit

Permalink
third work session, make remark plugin which extracts text by visitin…
Browse files Browse the repository at this point in the history
…g mdast and hast
  • Loading branch information
trueberryless committed Jan 6, 2025
1 parent a8d993c commit 9fccff9
Show file tree
Hide file tree
Showing 8 changed files with 314 additions and 74 deletions.
10 changes: 10 additions & 0 deletions docs/astro.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ export default defineConfig({
baseUrl:
"https://github.com/trueberryless-org/starlight-spell-checker/edit/main/docs/",
},
locales: {
root: {
lang: "en",
label: "English",
},
de: {
lang: "de",
label: "Deutsch",
},
},
plugins: [starlightSpellChecker()],
sidebar: [
{
Expand Down
31 changes: 31 additions & 0 deletions docs/src/content/docs/de/test.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---
title: starlight-spell-checker
description: Check your documentation for spelling mistakes; multilingual support.
head:
- tag: title
content: starlight-spell-checker
template: splash
editUrl: false
hero:
tagline: Check your documentation for spelling mistakes; multilingual support.
image:
file: ../../../assets/houston.webp
actions:
- text: Get Started
link: /getting-started/
icon: right-arrow
draft: true
---

import { Card, CardGrid } from '@astrojs/starlight/components'

## Next steps

<CardGrid stagger>
<Card title="Install the plugin" icon="puzzle">
In der [Startanleitung](/getting-started/) findest du Anweisungen zur Installation.
</Card>
<Card title="Configure the plugin" icon="setting">
Bearbeite deine Konfiguration in der Datei `astro.config.mjs`.
</Card>
</CardGrid>
20 changes: 20 additions & 0 deletions packages/starlight-spell-checker/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import {
type StarlightSpellCheckerUserConfig,
} from "./libs/config";
import { logErrors, validateTexts } from "./libs/validation";
import { clearContentLayerCache } from "./libs/astro";
import { remarkStarlightSpellChecker } from "./libs/remark";

export { type StarlightSpellCheckerConfig };

Expand All @@ -21,6 +23,24 @@ export default function starlightSpellChecker(
addIntegration({
name: "starlight-spell-checker-integration",
hooks: {
"astro:config:setup": async ({ command, updateConfig }) => {
if (command !== "build") {
return;
}

await clearContentLayerCache(astroConfig, logger);

updateConfig({
markdown: {
remarkPlugins: [
[
remarkStarlightSpellChecker,
{ base: astroConfig.base, srcDir: astroConfig.srcDir },
],
],
},
});
},
"astro:build:done": async ({ dir, pages }) => {
const misspellings = await validateTexts(
pages,
Expand Down
26 changes: 18 additions & 8 deletions packages/starlight-spell-checker/libs/i18n.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ import dictionaryPl from "dictionary-pl";
import dictionaryPt from "dictionary-pt";
import dictionaryRu from "dictionary-ru";

export function getLocaleConfig(
config: StarlightUserConfig
): LocaleConfig | undefined {
if (!config.locales || Object.keys(config.locales).length === 0) return;
export function getLocaleConfig(config: StarlightUserConfig): LocaleConfig {
if (!config.locales || Object.keys(config.locales).length === 0)
return {
defaultLocale: "",
locales: [],
};

let defaultLocale = config.defaultLocale;
const locales: string[] = [];
Expand All @@ -31,7 +33,11 @@ export function getLocaleConfig(
locales.push(dir);
}

if (defaultLocale === undefined) return;
if (defaultLocale === undefined)
return {
defaultLocale: "",
locales: [],
};

return {
defaultLocale,
Expand All @@ -58,14 +64,18 @@ const dictionaryMapper: Record<string, any> = {

export function getLocaleDictionary(
path: string,
localeConfig: LocaleConfig
): Dictionary | undefined {
starlightConfig: StarlightUserConfig
): Dictionary {
const localeConfig = getLocaleConfig(starlightConfig);

const pathLocale = getLocale(path, localeConfig);

if (pathLocale) {
return dictionaryMapper[pathLocale];
}
return;
return dictionaryMapper[
localeConfig.defaultLocale !== "" ? localeConfig.defaultLocale : "en"
];
}

function getLocale(path: string, localeConfig: LocaleConfig) {
Expand Down
122 changes: 122 additions & 0 deletions packages/starlight-spell-checker/libs/remark.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import "mdast-util-mdx-jsx";

import nodePath from "node:path";
import { fileURLToPath } from "node:url";

import { hasProperty } from "hast-util-has-property";
import type { Nodes } from "hast";
import { fromHtml } from "hast-util-from-html";
import { slug } from "github-slugger";
import type { Root } from "mdast";
import { unified, type Plugin } from "unified";
import { visit } from "unist-util-visit";

import { ensureTrailingSlash, stripLeadingSlash } from "./path";

// All the text content keyed by file path.
const contents: Contents = new Map();

export const remarkStarlightSpellChecker: Plugin<
[{ base: string; srcDir: URL }],
Root
> = function ({ base, srcDir }) {
return async (tree, file) => {
if (file.data.astro?.frontmatter?.["draft"]) return;

const filePath = normalizeFilePath(base, srcDir, file.history[0]);
const slug: string | undefined =
typeof file.data.astro?.frontmatter?.["slug"] === "string"
? file.data.astro.frontmatter["slug"]
: undefined;

let fileContent: string = "";

// https://github.com/syntax-tree/mdast#nodes
// https://github.com/syntax-tree/mdast-util-mdx-jsx#nodes
visit(
tree,
["text", "inlineCode", "paragraph", "heading", "html"],
(node) => {
switch (node.type) {
case "text":
fileContent += node.value;
break;
case "inlineCode":
fileContent += "`" + node.value + "`";
break;
case "paragraph":
fileContent += "\n";
break;
case "heading":
fileContent += "\n";
break;
// case "html": {
// const htmlTree = fromHtml(node.value, { fragment: true });

// visit(htmlTree, ["text"], (htmlNode: Nodes) => {
// fileContent += htmlNode.value;
// });

// break;
// }
}
}
);

contents.set(getFilePath(base, filePath, slug), fileContent);
};
};

export function getValidationData() {
return { contents };
}

function getFilePath(base: string, filePath: string, slug: string | undefined) {
if (slug) {
return nodePath.posix.join(
stripLeadingSlash(base),
stripLeadingSlash(ensureTrailingSlash(slug))
);
}

return filePath;
}

function normalizeFilePath(base: string, srcDir: URL, filePath?: string) {
if (!filePath) {
throw new Error("Missing file path to validate links.");
}

const path = nodePath
.relative(nodePath.join(fileURLToPath(srcDir), "content/docs"), filePath)
.replace(/\.\w+$/, "")
.replace(/(^|[/\\])index$/, "")
.replace(/[/\\]?$/, "/")
.split(/[/\\]/)
.map((segment) => slug(segment))
.join("/");

if (base !== "/") {
return nodePath.posix.join(stripLeadingSlash(base), path);
}

return path;
}

// function isMdxIdAttribute(
// attribute: MdxJsxAttribute | MdxJsxExpressionAttribute
// ): attribute is MdxIdAttribute {
// return (
// attribute.type === "mdxJsxAttribute" &&
// attribute.name === "id" &&
// typeof attribute.value === "string"
// );
// }

export type Contents = Map<string, string>;

// interface MdxIdAttribute {
// name: "id";
// type: "mdxJsxAttribute";
// value: string;
// }
83 changes: 18 additions & 65 deletions packages/starlight-spell-checker/libs/validation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import path from "path";
import type { Root } from "mdast";
import { getLocaleConfig, getLocaleDictionary } from "./i18n";
import { ensureTrailingSlash, stripLeadingSlash } from "./path";
import { reporter } from "vfile-reporter";
import { getValidationData } from "./remark";

export async function validateTexts(
pages: PageData[],
Expand All @@ -33,65 +35,25 @@ export async function validateTexts(
) {
process.stdout.write(`\n${bgGreen(black(` validating spelling `))}\n`);

const processor = unified()
.use(remarkParse) // Parse Markdown to MDAST
.use(remarkRehype) // Convert MDAST to HAST for easier text processing
.use(rehypeStringify); // Optionally stringify back to HTML (for debugging)

const localeConfig = getLocaleConfig(starlightConfig);

const allPages: Pages = new Set(
pages.map((page) =>
ensureTrailingSlash(
astroConfig.base === "/"
? stripLeadingSlash(page.pathname)
: posix.join(stripLeadingSlash(astroConfig.base), page.pathname)
)
)
);
const { contents } = getValidationData();

const errors = new Map();

// Iterate through all pages
for (const page of allPages) {
console.log(page);
if (!isValidAsset(page, astroConfig, outputDir)) {
continue;
}
let dictionary;

if (localeConfig) {
dictionary = getLocaleDictionary(page, localeConfig);
}
if (!dictionary) {
dictionary = dictionaryEn;
}

console.log(dictionary);
for (const [filePath, content] of contents) {
let dictionary = getLocaleDictionary(filePath, starlightConfig);

let retextProcessor = retext()
.use(retextSpell, {
dictionary,
})
.use(retextReadability, { age: 22 }) // Customize readability target age
.use(retextReadability, { age: 22 })
.use(retextIndefiniteArticle);

const filePath = path.join(outputDir.pathname, page);
const content = await fs.readFile(filePath, "utf-8");

try {
// Parse the Markdown content
const parsed = processor.parse(content);

// Extract plain text from Markdown
const plainText = extractText(parsed);

// Analyze text with retext
const file = await retextProcessor.process(plainText);
const file = await retextProcessor.process(content);

// Collect messages (errors/warnings)
if (file.messages.length > 0) {
errors.set(filePath, file.messages);
errors.set(filePath, reporter(file));
}
} catch (err) {
console.error(`Error processing file ${filePath}:`, err);
Expand All @@ -101,17 +63,6 @@ export async function validateTexts(
return errors;
}

/**
* Extract plain text from MDAST nodes.
*/
function extractText(ast: Root) {
let text = "";
visit(ast, "text", (node) => {
text += node.value + " ";
});
return text.trim();
}

export function logErrors(
pluginLogger: AstroIntegrationLogger,
errors: Map<string, any>
Expand Down Expand Up @@ -139,13 +90,15 @@ export function logErrors(
for (const [file, validationErrors] of errors) {
logger.info(`${red("▶")} ${blue(file)}`);

for (const [index, validationError] of validationErrors.entries()) {
logger.info(
` ${blue(`${index < validationErrors.length - 1 ? "├" : "└"}─`)} ${
validationError.link
}${dim(` - ${validationError.type}`)}`
);
}
logger.info(validationErrors);

// for (const [index, validationError] of validationErrors.entries()) {
// logger.info(
// ` ${blue(`${index < validationErrors.length - 1 ? "├" : "└"}─`)} ${
// validationError.link
// }${dim(` - ${validationError.type}`)}`
// );
// }
}

process.stdout.write("\n");
Expand All @@ -168,7 +121,7 @@ function isValidAsset(path: string, astroConfig: AstroConfig, outputDir: URL) {
try {
const filePath = fileURLToPath(new URL(path, outputDir));
const stats = statSync(filePath);
console.log(stats.isFile());
console.log(filePath);

return stats.isFile();
} catch {
Expand Down
Loading

0 comments on commit 9fccff9

Please sign in to comment.