Skip to content

Commit

Permalink
second work session, dirty code not working, but core mechanic and id…
Browse files Browse the repository at this point in the history
…ea there...
  • Loading branch information
trueberryless committed Jan 6, 2025
1 parent b05a80c commit a8d993c
Show file tree
Hide file tree
Showing 6 changed files with 273 additions and 121 deletions.
2 changes: 1 addition & 1 deletion docs/src/content/docs/getting-started.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Check your documentation for spelling mistakes; multilingual support.

## Prerequisites

You will need to have a Starlight website set up.
You will need to have a Starlight websit set up.
If you don't have one yet, you can follow the ["Getting Started"](https://starlight.astro.build/getting-started) guide in the Starlight docs to create one.

## Installation
Expand Down
23 changes: 2 additions & 21 deletions packages/starlight-spell-checker/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import {
type StarlightSpellCheckerConfig,
type StarlightSpellCheckerUserConfig,
} from "./libs/config";
// import { clearContentLayerCache } from "./libs/astro";
import { logErrors, validateTexts } from "./libs/validation";

export { type StarlightSpellCheckerConfig };
Expand All @@ -22,26 +21,8 @@ export default function starlightSpellChecker(
addIntegration({
name: "starlight-spell-checker-integration",
hooks: {
// "astro:config:setup": async ({ command, updateConfig }) => {
// if (command !== "build") {
// return;
// }

// await clearContentLayerCache(astroConfig, logger);

// updateConfig({
// markdown: {
// remarkPlugins: [
// [
// remarkStarlightLinksValidator,
// { base: astroConfig.base, srcDir: astroConfig.srcDir },
// ],
// ],
// },
// });
// },
"astro:build:done": ({ dir, pages }) => {
const misspellings = validateTexts(
"astro:build:done": async ({ dir, pages }) => {
const misspellings = await validateTexts(
pages,
dir,
astroConfig,
Expand Down
12 changes: 9 additions & 3 deletions packages/starlight-spell-checker/libs/i18n.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { ensureLeadingSlash, ensureTrailingSlash } from "./path";
import type { StarlightUserConfig } from "./validation";

import dictionaryDe from "dictionary-de";
import dictionaryEn from "dictionary-en";
import dictionaryEn, { type Dictionary } from "dictionary-en";
import dictionaryEs from "dictionary-es";
import dictionaryFr from "dictionary-fr";
import dictionaryIt from "dictionary-it";
Expand Down Expand Up @@ -56,10 +56,16 @@ const dictionaryMapper: Record<string, any> = {
"zh-tw": undefined,
};

export function getLocaleDictionary(path: string, localeConfig: LocaleConfig) {
export function getLocaleDictionary(
path: string,
localeConfig: LocaleConfig
): Dictionary | undefined {
const pathLocale = getLocale(path, localeConfig);

return dictionaryMapper[pathLocale];
if (pathLocale) {
return dictionaryMapper[pathLocale];
}
return;
}

function getLocale(path: string, localeConfig: LocaleConfig) {
Expand Down
184 changes: 111 additions & 73 deletions packages/starlight-spell-checker/libs/validation.ts
Original file line number Diff line number Diff line change
@@ -1,65 +1,120 @@
import { fileURLToPath } from "node:url";
import { statSync } from "node:fs";
import { posix } from "node:path";
import { fileURLToPath } from "node:url";

import type { StarlightUserConfig as StarlightUserConfigWithPlugins } from "@astrojs/starlight/types";
import type { AstroConfig, AstroIntegrationLogger } from "astro";
import { bgGreen, black, blue, dim, green, red } from "kleur/colors";
import picomatch from "picomatch";

import type { StarlightSpellCheckerConfig } from "../libs/config";

import { getLocaleConfig, type LocaleConfig } from "./i18n";
import { unified } from "unified";
import remarkParse from "remark-parse";
import remarkRehype from "remark-rehype";
import rehypeStringify from "rehype-stringify";
import { retext } from "retext";
import retextSpell from "retext-spell";
import dictionaryEn from "dictionary-en";
import retextReadability from "retext-readability";
import retextIndefiniteArticle from "retext-indefinite-article";
import { visit } from "unist-util-visit";
import { promises as fs } from "fs";
import path from "path";
import type { Root } from "mdast";
import { getLocaleConfig, getLocaleDictionary } from "./i18n";
import { ensureTrailingSlash, stripLeadingSlash } from "./path";

export const ValidationErrorType = {
MisspelledWord: "misspelled word",
} as const;

export function validateTexts(
export async function validateTexts(
pages: PageData[],
outputDir: URL,
astroConfig: AstroConfig,
starlightConfig: StarlightUserConfig,
options: StarlightSpellCheckerConfig
): ValidationErrors {
) {
process.stdout.write(`\n${bgGreen(black(` validating spelling `))}\n`);

const processor = unified()
.use(remarkParse) // Parse Markdown to MDAST
.use(remarkRehype) // Convert MDAST to HAST for easier text processing
.use(rehypeStringify); // Optionally stringify back to HTML (for debugging)

const localeConfig = getLocaleConfig(starlightConfig);

const allPages: Pages = new Set(
pages
.map((page) =>
ensureTrailingSlash(
astroConfig.base === "/"
? stripLeadingSlash(page.pathname)
: posix.join(stripLeadingSlash(astroConfig.base), page.pathname)
)
pages.map((page) =>
ensureTrailingSlash(
astroConfig.base === "/"
? stripLeadingSlash(page.pathname)
: posix.join(stripLeadingSlash(astroConfig.base), page.pathname)
)
.filter((page) => !isExcludedPage(page, options.exclude))
)
);

const errors: ValidationErrors = new Map();
const errors = new Map();

// Iterate through all pages
for (const page of allPages) {
const validationContext: ValidationContext = {
astroConfig,
errors,
localeConfig,
options,
outputDir,
pages: allPages,
currentPage: page,
};

validateText(validationContext);
console.log(page);
if (!isValidAsset(page, astroConfig, outputDir)) {
continue;
}
let dictionary;

if (localeConfig) {
dictionary = getLocaleDictionary(page, localeConfig);
}
if (!dictionary) {
dictionary = dictionaryEn;
}

console.log(dictionary);

let retextProcessor = retext()
.use(retextSpell, {
dictionary,
})
.use(retextReadability, { age: 22 }) // Customize readability target age
.use(retextIndefiniteArticle);

const filePath = path.join(outputDir.pathname, page);
const content = await fs.readFile(filePath, "utf-8");

try {
// Parse the Markdown content
const parsed = processor.parse(content);

// Extract plain text from Markdown
const plainText = extractText(parsed);

// Analyze text with retext
const file = await retextProcessor.process(plainText);

// Collect messages (errors/warnings)
if (file.messages.length > 0) {
errors.set(filePath, file.messages);
}
} catch (err) {
console.error(`Error processing file ${filePath}:`, err);
}
}

return errors;
}

/**
* Extract plain text from MDAST nodes.
*/
function extractText(ast: Root) {
let text = "";
visit(ast, "text", (node) => {
text += node.value + " ";
});
return text.trim();
}

export function logErrors(
pluginLogger: AstroIntegrationLogger,
errors: ValidationErrors
errors: Map<string, any>
) {
const logger = pluginLogger.fork("");

Expand Down Expand Up @@ -97,63 +152,46 @@ export function logErrors(
}

/**
* Validate a page.
* Check if a link is a valid asset in the build output directory.
*/
function validateText(context: ValidationContext) {
const { astroConfig, errors, localeConfig, options, pages } = context;

return;
}

/**
* Check if a page is excluded from validation by the user.
*/
function isExcludedPage(page: string, exclude: string[]) {
return picomatch(exclude)(page);
}

function stripQueryString(path: string): string {
return path.split("?")[0] ?? path;
}
function isValidAsset(path: string, astroConfig: AstroConfig, outputDir: URL) {
if (astroConfig.base !== "/") {
const base = stripLeadingSlash(astroConfig.base);

if (path.startsWith(base)) {
path = path.replace(new RegExp(`^${stripLeadingSlash(base)}/?`), "");
} else {
return false;
}
}

function addError(errors: ValidationErrors, type: ValidationErrorType) {
const fileErrors = errors.get(filePath) ?? [];
fileErrors.push({ link, type });
try {
const filePath = fileURLToPath(new URL(path, outputDir));
const stats = statSync(filePath);
console.log(stats.isFile());

errors.set(filePath, fileErrors);
return stats.isFile();
} catch {
return false;
}
}

function pluralize(count: number, singular: string) {
return count === 1 ? singular : `${singular}s`;
}

// The validation errors keyed by file path.
type ValidationErrors = Map<string, ValidationError[]>;

export type ValidationErrorType =
(typeof ValidationErrorType)[keyof typeof ValidationErrorType];

interface ValidationError {
word: string;
type: ValidationErrorType;
suggestions: string[];
}
/**
* Check if a page is excluded from validation by the user.
*/
// function isExcludedPage(page: string, exclude: string[]) {
// return picomatch(exclude)(page);
// }

interface PageData {
pathname: string;
}

type Page = PageData["pathname"];
type Pages = Set<Page>;

interface ValidationContext {
astroConfig: AstroConfig;
errors: ValidationErrors;
localeConfig: LocaleConfig | undefined;
options: StarlightSpellCheckerConfig;
outputDir: URL;
pages: Pages;
}
type Pages = Set<PageData["pathname"]>;

export type StarlightUserConfig = Omit<
StarlightUserConfigWithPlugins,
Expand Down
13 changes: 10 additions & 3 deletions packages/starlight-spell-checker/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,16 @@
"dictionary-pt": "^4.0.0",
"dictionary-ru": "^3.0.0",
"kleur": "^4.1.5",
"node": "^23.5.0",
"picomatch": "^4.0.2",
"mdast": "^3.0.0",
"rehype-stringify": "^10.0.1",
"remark-parse": "^11.0.0",
"remark-rehype": "^11.1.1",
"remark-retext": "^6.0.0",
"retext-spell": "^6.1.0"
"retext": "^9.0.0",
"retext-indefinite-article": "^5.0.0",
"retext-readability": "^8.0.0",
"retext-spell": "^6.1.0",
"unified": "^11.0.5",
"unist-util-visit": "^5.0.0"
}
}
Loading

0 comments on commit a8d993c

Please sign in to comment.