Skip to content

Commit

Permalink
Add syntax highlighting with Shikiji
Browse files Browse the repository at this point in the history
At long last, we have syntax highlighting with acceptable tradeoffs.

Shikiji is intended to be the next version of the longstanding Shiki.
It's fundamentally better in that it simply uses ESM and dynamic imports
to break out the wasm blob and the many language grammars into their own
JS assets, instead of an ad-hoc asset fetching meechanism. This means it
does the right thing when integrating with SvelteKit's vite build
tooling, for free.

I have had to make minor patches to Shikiji to get it to have the exact
APIs that I want. I could have used the stock ones but they would result
in a jankier (and slightly less performant) integration. I will propose
upstreaming these changes, but I doubt they will all be acceptable.

The exact approach to highlighting is discussed in the new LineGroup
component. The data returned by content-parser in the API also had to
change to more easily merge with Shikiji's highlights.
  • Loading branch information
isker committed Jan 14, 2024
1 parent 33b572d commit f9375e8
Show file tree
Hide file tree
Showing 15 changed files with 824 additions and 599 deletions.
5 changes: 5 additions & 0 deletions .changeset/unlucky-jokes-battle.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"neogrok": minor
---

Add syntax highlighting with shikiji
49 changes: 49 additions & 0 deletions .yarn/patches/shikiji-core-npm-0.9.18-826293a3df.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
diff --git a/dist/chunk-types.d.mts b/dist/chunk-types.d.mts
index 2992357a1d0e670afce9ec11133a58a4e17593da..788cab9adb4134796ab6cd91fa9fd25b3a971dc5 100644
--- a/dist/chunk-types.d.mts
+++ b/dist/chunk-types.d.mts
@@ -1124,10 +1124,8 @@ interface ThemedTokenExplanation {
interface ThemedToken extends TokenStyles, TokenBase {
}
interface TokenBase {
- /**
- * The content of the token
- */
- content: string;
+ start: number;
+ end: number;
/**
* Explanation of
*
diff --git a/dist/index.mjs b/dist/index.mjs
index 6f891f5e37470cb673f8bcb8914c44634ca47091..1b52e8c87fa19d739e66f824715a18f37ae05fc6 100644
--- a/dist/index.mjs
+++ b/dist/index.mjs
@@ -3709,7 +3709,7 @@ function dimColor(color) {
function codeToThemedTokens(internal, code, options = {}) {
const { lang = 'text', theme: themeName = internal.getLoadedThemes()[0], } = options;
if (isPlaintext(lang)) {
- const lines = code.split(/\r\n|\r|\n/);
+ const lines = code;
return [...lines.map(line => [{ content: line }])];
}
const { theme, colorMap } = internal.setTheme(themeName);
@@ -3723,7 +3723,7 @@ function tokenizeWithTheme(code, grammar, theme, colorMap, options) {
...theme.colorReplacements,
...options?.colorReplacements,
};
- const lines = code.split(/\r\n|\r|\n/);
+ const lines = code;
let ruleStack = INITIAL;
let actual = [];
const final = [];
@@ -3754,7 +3754,8 @@ function tokenizeWithTheme(code, grammar, theme, colorMap, options) {
const foregroundColor = applyColorReplacements(colorMap[foreground], colorReplacements);
const fontStyle = StackElementMetadata.getFontStyle(metadata);
const token = {
- content: line.substring(startIndex, nextStartIndex),
+ start: startIndex,
+ end: nextStartIndex,
color: foregroundColor,
fontStyle,
};
13 changes: 13 additions & 0 deletions .yarn/patches/shikiji-npm-0.9.18-945ea5efcb.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
diff --git a/dist/bundle-full.d.mts b/dist/bundle-full.d.mts
index b8e655052e4a1b6e10e617eb06008a0782ff3a70..db1121f519c64bcada055f10f07500c27afca436 100644
--- a/dist/bundle-full.d.mts
+++ b/dist/bundle-full.d.mts
@@ -12,7 +12,7 @@ type Highlighter = HighlighterGeneric<BundledLanguage, BundledTheme>;
declare const getHighlighter: shikiji_core.GetHighlighterFactory<BundledLanguage, BundledTheme>;
declare const codeToHtml: (code: string, options: shikiji_core.CodeToHastOptions<BundledLanguage, BundledTheme>) => Promise<string>;
declare const codeToHast: (code: string, options: shikiji_core.CodeToHastOptions<BundledLanguage, BundledTheme>) => Promise<Root>;
-declare const codeToThemedTokens: (code: string, options: shikiji_core.RequireKeys<shikiji_core.CodeToThemedTokensOptions<BundledLanguage, BundledTheme>, "lang" | "theme">) => Promise<shikiji_core.ThemedToken[][]>;
+declare const codeToThemedTokens: (code: string[], options: shikiji_core.RequireKeys<shikiji_core.CodeToThemedTokensOptions<BundledLanguage, BundledTheme>, "lang" | "theme">) => Promise<shikiji_core.ThemedToken[][]>;
declare const codeToTokensWithThemes: (code: string, options: shikiji_core.RequireKeys<shikiji_core.CodeToTokensWithThemesOptions<BundledLanguage, BundledTheme>, "lang" | "themes">) => Promise<shikiji_core.ThemedTokenWithVariants[][]>;
declare const getSingletonHighlighter: () => Promise<HighlighterGeneric<BundledLanguage, BundledTheme>>;

7 changes: 5 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"@sveltejs/kit": "2.0.6",
"@sveltejs/vite-plugin-svelte": "3.0.1",
"@types/lucene": "2.1.5",
"@types/node": "20.10.0",
"@typescript-eslint/eslint-plugin": "6.7.5",
"@typescript-eslint/parser": "6.7.5",
"eslint": "8.51.0",
Expand All @@ -56,7 +57,8 @@
"lucene": "2.1.1",
"lucide-svelte": "0.303.0",
"pretty-bytes": "6.1.1",
"prom-client": "15.0.0"
"prom-client": "15.0.0",
"shikiji": "patch:shikiji@npm%3A0.9.18#~/.yarn/patches/shikiji-npm-0.9.18-945ea5efcb.patch"
},
"engines": {
"node": ">=20"
Expand Down Expand Up @@ -105,6 +107,7 @@
"typed-array-length": "npm:@nolyfill/typed-array-length@latest",
"unbox-primitive": "npm:@nolyfill/unbox-primitive@latest",
"which-boxed-primitive": "npm:@nolyfill/which-boxed-primitive@latest",
"which-typed-array": "npm:@nolyfill/which-typed-array@latest"
"which-typed-array": "npm:@nolyfill/which-typed-array@latest",
"shikiji-core@npm:0.9.18": "patch:shikiji-core@npm%3A0.9.18#~/.yarn/patches/shikiji-core-npm-0.9.18-826293a3df.patch"
}
}
83 changes: 55 additions & 28 deletions src/lib/server/content-parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,92 +3,119 @@ import { parseChunkMatch, parseFileNameMatch } from "./content-parser";

describe("parseFileNameMatch", () => {
it("parses file name matches", () => {
expect(parseFileNameMatch(Buffer.from("foo"), [])).toEqual([
{ text: "foo" },
]);
expect(parseFileNameMatch(Buffer.from("foo"), [])).toEqual({
text: "foo",
matchRanges: [],
});
expect(
parseFileNameMatch(Buffer.from("foo"), [{ start: 0, end: 3 }]),
).toEqual([{ text: "foo", match: true }]);
).toEqual({ text: "foo", matchRanges: [{ start: 0, end: 3 }] });
expect(
parseFileNameMatch(Buffer.from("foo"), [{ start: 0, end: 2 }]),
).toEqual([{ text: "fo", match: true }, { text: "o" }]);
).toEqual({ text: "foo", matchRanges: [{ start: 0, end: 2 }] });
expect(
parseFileNameMatch(Buffer.from("foo"), [{ start: 1, end: 3 }]),
).toEqual([{ text: "f" }, { text: "oo", match: true }]);
).toEqual({ text: "foo", matchRanges: [{ start: 1, end: 3 }] });
expect(
parseFileNameMatch(Buffer.from("foo"), [{ start: 1, end: 2 }]),
).toEqual([{ text: "f" }, { text: "o", match: true }, { text: "o" }]);
).toEqual({ text: "foo", matchRanges: [{ start: 1, end: 2 }] });
expect(
parseFileNameMatch(Buffer.from("foo"), [
{ start: 1, end: 2 },
{ start: 2, end: 3 },
]),
).toEqual([
{ text: "f" },
{ text: "o", match: true },
{ text: "o", match: true },
]);
).toEqual({
text: "foo",
matchRanges: [
{ start: 1, end: 2 },
{ start: 2, end: 3 },
],
});
});
});

describe("parseChunkMatch", () => {
it("parses chunk matches", () => {
// Single line.
expect(parseChunkMatch(Buffer.from("foo"), [])).toEqual([
[{ text: "foo" }],
{ text: "foo", matchRanges: [] },
]);
expect(parseChunkMatch(Buffer.from("foo"), [{ start: 0, end: 3 }])).toEqual(
[[{ text: "foo", match: true }]],
[{ text: "foo", matchRanges: [{ start: 0, end: 3 }] }],
);
expect(parseChunkMatch(Buffer.from("foo"), [{ start: 0, end: 2 }])).toEqual(
[[{ text: "fo", match: true }, { text: "o" }]],
[{ text: "foo", matchRanges: [{ start: 0, end: 2 }] }],
);
expect(parseChunkMatch(Buffer.from("foo"), [{ start: 1, end: 3 }])).toEqual(
[[{ text: "f" }, { text: "oo", match: true }]],
[{ text: "foo", matchRanges: [{ start: 1, end: 3 }] }],
);
expect(parseChunkMatch(Buffer.from("foo"), [{ start: 1, end: 2 }])).toEqual(
[[{ text: "f" }, { text: "o", match: true }, { text: "o" }]],
[{ text: "foo", matchRanges: [{ start: 1, end: 2 }] }],
);
expect(
parseChunkMatch(Buffer.from("foo"), [
{ start: 1, end: 2 },
{ start: 2, end: 3 },
]),
).toEqual([
[{ text: "f" }, { text: "o", match: true }, { text: "o", match: true }],
{
text: "foo",
matchRanges: [
{ start: 1, end: 2 },
{ start: 2, end: 3 },
],
},
]);

// Multi-line.
expect(parseChunkMatch(Buffer.from("foo\n"), [])).toEqual([
[{ text: "foo" }],
[],
{ text: "foo", matchRanges: [] },
{ text: "", matchRanges: [] },
]);
expect(
parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 3 }]),
).toEqual([[{ text: "foo", match: true }], []]);
).toEqual([
{ text: "foo", matchRanges: [{ start: 0, end: 3 }] },
{ text: "", matchRanges: [] },
]);
expect(
parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 4 }]),
).toEqual([[{ text: "foo", match: true }], []]);
).toEqual([
{ text: "foo", matchRanges: [{ start: 0, end: 3 }] },
{ text: "", matchRanges: [] },
]);

expect(parseChunkMatch(Buffer.from("foo\nbar"), [])).toEqual([
[{ text: "foo" }],
[{ text: "bar" }],
{ text: "foo", matchRanges: [] },
{ text: "bar", matchRanges: [] },
]);
expect(
parseChunkMatch(Buffer.from("foo\nbar"), [{ start: 0, end: 3 }]),
).toEqual([[{ text: "foo", match: true }], [{ text: "bar" }]]);
).toEqual([
{ text: "foo", matchRanges: [{ start: 0, end: 3 }] },
{ text: "bar", matchRanges: [] },
]);
expect(
parseChunkMatch(Buffer.from("foo\nbar"), [{ start: 0, end: 4 }]),
).toEqual([[{ text: "foo", match: true }], [{ text: "bar" }]]);
).toEqual([
{ text: "foo", matchRanges: [{ start: 0, end: 3 }] },
{ text: "bar", matchRanges: [] },
]);

expect(
parseChunkMatch(Buffer.from("foo\nbar"), [
{ start: 0, end: 1 },
{ start: 2, end: 5 },
]),
).toEqual([
[{ text: "f", match: true }, { text: "o" }, { text: "o", match: true }],
[{ text: "b", match: true }, { text: "ar" }],
{
text: "foo",
matchRanges: [
{ start: 0, end: 1 },
{ start: 2, end: 3 },
],
},
{ text: "bar", matchRanges: [{ start: 0, end: 1 }] },
]);
});
});
Loading

0 comments on commit f9375e8

Please sign in to comment.