Skip to content

Commit

Permalink
🧮 feat: Improve LaTeX rendering consistency (#3763)
Browse files Browse the repository at this point in the history
* refactor: simplify LaTeX pre-processing for more consistent rendering, disables `singleDollarTextMath`

* refactor: disable singleDollarTextMath in all markdown components

* wip: first pass

* refactor: preserve code blocks and convert rather than preserve LaTeX delimiters

* refactor: remove unused escapeDollarNumber function from latex.ts
  • Loading branch information
danny-avila authored Aug 23, 2024
1 parent 967e8a1 commit ea5140f
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 3 deletions.
4 changes: 2 additions & 2 deletions client/src/components/Chat/Messages/Content/Markdown.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { useRecoilValue } from 'recoil';
import ReactMarkdown from 'react-markdown';
import type { PluggableList } from 'unified';
import rehypeHighlight from 'rehype-highlight';
import { cn, langSubset, validateIframe, processLaTeX, handleDoubleClick } from '~/utils';
import { langSubset, validateIframe, preprocessLaTeX, handleDoubleClick } from '~/utils';
import CodeBlock from '~/components/Messages/Content/CodeBlock';
import { useFileDownload } from '~/data-provider';
import useLocalize from '~/hooks/useLocalize';
Expand Down Expand Up @@ -123,7 +123,7 @@ const Markdown = memo(({ content = '', isEdited, showCursor, isLatestMessage }:
let currentContent = content;
if (!isInitializing) {
currentContent = currentContent.replace('z-index: 1;', '') || '';
currentContent = LaTeXParsing ? processLaTeX(currentContent) : currentContent;
currentContent = LaTeXParsing ? preprocessLaTeX(currentContent) : currentContent;
}

const rehypePlugins: PluggableList = [
Expand Down
92 changes: 91 additions & 1 deletion client/src/utils/latex.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { processLaTeX } from './latex';
/* eslint-disable no-useless-escape */
import { processLaTeX, preprocessLaTeX } from './latex';

describe('processLaTeX', () => {
test('returns the same string if no LaTeX patterns are found', () => {
Expand Down Expand Up @@ -103,3 +104,92 @@ describe('processLaTeX', () => {
});
});
});

describe('preprocessLaTeX', () => {
test('returns the same string if no LaTeX patterns are found', () => {
const content = 'This is a test string without LaTeX';
expect(preprocessLaTeX(content)).toBe(content);
});

test('escapes dollar signs followed by digits', () => {
const content = 'Price is $50 and $100';
const expected = 'Price is \\$50 and \\$100';
expect(preprocessLaTeX(content)).toBe(expected);
});

test('does not escape dollar signs not followed by digits', () => {
const content = 'This $variable is not escaped';
expect(preprocessLaTeX(content)).toBe(content);
});

test('preserves existing LaTeX expressions', () => {
const content = 'Inline $x^2 + y^2 = z^2$ and block $$E = mc^2$$';
expect(preprocessLaTeX(content)).toBe(content);
});

test('handles mixed LaTeX and currency', () => {
const content = 'LaTeX $x^2$ and price $50';
const expected = 'LaTeX $x^2$ and price \\$50';
expect(preprocessLaTeX(content)).toBe(expected);
});

test('converts LaTeX delimiters', () => {
const content = 'Brackets \\[x^2\\] and parentheses \\(y^2\\)';
const expected = 'Brackets $$x^2$$ and parentheses $y^2$';
expect(preprocessLaTeX(content)).toBe(expected);
});

test('escapes mhchem commands', () => {
const content = '$\\ce{H2O}$ and $\\pu{123 J}$';
const expected = '$\\\\ce{H2O}$ and $\\\\pu{123 J}$';
expect(preprocessLaTeX(content)).toBe(expected);
});

test('handles complex mixed content', () => {
const content = `
LaTeX inline $x^2$ and block $$y^2$$
Currency $100 and $200
Chemical $\\ce{H2O}$
Brackets \\[z^2\\]
`;
const expected = `
LaTeX inline $x^2$ and block $$y^2$$
Currency \\$100 and \\$200
Chemical $\\\\ce{H2O}$
Brackets $$z^2$$
`;
expect(preprocessLaTeX(content)).toBe(expected);
});

test('handles empty string', () => {
expect(preprocessLaTeX('')).toBe('');
});

test('preserves code blocks', () => {
const content = '```\n$100\n```\nOutside $200';
const expected = '```\n$100\n```\nOutside \\$200';
expect(preprocessLaTeX(content)).toBe(expected);
});

test('handles multiple currency values in a sentence', () => {
const content = 'I have $50 in my wallet and $100 in the bank.';
const expected = 'I have \\$50 in my wallet and \\$100 in the bank.';
expect(preprocessLaTeX(content)).toBe(expected);
});

test('preserves LaTeX expressions with numbers', () => {
const content = 'The equation is $f(x) = 2x + 3$ where x is a variable.';
expect(preprocessLaTeX(content)).toBe(content);
});

test('handles currency values with commas', () => {
const content = 'The price is $1,000,000 for this item.';
const expected = 'The price is \\$1,000,000 for this item.';
expect(preprocessLaTeX(content)).toBe(expected);
});

test('preserves LaTeX expressions with special characters', () => {
const content = 'The set is defined as $\\{x | x > 0\\}$.';
expect(preprocessLaTeX(content)).toBe(content);
});
});
63 changes: 63 additions & 0 deletions client/src/utils/latex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,66 @@ export const processLaTeX = (_content: string) => {
// Restore code blocks
return restoreCodeBlocks(processedContent, codeBlocks);
};

/**
* Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
*
* @param content The input string containing LaTeX expressions.
* @returns The processed string with replaced delimiters and escaped characters.
*/
export function preprocessLaTeX(content: string): string {
// Step 1: Protect code blocks
const codeBlocks: string[] = [];
content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (match, code) => {
codeBlocks.push(code);
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
});

// Step 2: Protect existing LaTeX expressions
const latexExpressions: string[] = [];
content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => {
latexExpressions.push(match);
return `<<LATEX_${latexExpressions.length - 1}>>`;
});

// Step 3: Escape dollar signs that are likely currency indicators
content = content.replace(/\$(?=\d)/g, '\\$');

// Step 4: Restore LaTeX expressions
content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => latexExpressions[parseInt(index)]);

// Step 5: Restore code blocks
content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => codeBlocks[parseInt(index)]);

// Step 6: Apply additional escaping functions
content = escapeBrackets(content);
content = escapeMhchem(content);

return content;
}

export function escapeBrackets(text: string): string {
const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
return text.replace(
pattern,
(
match: string,
codeBlock: string | undefined,
squareBracket: string | undefined,
roundBracket: string | undefined,
): string => {
if (codeBlock != null) {
return codeBlock;
} else if (squareBracket != null) {
return `$$${squareBracket}$$`;
} else if (roundBracket != null) {
return `$${roundBracket}$`;
}
return match;
},
);
}

export function escapeMhchem(text: string) {
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
}

0 comments on commit ea5140f

Please sign in to comment.