diff --git a/client/src/components/Chat/Messages/Content/Markdown.tsx b/client/src/components/Chat/Messages/Content/Markdown.tsx index be66f61c8e3..5439bc102a0 100644 --- a/client/src/components/Chat/Messages/Content/Markdown.tsx +++ b/client/src/components/Chat/Messages/Content/Markdown.tsx @@ -8,7 +8,7 @@ import { useRecoilValue } from 'recoil'; import ReactMarkdown from 'react-markdown'; import type { PluggableList } from 'unified'; import rehypeHighlight from 'rehype-highlight'; -import { cn, langSubset, validateIframe, processLaTeX, handleDoubleClick } from '~/utils'; +import { langSubset, validateIframe, preprocessLaTeX, handleDoubleClick } from '~/utils'; import CodeBlock from '~/components/Messages/Content/CodeBlock'; import { useFileDownload } from '~/data-provider'; import useLocalize from '~/hooks/useLocalize'; @@ -123,7 +123,7 @@ const Markdown = memo(({ content = '', isEdited, showCursor, isLatestMessage }: let currentContent = content; if (!isInitializing) { currentContent = currentContent.replace('z-index: 1;', '') || ''; - currentContent = LaTeXParsing ? processLaTeX(currentContent) : currentContent; + currentContent = LaTeXParsing ? preprocessLaTeX(currentContent) : currentContent; } const rehypePlugins: PluggableList = [ diff --git a/client/src/utils/latex.spec.ts b/client/src/utils/latex.spec.ts index dad0ce9e11b..d0a8ede3d5d 100644 --- a/client/src/utils/latex.spec.ts +++ b/client/src/utils/latex.spec.ts @@ -1,4 +1,5 @@ -import { processLaTeX } from './latex'; +/* eslint-disable no-useless-escape */ +import { processLaTeX, preprocessLaTeX } from './latex'; describe('processLaTeX', () => { test('returns the same string if no LaTeX patterns are found', () => { @@ -103,3 +104,92 @@ describe('processLaTeX', () => { }); }); }); + +describe('preprocessLaTeX', () => { + test('returns the same string if no LaTeX patterns are found', () => { + const content = 'This is a test string without LaTeX'; + expect(preprocessLaTeX(content)).toBe(content); + }); + + test('escapes dollar signs followed by digits', () => { + const content = 'Price is $50 and $100'; + const expected = 'Price is \\$50 and \\$100'; + expect(preprocessLaTeX(content)).toBe(expected); + }); + + test('does not escape dollar signs not followed by digits', () => { + const content = 'This $variable is not escaped'; + expect(preprocessLaTeX(content)).toBe(content); + }); + + test('preserves existing LaTeX expressions', () => { + const content = 'Inline $x^2 + y^2 = z^2$ and block $$E = mc^2$$'; + expect(preprocessLaTeX(content)).toBe(content); + }); + + test('handles mixed LaTeX and currency', () => { + const content = 'LaTeX $x^2$ and price $50'; + const expected = 'LaTeX $x^2$ and price \\$50'; + expect(preprocessLaTeX(content)).toBe(expected); + }); + + test('converts LaTeX delimiters', () => { + const content = 'Brackets \\[x^2\\] and parentheses \\(y^2\\)'; + const expected = 'Brackets $$x^2$$ and parentheses $y^2$'; + expect(preprocessLaTeX(content)).toBe(expected); + }); + + test('escapes mhchem commands', () => { + const content = '$\\ce{H2O}$ and $\\pu{123 J}$'; + const expected = '$\\\\ce{H2O}$ and $\\\\pu{123 J}$'; + expect(preprocessLaTeX(content)).toBe(expected); + }); + + test('handles complex mixed content', () => { + const content = ` + LaTeX inline $x^2$ and block $$y^2$$ + Currency $100 and $200 + Chemical $\\ce{H2O}$ + Brackets \\[z^2\\] + `; + const expected = ` + LaTeX inline $x^2$ and block $$y^2$$ + Currency \\$100 and \\$200 + Chemical $\\\\ce{H2O}$ + Brackets $$z^2$$ + `; + expect(preprocessLaTeX(content)).toBe(expected); + }); + + test('handles empty string', () => { + expect(preprocessLaTeX('')).toBe(''); + }); + + test('preserves code blocks', () => { + const content = '```\n$100\n```\nOutside $200'; + const expected = '```\n$100\n```\nOutside \\$200'; + expect(preprocessLaTeX(content)).toBe(expected); + }); + + test('handles multiple currency values in a sentence', () => { + const content = 'I have $50 in my wallet and $100 in the bank.'; + const expected = 'I have \\$50 in my wallet and \\$100 in the bank.'; + expect(preprocessLaTeX(content)).toBe(expected); + }); + + test('preserves LaTeX expressions with numbers', () => { + const content = 'The equation is $f(x) = 2x + 3$ where x is a variable.'; + expect(preprocessLaTeX(content)).toBe(content); + }); + + test('handles currency values with commas', () => { + const content = 'The price is $1,000,000 for this item.'; + const expected = 'The price is \\$1,000,000 for this item.'; + expect(preprocessLaTeX(content)).toBe(expected); + }); + + test('preserves LaTeX expressions with special characters', () => { + const content = 'The set is defined as $\\{x | x > 0\\}$.'; + expect(preprocessLaTeX(content)).toBe(content); + }); +}); diff --git a/client/src/utils/latex.ts b/client/src/utils/latex.ts index fde2dcb9c56..97fbc466702 100644 --- a/client/src/utils/latex.ts +++ b/client/src/utils/latex.ts @@ -40,3 +40,66 @@ export const processLaTeX = (_content: string) => { // Restore code blocks return restoreCodeBlocks(processedContent, codeBlocks); }; + +/** + * Preprocesses LaTeX content by replacing delimiters and escaping certain characters. + * + * @param content The input string containing LaTeX expressions. + * @returns The processed string with replaced delimiters and escaped characters. + */ +export function preprocessLaTeX(content: string): string { + // Step 1: Protect code blocks + const codeBlocks: string[] = []; + content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (match, code) => { + codeBlocks.push(code); + return `<>`; + }); + + // Step 2: Protect existing LaTeX expressions + const latexExpressions: string[] = []; + content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => { + latexExpressions.push(match); + return `<>`; + }); + + // Step 3: Escape dollar signs that are likely currency indicators + content = content.replace(/\$(?=\d)/g, '\\$'); + + // Step 4: Restore LaTeX expressions + content = content.replace(/<>/g, (_, index) => latexExpressions[parseInt(index)]); + + // Step 5: Restore code blocks + content = content.replace(/<>/g, (_, index) => codeBlocks[parseInt(index)]); + + // Step 6: Apply additional escaping functions + content = escapeBrackets(content); + content = escapeMhchem(content); + + return content; +} + +export function escapeBrackets(text: string): string { + const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g; + return text.replace( + pattern, + ( + match: string, + codeBlock: string | undefined, + squareBracket: string | undefined, + roundBracket: string | undefined, + ): string => { + if (codeBlock != null) { + return codeBlock; + } else if (squareBracket != null) { + return `$$${squareBracket}$$`; + } else if (roundBracket != null) { + return `$${roundBracket}$`; + } + return match; + }, + ); +} + +export function escapeMhchem(text: string) { + return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{'); +}