Fix glossary term matching for Japanese docs (#882)

josh-wong · web-flow · commit 2307ea119de7 · 2025-06-25T18:38:48.000+09:00
Refactored glossary term detection to fix handling of Japanese language pages by avoiding word boundaries and pluralization logic that are not applicable. This ensures accurate glossary injection for both English and Japanese documentation.
diff --git a/src/components/GlossaryInjector.tsx b/src/components/GlossaryInjector.tsx
@@ -12,11 +12,12 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
   useEffect(() => {
     const url = window.location.pathname;
     let glossaryPath = '/docs/glossary.json'; // Use the English version as the default glossary.
+    const isJapaneseSite = url.startsWith('/ja-jp/docs');
 
     if (process.env.NODE_ENV === 'production') { // The glossary tooltip works only in production environments.
-      glossaryPath = url.startsWith('/ja-jp/docs') ? '/ja-jp/glossary.json' : '/docs/glossary.json';
+      glossaryPath = isJapaneseSite ? '/ja-jp/glossary.json' : '/docs/glossary.json';
     } else {
-      glossaryPath = url.startsWith('/ja-jp/docs') ? '/ja-jp/glossary.json' : '/docs/glossary.json';
+      glossaryPath = isJapaneseSite ? '/ja-jp/glossary.json' : '/docs/glossary.json';
     }
 
     fetch(glossaryPath)
@@ -50,7 +51,7 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
   useEffect(() => {
     if (Object.keys(glossary).length === 0 || isVersionIndexPage()) return;
 
-    // Sort terms in descending order by length to prioritize multi-word terms.
+    // Sort terms in descending order to prioritize multi-word terms.
     const terms = Object.keys(glossary).sort((a, b) => b.length - a.length);
     const processedTerms = new Set<string>(); // Set to track processed terms.
 
@@ -93,12 +94,22 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
           const newNodes: Node[] = [];
           let hasReplacements = false;
 
-          // Create a regex pattern to match both exact terms and their plural forms.
+          // Check if the visitor is on the Japanese version of the site.
+          const isJapaneseSite = window.location.pathname.startsWith('/ja-jp/');
+
+          // Create a regex pattern based on the language.
           const regexPattern = terms.map(term => {
             const escapedTerm = term.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
-            // Match exact term or term followed by 's' or 'es' at word boundary.
+
+            // For the Japanese version of the site, don't use word boundaries that don't work well with Japanese characters.
+            if (isJapaneseSite) {
+              return `(${escapedTerm})`;
+            }
+
+            // For English site, match exact term or term followed by 's' or 'es' at word boundary.
             return `(\\b${escapedTerm}(s|es)?\\b)`;
           }).join('|');
+
           const regex = new RegExp(regexPattern, 'gi'); // The 'i' flag is for case-insensitive matching.
 
           let lastIndex = 0;
@@ -107,12 +118,22 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
           while ((match = regex.exec(currentText))) {
             const matchedText = match[0]; // The full matched text (may include plural suffix).
 
-            // Find the base term from the glossary that matches (without plural).
-            const baseTerm = terms.find(term => 
-              matchedText.toLowerCase() === term.toLowerCase() || 
-              matchedText.toLowerCase() === `${term.toLowerCase()}s` || 
-              matchedText.toLowerCase() === `${term.toLowerCase()}es`
-            );
+            // Find the base term from the glossary that matches.
+            let baseTerm: string | undefined;
+
+            if (isJapaneseSite) {
+              // For Japanese, look for an exact match only.
+              baseTerm = terms.find(term => 
+                matchedText.toLowerCase() === term.toLowerCase()
+              );
+            } else {
+              // For English, check both singular and plural forms too.
+              baseTerm = terms.find(term => 
+                matchedText.toLowerCase() === term.toLowerCase() || 
+                matchedText.toLowerCase() === `${term.toLowerCase()}s` || 
+                matchedText.toLowerCase() === `${term.toLowerCase()}es`
+              );
+            }
 
             if (!baseTerm) {
               // Skip if no matching base term found.
@@ -138,7 +159,8 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
               let textToUnderline = matchedText;
               let suffix = '';
 
-              if (matchedText.toLowerCase() !== baseTerm.toLowerCase()) {
+              // Only apply pluralization logic for the English version of the site.
+              if (!isJapaneseSite && matchedText.toLowerCase() !== baseTerm.toLowerCase()) {
                 // This is a plural form - only underline the base part.
                 const baseTermLength = baseTerm.length;
                 textToUnderline = matchedText.substring(0, baseTermLength);