Skip to content

Commit 2307ea1

Browse files
authored
Fix glossary term matching for Japanese docs (#882)
Refactored glossary term detection to fix handling of Japanese language pages by avoiding word boundaries and pluralization logic that are not applicable. This ensures accurate glossary injection for both English and Japanese documentation.
1 parent 54ca483 commit 2307ea1

File tree

1 file changed

+34
-12
lines changed

1 file changed

+34
-12
lines changed

src/components/GlossaryInjector.tsx

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
1212
useEffect(() => {
1313
const url = window.location.pathname;
1414
let glossaryPath = '/docs/glossary.json'; // Use the English version as the default glossary.
15+
const isJapaneseSite = url.startsWith('/ja-jp/docs');
1516

1617
if (process.env.NODE_ENV === 'production') { // The glossary tooltip works only in production environments.
17-
glossaryPath = url.startsWith('/ja-jp/docs') ? '/ja-jp/glossary.json' : '/docs/glossary.json';
18+
glossaryPath = isJapaneseSite ? '/ja-jp/glossary.json' : '/docs/glossary.json';
1819
} else {
19-
glossaryPath = url.startsWith('/ja-jp/docs') ? '/ja-jp/glossary.json' : '/docs/glossary.json';
20+
glossaryPath = isJapaneseSite ? '/ja-jp/glossary.json' : '/docs/glossary.json';
2021
}
2122

2223
fetch(glossaryPath)
@@ -50,7 +51,7 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
5051
useEffect(() => {
5152
if (Object.keys(glossary).length === 0 || isVersionIndexPage()) return;
5253

53-
// Sort terms in descending order by length to prioritize multi-word terms.
54+
// Sort terms in descending order to prioritize multi-word terms.
5455
const terms = Object.keys(glossary).sort((a, b) => b.length - a.length);
5556
const processedTerms = new Set<string>(); // Set to track processed terms.
5657

@@ -93,12 +94,22 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
9394
const newNodes: Node[] = [];
9495
let hasReplacements = false;
9596

96-
// Create a regex pattern to match both exact terms and their plural forms.
97+
// Check if the visitor is on the Japanese version of the site.
98+
const isJapaneseSite = window.location.pathname.startsWith('/ja-jp/');
99+
100+
// Create a regex pattern based on the language.
97101
const regexPattern = terms.map(term => {
98102
const escapedTerm = term.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
99-
// Match exact term or term followed by 's' or 'es' at word boundary.
103+
104+
// For the Japanese version of the site, don't use word boundaries that don't work well with Japanese characters.
105+
if (isJapaneseSite) {
106+
return `(${escapedTerm})`;
107+
}
108+
109+
// For English site, match exact term or term followed by 's' or 'es' at word boundary.
100110
return `(\\b${escapedTerm}(s|es)?\\b)`;
101111
}).join('|');
112+
102113
const regex = new RegExp(regexPattern, 'gi'); // The 'i' flag is for case-insensitive matching.
103114

104115
let lastIndex = 0;
@@ -107,12 +118,22 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
107118
while ((match = regex.exec(currentText))) {
108119
const matchedText = match[0]; // The full matched text (may include plural suffix).
109120

110-
// Find the base term from the glossary that matches (without plural).
111-
const baseTerm = terms.find(term =>
112-
matchedText.toLowerCase() === term.toLowerCase() ||
113-
matchedText.toLowerCase() === `${term.toLowerCase()}s` ||
114-
matchedText.toLowerCase() === `${term.toLowerCase()}es`
115-
);
121+
// Find the base term from the glossary that matches.
122+
let baseTerm: string | undefined;
123+
124+
if (isJapaneseSite) {
125+
// For Japanese, look for an exact match only.
126+
baseTerm = terms.find(term =>
127+
matchedText.toLowerCase() === term.toLowerCase()
128+
);
129+
} else {
130+
// For English, check both singular and plural forms too.
131+
baseTerm = terms.find(term =>
132+
matchedText.toLowerCase() === term.toLowerCase() ||
133+
matchedText.toLowerCase() === `${term.toLowerCase()}s` ||
134+
matchedText.toLowerCase() === `${term.toLowerCase()}es`
135+
);
136+
}
116137

117138
if (!baseTerm) {
118139
// Skip if no matching base term found.
@@ -138,7 +159,8 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
138159
let textToUnderline = matchedText;
139160
let suffix = '';
140161

141-
if (matchedText.toLowerCase() !== baseTerm.toLowerCase()) {
162+
// Only apply pluralization logic for the English version of the site.
163+
if (!isJapaneseSite && matchedText.toLowerCase() !== baseTerm.toLowerCase()) {
142164
// This is a plural form - only underline the base part.
143165
const baseTermLength = baseTerm.length;
144166
textToUnderline = matchedText.substring(0, baseTermLength);

0 commit comments

Comments
 (0)