Skip to content

Commit fef09c1

Browse files
authored
Fix false-positive matches (#883)
Fixes instances where a word that is part of another word is incorrectly flagged (like `ca` showing a tooltip in the word `Scalar`).
1 parent 2307ea1 commit fef09c1

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

src/components/GlossaryInjector.tsx

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,35 +103,41 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
103103

104104
// For the Japanese version of the site, don't use word boundaries that don't work well with Japanese characters.
105105
if (isJapaneseSite) {
106+
// For Japanese text, we can't use word boundaries, so just match the exact term.
106107
return `(${escapedTerm})`;
107108
}
108109

109110
// For English site, match exact term or term followed by 's' or 'es' at word boundary.
110-
return `(\\b${escapedTerm}(s|es)?\\b)`;
111+
return `(\\b${escapedTerm}\\b|\\b${escapedTerm}s\\b|\\b${escapedTerm}es\\b)`;
111112
}).join('|');
112113

113-
const regex = new RegExp(regexPattern, 'gi'); // The 'i' flag is for case-insensitive matching.
114+
const regex = new RegExp(regexPattern, 'gi'); // Use case-insensitive matching.
114115

115116
let lastIndex = 0;
116117
let match: RegExpExecArray | null;
117118

118119
while ((match = regex.exec(currentText))) {
119120
const matchedText = match[0]; // The full matched text (may include plural suffix).
120121

122+
// For Japanese, remove any non-word characters that were captured by the regex.
123+
const actualMatch = isJapaneseSite
124+
? matchedText.replace(/^[^\p{L}\p{N}_]+|[^\p{L}\p{N}_]+$/gu, '')
125+
: matchedText;
126+
121127
// Find the base term from the glossary that matches.
122128
let baseTerm: string | undefined;
123129

124130
if (isJapaneseSite) {
125131
// For Japanese, look for an exact match only.
126132
baseTerm = terms.find(term =>
127-
matchedText.toLowerCase() === term.toLowerCase()
133+
actualMatch === term
128134
);
129135
} else {
130136
// For English, check both singular and plural forms too.
131137
baseTerm = terms.find(term =>
132-
matchedText.toLowerCase() === term.toLowerCase() ||
133-
matchedText.toLowerCase() === `${term.toLowerCase()}s` ||
134-
matchedText.toLowerCase() === `${term.toLowerCase()}es`
138+
actualMatch.toLowerCase() === term.toLowerCase() ||
139+
actualMatch.toLowerCase() === `${term.toLowerCase()}s` ||
140+
actualMatch.toLowerCase() === `${term.toLowerCase()}es`
135141
);
136142
}
137143

0 commit comments

Comments
 (0)