Skip to content

Fix glossary term matching for Japanese docs #882

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 25, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 34 additions & 12 deletions src/components/GlossaryInjector.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
useEffect(() => {
const url = window.location.pathname;
let glossaryPath = '/docs/glossary.json'; // Use the English version as the default glossary.
const isJapaneseSite = url.startsWith('/ja-jp/docs');

if (process.env.NODE_ENV === 'production') { // The glossary tooltip works only in production environments.
glossaryPath = url.startsWith('/ja-jp/docs') ? '/ja-jp/glossary.json' : '/docs/glossary.json';
glossaryPath = isJapaneseSite ? '/ja-jp/glossary.json' : '/docs/glossary.json';
} else {
glossaryPath = url.startsWith('/ja-jp/docs') ? '/ja-jp/glossary.json' : '/docs/glossary.json';
glossaryPath = isJapaneseSite ? '/ja-jp/glossary.json' : '/docs/glossary.json';
}

fetch(glossaryPath)
Expand Down Expand Up @@ -50,7 +51,7 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
useEffect(() => {
if (Object.keys(glossary).length === 0 || isVersionIndexPage()) return;

// Sort terms in descending order by length to prioritize multi-word terms.
// Sort terms in descending order to prioritize multi-word terms.
const terms = Object.keys(glossary).sort((a, b) => b.length - a.length);
const processedTerms = new Set<string>(); // Set to track processed terms.

Expand Down Expand Up @@ -93,12 +94,22 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
const newNodes: Node[] = [];
let hasReplacements = false;

// Create a regex pattern to match both exact terms and their plural forms.
// Check if the visitor is on the Japanese version of the site.
const isJapaneseSite = window.location.pathname.startsWith('/ja-jp/');

// Create a regex pattern based on the language.
const regexPattern = terms.map(term => {
const escapedTerm = term.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
// Match exact term or term followed by 's' or 'es' at word boundary.

// For the Japanese version of the site, don't use word boundaries that don't work well with Japanese characters.
if (isJapaneseSite) {
return `(${escapedTerm})`;
}

// For English site, match exact term or term followed by 's' or 'es' at word boundary.
return `(\\b${escapedTerm}(s|es)?\\b)`;
}).join('|');

const regex = new RegExp(regexPattern, 'gi'); // The 'i' flag is for case-insensitive matching.

let lastIndex = 0;
Expand All @@ -107,12 +118,22 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
while ((match = regex.exec(currentText))) {
const matchedText = match[0]; // The full matched text (may include plural suffix).

// Find the base term from the glossary that matches (without plural).
const baseTerm = terms.find(term =>
matchedText.toLowerCase() === term.toLowerCase() ||
matchedText.toLowerCase() === `${term.toLowerCase()}s` ||
matchedText.toLowerCase() === `${term.toLowerCase()}es`
);
// Find the base term from the glossary that matches.
let baseTerm: string | undefined;

if (isJapaneseSite) {
// For Japanese, look for an exact match only.
baseTerm = terms.find(term =>
matchedText.toLowerCase() === term.toLowerCase()
);
} else {
// For English, check both singular and plural forms too.
baseTerm = terms.find(term =>
matchedText.toLowerCase() === term.toLowerCase() ||
matchedText.toLowerCase() === `${term.toLowerCase()}s` ||
matchedText.toLowerCase() === `${term.toLowerCase()}es`
);
}

if (!baseTerm) {
// Skip if no matching base term found.
Expand All @@ -138,7 +159,8 @@ const GlossaryInjector: React.FC<GlossaryInjectorProps> = ({ children }) => {
let textToUnderline = matchedText;
let suffix = '';

if (matchedText.toLowerCase() !== baseTerm.toLowerCase()) {
// Only apply pluralization logic for the English version of the site.
if (!isJapaneseSite && matchedText.toLowerCase() !== baseTerm.toLowerCase()) {
// This is a plural form - only underline the base part.
const baseTermLength = baseTerm.length;
textToUnderline = matchedText.substring(0, baseTermLength);
Expand Down