Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a better matching algorithm to match languages #4067

Merged
merged 9 commits into from
Mar 27, 2024
52 changes: 52 additions & 0 deletions pkg/util/intl/match.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,55 @@ func Match(preferredLanguageTags []string, supportedLanguageTags SupportedLangua

return idx, tag
}

// matcher.Match will not return tags with higher confidence
// For example, with supported tags zh-CN, zh-HK
// matcher.Match("zh-Hant") will return zh-CN, which confidence is Low,
// but not zh-HK which confidence is High.
// This function is an implementation of Match trying to return an option with higher confidence.
func BestMatch(preferredLanguageTags []string, supportedLanguageTags SupportedLanguages) (int, language.Tag) {
if len(supportedLanguageTags) <= 0 {
return -1, language.Und
}

supportedTags := toLanguageTags(supportedLanguageTags)
preferredTags := toLanguageTags(preferredLanguageTags)

if len(preferredTags) <= 0 {
return 0, supportedTags[0]
}

var selectedTagIdx int = -1
var selectedTagConfidence language.Confidence
for _, pt := range preferredTags {
preferredTag := pt

for idx, t := range supportedTags {
supportedTag := t
matcher := GetMatcher([]language.Tag{supportedTag})
_, _, confidence := matcher.Match(preferredTag)
louischan-oursky marked this conversation as resolved.
Show resolved Hide resolved

// If exact match, choose this option without considering others
if confidence == language.Exact {
selectedTagIdx = idx
selectedTagConfidence = confidence
break
}

// Else, select the option with highest confidence
if confidence > selectedTagConfidence || selectedTagIdx == -1 {
selectedTagIdx = idx
selectedTagConfidence = confidence
}
}

// If confidence is not No, use this match as the result and do not look at other preferred tags
if selectedTagConfidence != language.No {
break
}
}

tag := supportedTags[selectedTagIdx]

return selectedTagIdx, tag
}
54 changes: 54 additions & 0 deletions pkg/util/intl/match_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,58 @@ func TestMatch(t *testing.T) {
2,
)
})

Convey("BestMatch", t, func() {
test := func(preferred []string, supported []string, expected int) {
actual, _ := BestMatch(preferred, supported)
So(actual, ShouldEqual, expected)
}

// Select default if there is no preferred languages
test(nil, []string{"en", "ja", "zh"}, 0)
test([]string{}, []string{"en", "ja", "zh"}, 0)

// Simply select japanese
test(
[]string{"ja-JP", "en-US", "zh-Hant-HK"},
[]string{"zh", "en", "ja"},
2,
)

// Should select supported tag with higher confidence
test(
[]string{"zh-Hant"},
[]string{"zh-CN", "zh-HK", "en-US"},
1,
)
test(
[]string{"en-UK"},
[]string{"zh-CN", "zh-HK", "zh-TW", "en-US"},
3,
)
test(
[]string{"zh-SG"},
[]string{"en-US", "zh-CN", "zh-HK", "zh-TW"},
1,
)

// Should select supported tag with lower index if confidence are same
test(
[]string{"en"},
[]string{"en-HK", "en-GB"},
0,
)

// Should select zh-TW with exact confidence
test(
[]string{"zh-Hant"},
[]string{"zh-CN", "zh-HK", "zh-TW", "en-US"},
2,
)
test(
[]string{"zh-Hant-HK"},
[]string{"zh-CN", "zh-HK", "zh-TW", "en-US"},
1,
)
})
}