Skip to content

Commit

Permalink
Add display support for more languages in LanguageCtrl
Browse files Browse the repository at this point in the history
Don't rely entirely on ICU's locales database and manually add all
ISO 639 languages (as listed by ICU API) to the list of known languages.

Update display determination accordingly.

This improves handling for languages like Haitian Creole (ht) that would
previously be recognized if provided as "ht", but couldn't be picked in
LanguageCtrl list or entered as name rather than code.
  • Loading branch information
vslavik committed Oct 26, 2023
1 parent 6b3dc6c commit 1a78c3a
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 8 deletions.
16 changes: 11 additions & 5 deletions src/catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@
#include <wx/memtext.h>
#include <wx/filename.h>

#include <set>
#include <algorithm>
#include <set>
#include <regex>


// ----------------------------------------------------------------------
Expand All @@ -63,6 +64,9 @@
namespace
{

// Mostly correct regex for removing HTML markup
const std::wregex RE_APPROXIMATE_MARKUP(L"<[^>]*>");

// Fixup some common issues with filepaths in PO files, due to old Poedit versions,
// user misunderstanding or Poedit bugs:
wxString FixBrokenSearchPathValue(wxString p)
Expand Down Expand Up @@ -1076,15 +1080,17 @@ void Catalog::PostCreation()
{
// detect source language from the text (ignoring plurals for simplicity,
// as we don't need 100% of the text):
wxString allText;
std::wstring allText;
for (auto& i: items())
{
allText.append(i->GetRawString());
allText.append('\n');
auto withoutMarkup = std::regex_replace(i->GetRawString().ToStdWstring(), RE_APPROXIMATE_MARKUP, L" ");
allText.append(withoutMarkup);
allText += L' ';
}
if (!allText.empty())
{
m_sourceLanguage = Language::TryDetectFromText(allText.utf8_str());
puts(str::to_utf8(allText).c_str());
m_sourceLanguage = Language::TryDetectFromText(str::to_utf8(allText));
wxLogTrace("poedit", "detected source language is '%s'", m_sourceLanguage.Code());
}
}
Expand Down
42 changes: 39 additions & 3 deletions src/language.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,12 @@ const DisplayNamesData& GetDisplayNamesData()
std::call_once(of_namesList, [=]{
auto locEng = icu::Locale::getEnglish();
std::vector<icu::UnicodeString> names;
std::set<std::string> foundCodes;

int32_t count;
const icu::Locale *loc = icu::Locale::getAvailableLocales(count);
names.reserve(count);

for (int i = 0; i < count; i++, loc++)
{
auto language = loc->getLanguage();
Expand Down Expand Up @@ -177,7 +179,9 @@ const DisplayNamesData& GetDisplayNamesData()
code += "@cyrillic";
}
}


foundCodes.insert(code);

s.foldCase();
data.names[str::to_wstring(s)] = code;

Expand All @@ -186,6 +190,33 @@ const DisplayNamesData& GetDisplayNamesData()
data.namesEng[str::to_wstring(s)] = code;
}

// add languages that are not listed as locales in ICU:
for (const char * const* i = icu::Locale::getISOLanguages(); *i != nullptr; ++i)
{
const char *code = *i;
if (foundCodes.find(code) != foundCodes.end())
continue;

icu::Locale langLoc(code);
wxASSERT( strcmp(code, langLoc.getLanguage()) == 0 );

icu::UnicodeString name;
if (langLoc.getDisplayName(name).isEmpty())
langLoc.getDisplayLanguage(name);
if (name.isEmpty())
continue;

names.push_back(name);

name.foldCase();
data.names[str::to_wstring(name)] = code;

if (langLoc.getDisplayName(locEng, name).isEmpty())
langLoc.getDisplayLanguage(locEng, name);
name.foldCase();
data.namesEng[str::to_wstring(name)] = code;
}

// sort the names alphabetically for data.sortedNames:
UErrorCode err = U_ZERO_ERROR;
std::unique_ptr<icu::Collator> coll(icu::Collator::createInstance(err));
Expand Down Expand Up @@ -506,8 +537,10 @@ icu::Locale Language::ToIcu() const

wxString Language::DisplayName() const
{
auto loc = ToIcu();
icu::UnicodeString s;
ToIcu().getDisplayName(s);
if (loc.getDisplayName(s).isEmpty())
loc.getDisplayLanguage(s);
return str::to_wx(s);
}

Expand All @@ -522,7 +555,10 @@ wxString Language::DisplayNameInItself() const
{
auto loc = ToIcu();
icu::UnicodeString s;
loc.getDisplayName(loc, s);
if (loc.getDisplayName(loc, s).isEmpty())
loc.getDisplayLanguage(loc, s);
if (s.isEmpty())
return DisplayName(); // fall back to current locale's name, better than nothing
return str::to_wx(s);
}

Expand Down

0 comments on commit 1a78c3a

Please sign in to comment.