diff --git a/langtable/data/languages.xml b/langtable/data/languages.xml index 36952fa..518721d 100644 --- a/langtable/data/languages.xml +++ b/langtable/data/languages.xml @@ -1651,7 +1651,7 @@ - ar_EG.UTF-8300 + ar_EG.UTF-8600 ar_SD.UTF-8170 ar_DZ.UTF-8150 ar_MA.UTF-8140 @@ -11297,50 +11297,50 @@ es_ES.UTF-81000 - es_AR.UTF-8900 - es_BO.UTF-8900 - es_CL.UTF-8900 - es_CO.UTF-8900 - es_CR.UTF-8900 - es_CU.UTF-8900 - es_DO.UTF-8900 - es_EC.UTF-8900 - es_GT.UTF-8900 - es_HN.UTF-8900 - es_MX.UTF-8900 - es_NI.UTF-8900 - es_PA.UTF-8900 - es_PE.UTF-8900 - es_PR.UTF-8900 - es_PY.UTF-8900 - es_SV.UTF-8900 - es_US.UTF-8900 - es_UY.UTF-8900 - es_VE.UTF-8900 + es_AR.UTF-8400 + es_BO.UTF-8400 + es_CL.UTF-8400 + es_CO.UTF-8400 + es_CR.UTF-8400 + es_CU.UTF-8400 + es_DO.UTF-8400 + es_EC.UTF-8400 + es_GT.UTF-8400 + es_HN.UTF-8400 + es_MX.UTF-8400 + es_NI.UTF-8400 + es_PA.UTF-8400 + es_PE.UTF-8400 + es_PR.UTF-8400 + es_PY.UTF-8400 + es_SV.UTF-8400 + es_US.UTF-8400 + es_UY.UTF-8400 + es_VE.UTF-8400 ES1000 - 419900 - AR900 - BO900 - CL900 - CO900 - CR900 - CU900 - DO900 - EC900 - GT900 - HN900 - MX900 - NI900 - PA900 - PE900 - PR900 - PY900 - SV900 - US900 - UY900 - VE900 + 419400 + AR400 + BO400 + CL400 + CO400 + CR400 + CU400 + DO400 + EC400 + GT400 + HN400 + MX400 + NI400 + PA400 + PE400 + PR400 + PY400 + SV400 + US400 + UY400 + VE400 es501 diff --git a/langtable/langtable.py b/langtable/langtable.py index f241174..83ff2f7 100644 --- a/langtable/langtable.py +++ b/langtable/langtable.py @@ -22,6 +22,7 @@ # list_locales() # list_keyboards() # list_common_languages() +# list_common_locales() # list_common_keyboards() # list_consolefonts() # list_inputmethods() @@ -118,6 +119,10 @@ # keyboard layout as prevalent _KEYBOARD_LAYOUT_RANK_THRESHOLD = 500 +# Rank threshold to qualify a +# locale as prevalent +_LOCALE_RANK_THRESHOLD = 500 + # For the ICU/CLDR locale pattern see: http://userguide.icu-project.org/locale # (We ignore the variant code here) _cldr_locale_pattern = re.compile( @@ -2210,7 +2215,7 @@ def list_keyboards(concise=True, show_weights=False, languageId = None, scriptId def list_common_keyboards(languageId = None, scriptId = None, territoryId = None): '''Returns highest ranked keyboard layout(s) - +2 :param languageId: identifier for the language :type languageId: string :param scriptId: identifier for the script @@ -2268,6 +2273,81 @@ def list_common_keyboards(languageId = None, scriptId = None, territoryId = None return sorted(high_ranked_keyboards) +def list_common_locales(languageId = None, scriptId = None, territoryId = None): + '''Returns highest ranked locales + + :param languageId: identifier for the language + :type languageId: string + :param scriptId: identifier for the script + :type scriptId: string + :param territoryId: identifier for the territory + :type territoryId: string + :return: list of locales + :rtype: list of strings + + **Examples:** + + >>> list_common_locales() + ['ar_EG.UTF-8', 'en_US.UTF-8', 'en_GB.UTF-8', 'fr_FR.UTF-8', 'de_DE.UTF-8', 'ja_JP.UTF-8', 'zh_CN.UTF-8', 'ru_RU.UTF-8', 'es_ES.UTF-8'] + + >>> list_common_locales(languageId='fr') + ['fr_FR.UTF-8'] + + >>> list_common_locales(territoryId='CA') + ['en_CA.UTF-8'] + + >>> list_common_locales(territoryId='FR') + ['fr_FR.UTF-8'] + + >>> list_common_locales(languageId='fr', territoryId='CA') + ['fr_CA.UTF-8'] + + >>> list_common_locales(languageId='de', territoryId='FR') + ['de_DE.UTF-8'] + + >>> list_common_locales(languageId='sr', scriptId='Latn') + ['sr_RS.UTF-8@latin'] + + >>> list_common_locales(languageId='sr', scriptId='Cyrl') + ['sr_RS.UTF-8'] + + >>> list_common_locales(languageId='zh', scriptId='Hans') + ['zh_CN.UTF-8'] + + >>> list_common_locales(languageId='zh', scriptId='Hant') + ['zh_TW.UTF-8'] + + >>> list_common_locales(languageId='zh', territoryId='TW') + ['zh_TW.UTF-8'] + ''' + high_ranked_locales = list() + if not languageId and not scriptId and not territoryId: + for language in list_common_languages(): + locales = _languages_db[language].locales + selected_locales = [locale for locale, rank + in sorted(locales.items(), + key=lambda x: (-x[1])) + if rank >= _LOCALE_RANK_THRESHOLD] + if selected_locales: + high_ranked_locales.extend(selected_locales) + return high_ranked_locales + + kwargs = dict() + locale = _parse_and_split_languageId( + languageId=languageId, scriptId=scriptId, territoryId=territoryId + ) + if locale.language: + kwargs.update(dict(languageId=locale.language)) + if locale.script: + kwargs.update(dict(scriptId=locale.script)) + if locale.territory: + kwargs.update(dict(territoryId=locale.territory)) + common_locales = list_locales(**kwargs) + if common_locales: + # Picking up first locale from the list + high_ranked_locales.append(common_locales[0]) + return high_ranked_locales + def list_consolefonts(concise=True, show_weights=False, languageId = None, scriptId = None, territoryId = None): u'''List likely Linux Console fonts