From 6d88fbd4946b1bdad6a2e7450e102deef7c59525 Mon Sep 17 00:00:00 2001 From: Nathan Williams Date: Mon, 8 Jul 2024 18:48:01 -0400 Subject: [PATCH 01/39] Update de_Latn.textproto MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move `ẞ` to the auxiliary character set. The lowercase `ß` is still part of the base character set. Based on [Wikipedia](https://en.wikipedia.org/wiki/%C3%9F#:~:text=Additionally%2C%20as%20of%202017%2C%20when,with%20%E2%9F%A8SS%E2%9F%A9%20in%20allcaps.), `ẞ` seems to be more of an auxiliary than base character. It is relatively new and being written as `SS` is considered valid. --- Lib/gflanguages/data/languages/de_Latn.textproto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/gflanguages/data/languages/de_Latn.textproto b/Lib/gflanguages/data/languages/de_Latn.textproto index 2b4f1a012c8..b1e34729ac4 100644 --- a/Lib/gflanguages/data/languages/de_Latn.textproto +++ b/Lib/gflanguages/data/languages/de_Latn.textproto @@ -32,8 +32,8 @@ region: "SI" region: "SK" region: "US" exemplar_chars { - base: "A Ä B C D E F G H I J K L M N O Ö P Q R S ẞ T U Ü V W X Y Z a ä b c d e f g h i j k l m n o ö p q r s ß t u ü v w x y z" - auxiliary: "Á À Ă Â Å Ã Ā Æ Ç É È Ĕ Ê Ë Ē Ğ Í Ì Ĭ Î Ï İ Ī I Ñ Ó Ò Ŏ Ô Ø Ō Œ Ş S Ú Ù Ŭ Û Ū Ÿ á à ă â å ã ā æ ç é è ĕ ê ë ē ğ í ì ĭ î ï ī ı ñ ó ò ŏ ô ø ō œ ş ſ ú ù ŭ û ū ÿ" + base: "A Ä B C D E F G H I J K L M N O Ö P Q R S T U Ü V W X Y Z a ä b c d e f g h i j k l m n o ö p q r s ß t u ü v w x y z" + auxiliary: "Á À Ă Â Å Ã Ā Æ Ç É È Ĕ Ê Ë Ē Ğ Í Ì Ĭ Î Ï İ Ī I Ñ Ó Ò Ŏ Ô Ø Ō Œ Ş S ẞ Ú Ù Ŭ Û Ū Ÿ á à ă â å ã ā æ ç é è ĕ ê ë ē ğ í ì ĭ î ï ī ı ñ ó ò ŏ ô ø ō œ ş ſ ú ù ŭ û ū ÿ" marks: "◌̈ ◌̀ ◌́" numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9" punctuation: "- – — , ; : ! ? . … \' ‘ ‚ \" “ „ « » ( ) [ ] { } @ * / & #" From c8f64d01bf842a5ee4509f2f03b145eea45165b2 Mon Sep 17 00:00:00 2001 From: Nathan Williams Date: Mon, 15 Jul 2024 10:34:07 -0400 Subject: [PATCH 02/39] Revert "Update de_Latn.textproto" --- Lib/gflanguages/data/languages/de_Latn.textproto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/gflanguages/data/languages/de_Latn.textproto b/Lib/gflanguages/data/languages/de_Latn.textproto index b1e34729ac4..2b4f1a012c8 100644 --- a/Lib/gflanguages/data/languages/de_Latn.textproto +++ b/Lib/gflanguages/data/languages/de_Latn.textproto @@ -32,8 +32,8 @@ region: "SI" region: "SK" region: "US" exemplar_chars { - base: "A Ä B C D E F G H I J K L M N O Ö P Q R S T U Ü V W X Y Z a ä b c d e f g h i j k l m n o ö p q r s ß t u ü v w x y z" - auxiliary: "Á À Ă Â Å Ã Ā Æ Ç É È Ĕ Ê Ë Ē Ğ Í Ì Ĭ Î Ï İ Ī I Ñ Ó Ò Ŏ Ô Ø Ō Œ Ş S ẞ Ú Ù Ŭ Û Ū Ÿ á à ă â å ã ā æ ç é è ĕ ê ë ē ğ í ì ĭ î ï ī ı ñ ó ò ŏ ô ø ō œ ş ſ ú ù ŭ û ū ÿ" + base: "A Ä B C D E F G H I J K L M N O Ö P Q R S ẞ T U Ü V W X Y Z a ä b c d e f g h i j k l m n o ö p q r s ß t u ü v w x y z" + auxiliary: "Á À Ă Â Å Ã Ā Æ Ç É È Ĕ Ê Ë Ē Ğ Í Ì Ĭ Î Ï İ Ī I Ñ Ó Ò Ŏ Ô Ø Ō Œ Ş S Ú Ù Ŭ Û Ū Ÿ á à ă â å ã ā æ ç é è ĕ ê ë ē ğ í ì ĭ î ï ī ı ñ ó ò ŏ ô ø ō œ ş ſ ú ù ŭ û ū ÿ" marks: "◌̈ ◌̀ ◌́" numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9" punctuation: "- – — , ; : ! ? . … \' ‘ ‚ \" “ „ « » ( ) [ ] { } @ * / & #" From cc55af739d121218ca1763c84ec2b18c2f7c6214 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Thu, 18 Jul 2024 11:54:03 -0400 Subject: [PATCH 03/39] Enforce uniqueness of language names across all scripts --- tests/test_data_languages.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 25a22dd3c4e..e72bc088f07 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -270,13 +270,10 @@ def test_exemplar_parser(): def test_language_uniqueness(): names = Counter([]) for lang in LANGUAGES.values(): - # We check that names are unique *within a script* since - # when we display them in a menu we segment that menu by - # script and then by language if lang.preferred_name: - names[lang.script + "/" + lang.preferred_name] += 1 + names[lang.preferred_name] += 1 else: - names[lang.name + "/" + lang.preferred_name] += 1 + names[lang.name] += 1 if any(count > 1 for count in names.values()): duplicates = {name: count for name, count in names.items() if count > 1} pytest.fail(f"Duplicate language names: {duplicates}") From 5a57dcaa8a5c159cc026855c6baf69c9ea1c8a20 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Thu, 18 Jul 2024 13:19:13 -0400 Subject: [PATCH 04/39] Add test for enforcing language name structure --- tests/test_data_languages.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index e72bc088f07..4c244db4e71 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -83,10 +83,13 @@ "tlh_Latn": "Klingon is an artifical language.", } +LANGUAGE_NAME_REGEX = "^[A-Za-z-]+(, [A-Za-z-]+)?( [(][A-Za-z-]+[)])?$" + @pytest.mark.parametrize("lang_code", LANGUAGES) @pytest.mark.parametrize( - "exemplar_name", ["base", "auxiliary", "marks", "numerals", "punctuation", "index"] + "exemplar_name", ["base", "auxiliary", "marks", + "numerals", "punctuation", "index"] ) def test_languages_exemplars_canonical_duplicates(lang_code, exemplar_name): lang = LANGUAGES[lang_code] @@ -105,13 +108,15 @@ def test_languages_exemplars_canonical_duplicates(lang_code, exemplar_name): @pytest.mark.parametrize("lang_code", LANGUAGES) @pytest.mark.parametrize( - "exemplar_name", ["base", "auxiliary", "marks", "numerals", "punctuation", "index"] + "exemplar_name", ["base", "auxiliary", "marks", + "numerals", "punctuation", "index"] ) def test_languages_exemplars_duplicates(lang_code, exemplar_name): lang = LANGUAGES[lang_code] exemplar = getattr(lang.exemplar_chars, exemplar_name).split() counter = Counter(exemplar) - counts = sorted(counter.most_common(), key=lambda pair: exemplar.index(pair[0])) + counts = sorted(counter.most_common(), + key=lambda pair: exemplar.index(pair[0])) assert counts == [(v, 1) for v in exemplar] @@ -211,7 +216,8 @@ def test_sample_texts_are_in_script(lang_code): "idu_Latn", "ban_Bali", ]: - pytest.xfail("These languages have known issues with their sample text") + pytest.xfail( + "These languages have known issues with their sample text") return lang = LANGUAGES[lang_code] script_name = SCRIPTS[lang.script].name @@ -230,7 +236,8 @@ def test_sample_texts_are_in_script(lang_code): chars = set(samples) for char in chars: char_script = ( - youseedee.ucd_data(ord(char)).get("Script", "").replace("_", " ") + youseedee.ucd_data(ord(char)).get( + "Script", "").replace("_", " ") ) if char_script == "Common" or char_script == "Inherited": continue @@ -275,5 +282,18 @@ def test_language_uniqueness(): else: names[lang.name] += 1 if any(count > 1 for count in names.values()): - duplicates = {name: count for name, count in names.items() if count > 1} + duplicates = {name: count for name, + count in names.items() if count > 1} pytest.fail(f"Duplicate language names: {duplicates}") + + +def test_language_name_structure(): + for lang in LANGUAGES.values(): + language_name = lang.preferred_name if lang.preferred_name else lang.name + script_name = SCRIPTS[lang.script].name + if not re.match(LANGUAGE_NAME_REGEX, language_name): + pytest.fail( + f"Language name does not have expected structure (\"LANGUAGE, MODIFIER (SCRIPT)\"): {language_name}") + if language_name.endswith(")") and not language_name.endsWith(f"({script_name})"): + pytest.fail( + f"Language name parenthetical should contain script name ({script_name}): {language_name}") From af1807c6ec727c4e5f022e6faa4179fa395186b8 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Thu, 18 Jul 2024 13:20:16 -0400 Subject: [PATCH 05/39] Fix duplicate or near malstructured Chinese language names --- Lib/gflanguages/data/languages/yue_Hani.textproto | 4 ++-- Lib/gflanguages/data/languages/yue_Hant.textproto | 2 +- Lib/gflanguages/data/languages/zh_Hans.textproto | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/gflanguages/data/languages/yue_Hani.textproto b/Lib/gflanguages/data/languages/yue_Hani.textproto index 29ab10c28cc..e14343eda3c 100644 --- a/Lib/gflanguages/data/languages/yue_Hani.textproto +++ b/Lib/gflanguages/data/languages/yue_Hani.textproto @@ -2,7 +2,7 @@ id: "yue_Hani" language: "yue" script: "Hani" name: "Yue Chinese" -preferred_name: "Cantonese" +preferred_name: "Cantonese (Han)" autonym: "粵語" region: "CN" region: "HK" @@ -20,4 +20,4 @@ sample_text { specimen_32: "鑑於對人類家庭所有成員嘅固有尊嚴及其平等嘅同不移嘅權利嘅承認,乃係世界自由、正義同和平嘅基礎,\n鑑於對人權嘅無視同侮蔑已經發展為野蠻暴行,呢啲暴行玷污咗人類嘅良心,而一個人人享有言論同信仰自由並免予恐懼同匱乏嘅世界嘅來臨,已經被宣布為普通人民嘅最高願望,\n鑑於為咗使人類唔致迫不得已鋌而走險對暴政同壓迫進行反叛,有必要使人權受法治嘅保護," specimen_21: "鑑於對人類家庭所有成員嘅固有尊嚴及其平等嘅同不移嘅權利嘅承認,乃係世界自由、正義同和平嘅基礎,\n鑑於對人權嘅無視同侮蔑已經發展為野蠻暴行,呢啲暴行玷污咗人類嘅良心,而一個人人享有言論同信仰自由並免予恐懼同匱乏嘅世界嘅來臨,已經被宣布為普通人民嘅最高願望,\n鑑於為咗使人類唔致迫不得已鋌而走險對暴政同壓迫進行反叛,有必要使人權受法治嘅保護,\n鑑於有必要促進各國間友好關係嘅發展,\n鑑於各聯合國國家嘅人民已經喺聯合國憲章中重申佢哋對基本人權、人格尊嚴同價值以及男女平等權利嘅信念,並決心促成較大自由中嘅社會進步同生活水平嘅改善,\n鑑於各會員國業已誓願同聯合國合作以促進對人權同基本自由嘅普遍尊重同遵行,\n鑑於對呢啲權利同自由嘅普遍了解對於呢個誓願嘅充分實現具有好大嘅重要性," specimen_16: "鑑於對人類家庭所有成員嘅固有尊嚴及其平等嘅同不移嘅權利嘅承認,乃係世界自由、正義同和平嘅基礎,\n鑑於對人權嘅無視同侮蔑已經發展為野蠻暴行,呢啲暴行玷污咗人類嘅良心,而一個人人享有言論同信仰自由並免予恐懼同匱乏嘅世界嘅來臨,已經被宣布為普通人民嘅最高願望,\n鑑於為咗使人類唔致迫不得已鋌而走險對暴政同壓迫進行反叛,有必要使人權受法治嘅保護,\n鑑於有必要促進各國間友好關係嘅發展,\n鑑於各聯合國國家嘅人民已經喺聯合國憲章中重申佢哋對基本人權、人格尊嚴同價值以及男女平等權利嘅信念,並決心促成較大自由中嘅社會進步同生活水平嘅改善,\n鑑於各會員國業已誓願同聯合國合作以促進對人權同基本自由嘅普遍尊重同遵行,\n鑑於對呢啲權利同自由嘅普遍了解對於呢個誓願嘅充分實現具有好大嘅重要性,\n因此而家,\n大會,\n發布呢一個世界人權宣言,作為所有人民同所有國家努力實現嘅共同標準,以期每一個人同社會機構經常銘念本宣言,努力通過教誨同教育促進對權利同自由嘅尊重,並通過國家嘅和國際嘅漸進措施,令呢啲權利同自由喺各會員國本身人民及喺佢管轄下領土嘅人民中得到普遍同有效嘅承認同遵行;\n 人人生而自由,喺尊嚴同權利上一律平等。佢哋賦有理性同良心,並應以兄弟關係嘅精神相對待。\n人人有資格享有本宣言所載嘅一切權利同自由,唔分種族、膚色、性別、語言、宗教、政治或其他見解、國籍或社會出身、財產、出生或其他身分等任何區別。" -} \ No newline at end of file +} diff --git a/Lib/gflanguages/data/languages/yue_Hant.textproto b/Lib/gflanguages/data/languages/yue_Hant.textproto index 9f7e95b4b99..c26e7d91f0d 100644 --- a/Lib/gflanguages/data/languages/yue_Hant.textproto +++ b/Lib/gflanguages/data/languages/yue_Hant.textproto @@ -2,7 +2,7 @@ id: "yue_Hant" language: "yue" script: "Hant" name: "Yue Chinese" -preferred_name: "Cantonese" +preferred_name: "Cantonese (Traditional)" autonym: "粵語" population: 6524919 region: "CN" diff --git a/Lib/gflanguages/data/languages/zh_Hans.textproto b/Lib/gflanguages/data/languages/zh_Hans.textproto index 3ff2abdeb7c..e4eb5aa7782 100644 --- a/Lib/gflanguages/data/languages/zh_Hans.textproto +++ b/Lib/gflanguages/data/languages/zh_Hans.textproto @@ -1,7 +1,7 @@ id: "zh_Hans" language: "zh" script: "Hans" -name: "Simplified Chinese" +name: "Chinese (Simplified)" autonym: "中文(简体,中国)" population: 1265387866 region: "CN" From 95e4db291778c9f42fe9d600a1126b253253ba43 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Thu, 18 Jul 2024 13:20:40 -0400 Subject: [PATCH 06/39] Update language names for Ancient Greek, there were duplicates --- Lib/gflanguages/data/languages/bal_Latn.textproto | 2 +- Lib/gflanguages/data/languages/bgn_Arab.textproto | 1 - Lib/gflanguages/data/languages/grc_Cprt.textproto | 1 - Lib/gflanguages/data/languages/grc_Grek.textproto | 3 +-- Lib/gflanguages/data/languages/grc_Linb.textproto | 3 +-- Lib/gflanguages/data/languages/ii_Latn.textproto | 2 +- 6 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Lib/gflanguages/data/languages/bal_Latn.textproto b/Lib/gflanguages/data/languages/bal_Latn.textproto index 34984a58553..2a441d3f883 100644 --- a/Lib/gflanguages/data/languages/bal_Latn.textproto +++ b/Lib/gflanguages/data/languages/bal_Latn.textproto @@ -2,6 +2,6 @@ id: "bal_Latn" language: "bal" script: "Latn" name: "Baluchi, Latin" -preferred_name: "Balochi" +preferred_name: "Balochi (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/bgn_Arab.textproto b/Lib/gflanguages/data/languages/bgn_Arab.textproto index d42af6e8835..e2ab61434ae 100644 --- a/Lib/gflanguages/data/languages/bgn_Arab.textproto +++ b/Lib/gflanguages/data/languages/bgn_Arab.textproto @@ -2,7 +2,6 @@ id: "bgn_Arab" language: "bgn" script: "Arab" name: "Western Balochi" -#preferred_name: "Balochi" population: 2037382 region: "AF" region: "IR" diff --git a/Lib/gflanguages/data/languages/grc_Cprt.textproto b/Lib/gflanguages/data/languages/grc_Cprt.textproto index de1fc5f6dbd..b88160280cb 100644 --- a/Lib/gflanguages/data/languages/grc_Cprt.textproto +++ b/Lib/gflanguages/data/languages/grc_Cprt.textproto @@ -2,7 +2,6 @@ id: "grc_Cprt" language: "grc" script: "Cprt" name: "Ancient Greek" -preferred_name: "Ancient Greek" population: 0 sample_text { masthead_full: "𐠀𐠜𐠍𐠚" diff --git a/Lib/gflanguages/data/languages/grc_Grek.textproto b/Lib/gflanguages/data/languages/grc_Grek.textproto index 65a80e691d3..ad9a2096ead 100644 --- a/Lib/gflanguages/data/languages/grc_Grek.textproto +++ b/Lib/gflanguages/data/languages/grc_Grek.textproto @@ -1,8 +1,7 @@ id: "grc_Grek" language: "grc" script: "Grek" -name: "Ancient Greek, Greek" -preferred_name: "Ancient Greek" +name: "Ancient Greek (Greek)" autonym: "Ελληνικά" population: 0 exemplar_chars { diff --git a/Lib/gflanguages/data/languages/grc_Linb.textproto b/Lib/gflanguages/data/languages/grc_Linb.textproto index 37d77a3587a..df8f02a7764 100644 --- a/Lib/gflanguages/data/languages/grc_Linb.textproto +++ b/Lib/gflanguages/data/languages/grc_Linb.textproto @@ -1,8 +1,7 @@ id: "grc_Linb" language: "grc" script: "Linb" -name: "Ancient Greek, Linear B" -preferred_name: "Ancient Greek" +name: "Ancient Greek (Linear B)" population: 0 sample_text { masthead_full: "𐀐𐀮𐀝𐀹" diff --git a/Lib/gflanguages/data/languages/ii_Latn.textproto b/Lib/gflanguages/data/languages/ii_Latn.textproto index 47bc9e4d220..715f1f9850a 100644 --- a/Lib/gflanguages/data/languages/ii_Latn.textproto +++ b/Lib/gflanguages/data/languages/ii_Latn.textproto @@ -2,6 +2,6 @@ id: "ii_Latn" language: "ii" script: "Latn" name: "Sichuan Yi, Latin" -preferred_name: "Nuosu" +preferred_name: "Nuosu (Latin)" population: 0 historical: true From 6b34df3c0dd827eea3c604d8aa16215906e68642 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Thu, 18 Jul 2024 13:24:45 -0400 Subject: [PATCH 07/39] Enforce language name structure for both name and preferred name --- tests/test_data_languages.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 4c244db4e71..3ce6a44f127 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -289,11 +289,13 @@ def test_language_uniqueness(): def test_language_name_structure(): for lang in LANGUAGES.values(): - language_name = lang.preferred_name if lang.preferred_name else lang.name script_name = SCRIPTS[lang.script].name - if not re.match(LANGUAGE_NAME_REGEX, language_name): - pytest.fail( - f"Language name does not have expected structure (\"LANGUAGE, MODIFIER (SCRIPT)\"): {language_name}") - if language_name.endswith(")") and not language_name.endsWith(f"({script_name})"): - pytest.fail( - f"Language name parenthetical should contain script name ({script_name}): {language_name}") + for type, name in [["name", lang.name], ["preferred_name", lang.preferred_name]]: + if name is None: + continue + if not re.match(LANGUAGE_NAME_REGEX, name): + pytest.fail( + f"Language {type} does not have expected structure (\"LANGUAGE, MODIFIER (SCRIPT)\"): {name}") + if name.endswith(")") and not name.endsWith(f"({script_name})"): + pytest.fail( + f"Language {type} parenthetical should contain script name ({script_name}): {name}") From bee8c9f22e60fdf0bd49dc586af2ff365a413295 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Thu, 18 Jul 2024 13:24:45 -0400 Subject: [PATCH 08/39] Enforce language name structure for both name and preferred name --- tests/test_data_languages.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 4c244db4e71..3ce6a44f127 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -289,11 +289,13 @@ def test_language_uniqueness(): def test_language_name_structure(): for lang in LANGUAGES.values(): - language_name = lang.preferred_name if lang.preferred_name else lang.name script_name = SCRIPTS[lang.script].name - if not re.match(LANGUAGE_NAME_REGEX, language_name): - pytest.fail( - f"Language name does not have expected structure (\"LANGUAGE, MODIFIER (SCRIPT)\"): {language_name}") - if language_name.endswith(")") and not language_name.endsWith(f"({script_name})"): - pytest.fail( - f"Language name parenthetical should contain script name ({script_name}): {language_name}") + for type, name in [["name", lang.name], ["preferred_name", lang.preferred_name]]: + if name is None: + continue + if not re.match(LANGUAGE_NAME_REGEX, name): + pytest.fail( + f"Language {type} does not have expected structure (\"LANGUAGE, MODIFIER (SCRIPT)\"): {name}") + if name.endswith(")") and not name.endsWith(f"({script_name})"): + pytest.fail( + f"Language {type} parenthetical should contain script name ({script_name}): {name}") From 149b73435c9b0d2205e54e07a355703bd10f1af5 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Fri, 19 Jul 2024 11:58:10 -0400 Subject: [PATCH 09/39] Update language name structure test to output error once --- tests/test_data_languages.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 3ce6a44f127..f206ef83654 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -288,14 +288,23 @@ def test_language_uniqueness(): def test_language_name_structure(): + languages_with_bad_name_structure = {} for lang in LANGUAGES.values(): script_name = SCRIPTS[lang.script].name - for type, name in [["name", lang.name], ["preferred_name", lang.preferred_name]]: - if name is None: - continue - if not re.match(LANGUAGE_NAME_REGEX, name): - pytest.fail( - f"Language {type} does not have expected structure (\"LANGUAGE, MODIFIER (SCRIPT)\"): {name}") - if name.endswith(")") and not name.endsWith(f"({script_name})"): - pytest.fail( - f"Language {type} parenthetical should contain script name ({script_name}): {name}") + names = [["name", lang.name]] + if lang.preferred_name: + names += [["preferred_name", lang.preferred_name]] + bad_names = [] + for type, name in names: + bad_structure = not re.match(LANGUAGE_NAME_REGEX, name) + bad_script_suffix = name.endswith( + ")") and not name.endsWith(f"({script_name})") + if bad_structure or bad_script_suffix: + bad_names.append(type) + if len(bad_names) > 0: + languages_with_bad_name_structure[lang.id] = bad_names + if len(languages_with_bad_name_structure) > 0: + misstructured_language_names = [f"{language_id}" if len( + types) == 1 else f"{language_id}: {types}" for language_id, types in languages_with_bad_name_structure.items() if len(types) > 0] + pytest.fail( + f"Languages names without expected structure (\"LANGUAGE, MODIFIER (SCRIPT)\"): {misstructured_language_names}") From c6fa0d8a8f5e292eed108e49f309801b696331ed Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Fri, 19 Jul 2024 12:07:14 -0400 Subject: [PATCH 10/39] Fix type: endsWith => endswith --- tests/test_data_languages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index f206ef83654..4e347dd9b47 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -298,7 +298,7 @@ def test_language_name_structure(): for type, name in names: bad_structure = not re.match(LANGUAGE_NAME_REGEX, name) bad_script_suffix = name.endswith( - ")") and not name.endsWith(f"({script_name})") + ")") and not name.endswith(f"({script_name})") if bad_structure or bad_script_suffix: bad_names.append(type) if len(bad_names) > 0: From 3e9373a0f3dfce72e0788374d046b6f0a584250d Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Sun, 21 Jul 2024 19:14:51 -0400 Subject: [PATCH 11/39] Restructure language names to be consistent --- Lib/gflanguages/data/languages/ain_Latn.textproto | 2 +- Lib/gflanguages/data/languages/ar_Syrc.textproto | 2 +- Lib/gflanguages/data/languages/arc_Nbat.textproto | 2 +- Lib/gflanguages/data/languages/arc_Palm.textproto | 2 +- Lib/gflanguages/data/languages/ba_Latn.textproto | 2 +- Lib/gflanguages/data/languages/bal_Latn.textproto | 2 +- Lib/gflanguages/data/languages/ban_Bali.textproto | 2 +- Lib/gflanguages/data/languages/bax_Latn.textproto | 2 +- Lib/gflanguages/data/languages/be_Latn.textproto | 2 +- Lib/gflanguages/data/languages/ber_Latn.textproto | 2 +- Lib/gflanguages/data/languages/ber_Tfng.textproto | 2 +- Lib/gflanguages/data/languages/bft_Tibt.textproto | 2 +- Lib/gflanguages/data/languages/bho_Kthi.textproto | 2 +- Lib/gflanguages/data/languages/bku_Buhd.textproto | 4 ++-- Lib/gflanguages/data/languages/bm_Nkoo.textproto | 2 +- Lib/gflanguages/data/languages/bn_Newa.textproto | 2 +- Lib/gflanguages/data/languages/bo_Marc.textproto | 2 +- Lib/gflanguages/data/languages/bo_Zanb.textproto | 2 +- Lib/gflanguages/data/languages/brh_Latn.textproto | 2 +- Lib/gflanguages/data/languages/brx_Beng.textproto | 2 +- Lib/gflanguages/data/languages/brx_Latn.textproto | 2 +- Lib/gflanguages/data/languages/bsq_Bass.textproto | 2 +- Lib/gflanguages/data/languages/bsq_Latn.textproto | 2 +- Lib/gflanguages/data/languages/bug_Bugi.textproto | 2 +- Lib/gflanguages/data/languages/byh_Deva.textproto | 2 +- Lib/gflanguages/data/languages/ccp_Beng.textproto | 2 +- Lib/gflanguages/data/languages/chx_Deva.textproto | 2 +- Lib/gflanguages/data/languages/cop_Copt.textproto | 2 +- Lib/gflanguages/data/languages/cop_Grek.textproto | 2 +- Lib/gflanguages/data/languages/cr_Latn.textproto | 2 +- Lib/gflanguages/data/languages/cyo_Latn.textproto | 2 +- Lib/gflanguages/data/languages/de_Dupl.textproto | 2 +- Lib/gflanguages/data/languages/de_Runr.textproto | 2 +- Lib/gflanguages/data/languages/dhi_Deva.textproto | 2 +- Lib/gflanguages/data/languages/doi_Arab.textproto | 2 +- Lib/gflanguages/data/languages/doi_Dogr.textproto | 2 +- Lib/gflanguages/data/languages/doi_Takr.textproto | 4 ++-- Lib/gflanguages/data/languages/dyu_Nkoo.textproto | 2 +- Lib/gflanguages/data/languages/en_Brai.textproto | 2 +- Lib/gflanguages/data/languages/en_Dsrt.textproto | 2 +- Lib/gflanguages/data/languages/en_Shaw.textproto | 2 +- Lib/gflanguages/data/languages/ett_Latn.textproto | 2 +- Lib/gflanguages/data/languages/evn_Latn.textproto | 2 +- Lib/gflanguages/data/languages/fil_Tglg.textproto | 2 +- Lib/gflanguages/data/languages/fr_Dupl.textproto | 2 +- Lib/gflanguages/data/languages/fuf_Adlm.textproto | 2 +- Lib/gflanguages/data/languages/gag_Cyrl.textproto | 2 +- Lib/gflanguages/data/languages/gju_Deva.textproto | 2 +- Lib/gflanguages/data/languages/gon_Telu.textproto | 2 +- Lib/gflanguages/data/languages/got_Runr.textproto | 2 +- Lib/gflanguages/data/languages/hi_Mahj.textproto | 3 +-- Lib/gflanguages/data/languages/hi_Newa.textproto | 2 +- Lib/gflanguages/data/languages/hnn_Hano.textproto | 2 +- Lib/gflanguages/data/languages/id_Arab.textproto | 2 +- Lib/gflanguages/data/languages/ie_Latn.textproto | 2 +- Lib/gflanguages/data/languages/inh_Arab.textproto | 2 +- Lib/gflanguages/data/languages/inh_Latn.textproto | 2 +- Lib/gflanguages/data/languages/ja_Hira.textproto | 2 +- Lib/gflanguages/data/languages/ja_Kana.textproto | 2 +- Lib/gflanguages/data/languages/jbo_Latn.textproto | 2 +- Lib/gflanguages/data/languages/jra_Latn.textproto | 4 ++-- Lib/gflanguages/data/languages/jv_Java.textproto | 2 +- Lib/gflanguages/data/languages/kab_Tfng.textproto | 2 +- Lib/gflanguages/data/languages/kha_Beng.textproto | 2 +- Lib/gflanguages/data/languages/khr_Beng.textproto | 2 +- Lib/gflanguages/data/languages/khr_Deva.textproto | 2 +- Lib/gflanguages/data/languages/khr_Orya.textproto | 2 +- Lib/gflanguages/data/languages/khw_Latn.textproto | 2 +- Lib/gflanguages/data/languages/kjg_Latn.textproto | 2 +- Lib/gflanguages/data/languages/kk_Latn.textproto | 2 +- Lib/gflanguages/data/languages/kr_Arab.textproto | 2 +- Lib/gflanguages/data/languages/ks_Deva.textproto | 2 +- Lib/gflanguages/data/languages/ku_Latn.textproto | 2 +- Lib/gflanguages/data/languages/ku_Yezi.textproto | 2 +- Lib/gflanguages/data/languages/kyw_Beng.textproto | 2 +- Lib/gflanguages/data/languages/kyw_Orya.textproto | 2 +- Lib/gflanguages/data/languages/lad_Latn.textproto | 2 +- Lib/gflanguages/data/languages/lhm_Deva.textproto | 2 +- Lib/gflanguages/data/languages/lif_Limb.textproto | 2 +- Lib/gflanguages/data/languages/lus_Latn.textproto | 2 +- Lib/gflanguages/data/languages/lzz_Latn.textproto | 4 ++-- Lib/gflanguages/data/languages/mai_Newa.textproto | 2 +- Lib/gflanguages/data/languages/mai_Tirh.textproto | 2 +- Lib/gflanguages/data/languages/mak_Bugi.textproto | 2 +- Lib/gflanguages/data/languages/man_Nkoo.textproto | 2 +- Lib/gflanguages/data/languages/mdr_Bugi.textproto | 2 +- Lib/gflanguages/data/languages/men_Mend.textproto | 2 +- Lib/gflanguages/data/languages/min_Arab.textproto | 2 +- Lib/gflanguages/data/languages/mn_Zanb.textproto | 2 +- Lib/gflanguages/data/languages/mr_Modi.textproto | 2 +- Lib/gflanguages/data/languages/mro_Mroo.textproto | 3 +-- Lib/gflanguages/data/languages/mrw_Arab.textproto | 2 +- Lib/gflanguages/data/languages/mrw_Latn.textproto | 2 +- Lib/gflanguages/data/languages/mui_Latn.textproto | 2 +- Lib/gflanguages/data/languages/ne_Newa.textproto | 2 +- Lib/gflanguages/data/languages/new_Newa.textproto | 2 +- Lib/gflanguages/data/languages/nsk_Latn.textproto | 2 +- Lib/gflanguages/data/languages/oj_Latn.textproto | 2 +- Lib/gflanguages/data/languages/om_Ethi.textproto | 2 +- Lib/gflanguages/data/languages/osa_Latn.textproto | 2 +- Lib/gflanguages/data/languages/osc_Latn.textproto | 2 +- Lib/gflanguages/data/languages/pi_Brah.textproto | 2 +- Lib/gflanguages/data/languages/pi_Sinh.textproto | 2 +- Lib/gflanguages/data/languages/pi_Thai.textproto | 2 +- Lib/gflanguages/data/languages/pnt_Grek.textproto | 2 +- Lib/gflanguages/data/languages/pnt_Latn.textproto | 2 +- Lib/gflanguages/data/languages/rab_Deva.textproto | 2 +- Lib/gflanguages/data/languages/ray_Latn.textproto | 2 +- Lib/gflanguages/data/languages/rej_Rjng.textproto | 2 +- Lib/gflanguages/data/languages/rhg_Latn.textproto | 2 +- Lib/gflanguages/data/languages/ro_Cyrl.textproto | 2 +- Lib/gflanguages/data/languages/rom_Cyrl.textproto | 2 +- Lib/gflanguages/data/languages/sa_Ahom.textproto | 2 +- Lib/gflanguages/data/languages/sa_Bali.textproto | 2 +- Lib/gflanguages/data/languages/sa_Bhks.textproto | 2 +- Lib/gflanguages/data/languages/sa_Brah.textproto | 2 +- Lib/gflanguages/data/languages/sa_Bugi.textproto | 2 +- Lib/gflanguages/data/languages/sa_Cham.textproto | 2 +- Lib/gflanguages/data/languages/sa_Gran.textproto | 2 +- Lib/gflanguages/data/languages/sa_Khar.textproto | 2 +- Lib/gflanguages/data/languages/sa_Marc.textproto | 2 +- Lib/gflanguages/data/languages/sa_Mong.textproto | 2 +- Lib/gflanguages/data/languages/sa_Mroo.textproto | 2 +- Lib/gflanguages/data/languages/sa_Mult.textproto | 2 +- Lib/gflanguages/data/languages/sa_Nand.textproto | 2 +- Lib/gflanguages/data/languages/sa_Newa.textproto | 2 +- Lib/gflanguages/data/languages/sa_Rjng.textproto | 2 +- Lib/gflanguages/data/languages/sa_Shrd.textproto | 2 +- Lib/gflanguages/data/languages/sa_Sidd.textproto | 2 +- Lib/gflanguages/data/languages/sa_Sinh.textproto | 2 +- Lib/gflanguages/data/languages/sa_Soyo.textproto | 2 +- Lib/gflanguages/data/languages/sa_Tagb.textproto | 2 +- Lib/gflanguages/data/languages/sa_Tirh.textproto | 2 +- Lib/gflanguages/data/languages/sa_Wcho.textproto | 2 +- Lib/gflanguages/data/languages/sa_Zanb.textproto | 2 +- Lib/gflanguages/data/languages/sat_Beng.textproto | 2 +- Lib/gflanguages/data/languages/sat_Deva.textproto | 2 +- Lib/gflanguages/data/languages/sat_Latn.textproto | 2 +- Lib/gflanguages/data/languages/sat_Orya.textproto | 2 +- Lib/gflanguages/data/languages/sd_Khoj.textproto | 2 +- Lib/gflanguages/data/languages/sd_Sind.textproto | 4 ++-- Lib/gflanguages/data/languages/shi_Tfng.textproto | 2 +- Lib/gflanguages/data/languages/sja_Latn.textproto | 2 +- Lib/gflanguages/data/languages/skr_Mult.textproto | 4 ++-- Lib/gflanguages/data/languages/so_Arab.textproto | 2 +- Lib/gflanguages/data/languages/so_Osma.textproto | 2 +- Lib/gflanguages/data/languages/sq_Elba.textproto | 2 +- Lib/gflanguages/data/languages/sq_Vith.textproto | 2 +- Lib/gflanguages/data/languages/su_Sund.textproto | 2 +- Lib/gflanguages/data/languages/sus_Arab.textproto | 2 +- Lib/gflanguages/data/languages/taq_Latn.textproto | 2 +- Lib/gflanguages/data/languages/taq_Tfng.textproto | 2 +- Lib/gflanguages/data/languages/tbw_Tagb.textproto | 2 +- Lib/gflanguages/data/languages/tg_Latn.textproto | 2 +- Lib/gflanguages/data/languages/thf_Deva.textproto | 2 +- Lib/gflanguages/data/languages/ths_Deva.textproto | 2 +- Lib/gflanguages/data/languages/tiw_Latn.textproto | 2 +- Lib/gflanguages/data/languages/tk_Cyrl.textproto | 2 +- Lib/gflanguages/data/languages/tk_Latn.textproto | 2 +- Lib/gflanguages/data/languages/tkr_Latn.textproto | 2 +- Lib/gflanguages/data/languages/tlh_Latn.textproto | 2 +- Lib/gflanguages/data/languages/tly_Cyrl.textproto | 2 +- Lib/gflanguages/data/languages/tly_Latn.textproto | 2 +- Lib/gflanguages/data/languages/tr_Arab.textproto | 2 +- Lib/gflanguages/data/languages/tru_Syrc.textproto | 2 +- Lib/gflanguages/data/languages/tt_Arab.textproto | 2 +- Lib/gflanguages/data/languages/tt_Latn.textproto | 2 +- Lib/gflanguages/data/languages/udm_Latn.textproto | 2 +- Lib/gflanguages/data/languages/ug_Latn.textproto | 2 +- Lib/gflanguages/data/languages/unr_Orya.textproto | 2 +- Lib/gflanguages/data/languages/unx_Deva.textproto | 2 +- Lib/gflanguages/data/languages/vi_Hani.textproto | 2 +- Lib/gflanguages/data/languages/wal_Ethi.textproto | 2 +- Lib/gflanguages/data/languages/wal_Latn.textproto | 2 +- Lib/gflanguages/data/languages/wo_Arab.textproto | 2 +- Lib/gflanguages/data/languages/xum_Latn.textproto | 2 +- Lib/gflanguages/data/languages/ybh_Deva.textproto | 2 +- Lib/gflanguages/data/languages/zh_Hebr.textproto | 2 +- Lib/gflanguages/data/languages/zlm_Arab.textproto | 2 +- Lib/gflanguages/data/languages/zlm_Latn.textproto | 2 +- 180 files changed, 186 insertions(+), 188 deletions(-) diff --git a/Lib/gflanguages/data/languages/ain_Latn.textproto b/Lib/gflanguages/data/languages/ain_Latn.textproto index 6b6f9bd9e76..8b1b6ef9876 100644 --- a/Lib/gflanguages/data/languages/ain_Latn.textproto +++ b/Lib/gflanguages/data/languages/ain_Latn.textproto @@ -1,6 +1,6 @@ id: "ain_Latn" language: "ain" script: "Latn" -name: "Ainu, Latin" +name: "Ainu (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/ar_Syrc.textproto b/Lib/gflanguages/data/languages/ar_Syrc.textproto index fc8893fa8a7..03d02b16035 100644 --- a/Lib/gflanguages/data/languages/ar_Syrc.textproto +++ b/Lib/gflanguages/data/languages/ar_Syrc.textproto @@ -1,6 +1,6 @@ id: "ar_Syrc" language: "ar" script: "Syrc" -name: "Arabic, Syriac" +name: "Arabic (Syriac)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/arc_Nbat.textproto b/Lib/gflanguages/data/languages/arc_Nbat.textproto index bb255e24eef..46d3cede67a 100644 --- a/Lib/gflanguages/data/languages/arc_Nbat.textproto +++ b/Lib/gflanguages/data/languages/arc_Nbat.textproto @@ -1,7 +1,7 @@ id: "arc_Nbat" language: "arc" script: "Nbat" -name: "Aramaic, Nabataean" +name: "Aramaic (Nabataean)" population: 0 sample_text { masthead_full: "𐢅𐢕𐢆𐢚" diff --git a/Lib/gflanguages/data/languages/arc_Palm.textproto b/Lib/gflanguages/data/languages/arc_Palm.textproto index d1af797e8f3..46af68d0a1b 100644 --- a/Lib/gflanguages/data/languages/arc_Palm.textproto +++ b/Lib/gflanguages/data/languages/arc_Palm.textproto @@ -1,7 +1,7 @@ id: "arc_Palm" language: "arc" script: "Palm" -name: "Aramaic, Palmyrene" +name: "Aramaic (Palmyrene)" population: 0 sample_text { masthead_full: "𐡲𐡫𐡬𐡩" diff --git a/Lib/gflanguages/data/languages/ba_Latn.textproto b/Lib/gflanguages/data/languages/ba_Latn.textproto index 8c3533eb1b6..847a5803ba0 100644 --- a/Lib/gflanguages/data/languages/ba_Latn.textproto +++ b/Lib/gflanguages/data/languages/ba_Latn.textproto @@ -1,7 +1,7 @@ id: "ba_Latn" language: "ba" script: "Latn" -name: "Bashkir, Latin" +name: "Bashkir (Latin)" region: "RU" sample_text { masthead_full: "BbAa" diff --git a/Lib/gflanguages/data/languages/bal_Latn.textproto b/Lib/gflanguages/data/languages/bal_Latn.textproto index 2a441d3f883..2f1731af53f 100644 --- a/Lib/gflanguages/data/languages/bal_Latn.textproto +++ b/Lib/gflanguages/data/languages/bal_Latn.textproto @@ -1,7 +1,7 @@ id: "bal_Latn" language: "bal" script: "Latn" -name: "Baluchi, Latin" +name: "Baluchi (Latin)" preferred_name: "Balochi (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/ban_Bali.textproto b/Lib/gflanguages/data/languages/ban_Bali.textproto index 0b2837a0776..4fc5371ba1d 100644 --- a/Lib/gflanguages/data/languages/ban_Bali.textproto +++ b/Lib/gflanguages/data/languages/ban_Bali.textproto @@ -1,7 +1,7 @@ id: "ban_Bali" language: "ban" script: "Bali" -name: "Balinese, Balinese" +name: "Balinese (Balinese)" autonym: "ᬪᬵᬱᬩᬮᬶ" population: 0 region: "ID" diff --git a/Lib/gflanguages/data/languages/bax_Latn.textproto b/Lib/gflanguages/data/languages/bax_Latn.textproto index 97668ef3d35..bf02317d381 100644 --- a/Lib/gflanguages/data/languages/bax_Latn.textproto +++ b/Lib/gflanguages/data/languages/bax_Latn.textproto @@ -1,7 +1,7 @@ id: "bax_Latn" language: "bax" script: "Latn" -name: "Bamun, Latin" +name: "Bamun (Latin)" region: "CM" population: 673000 exemplar_chars { diff --git a/Lib/gflanguages/data/languages/be_Latn.textproto b/Lib/gflanguages/data/languages/be_Latn.textproto index ad761bcac4d..af6cee903fa 100644 --- a/Lib/gflanguages/data/languages/be_Latn.textproto +++ b/Lib/gflanguages/data/languages/be_Latn.textproto @@ -1,7 +1,7 @@ id: "be_Latn" language: "be" script: "Latn" -name: "Belarusian, Latin" +name: "Belarusian (Latin)" region: "BY" sample_text { masthead_full: "UuSs" diff --git a/Lib/gflanguages/data/languages/ber_Latn.textproto b/Lib/gflanguages/data/languages/ber_Latn.textproto index ba743964aad..0c5c1a7fed1 100644 --- a/Lib/gflanguages/data/languages/ber_Latn.textproto +++ b/Lib/gflanguages/data/languages/ber_Latn.textproto @@ -1,4 +1,4 @@ id: "ber_Latn" language: "ber" script: "Latn" -name: "Berber, Latin" +name: "Berber (Latin)" diff --git a/Lib/gflanguages/data/languages/ber_Tfng.textproto b/Lib/gflanguages/data/languages/ber_Tfng.textproto index 515618f5c1a..5580455a7aa 100644 --- a/Lib/gflanguages/data/languages/ber_Tfng.textproto +++ b/Lib/gflanguages/data/languages/ber_Tfng.textproto @@ -1,5 +1,5 @@ id: "ber_Tfng" language: "ber" script: "Tfng" -name: "Berber, Tifinagh" +name: "Berber (Tifinagh)" region: "MA" diff --git a/Lib/gflanguages/data/languages/bft_Tibt.textproto b/Lib/gflanguages/data/languages/bft_Tibt.textproto index 1ba71ab130e..c039afefe42 100644 --- a/Lib/gflanguages/data/languages/bft_Tibt.textproto +++ b/Lib/gflanguages/data/languages/bft_Tibt.textproto @@ -1,6 +1,6 @@ id: "bft_Tibt" language: "bft" script: "Tibt" -name: "Balti, Tibetan" +name: "Balti (Tibetan)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/bho_Kthi.textproto b/Lib/gflanguages/data/languages/bho_Kthi.textproto index 292445e0a1c..afac27894c3 100644 --- a/Lib/gflanguages/data/languages/bho_Kthi.textproto +++ b/Lib/gflanguages/data/languages/bho_Kthi.textproto @@ -1,7 +1,7 @@ id: "bho_Kthi" language: "bho" script: "Kthi" -name: "Bhojpuri, Kaithi" +name: "Bhojpuri (Kaithi)" sample_text { masthead_full: "𑂃𑂮𑂥𑂰𑂥" masthead_partial: "𑂉𑂍" diff --git a/Lib/gflanguages/data/languages/bku_Buhd.textproto b/Lib/gflanguages/data/languages/bku_Buhd.textproto index 1ec4c954dad..3c7d6c41fe4 100644 --- a/Lib/gflanguages/data/languages/bku_Buhd.textproto +++ b/Lib/gflanguages/data/languages/bku_Buhd.textproto @@ -1,7 +1,7 @@ id: "bku_Buhd" language: "bku" script: "Buhd" -name: "Buhid, Buhid" +name: "Buhid (Buhid)" population: 0 historical: true sample_text { @@ -17,4 +17,4 @@ sample_text { specimen_32: "ᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ" specimen_21: "ᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ\nᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ" specimen_16: "ᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ\nᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ \nᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ \nᝃᝑᝓᝃᝑᝓᝃᝓᝋᝎᝄ ᝃᝊᝓᝌᝓᝊᝓᝌᝐᝒᝂᝎᝓ ᝃᝇᝓᝌᝇᝓᝌᝐᝒᝇᝄᝓ \nᝇᝎᝒᝃᝐᝉᝍᝓᝌᝓ ᝐᝒᝑᝋᝓᝃᝐᝊᝎᝊᝃᝓ ᝀᝈᝂᝋᝊᝓᝃᝈᝒᝋᝓ" - } \ No newline at end of file + } diff --git a/Lib/gflanguages/data/languages/bm_Nkoo.textproto b/Lib/gflanguages/data/languages/bm_Nkoo.textproto index efc4c9dbcd4..2734cdca766 100644 --- a/Lib/gflanguages/data/languages/bm_Nkoo.textproto +++ b/Lib/gflanguages/data/languages/bm_Nkoo.textproto @@ -1,6 +1,6 @@ id: "bm_Nkoo" language: "bm" script: "Nkoo" -name: "Bambara, Nko" +name: "Bambara (Nko)" population: 16000000 region: "ML" diff --git a/Lib/gflanguages/data/languages/bn_Newa.textproto b/Lib/gflanguages/data/languages/bn_Newa.textproto index e676df2cbde..207b074b2ac 100644 --- a/Lib/gflanguages/data/languages/bn_Newa.textproto +++ b/Lib/gflanguages/data/languages/bn_Newa.textproto @@ -1,6 +1,6 @@ id: "bn_Newa" language: "bn" script: "Newa" -name: "Bengali, Newa" +name: "Bengali (Newa)" population: 6000 region: "NP" diff --git a/Lib/gflanguages/data/languages/bo_Marc.textproto b/Lib/gflanguages/data/languages/bo_Marc.textproto index 3b4afd1a5f3..f508a24b87b 100644 --- a/Lib/gflanguages/data/languages/bo_Marc.textproto +++ b/Lib/gflanguages/data/languages/bo_Marc.textproto @@ -1,5 +1,5 @@ id: "bo_Marc" language: "bo" script: "Marc" -name: "Tibetan, Marchen" +name: "Tibetan (Marchen)" region: "CN" diff --git a/Lib/gflanguages/data/languages/bo_Zanb.textproto b/Lib/gflanguages/data/languages/bo_Zanb.textproto index a45f99bcb75..69b4e76bce1 100644 --- a/Lib/gflanguages/data/languages/bo_Zanb.textproto +++ b/Lib/gflanguages/data/languages/bo_Zanb.textproto @@ -1,4 +1,4 @@ id: "bo_Zanb" language: "bo" script: "Zanb" -name: "Tibetan, Zanabazar" +name: "Tibetan (Zanabazar)" diff --git a/Lib/gflanguages/data/languages/brh_Latn.textproto b/Lib/gflanguages/data/languages/brh_Latn.textproto index a59d7339fd9..19744b5ab4c 100644 --- a/Lib/gflanguages/data/languages/brh_Latn.textproto +++ b/Lib/gflanguages/data/languages/brh_Latn.textproto @@ -1,7 +1,7 @@ id: "brh_Latn" language: "brh" script: "Latn" -name: "Brahui, Latin" +name: "Brahui (Latin)" population: 0 region: "PK" sample_text { diff --git a/Lib/gflanguages/data/languages/brx_Beng.textproto b/Lib/gflanguages/data/languages/brx_Beng.textproto index 0d3e5f0f424..2dd936b68a5 100644 --- a/Lib/gflanguages/data/languages/brx_Beng.textproto +++ b/Lib/gflanguages/data/languages/brx_Beng.textproto @@ -1,7 +1,7 @@ id: "brx_Beng" language: "brx" script: "Beng" -name: "Bodo, Bangla" +name: "Bodo (Bangla)" region: "IN" sample_text { masthead_full: "গসবঙ" diff --git a/Lib/gflanguages/data/languages/brx_Latn.textproto b/Lib/gflanguages/data/languages/brx_Latn.textproto index 9db0d9cf617..939e34198ce 100644 --- a/Lib/gflanguages/data/languages/brx_Latn.textproto +++ b/Lib/gflanguages/data/languages/brx_Latn.textproto @@ -1,7 +1,7 @@ id: "brx_Latn" language: "brx" script: "Latn" -name: "Bodo, Latin" +name: "Bodo (Latin)" region: "IN" sample_text { masthead_full: "GgAa" diff --git a/Lib/gflanguages/data/languages/bsq_Bass.textproto b/Lib/gflanguages/data/languages/bsq_Bass.textproto index 8b9b529e938..3d76b53b1da 100644 --- a/Lib/gflanguages/data/languages/bsq_Bass.textproto +++ b/Lib/gflanguages/data/languages/bsq_Bass.textproto @@ -1,7 +1,7 @@ id: "bsq_Bass" language: "bsq" script: "Bass" -name: "Bassa, Vah" +name: "Bassa (Vah)" population: 410000 region: "LR" region: "SL" diff --git a/Lib/gflanguages/data/languages/bsq_Latn.textproto b/Lib/gflanguages/data/languages/bsq_Latn.textproto index 7e6d576d0d4..5566ba5e10a 100644 --- a/Lib/gflanguages/data/languages/bsq_Latn.textproto +++ b/Lib/gflanguages/data/languages/bsq_Latn.textproto @@ -1,7 +1,7 @@ id: "bsq_Latn" language: "bsq" script: "Latn" -name: "Bassa, Latin" +name: "Bassa (Latin)" population: 410000 region: "LR" region: "SL" diff --git a/Lib/gflanguages/data/languages/bug_Bugi.textproto b/Lib/gflanguages/data/languages/bug_Bugi.textproto index 5f40646882b..e0f5c002402 100644 --- a/Lib/gflanguages/data/languages/bug_Bugi.textproto +++ b/Lib/gflanguages/data/languages/bug_Bugi.textproto @@ -1,7 +1,7 @@ id: "bug_Bugi" language: "bug" script: "Bugi" -name: "Buginese, Buginese" +name: "Buginese (Buginese)" population: 0 region: "ID" sample_text { diff --git a/Lib/gflanguages/data/languages/byh_Deva.textproto b/Lib/gflanguages/data/languages/byh_Deva.textproto index 2dc44290e6e..71fa1fd8dc4 100644 --- a/Lib/gflanguages/data/languages/byh_Deva.textproto +++ b/Lib/gflanguages/data/languages/byh_Deva.textproto @@ -1,7 +1,7 @@ id: "byh_Deva" language: "byh" script: "Deva" -name: "Bhujel, Devanagari" +name: "Bhujel (Devanagari)" region: "NP" sample_text { masthead_full: "सकतब" diff --git a/Lib/gflanguages/data/languages/ccp_Beng.textproto b/Lib/gflanguages/data/languages/ccp_Beng.textproto index 14ae5c74f32..01367ef1de3 100644 --- a/Lib/gflanguages/data/languages/ccp_Beng.textproto +++ b/Lib/gflanguages/data/languages/ccp_Beng.textproto @@ -1,7 +1,7 @@ id: "ccp_Beng" language: "ccp" script: "Beng" -name: "Chakma, Bengali" +name: "Chakma (Bengali)" population: 729137 region: "BD" region: "IN" diff --git a/Lib/gflanguages/data/languages/chx_Deva.textproto b/Lib/gflanguages/data/languages/chx_Deva.textproto index 910292ee6c5..f649c1be59d 100644 --- a/Lib/gflanguages/data/languages/chx_Deva.textproto +++ b/Lib/gflanguages/data/languages/chx_Deva.textproto @@ -1,7 +1,7 @@ id: "chx_Deva" language: "chx" script: "Deva" -name: "Chantyal, Devanagari" +name: "Chantyal (Devanagari)" region: "NP" sample_text { masthead_full: "झउनम" diff --git a/Lib/gflanguages/data/languages/cop_Copt.textproto b/Lib/gflanguages/data/languages/cop_Copt.textproto index 98e6f7e7073..6915e17c876 100644 --- a/Lib/gflanguages/data/languages/cop_Copt.textproto +++ b/Lib/gflanguages/data/languages/cop_Copt.textproto @@ -1,7 +1,7 @@ id: "cop_Copt" language: "cop" script: "Copt" -name: "Coptic, Coptic" +name: "Coptic (Coptic)" population: 0 region: "EG" sample_text { diff --git a/Lib/gflanguages/data/languages/cop_Grek.textproto b/Lib/gflanguages/data/languages/cop_Grek.textproto index 3a3b92dd972..25a33fd797d 100644 --- a/Lib/gflanguages/data/languages/cop_Grek.textproto +++ b/Lib/gflanguages/data/languages/cop_Grek.textproto @@ -1,6 +1,6 @@ id: "cop_Grek" language: "cop" script: "Grek" -name: "Coptic, Greek" +name: "Coptic (Greek)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/cr_Latn.textproto b/Lib/gflanguages/data/languages/cr_Latn.textproto index 2bc3587d4f1..0c5a670683f 100644 --- a/Lib/gflanguages/data/languages/cr_Latn.textproto +++ b/Lib/gflanguages/data/languages/cr_Latn.textproto @@ -1,7 +1,7 @@ id: "cr_Latn" language: "cr" script: "Latn" -name: "Cree, Latin" +name: "Cree (Latin)" population: 96000 region: "CA" region: "US" diff --git a/Lib/gflanguages/data/languages/cyo_Latn.textproto b/Lib/gflanguages/data/languages/cyo_Latn.textproto index f045397b607..e28eb1126b0 100644 --- a/Lib/gflanguages/data/languages/cyo_Latn.textproto +++ b/Lib/gflanguages/data/languages/cyo_Latn.textproto @@ -1,7 +1,7 @@ id: "cyo_Latn" language: "cyo" script: "Latn" -name: "Cuyonon, Latin" +name: "Cuyonon (Latin)" region: "PH" sample_text { masthead_full: "TtAa" diff --git a/Lib/gflanguages/data/languages/de_Dupl.textproto b/Lib/gflanguages/data/languages/de_Dupl.textproto index ade2f1813fd..718b6fd7479 100644 --- a/Lib/gflanguages/data/languages/de_Dupl.textproto +++ b/Lib/gflanguages/data/languages/de_Dupl.textproto @@ -1,5 +1,5 @@ id: "de_Dupl" language: "de" script: "Dupl" -name: "German, Duployan" +name: "German (Duployan)" region: "DE" diff --git a/Lib/gflanguages/data/languages/de_Runr.textproto b/Lib/gflanguages/data/languages/de_Runr.textproto index d8713bf625f..3f11ca04641 100644 --- a/Lib/gflanguages/data/languages/de_Runr.textproto +++ b/Lib/gflanguages/data/languages/de_Runr.textproto @@ -1,6 +1,6 @@ id: "de_Runr" language: "de" script: "Runr" -name: "German, Runic" +name: "German (Runic)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/dhi_Deva.textproto b/Lib/gflanguages/data/languages/dhi_Deva.textproto index 131854e3338..061749c2dde 100644 --- a/Lib/gflanguages/data/languages/dhi_Deva.textproto +++ b/Lib/gflanguages/data/languages/dhi_Deva.textproto @@ -1,7 +1,7 @@ id: "dhi_Deva" language: "dhi" script: "Deva" -name: "Dhimal, Devanagari" +name: "Dhimal (Devanagari)" region: "NP" sample_text { masthead_full: "हदबङ" diff --git a/Lib/gflanguages/data/languages/doi_Arab.textproto b/Lib/gflanguages/data/languages/doi_Arab.textproto index d9e8313d013..f60ec63a4ea 100644 --- a/Lib/gflanguages/data/languages/doi_Arab.textproto +++ b/Lib/gflanguages/data/languages/doi_Arab.textproto @@ -1,6 +1,6 @@ id: "doi_Arab" language: "doi" script: "Arab" -name: "Dogri, Arabic" +name: "Dogri (Arabic)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/doi_Dogr.textproto b/Lib/gflanguages/data/languages/doi_Dogr.textproto index a027786c9c7..9dbd5a82721 100644 --- a/Lib/gflanguages/data/languages/doi_Dogr.textproto +++ b/Lib/gflanguages/data/languages/doi_Dogr.textproto @@ -1,7 +1,7 @@ id: "doi_Dogr" language: "doi" script: "Dogr" -name: "Dogri, Dogra" +name: "Dogri (Dogra)" population: 2600000 region: "IN" sample_text { diff --git a/Lib/gflanguages/data/languages/doi_Takr.textproto b/Lib/gflanguages/data/languages/doi_Takr.textproto index f7776f05d97..e2ba1a6d42c 100644 --- a/Lib/gflanguages/data/languages/doi_Takr.textproto +++ b/Lib/gflanguages/data/languages/doi_Takr.textproto @@ -1,7 +1,7 @@ id: "doi_Takr" language: "doi" script: "Takr" -name: "Dogri, Takri" +name: "Dogri (Takri)" population: 0 historical: true sample_text { @@ -17,4 +17,4 @@ sample_text { specimen_32: "𑚂𑚊 𑚁𑚛𑚢𑚯𑚛𑚲 𑚛𑚴 𑚞𑚰𑚙𑚕 𑚚𑚲। 𑚄𑚫𑚛𑚲 𑚦𑚮𑚏𑚭 𑚝𑚮𑚊𑚖𑚷𑚝𑚲 𑚦𑚦𑚲𑚊𑚯 𑚁𑚋𑚮𑚁 𑚑𑚲 𑚒𑚲 𑚦𑚭𑚞𑚱𑚑𑚯 𑚑𑚭𑚆𑚛𑚭𑚙𑚯𑚛 𑚑𑚲 𑚙𑚮𑚨𑚭 𑚢𑚮𑚊𑚯 𑚛𑚲𑚃 𑚛𑚲𑚈 । 𑚙𑚭𑚫 𑚄𑚌𑚝𑚳 𑚢𑚭𑚥 𑚄𑚝𑚲𑚊𑚯 𑚦𑚫𑚙𑚯 𑚛𑚮𑚙𑚭 ।," specimen_21: "𑚂𑚊 𑚁𑚛𑚢𑚯𑚛𑚲 𑚛𑚴 𑚞𑚰𑚙𑚕 𑚚𑚲। 𑚄𑚫𑚛𑚲 𑚦𑚮𑚏𑚭 𑚝𑚮𑚊𑚖𑚷𑚝𑚲 𑚦𑚦𑚲𑚊𑚯 𑚁𑚋𑚮𑚁 𑚑𑚲 𑚒𑚲 𑚦𑚭𑚞𑚱𑚑𑚯 𑚑𑚭𑚆𑚛𑚭𑚙𑚯𑚛 𑚑𑚲 𑚙𑚮𑚨𑚭 𑚢𑚮𑚊𑚯 𑚛𑚲𑚃 𑚛𑚲𑚈 । 𑚙𑚭𑚫 𑚄𑚌𑚝𑚳 𑚢𑚭𑚥 𑚄𑚝𑚲𑚊𑚯 𑚦𑚫𑚙𑚯 𑚛𑚮𑚙𑚭 । \n𑚀𑚙𑚳 𑚚𑚴𑚖𑚷𑚲 𑚛𑚮𑚘𑚲𑚫 𑚞𑚮𑚐𑚴𑚫 𑚝𑚮𑚊𑚖𑚷𑚳 𑚞𑚰𑚙𑚤𑚳𑚝𑚲 𑚨𑚦𑚊𑚮𑚒 𑚊𑚮𑚕𑚭 𑚊𑚤𑚯 𑚛𑚰𑚤 𑚛𑚲𑚨𑚲𑚛𑚭 𑚞𑚳𑚫𑚖𑚭 𑚊𑚯𑚙𑚭 𑚀𑚙𑚳 𑚄𑚚𑚲𑚫 𑚀𑚞𑚝𑚭 𑚢𑚭𑚥 𑚥𑚰𑚏𑚞𑚘𑚲 𑚊𑚝𑚲 𑚄𑚖𑚭𑚃 𑚛𑚮𑚙𑚭 ।," specimen_16: "𑚂𑚊 𑚁𑚛𑚢𑚯𑚛𑚲 𑚛𑚴 𑚞𑚰𑚙𑚕 𑚚𑚲। 𑚄𑚫𑚛𑚲 𑚦𑚮𑚏𑚭 𑚝𑚮𑚊𑚖𑚷𑚝𑚲 𑚦𑚦𑚲𑚊𑚯 𑚁𑚋𑚮𑚁 𑚑𑚲 𑚒𑚲 𑚦𑚭𑚞𑚱𑚑𑚯 𑚑𑚭𑚆𑚛𑚭𑚙𑚯𑚛 𑚑𑚲 𑚙𑚮𑚨𑚭 𑚢𑚮𑚊𑚯 𑚛𑚲𑚃 𑚛𑚲𑚈 । 𑚙𑚭𑚫 𑚄𑚌𑚝𑚳 𑚢𑚭𑚥 𑚄𑚝𑚲𑚊𑚯 𑚦𑚫𑚙𑚯 𑚛𑚮𑚙𑚭 । \n𑚀𑚙𑚳 𑚚𑚴𑚖𑚷𑚲 𑚛𑚮𑚘𑚲𑚫 𑚞𑚮𑚐𑚴𑚫 𑚝𑚮𑚊𑚖𑚷𑚳 𑚞𑚰𑚙𑚤𑚳𑚝𑚲 𑚨𑚦𑚊𑚮𑚒 𑚊𑚮𑚕𑚭 𑚊𑚤𑚯 𑚛𑚰𑚤 𑚛𑚲𑚨𑚲𑚛𑚭 𑚞𑚳𑚫𑚖𑚭 𑚊𑚯𑚙𑚭 𑚀𑚙𑚳 𑚄𑚚𑚲𑚫 𑚀𑚞𑚝𑚭 𑚢𑚭𑚥 𑚥𑚰𑚏𑚞𑚘𑚲 𑚊𑚝𑚲 𑚄𑚖𑚭𑚃 𑚛𑚮𑚙𑚭 । \n𑚀𑚙𑚲 𑚑𑚛 𑚨𑚦 𑚋𑚤𑚏 𑚊𑚤𑚯 𑚏𑚰𑚊𑚮𑚁 𑚄𑚨 𑚢𑚰𑚥𑚋𑚳 𑚦𑚮𑚏 𑚦𑚖𑚭 𑚊𑚭𑚥 𑚞𑚃 𑚌𑚮𑚁 𑚀𑚙𑚲 𑚈𑚩 𑚀𑚫𑚌𑚭𑚥 𑚩𑚴𑚝 𑚥𑚌𑚮𑚁 ।," -} \ No newline at end of file +} diff --git a/Lib/gflanguages/data/languages/dyu_Nkoo.textproto b/Lib/gflanguages/data/languages/dyu_Nkoo.textproto index 9296b6e3ac7..7edc157ecd5 100644 --- a/Lib/gflanguages/data/languages/dyu_Nkoo.textproto +++ b/Lib/gflanguages/data/languages/dyu_Nkoo.textproto @@ -1,5 +1,5 @@ id: "dyu_Nkoo" language: "dyu" script: "Nkoo" -name: "Dyula, Nko" +name: "Dyula (Nko)" region: "CI" diff --git a/Lib/gflanguages/data/languages/en_Brai.textproto b/Lib/gflanguages/data/languages/en_Brai.textproto index dbc32974041..cc294d99a29 100644 --- a/Lib/gflanguages/data/languages/en_Brai.textproto +++ b/Lib/gflanguages/data/languages/en_Brai.textproto @@ -1,7 +1,7 @@ id: "en_Brai" language: "en" script: "Brai" -name: "English, Braille" +name: "English (Braille)" sample_text { masthead_full: "⠉⠽⠑⠝" masthead_partial: "⠉⠽" diff --git a/Lib/gflanguages/data/languages/en_Dsrt.textproto b/Lib/gflanguages/data/languages/en_Dsrt.textproto index 425586c1df8..a46af4f16b6 100644 --- a/Lib/gflanguages/data/languages/en_Dsrt.textproto +++ b/Lib/gflanguages/data/languages/en_Dsrt.textproto @@ -1,7 +1,7 @@ id: "en_Dsrt" language: "en" script: "Dsrt" -name: "English, Deseret" +name: "English (Deseret)" population: 0 sample_text { masthead_full: "𐐃𐐫𐐢𐑊" diff --git a/Lib/gflanguages/data/languages/en_Shaw.textproto b/Lib/gflanguages/data/languages/en_Shaw.textproto index bda053096be..6492541ea7b 100644 --- a/Lib/gflanguages/data/languages/en_Shaw.textproto +++ b/Lib/gflanguages/data/languages/en_Shaw.textproto @@ -1,7 +1,7 @@ id: "en_Shaw" language: "en" script: "Shaw" -name: "English, Shavian" +name: "English (Shavian)" population: 0 region: "US" sample_text { diff --git a/Lib/gflanguages/data/languages/ett_Latn.textproto b/Lib/gflanguages/data/languages/ett_Latn.textproto index 4b8e1e9546b..b8d0b43c9aa 100644 --- a/Lib/gflanguages/data/languages/ett_Latn.textproto +++ b/Lib/gflanguages/data/languages/ett_Latn.textproto @@ -1,6 +1,6 @@ id: "ett_Latn" language: "ett" script: "Latn" -name: "Etruscan, Latin" +name: "Etruscan (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/evn_Latn.textproto b/Lib/gflanguages/data/languages/evn_Latn.textproto index 937e9a9bbca..4261d60a204 100644 --- a/Lib/gflanguages/data/languages/evn_Latn.textproto +++ b/Lib/gflanguages/data/languages/evn_Latn.textproto @@ -1,7 +1,7 @@ id: "evn_Latn" language: "evn" script: "Latn" -name: "Evenki, Latin" +name: "Evenki (Latin)" population: 16000 region: "RU" region: "CN" diff --git a/Lib/gflanguages/data/languages/fil_Tglg.textproto b/Lib/gflanguages/data/languages/fil_Tglg.textproto index 0d253e0fa9a..f46a534991b 100644 --- a/Lib/gflanguages/data/languages/fil_Tglg.textproto +++ b/Lib/gflanguages/data/languages/fil_Tglg.textproto @@ -1,7 +1,7 @@ id: "fil_Tglg" language: "fil" script: "Tglg" -name: "Filipino, Tagalog" +name: "Filipino (Tagalog)" population: 0 region: "PH" sample_text { diff --git a/Lib/gflanguages/data/languages/fr_Dupl.textproto b/Lib/gflanguages/data/languages/fr_Dupl.textproto index a4f70b46e34..0093d282a83 100644 --- a/Lib/gflanguages/data/languages/fr_Dupl.textproto +++ b/Lib/gflanguages/data/languages/fr_Dupl.textproto @@ -1,5 +1,5 @@ id: "fr_Dupl" language: "fr" script: "Dupl" -name: "French, Duployan" +name: "French (Duployan)" historical: true diff --git a/Lib/gflanguages/data/languages/fuf_Adlm.textproto b/Lib/gflanguages/data/languages/fuf_Adlm.textproto index b1a89a29251..ce927cb1462 100644 --- a/Lib/gflanguages/data/languages/fuf_Adlm.textproto +++ b/Lib/gflanguages/data/languages/fuf_Adlm.textproto @@ -1,7 +1,7 @@ id: "fuf_Adlm" language: "fuf" script: "Adlm" -name: "Pular, Adlam" +name: "Pular (Adlam)" region: "GN" sample_text { masthead_full: "𞤋𞤭𞤐𞤲" diff --git a/Lib/gflanguages/data/languages/gag_Cyrl.textproto b/Lib/gflanguages/data/languages/gag_Cyrl.textproto index abd299f5c8b..399cd50a4a0 100644 --- a/Lib/gflanguages/data/languages/gag_Cyrl.textproto +++ b/Lib/gflanguages/data/languages/gag_Cyrl.textproto @@ -1,7 +1,7 @@ id: "gag_Cyrl" language: "gag" script: "Cyrl" -name: "Gagauz, Cyrillic" +name: "Gagauz (Cyrillic)" population: 0 region: "MD" exemplar_chars { diff --git a/Lib/gflanguages/data/languages/gju_Deva.textproto b/Lib/gflanguages/data/languages/gju_Deva.textproto index 5a71192a98f..74f83e59b6d 100644 --- a/Lib/gflanguages/data/languages/gju_Deva.textproto +++ b/Lib/gflanguages/data/languages/gju_Deva.textproto @@ -1,7 +1,7 @@ id: "gju_Deva" language: "gju" script: "Deva" -name: "Gujari, Devanagari" +name: "Gujari (Devanagari)" region: "IN" sample_text { masthead_full: "सगलम" diff --git a/Lib/gflanguages/data/languages/gon_Telu.textproto b/Lib/gflanguages/data/languages/gon_Telu.textproto index 4f6c18dce55..5aee55608d3 100644 --- a/Lib/gflanguages/data/languages/gon_Telu.textproto +++ b/Lib/gflanguages/data/languages/gon_Telu.textproto @@ -1,6 +1,6 @@ id: "gon_Telu" language: "gon" script: "Telu" -name: "Gondi, Telugu" +name: "Gondi (Telugu)" population: 0 region: "IN" diff --git a/Lib/gflanguages/data/languages/got_Runr.textproto b/Lib/gflanguages/data/languages/got_Runr.textproto index 95ab8e43b74..c6b996e8707 100644 --- a/Lib/gflanguages/data/languages/got_Runr.textproto +++ b/Lib/gflanguages/data/languages/got_Runr.textproto @@ -1,7 +1,7 @@ id: "got_Runr" language: "got" script: "Runr" -name: "Gothic, Runic" +name: "Gothic (Runic)" region: "UA" sample_text { masthead_full: "ᚨᛚᛁᛗ" diff --git a/Lib/gflanguages/data/languages/hi_Mahj.textproto b/Lib/gflanguages/data/languages/hi_Mahj.textproto index 4f27f550278..5c6325565ce 100644 --- a/Lib/gflanguages/data/languages/hi_Mahj.textproto +++ b/Lib/gflanguages/data/languages/hi_Mahj.textproto @@ -1,7 +1,7 @@ id: "hi_Mahj" language: "hi" script: "Mahj" -name: "Hindi, Mahajani" +name: "Hindi (Mahajani)" population: 0 historical: true @@ -19,4 +19,3 @@ sample_text { specimen_21: "𑅕𑅑𑅰𑅑 𑅯𑅛𑅕𑅣𑅑 𑅕𑅑 𑅓𑅕𑅐𑅧𑅣𑅣𑅐, 𑅨𑅭𑅑𑅯𑅐𑅭, 𑅘𑅭 𑅛𑅐 𑅨𑅣𑅭𑅯𑅛𑅯𑅱𑅐𑅭 𑅕𑅓 𑅨𑅭𑅣𑅑 𑅕𑅔𑅑 𑅬𑅧𑅬𑅐𑅧𑅐 𑅱𑅰𑅣𑅕𑅖𑅳𑅓𑅨 𑅧 𑅕𑅑𑅛𑅐 𑅛𑅐𑅓𑅗𑅐, 𑅧 𑅕𑅑𑅰𑅑 𑅕𑅓 𑅰𑅬𑅬𑅐𑅧 𑅒𑅭 𑅖𑅛𑅐𑅣𑅑 𑅨𑅭 𑅕𑅔𑅑 𑅐𑅕𑅖𑅳𑅓𑅨 𑅱𑅔 𑅰𑅕𑅓𑅗𑅐 । 𑅑𑅰𑅓 𑅱𑅰𑅣𑅕𑅖𑅳𑅓𑅨 𑅛𑅐 𑅐𑅦𑅓𑅨𑅔𑅧 𑅕𑅓 𑅯𑅑𑅭𑅒𑅥𑅦 𑅨𑅭𑅣𑅛𑅓𑅕 𑅕𑅔 𑅕𑅳𑅐𑅧𑅒𑅧𑅑 𑅭𑅕𑅖𑅳𑅐 𑅕𑅐 𑅐𑅦𑅑𑅕𑅐𑅭 𑅨𑅭𑅐𑅨𑅣 𑅱𑅑 ।\n𑅨𑅭𑅣𑅛𑅓𑅕 𑅯𑅛𑅕𑅣𑅑 𑅕𑅔 𑅯𑅑𑅙𑅐𑅭, 𑅐𑅧𑅣𑅭𑅐𑅣𑅬𑅐 𑅒𑅭 𑅦𑅭𑅬 𑅕𑅑 𑅐𑅛𑅳𑅐𑅥𑅑 𑅕𑅐 𑅐𑅦𑅑𑅕𑅐𑅭 𑅱𑅑 । 𑅑𑅰 𑅐𑅦𑅑𑅕𑅐𑅭 𑅕𑅓 𑅐𑅧𑅣𑅭𑅗𑅣 𑅐𑅨𑅧𑅐 𑅦𑅭𑅬 𑅛𑅐 𑅯𑅑𑅰𑅳𑅯𑅐𑅰 𑅪𑅥𑅮𑅧𑅓 𑅒𑅭 𑅐𑅕𑅓𑅮𑅓 𑅛𑅐 𑅥𑅒𑅰𑅭𑅔𑅧 𑅕𑅓 𑅰𑅐𑅤 𑅬𑅑𑅮𑅕𑅭 𑅣𑅤𑅐 𑅰𑅐𑅭𑅯𑅛𑅧𑅑𑅕 𑅭𑅒𑅨 𑅬𑅓𑅧 𑅐𑅤𑅯𑅐 𑅧𑅑𑅛𑅑 𑅣𑅔𑅭 𑅨𑅭 𑅐𑅨𑅧𑅓 𑅦𑅭𑅬 𑅛𑅐 𑅯𑅑𑅰𑅳𑅯𑅐𑅰 𑅕𑅔 𑅰𑅳𑅑𑅕𑅖𑅳𑅐, 𑅕𑅭𑅑𑅛𑅐, 𑅒𑅨𑅐𑅰𑅧𑅐, 𑅣𑅤𑅐 𑅯𑅛𑅯𑅱𑅐𑅭 𑅕𑅓 𑅥𑅯𑅐𑅭𑅐 𑅨𑅭𑅕𑅞 𑅕𑅭𑅧𑅓 𑅕𑅑 𑅰𑅯𑅣𑅧𑅣𑅭𑅣𑅐 𑅱𑅑 ।" specimen_16: "𑅨𑅭𑅣𑅛𑅓𑅕 𑅯𑅛𑅕𑅣𑅑 𑅕𑅔 𑅯𑅑𑅙𑅐𑅭 𑅒𑅭 𑅒𑅰𑅕𑅑 𑅐𑅫𑅑𑅯𑅛𑅕𑅣𑅑 𑅕𑅑 𑅰𑅯𑅣𑅧𑅣𑅭𑅣𑅐 𑅕𑅐 𑅐𑅦𑅑𑅕𑅐𑅭 𑅱𑅑 । 𑅑𑅰𑅕𑅓 𑅐𑅧𑅣𑅭𑅗𑅣 𑅪𑅑𑅧𑅐 𑅱𑅰𑅣𑅕𑅖𑅳𑅓𑅨 𑅕𑅓 𑅕𑅔𑅑 𑅭𑅐𑅛 𑅭𑅖𑅧𑅐 𑅒𑅭 𑅕𑅑𑅰𑅑 𑅫𑅑 𑅬𑅐𑅦𑅛𑅬 𑅕𑅓 𑅛𑅳𑅭𑅑𑅓 𑅰𑅓 𑅣𑅤𑅐 𑅰𑅑𑅬𑅐𑅔𑅧 𑅕𑅑 𑅨𑅭𑅯𑅐𑅱 𑅧 𑅕𑅭 𑅕𑅓 𑅕𑅑𑅰𑅑 𑅕𑅑 𑅬𑅒𑅙𑅧𑅐 𑅒𑅭 𑅦𑅐𑅭𑅢𑅐 𑅕𑅐 𑅐𑅧𑅯𑅓𑅖𑅳𑅢, 𑅨𑅭𑅱𑅢 𑅣𑅤𑅐 𑅨𑅭𑅥𑅐𑅧 𑅰𑅬𑅬𑅑𑅮𑅑𑅣 𑅱𑅑 ।\n𑅰𑅬𑅐𑅛 𑅕𑅓 𑅓𑅕 𑅰𑅥𑅰𑅛 𑅕𑅓 𑅭𑅒𑅨 𑅬𑅓𑅧 𑅨𑅭𑅣𑅛𑅓𑅕 𑅯𑅛𑅕𑅣𑅑 𑅕𑅔 𑅰𑅐𑅬𑅐𑅛𑅑𑅕 𑅰𑅒𑅭𑅕𑅖𑅳𑅐 𑅕𑅐 𑅐𑅦𑅑𑅕𑅐𑅭 𑅱𑅑 𑅒𑅭 𑅨𑅭𑅣𑅛𑅓𑅕 𑅯𑅛𑅕𑅣𑅑 𑅕𑅔 𑅐𑅨𑅧𑅓 𑅯𑅛𑅕𑅣𑅑𑅣𑅯 𑅕𑅓 𑅒𑅰 𑅰𑅯𑅣𑅧𑅣𑅭 𑅯𑅑𑅕𑅐𑅰 𑅣𑅤𑅐 𑅗𑅔𑅭𑅯 𑅕𑅓 𑅮𑅑𑅓—𑅛𑅔 𑅭𑅐𑅖𑅳𑅞𑅭𑅑𑅛 𑅨𑅭𑅛𑅣𑅧 𑅛𑅐 𑅐𑅧𑅣𑅭𑅭𑅐𑅖𑅳𑅞𑅭𑅑𑅛 𑅰𑅱𑅛𑅔𑅗 𑅣𑅤𑅐 𑅨𑅭𑅣𑅛𑅓𑅕 𑅭𑅐𑅛𑅛 𑅕𑅓 𑅰𑅧𑅗𑅟𑅧 𑅓𑅯𑅧 𑅰𑅐𑅦𑅧𑅔𑅧 𑅕𑅓 𑅐𑅧𑅒𑅕𑅒𑅮 𑅱𑅔—𑅐𑅧𑅑𑅕𑅐𑅭𑅛𑅣𑅱 𑅐𑅯𑅰𑅳𑅛𑅕 𑅐𑅭𑅤𑅑𑅕, 𑅰𑅐𑅬𑅐𑅛𑅑𑅕, 𑅒𑅭 𑅰𑅐𑅧𑅰𑅕𑅭𑅒𑅣𑅑𑅕 𑅐𑅦𑅑𑅕𑅐𑅭𑅔𑅧 𑅕𑅑 𑅨𑅭𑅐𑅨𑅣𑅑 𑅕𑅐 𑅱𑅕𑅳 𑅱𑅑 ।\n𑅨𑅭𑅣𑅛𑅓𑅕 𑅯𑅛𑅕𑅣𑅑 𑅕𑅔 𑅯𑅑𑅰𑅳𑅭𑅐𑅬 𑅒𑅭 𑅐𑅯𑅕𑅐𑅰𑅳 𑅕𑅐 𑅐𑅦𑅑𑅕𑅐𑅭 𑅱𑅑 । 𑅑𑅰𑅕𑅓 𑅐𑅧𑅣𑅭𑅗𑅣 𑅕𑅐𑅬 𑅕𑅓 𑅘𑅧𑅞𑅔𑅧 𑅕𑅑 𑅒𑅙𑅑𑅣 𑅱𑅥𑅪𑅧𑅥𑅑 𑅒𑅭 𑅰𑅬𑅛𑅰𑅬𑅛 𑅨𑅭 𑅬𑅛𑅳𑅥𑅒𑅭𑅑 𑅰𑅱𑅑𑅣 𑅚𑅒𑅞𑅞𑅑𑅛𑅐𑅧 𑅰𑅬𑅬𑅑𑅮𑅑𑅣 𑅱𑅑 ।\n𑅨𑅭𑅣𑅛𑅓𑅕 𑅯𑅛𑅕𑅣𑅑 𑅕𑅔 𑅑𑅰𑅑 𑅰𑅐𑅬𑅐𑅛𑅑𑅕 𑅒𑅭 𑅐𑅧𑅣𑅭𑅭𑅐𑅖𑅳𑅞𑅭𑅑𑅛 𑅯𑅛𑅯𑅰𑅤𑅐 𑅕𑅑 𑅨𑅭𑅐𑅨𑅣𑅑 𑅕𑅐 𑅐𑅦𑅑𑅕𑅐𑅭 𑅱𑅑 𑅛𑅑𑅰𑅬𑅓𑅧 𑅑𑅰 𑅘𑅔𑅖𑅳𑅢𑅐 𑅬𑅓𑅧 𑅒𑅮𑅮𑅑𑅖𑅑𑅣 𑅐𑅦𑅑𑅕𑅐𑅭𑅔𑅧 𑅒𑅭 𑅰𑅯𑅣𑅧𑅣𑅭𑅣𑅐𑅔𑅧 𑅕𑅔 𑅨𑅒𑅭𑅢𑅣𑅱 𑅨𑅭𑅐𑅨𑅣 𑅕𑅑𑅛𑅐 𑅛𑅐 𑅰𑅕𑅓 ।" } - \ No newline at end of file diff --git a/Lib/gflanguages/data/languages/hi_Newa.textproto b/Lib/gflanguages/data/languages/hi_Newa.textproto index 18435e951f4..edd75fa0191 100644 --- a/Lib/gflanguages/data/languages/hi_Newa.textproto +++ b/Lib/gflanguages/data/languages/hi_Newa.textproto @@ -1,5 +1,5 @@ id: "hi_Newa" language: "hi" script: "Newa" -name: "Hindi, Newa" +name: "Hindi (Newa)" region: "IN" diff --git a/Lib/gflanguages/data/languages/hnn_Hano.textproto b/Lib/gflanguages/data/languages/hnn_Hano.textproto index bab528babf0..3e7f7615ab8 100644 --- a/Lib/gflanguages/data/languages/hnn_Hano.textproto +++ b/Lib/gflanguages/data/languages/hnn_Hano.textproto @@ -1,7 +1,7 @@ id: "hnn_Hano" language: "hnn" script: "Hano" -name: "Hanunoo, Hanunoo" +name: "Hanunoo (Hanunoo)" autonym: "ᜱᜨᜳᜨᜳᜢ" population: 13000 exemplar_chars { diff --git a/Lib/gflanguages/data/languages/id_Arab.textproto b/Lib/gflanguages/data/languages/id_Arab.textproto index d5025732efb..e25427ea574 100644 --- a/Lib/gflanguages/data/languages/id_Arab.textproto +++ b/Lib/gflanguages/data/languages/id_Arab.textproto @@ -1,6 +1,6 @@ id: "id_Arab" language: "id" script: "Arab" -name: "Indonesian, Arabic" +name: "Indonesian (Arabic)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/ie_Latn.textproto b/Lib/gflanguages/data/languages/ie_Latn.textproto index 0df6a02cc91..ed6ae1d601e 100644 --- a/Lib/gflanguages/data/languages/ie_Latn.textproto +++ b/Lib/gflanguages/data/languages/ie_Latn.textproto @@ -1,7 +1,7 @@ id: "ie_Latn" language: "ie" script: "Latn" -name: "Interlingue, Latin" +name: "Interlingue (Latin)" sample_text { masthead_full: "OoMm" masthead_partial: "Nn" diff --git a/Lib/gflanguages/data/languages/inh_Arab.textproto b/Lib/gflanguages/data/languages/inh_Arab.textproto index b736d376a15..205f94c9d23 100644 --- a/Lib/gflanguages/data/languages/inh_Arab.textproto +++ b/Lib/gflanguages/data/languages/inh_Arab.textproto @@ -1,6 +1,6 @@ id: "inh_Arab" language: "inh" script: "Arab" -name: "Ingush, Arabic" +name: "Ingush (Arabic)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/inh_Latn.textproto b/Lib/gflanguages/data/languages/inh_Latn.textproto index d990fc6ecc0..2158ad0eb35 100644 --- a/Lib/gflanguages/data/languages/inh_Latn.textproto +++ b/Lib/gflanguages/data/languages/inh_Latn.textproto @@ -1,6 +1,6 @@ id: "inh_Latn" language: "inh" script: "Latn" -name: "Ingush, Latin" +name: "Ingush (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/ja_Hira.textproto b/Lib/gflanguages/data/languages/ja_Hira.textproto index 751c56653f4..d69bafa7032 100644 --- a/Lib/gflanguages/data/languages/ja_Hira.textproto +++ b/Lib/gflanguages/data/languages/ja_Hira.textproto @@ -1,7 +1,7 @@ id: "ja_Hira" language: "ja" script: "Hira" -name: "Japanese, Hiragana" +name: "Japanese (Hiragana)" autonym: "日本語" region: "BR" region: "JP" diff --git a/Lib/gflanguages/data/languages/ja_Kana.textproto b/Lib/gflanguages/data/languages/ja_Kana.textproto index b15c926409b..ca624bf102f 100644 --- a/Lib/gflanguages/data/languages/ja_Kana.textproto +++ b/Lib/gflanguages/data/languages/ja_Kana.textproto @@ -1,7 +1,7 @@ id: "ja_Kana" language: "ja" script: "Kana" -name: "Japanese, Katakana" +name: "Japanese (Katakana)" autonym: "日本語" region: "BR" region: "JP" diff --git a/Lib/gflanguages/data/languages/jbo_Latn.textproto b/Lib/gflanguages/data/languages/jbo_Latn.textproto index 282f6ca2e09..1db47d711f5 100644 --- a/Lib/gflanguages/data/languages/jbo_Latn.textproto +++ b/Lib/gflanguages/data/languages/jbo_Latn.textproto @@ -1,7 +1,7 @@ id: "jbo_Latn" language: "jbo" script: "Latn" -name: "Lojban, Latin" +name: "Lojban (Latin)" autonym: "lojban (Latin)" sample_text { masthead_full: "RrOo" diff --git a/Lib/gflanguages/data/languages/jra_Latn.textproto b/Lib/gflanguages/data/languages/jra_Latn.textproto index 3713ea61848..eb0154a059b 100644 --- a/Lib/gflanguages/data/languages/jra_Latn.textproto +++ b/Lib/gflanguages/data/languages/jra_Latn.textproto @@ -1,7 +1,7 @@ id: "jra_Latn" language: "jra" script: "Latn" -name: "Jarai, Latin" +name: "Jarai (Latin)" population: 530000 region: "VN" exemplar_chars { @@ -22,4 +22,4 @@ sample_text { specimen_21: "Abih bang mơnuih-mơnam tơkeng rai rơngai laih anŭn mơdơ̆-mơđơr amăng tơlơi pơpŭ-pơyôm hăng tơlơi dưi. Ƀing gơñu tŭ hơmâo tơlơi pơmĭn hăng tơlơi thâo djơ̆-glaĭ laih anŭn brơi ngă kơ tơdruă amăng tơlơi khăp ayŏng adơi.\nAbih bang mơnuih-mơnam tơkeng rai rơngai laih anŭn mơdơ̆-mơđơr amăng tơlơi pơpŭ-pơyôm hăng tơlơi dưi. Ƀing gơñu tŭ hơmâo tơlơi pơmĭn hăng tơlơi thâo djơ̆-glaĭ laih anŭn brơi ngă kơ tơdruă amăng tơlơi khăp ayŏng adơi." specimen_16: "Abih bang mơnuih-mơnam tơkeng rai rơngai laih anŭn mơdơ̆-mơđơr amăng tơlơi pơpŭ-pơyôm hăng tơlơi dưi. Ƀing gơñu tŭ hơmâo tơlơi pơmĭn hăng tơlơi thâo djơ̆-glaĭ laih anŭn brơi ngă kơ tơdruă amăng tơlơi khăp ayŏng adơi.\nAbih bang mơnuih-mơnam tơkeng rai rơngai laih anŭn mơdơ̆-mơđơr amăng tơlơi pơpŭ-pơyôm hăng tơlơi dưi. Ƀing gơñu tŭ hơmâo tơlơi pơmĭn hăng tơlơi thâo djơ̆-glaĭ laih anŭn brơi ngă kơ tơdruă amăng tơlơi khăp ayŏng adơi.\nAbih bang mơnuih-mơnam tơkeng rai rơngai laih anŭn mơdơ̆-mơđơr amăng tơlơi pơpŭ-pơyôm hăng tơlơi dưi. Ƀing gơñu tŭ hơmâo tơlơi pơmĭn hăng tơlơi thâo djơ̆-glaĭ laih anŭn brơi ngă kơ tơdruă amăng tơlơi khăp ayŏng adơi." } -source: "Lap Minh Siu, Developing the First Preliminary Dictionary of North American Jarai, Texas Tech University, 2009" \ No newline at end of file +source: "Lap Minh Siu, Developing the First Preliminary Dictionary of North American Jarai, Texas Tech University, 2009" diff --git a/Lib/gflanguages/data/languages/jv_Java.textproto b/Lib/gflanguages/data/languages/jv_Java.textproto index 1f89595da1c..5dc5242b3a0 100644 --- a/Lib/gflanguages/data/languages/jv_Java.textproto +++ b/Lib/gflanguages/data/languages/jv_Java.textproto @@ -1,7 +1,7 @@ id: "jv_Java" language: "jv" script: "Java" -name: "Javanese, Javanese" +name: "Javanese (Javanese)" autonym: "ꦧꦱꦗꦮ" population: 0 region: "ID" diff --git a/Lib/gflanguages/data/languages/kab_Tfng.textproto b/Lib/gflanguages/data/languages/kab_Tfng.textproto index fdb7dd3c830..011d356bcdc 100644 --- a/Lib/gflanguages/data/languages/kab_Tfng.textproto +++ b/Lib/gflanguages/data/languages/kab_Tfng.textproto @@ -1,7 +1,7 @@ id: "kab_Tfng" language: "kab" script: "Tfng" -name: "Kabyle, Tifinagh" +name: "Kabyle (Tifinagh)" region: "DZ" sample_text { masthead_full: "ⵉⵎⴷⴰ" diff --git a/Lib/gflanguages/data/languages/kha_Beng.textproto b/Lib/gflanguages/data/languages/kha_Beng.textproto index f43a4c9fec4..286ed155d83 100644 --- a/Lib/gflanguages/data/languages/kha_Beng.textproto +++ b/Lib/gflanguages/data/languages/kha_Beng.textproto @@ -1,6 +1,6 @@ id: "kha_Beng" language: "kha" script: "Beng" -name: "Khasi, Bengali" +name: "Khasi (Bengali)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/khr_Beng.textproto b/Lib/gflanguages/data/languages/khr_Beng.textproto index 0124d01fbdc..ae571a7eced 100644 --- a/Lib/gflanguages/data/languages/khr_Beng.textproto +++ b/Lib/gflanguages/data/languages/khr_Beng.textproto @@ -1,7 +1,7 @@ id: "khr_Beng" language: "khr" script: "Beng" -name: "Kharia, Bangla" +name: "Kharia (Bangla)" region: "IN" sample_text { masthead_full: "সউবম" diff --git a/Lib/gflanguages/data/languages/khr_Deva.textproto b/Lib/gflanguages/data/languages/khr_Deva.textproto index 4e7a197360d..7f180c67f98 100644 --- a/Lib/gflanguages/data/languages/khr_Deva.textproto +++ b/Lib/gflanguages/data/languages/khr_Deva.textproto @@ -1,7 +1,7 @@ id: "khr_Deva" language: "khr" script: "Deva" -name: "Kharia, Devanagari" +name: "Kharia (Devanagari)" region: "IN" sample_text { masthead_full: "सउबम" diff --git a/Lib/gflanguages/data/languages/khr_Orya.textproto b/Lib/gflanguages/data/languages/khr_Orya.textproto index 024330cbc9b..e5667d66f6e 100644 --- a/Lib/gflanguages/data/languages/khr_Orya.textproto +++ b/Lib/gflanguages/data/languages/khr_Orya.textproto @@ -1,7 +1,7 @@ id: "khr_Orya" language: "khr" script: "Orya" -name: "Kharia, Odia" +name: "Kharia (Odia)" region: "IN" sample_text { masthead_full: "ସଉବମ" diff --git a/Lib/gflanguages/data/languages/khw_Latn.textproto b/Lib/gflanguages/data/languages/khw_Latn.textproto index a6a07aedbe3..46a142e2d0c 100644 --- a/Lib/gflanguages/data/languages/khw_Latn.textproto +++ b/Lib/gflanguages/data/languages/khw_Latn.textproto @@ -1,7 +1,7 @@ id: "khw_Latn" language: "khw" script: "Latn" -name: "Khowar, Latin" +name: "Khowar (Latin)" region: "PK" sample_text { masthead_full: "SsAa" diff --git a/Lib/gflanguages/data/languages/kjg_Latn.textproto b/Lib/gflanguages/data/languages/kjg_Latn.textproto index db01f97b71c..07e7e2f7eb1 100644 --- a/Lib/gflanguages/data/languages/kjg_Latn.textproto +++ b/Lib/gflanguages/data/languages/kjg_Latn.textproto @@ -1,6 +1,6 @@ id: "kjg_Latn" language: "kjg" script: "Latn" -name: "Khmu, Latin" +name: "Khmu (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/kk_Latn.textproto b/Lib/gflanguages/data/languages/kk_Latn.textproto index 6e8516885f7..63e5431be6d 100644 --- a/Lib/gflanguages/data/languages/kk_Latn.textproto +++ b/Lib/gflanguages/data/languages/kk_Latn.textproto @@ -1,7 +1,7 @@ id: "kk_Latn" language: "kk" script: "Latn" -name: "Kazakh, Latin" +name: "Kazakh (Latin)" region: "TR" sample_text { masthead_full: "BbAa" diff --git a/Lib/gflanguages/data/languages/kr_Arab.textproto b/Lib/gflanguages/data/languages/kr_Arab.textproto index 4621a6326b9..e215b349e00 100644 --- a/Lib/gflanguages/data/languages/kr_Arab.textproto +++ b/Lib/gflanguages/data/languages/kr_Arab.textproto @@ -1,4 +1,4 @@ id: "kr_Arab" language: "kr" script: "Arab" -name: "Kanuri, Arabic" +name: "Kanuri (Arabic)" diff --git a/Lib/gflanguages/data/languages/ks_Deva.textproto b/Lib/gflanguages/data/languages/ks_Deva.textproto index 5e3ce1c2791..e835062ce90 100644 --- a/Lib/gflanguages/data/languages/ks_Deva.textproto +++ b/Lib/gflanguages/data/languages/ks_Deva.textproto @@ -1,7 +1,7 @@ id: "ks_Deva" language: "ks" script: "Deva" -name: "Kashmiri, Devanagari" +name: "Kashmiri (Devanagari)" population: 0 region: "BT" region: "FJ" diff --git a/Lib/gflanguages/data/languages/ku_Latn.textproto b/Lib/gflanguages/data/languages/ku_Latn.textproto index ee8c5562d0e..cfc38aabaef 100644 --- a/Lib/gflanguages/data/languages/ku_Latn.textproto +++ b/Lib/gflanguages/data/languages/ku_Latn.textproto @@ -1,7 +1,7 @@ id: "ku_Latn" language: "ku" script: "Latn" -name: "Kurdish, Latin" +name: "Kurdish (Latin)" autonym: "Kurmancî" population: 25000000 region: "TR" diff --git a/Lib/gflanguages/data/languages/ku_Yezi.textproto b/Lib/gflanguages/data/languages/ku_Yezi.textproto index f62e70e2b45..cf327d77999 100644 --- a/Lib/gflanguages/data/languages/ku_Yezi.textproto +++ b/Lib/gflanguages/data/languages/ku_Yezi.textproto @@ -1,7 +1,7 @@ id: "ku_Yezi" language: "ku" script: "Yezi" -name: "Kurdish, Yezidi" +name: "Kurdish (Yezidi)" region: "GE" sample_text { masthead_full: "𐺍𐺁𐺄𐺀" diff --git a/Lib/gflanguages/data/languages/kyw_Beng.textproto b/Lib/gflanguages/data/languages/kyw_Beng.textproto index 0d61819bfcd..4569efcca4a 100644 --- a/Lib/gflanguages/data/languages/kyw_Beng.textproto +++ b/Lib/gflanguages/data/languages/kyw_Beng.textproto @@ -1,7 +1,7 @@ id: "kyw_Beng" language: "kyw" script: "Beng" -name: "Kudmali, Bangla" +name: "Kudmali (Bangla)" region: "IN" sample_text { masthead_full: "সভমন" diff --git a/Lib/gflanguages/data/languages/kyw_Orya.textproto b/Lib/gflanguages/data/languages/kyw_Orya.textproto index 06bac434fcc..a72fa031313 100644 --- a/Lib/gflanguages/data/languages/kyw_Orya.textproto +++ b/Lib/gflanguages/data/languages/kyw_Orya.textproto @@ -1,7 +1,7 @@ id: "kyw_Orya" language: "kyw" script: "Orya" -name: "Kudmali, Odia" +name: "Kudmali (Odia)" region: "IN" sample_text { masthead_full: "ସଭମନ" diff --git a/Lib/gflanguages/data/languages/lad_Latn.textproto b/Lib/gflanguages/data/languages/lad_Latn.textproto index 8a674c43377..76b3cd8fad0 100644 --- a/Lib/gflanguages/data/languages/lad_Latn.textproto +++ b/Lib/gflanguages/data/languages/lad_Latn.textproto @@ -1,7 +1,7 @@ id: "lad_Latn" language: "lad" script: "Latn" -name: "Ladino, Latin" +name: "Ladino (Latin)" region: "IL" sample_text { masthead_full: "TtOo" diff --git a/Lib/gflanguages/data/languages/lhm_Deva.textproto b/Lib/gflanguages/data/languages/lhm_Deva.textproto index b46ea5928c6..0dcb3e6ae33 100644 --- a/Lib/gflanguages/data/languages/lhm_Deva.textproto +++ b/Lib/gflanguages/data/languages/lhm_Deva.textproto @@ -1,7 +1,7 @@ id: "lhm_Deva" language: "lhm" script: "Deva" -name: "Lhomi, Devanagari" +name: "Lhomi (Devanagari)" region: "NP" sample_text { masthead_full: "वङजय" diff --git a/Lib/gflanguages/data/languages/lif_Limb.textproto b/Lib/gflanguages/data/languages/lif_Limb.textproto index ad2b6d45bef..85d2a24a575 100644 --- a/Lib/gflanguages/data/languages/lif_Limb.textproto +++ b/Lib/gflanguages/data/languages/lif_Limb.textproto @@ -1,7 +1,7 @@ id: "lif_Limb" language: "lif" script: "Limb" -name: "Limbu, Limbu" +name: "Limbu (Limbu)" population: 0 # This sample text was taken from diff --git a/Lib/gflanguages/data/languages/lus_Latn.textproto b/Lib/gflanguages/data/languages/lus_Latn.textproto index a1cb1aea5e2..8861da0d9a2 100644 --- a/Lib/gflanguages/data/languages/lus_Latn.textproto +++ b/Lib/gflanguages/data/languages/lus_Latn.textproto @@ -1,7 +1,7 @@ id: "lus_Latn" language: "lus" script: "Latn" -name: "Mizo, Latin" +name: "Mizo (Latin)" region: "IN" sample_text { masthead_full: "MmIi" diff --git a/Lib/gflanguages/data/languages/lzz_Latn.textproto b/Lib/gflanguages/data/languages/lzz_Latn.textproto index 59e83e0edef..22061146033 100644 --- a/Lib/gflanguages/data/languages/lzz_Latn.textproto +++ b/Lib/gflanguages/data/languages/lzz_Latn.textproto @@ -1,7 +1,7 @@ id: "lzz_Latn" language: "lzz" script: "Latn" -name: "Laz, Latin" +name: "Laz (Latin)" population: 22000 region: "GE" region: "TR" @@ -11,4 +11,4 @@ exemplar_chars { auxiliary: "" } source: "İsmail Avci, Lazuri 5: doguroni materyali – lazca öğretim matryali, Ankara, Milli Eğitim Bakanlığı Yayınları, 2019" -source: "K’lemurişi Ramazan Kosanoğlu, “Lazla ve Lazca”, Kiana, April 2016" \ No newline at end of file +source: "K’lemurişi Ramazan Kosanoğlu, “Lazla ve Lazca”, Kiana, April 2016" diff --git a/Lib/gflanguages/data/languages/mai_Newa.textproto b/Lib/gflanguages/data/languages/mai_Newa.textproto index ade356a3d72..e2faa241c72 100644 --- a/Lib/gflanguages/data/languages/mai_Newa.textproto +++ b/Lib/gflanguages/data/languages/mai_Newa.textproto @@ -1,5 +1,5 @@ id: "mai_Newa" language: "mai" script: "Newa" -name: "Maithili, Newa" +name: "Maithili (Newa)" region: "IN" diff --git a/Lib/gflanguages/data/languages/mai_Tirh.textproto b/Lib/gflanguages/data/languages/mai_Tirh.textproto index 8b78f353881..f93ed4131ca 100644 --- a/Lib/gflanguages/data/languages/mai_Tirh.textproto +++ b/Lib/gflanguages/data/languages/mai_Tirh.textproto @@ -1,6 +1,6 @@ id: "mai_Tirh" language: "mai" script: "Tirh" -name: "Maithili, Tirhuta" +name: "Maithili (Tirhuta)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/mak_Bugi.textproto b/Lib/gflanguages/data/languages/mak_Bugi.textproto index bf60e212176..dd9d40b90a5 100644 --- a/Lib/gflanguages/data/languages/mak_Bugi.textproto +++ b/Lib/gflanguages/data/languages/mak_Bugi.textproto @@ -1,6 +1,6 @@ id: "mak_Bugi" language: "mak" script: "Bugi" -name: "Makasar, Buginese" +name: "Makasar (Buginese)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/man_Nkoo.textproto b/Lib/gflanguages/data/languages/man_Nkoo.textproto index 78e2f2acd89..2ba2349b183 100644 --- a/Lib/gflanguages/data/languages/man_Nkoo.textproto +++ b/Lib/gflanguages/data/languages/man_Nkoo.textproto @@ -1,5 +1,5 @@ id: "man_Nkoo" language: "man" script: "Nkoo" -name: "Mandingo, Nko" +name: "Mandingo (Nko)" region: "GN" diff --git a/Lib/gflanguages/data/languages/mdr_Bugi.textproto b/Lib/gflanguages/data/languages/mdr_Bugi.textproto index 4e5b8bdb284..c8056ee4a58 100644 --- a/Lib/gflanguages/data/languages/mdr_Bugi.textproto +++ b/Lib/gflanguages/data/languages/mdr_Bugi.textproto @@ -1,6 +1,6 @@ id: "mdr_Bugi" language: "mdr" script: "Bugi" -name: "Mandar, Buginese" +name: "Mandar (Buginese)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/men_Mend.textproto b/Lib/gflanguages/data/languages/men_Mend.textproto index b728142d0ea..541465dbb99 100644 --- a/Lib/gflanguages/data/languages/men_Mend.textproto +++ b/Lib/gflanguages/data/languages/men_Mend.textproto @@ -1,7 +1,7 @@ id: "men_Mend" language: "men" script: "Mend" -name: "Mende, Mende" +name: "Mende (Mende)" population: 0 sample_text { masthead_full: "𞡥𞠖𞢻𞠢" diff --git a/Lib/gflanguages/data/languages/min_Arab.textproto b/Lib/gflanguages/data/languages/min_Arab.textproto index 06d62b51c0e..09ba49ce8d0 100644 --- a/Lib/gflanguages/data/languages/min_Arab.textproto +++ b/Lib/gflanguages/data/languages/min_Arab.textproto @@ -1,7 +1,7 @@ id: "min_Arab" language: "min" script: "Arab" -name: "Minangkabau, Arabic" +name: "Minangkabau (Arabic)" region: "ID" sample_text { masthead_full: "سادو" diff --git a/Lib/gflanguages/data/languages/mn_Zanb.textproto b/Lib/gflanguages/data/languages/mn_Zanb.textproto index 5b81efdeba0..d1493ecd39f 100644 --- a/Lib/gflanguages/data/languages/mn_Zanb.textproto +++ b/Lib/gflanguages/data/languages/mn_Zanb.textproto @@ -1,4 +1,4 @@ id: "mn_Zanb" language: "mn" script: "Zanb" -name: "Mongolian, Zanabazar" +name: "Mongolian (Zanabazar)" diff --git a/Lib/gflanguages/data/languages/mr_Modi.textproto b/Lib/gflanguages/data/languages/mr_Modi.textproto index c8d4e6b6447..684ccec309f 100644 --- a/Lib/gflanguages/data/languages/mr_Modi.textproto +++ b/Lib/gflanguages/data/languages/mr_Modi.textproto @@ -1,7 +1,7 @@ id: "mr_Modi" language: "mr" script: "Modi" -name: "Marathi, Modi" +name: "Marathi (Modi)" population: 0 region: "IN" sample_text { diff --git a/Lib/gflanguages/data/languages/mro_Mroo.textproto b/Lib/gflanguages/data/languages/mro_Mroo.textproto index 047bc8463ed..8cf41962609 100644 --- a/Lib/gflanguages/data/languages/mro_Mroo.textproto +++ b/Lib/gflanguages/data/languages/mro_Mroo.textproto @@ -1,7 +1,7 @@ id: "mro_Mroo" language: "mro" script: "Mroo" -name: "Mru, Mro" +name: "Mru (Mro)" population: 0 historical: true sample_text { @@ -18,4 +18,3 @@ sample_text { specimen_21: "𖩏𖩖𖩔𖩆𖩊 𖩗𖩖𖩊 𖩍𖩖𖩌 𖩎𖩆𖩁 𖩋𖩖 𖩍𖩖𖩌𖩯 𖩏𖩖𖩎𖩊 𖩏𖩖𖩔𖩆𖩊 𖩌𖩖 𖩐𖩓𖩆𖩎 𖩖𖩂𖩑𖩌 𖩎𖩖𖩯 𖩌𖩍𖩖𖩁𖩐𖩖 𖩂𖩑𖩌 𖩎𖩖 𖩖𖩎𖩆𖩁 𖩀𖩑𖩖𖩏 𖩈𖩝𖩐 𖩐𖩖𖩮 𖩏𖩖𖩔𖩆𖩊 𖩈𖩝𖩌𖩇𖩆 𖩌𖩓𖩑𖩖𖩗 𖩌𖩖 𖩍𖩖𖩁 𖩔𖩓𖩊𖩏 𖩌𖩆𖩎𖩄𖩝𖩓 𖩈𖩝𖩆 𖩀𖩐𖩘𖩅 𖩐𖩓𖩆𖩁𖩮\n𖩍𖩖𖩁𖩔𖩊𖩏 𖩌𖩆𖩎𖩄𖩝𖩓 𖩏𖩖𖩔𖩆𖩊 𖩈𖩝𖩌𖩇𖩆 𖩗𖩖𖩊 𖩀𖩔𖩆𖩎 𖩈𖩘𖩒 𖩌𖩖 𖩖𖩌𖩆𖩓 𖩎𖩊 𖩌𖩆𖩓 𖩅𖩖𖩌 𖩖𖩊 𖩌𖩆𖩓 𖩔𖩘 𖩍𖩖𖩎𖩊 𖩆𖩁 𖩊𖩁 𖩌𖩖𖩁 𖩆𖩁𖩊𖩁 𖩌𖩖𖩁 𖩈𖩖𖩄𖩖𖩅𖩯 𖩆𖩁 𖩊𖩁 𖩌𖩖𖩁 𖩘 𖩗𖩆𖩁 𖩍𖩝𖩁 𖩄𖩑𖩖𖩗 𖩅𖩊𖩂𖩯 𖩎𖩊𖩂𖩐𖩖 𖩌𖩖 𖩍𖩝𖩁 𖩌𖩖 𖩈𖩖𖩁 𖩖𖩌𖩖𖩎 𖩎𖩊 𖩗𖩜 𖩅𖩖 𖩌𖩄𖩑𖩖𖩗 𖩌𖩖𖩯 𖩈𖩖𖩅𖩏𖩖 𖩐𖩆𖩗 𖩐𖩆𖩗 𖩎𖩍𖩆 𖩍𖩖𖩁 𖩀𖩑𖩅 𖩘 𖩍𖩖𖩁 𖩔𖩓𖩊𖩏 𖩌𖩆𖩎𖩄𖩝𖩓 𖩌𖩖 𖩌𖩖𖩊𖩯" specimen_16: "𖩏𖩖𖩔𖩆𖩊 𖩗𖩖𖩊 𖩍𖩖𖩌 𖩎𖩆𖩁 𖩋𖩖 𖩍𖩖𖩌𖩯 𖩏𖩖𖩎𖩊 𖩏𖩖𖩔𖩆𖩊 𖩌𖩖 𖩐𖩓𖩆𖩎 𖩖𖩂𖩑𖩌 𖩎𖩖𖩯 𖩌𖩍𖩖𖩁𖩐𖩖 𖩂𖩑𖩌 𖩎𖩖 𖩖𖩎𖩆𖩁 𖩀𖩑𖩖𖩏 𖩈𖩝𖩐 𖩐𖩖𖩮 𖩏𖩖𖩔𖩆𖩊 𖩈𖩝𖩌𖩇𖩆 𖩌𖩓𖩑𖩖𖩗 𖩌𖩖 𖩍𖩖𖩁 𖩔𖩓𖩊𖩏 𖩌𖩆𖩎𖩄𖩝𖩓 𖩈𖩝𖩆 𖩀𖩐𖩘𖩅 𖩐𖩓𖩆𖩁𖩮\n𖩍𖩖𖩁𖩔𖩊𖩏 𖩌𖩆𖩎𖩄𖩝𖩓 𖩏𖩖𖩔𖩆𖩊 𖩈𖩝𖩌𖩇𖩆 𖩗𖩖𖩊 𖩀𖩔𖩆𖩎 𖩈𖩘𖩒 𖩌𖩖 𖩖𖩌𖩆𖩓 𖩎𖩊 𖩌𖩆𖩓 𖩅𖩖𖩌 𖩖𖩊 𖩌𖩆𖩓 𖩔𖩘 𖩍𖩖𖩎𖩊 𖩆𖩁 𖩊𖩁 𖩌𖩖𖩁 𖩆𖩁𖩊𖩁 𖩌𖩖𖩁 𖩈𖩖𖩄𖩖𖩅𖩯 𖩆𖩁 𖩊𖩁 𖩌𖩖𖩁 𖩘 𖩗𖩆𖩁 𖩍𖩝𖩁 𖩄𖩑𖩖𖩗 𖩅𖩊𖩂𖩯 𖩎𖩊𖩂𖩐𖩖 𖩌𖩖 𖩍𖩝𖩁 𖩌𖩖 𖩈𖩖𖩁 𖩖𖩌𖩖𖩎 𖩎𖩊 𖩗𖩜 𖩅𖩖 𖩌𖩄𖩑𖩖𖩗 𖩌𖩖𖩯 𖩈𖩖𖩅𖩏𖩖 𖩐𖩆𖩗 𖩐𖩆𖩗 𖩎𖩍𖩆 𖩍𖩖𖩁 𖩀𖩑𖩅 𖩘 𖩍𖩖𖩁 𖩔𖩓𖩊𖩏 𖩌𖩆𖩎𖩄𖩝𖩓 𖩌𖩖 𖩌𖩖𖩊𖩯\n𖩄𖩖𖩌 𖩄𖩖𖩌 𖩍𖩆𖩊 𖩌𖩍𖩖 𖩀𖩆 𖩀𖩖𖩏𖩖 𖩐𖩆𖩗 𖩎𖩍𖩆 𖩏𖩖𖩎 𖩏𖩖𖩎 𖩀𖩗𖩆𖩌 𖩌𖩓𖩆𖩅𖩯 𖩐𖩆𖩗 𖩎𖩍𖩆 𖩍𖩖𖩁 𖩌𖩖 𖩉𖩝 𖩖 𖩏𖩖 𖩀𖩘𖩌 𖩐𖩘𖩊 𖩎𖩊 𖩀𖩆𖩊𖩏𖩆𖩗 𖩍𖩖𖩁 𖩖𖩒 𖩘𖩏 𖩀𖩝𖩁 𖩐𖩖 𖩉𖩆𖩁 𖩌𖩖 𖩋𖩆𖩁𖩯 𖩍𖩖𖩁 𖩎𖩊 𖩒𖩝𖩕 𖩓𖩝𖩕 𖩌𖩖 𖩓𖩘𖩏 𖩓𖩘𖩏 𖩔𖩘 𖩀𖩘𖩌 𖩍𖩖𖩕𖩊, 𖩆𖩁 𖩖𖩌𖩖 𖩌𖩖𖩊 𖩖𖩄𖩖𖩅 𖩏𖩖𖩔𖩊 𖩀𖩑𖩅 𖩘 𖩈𖩖𖩅𖩏𖩖 𖩍𖩖𖩁 𖩎𖩊 𖩔𖩆𖩔𖩆 𖩋𖩖 𖩌𖩖𖩊 𖩖𖩏𖩖 𖩏𖩖𖩔𖩆𖩊 𖩐𖩓𖩆𖩎 𖩍𖩆𖩌 𖩌𖩖𖩌𖩖 𖩌𖩑𖩐 𖩏𖩖𖩔𖩆𖩊 𖩀𖩘𖩌 𖩕𖩊, 𖩕𖩊𖩂𖩋𖩖? 𖩘𖩏 𖩆𖩁 𖩐𖩓𖩆𖩎 𖩌𖩖 𖩗𖩆𖩁 𖩐𖩍𖩆𖩏 𖩌𖩑𖩐 𖩀𖩆𖩌𖩯" } - \ No newline at end of file diff --git a/Lib/gflanguages/data/languages/mrw_Arab.textproto b/Lib/gflanguages/data/languages/mrw_Arab.textproto index 8e659ee8f69..70c1fc2ae8b 100644 --- a/Lib/gflanguages/data/languages/mrw_Arab.textproto +++ b/Lib/gflanguages/data/languages/mrw_Arab.textproto @@ -1,7 +1,7 @@ id: "mrw_Arab" language: "mrw" script: "Arab" -name: "Maranao, Arabic" +name: "Maranao (Arabic)" region: "PH" sample_text { masthead_full: "لاڠو" diff --git a/Lib/gflanguages/data/languages/mrw_Latn.textproto b/Lib/gflanguages/data/languages/mrw_Latn.textproto index de311094372..583ddc7f50d 100644 --- a/Lib/gflanguages/data/languages/mrw_Latn.textproto +++ b/Lib/gflanguages/data/languages/mrw_Latn.textproto @@ -1,7 +1,7 @@ id: "mrw_Latn" language: "mrw" script: "Latn" -name: "Maranao, Latin" +name: "Maranao (Latin)" region: "PH" sample_text { masthead_full: "LlAa" diff --git a/Lib/gflanguages/data/languages/mui_Latn.textproto b/Lib/gflanguages/data/languages/mui_Latn.textproto index f23461ef931..6f972d3401f 100644 --- a/Lib/gflanguages/data/languages/mui_Latn.textproto +++ b/Lib/gflanguages/data/languages/mui_Latn.textproto @@ -1,7 +1,7 @@ id: "mui_Latn" language: "mui" script: "Latn" -name: "Musi, Latin" +name: "Musi (Latin)" region: "ID" sample_text { masthead_full: "GgAa" diff --git a/Lib/gflanguages/data/languages/ne_Newa.textproto b/Lib/gflanguages/data/languages/ne_Newa.textproto index ad0104dbd97..ddcfa55c410 100644 --- a/Lib/gflanguages/data/languages/ne_Newa.textproto +++ b/Lib/gflanguages/data/languages/ne_Newa.textproto @@ -1,5 +1,5 @@ id: "ne_Newa" language: "ne" script: "Newa" -name: "Nepali, Newa" +name: "Nepali (Newa)" region: "NP" diff --git a/Lib/gflanguages/data/languages/new_Newa.textproto b/Lib/gflanguages/data/languages/new_Newa.textproto index f8d652edcbe..374886c8e32 100644 --- a/Lib/gflanguages/data/languages/new_Newa.textproto +++ b/Lib/gflanguages/data/languages/new_Newa.textproto @@ -1,7 +1,7 @@ id: "new_Newa" language: "new" script: "Newa" -name: "Newari, Newa" +name: "Newari (Newa)" region: "NP" sample_text { masthead_full: "𑐳𑐎𑐮𑐩" diff --git a/Lib/gflanguages/data/languages/nsk_Latn.textproto b/Lib/gflanguages/data/languages/nsk_Latn.textproto index 732daaa64ac..3dd1c549990 100644 --- a/Lib/gflanguages/data/languages/nsk_Latn.textproto +++ b/Lib/gflanguages/data/languages/nsk_Latn.textproto @@ -1,6 +1,6 @@ id: "nsk_Latn" language: "nsk" script: "Latn" -name: "Naskapi, Latin" +name: "Naskapi (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/oj_Latn.textproto b/Lib/gflanguages/data/languages/oj_Latn.textproto index 23632e98b0e..7e2887724db 100644 --- a/Lib/gflanguages/data/languages/oj_Latn.textproto +++ b/Lib/gflanguages/data/languages/oj_Latn.textproto @@ -1,6 +1,6 @@ id: "oj_Latn" language: "oj" script: "Latn" -name: "Ojibwa, Latin" +name: "Ojibwa (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/om_Ethi.textproto b/Lib/gflanguages/data/languages/om_Ethi.textproto index 32f01fecd27..239a2db5fc5 100644 --- a/Lib/gflanguages/data/languages/om_Ethi.textproto +++ b/Lib/gflanguages/data/languages/om_Ethi.textproto @@ -1,6 +1,6 @@ id: "om_Ethi" language: "om" script: "Ethi" -name: "Oromo, Ethiopic" +name: "Oromo (Ethiopic)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/osa_Latn.textproto b/Lib/gflanguages/data/languages/osa_Latn.textproto index 4d2e5a0444d..85a0c322e24 100644 --- a/Lib/gflanguages/data/languages/osa_Latn.textproto +++ b/Lib/gflanguages/data/languages/osa_Latn.textproto @@ -1,6 +1,6 @@ id: "osa_Latn" language: "osa" script: "Latn" -name: "Osage, Latin" +name: "Osage (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/osc_Latn.textproto b/Lib/gflanguages/data/languages/osc_Latn.textproto index 62f4efb911b..a2aff475d4c 100644 --- a/Lib/gflanguages/data/languages/osc_Latn.textproto +++ b/Lib/gflanguages/data/languages/osc_Latn.textproto @@ -1,6 +1,6 @@ id: "osc_Latn" language: "osc" script: "Latn" -name: "Oscan, Latin" +name: "Oscan (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/pi_Brah.textproto b/Lib/gflanguages/data/languages/pi_Brah.textproto index c18ff1aa574..a63a5aa2884 100644 --- a/Lib/gflanguages/data/languages/pi_Brah.textproto +++ b/Lib/gflanguages/data/languages/pi_Brah.textproto @@ -1,5 +1,5 @@ id: "pi_Brah" language: "pi" script: "Brah" -name: "Pali, Brahmi" +name: "Pali (Brahmi)" historical: true diff --git a/Lib/gflanguages/data/languages/pi_Sinh.textproto b/Lib/gflanguages/data/languages/pi_Sinh.textproto index d4290d3716c..cd39c845fa9 100644 --- a/Lib/gflanguages/data/languages/pi_Sinh.textproto +++ b/Lib/gflanguages/data/languages/pi_Sinh.textproto @@ -1,6 +1,6 @@ id: "pi_Sinh" language: "pi" script: "Sinh" -name: "Pali, Sinhala" +name: "Pali (Sinhala)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/pi_Thai.textproto b/Lib/gflanguages/data/languages/pi_Thai.textproto index dcf2349625f..0d3a7b33482 100644 --- a/Lib/gflanguages/data/languages/pi_Thai.textproto +++ b/Lib/gflanguages/data/languages/pi_Thai.textproto @@ -1,6 +1,6 @@ id: "pi_Thai" language: "pi" script: "Thai" -name: "Pali, Thai" +name: "Pali (Thai)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/pnt_Grek.textproto b/Lib/gflanguages/data/languages/pnt_Grek.textproto index 1d4cda8f5df..4df93e5354a 100644 --- a/Lib/gflanguages/data/languages/pnt_Grek.textproto +++ b/Lib/gflanguages/data/languages/pnt_Grek.textproto @@ -1,5 +1,5 @@ id: "pnt_Grek" language: "pnt" script: "Grek" -name: "Pontic, Greek" +name: "Pontic (Greek)" population: 0 diff --git a/Lib/gflanguages/data/languages/pnt_Latn.textproto b/Lib/gflanguages/data/languages/pnt_Latn.textproto index e51bbe06707..7e59f079404 100644 --- a/Lib/gflanguages/data/languages/pnt_Latn.textproto +++ b/Lib/gflanguages/data/languages/pnt_Latn.textproto @@ -1,5 +1,5 @@ id: "pnt_Latn" language: "pnt" script: "Latn" -name: "Pontic, Latin" +name: "Pontic (Latin)" population: 0 diff --git a/Lib/gflanguages/data/languages/rab_Deva.textproto b/Lib/gflanguages/data/languages/rab_Deva.textproto index 9248f4e31f1..f313f8f62ff 100644 --- a/Lib/gflanguages/data/languages/rab_Deva.textproto +++ b/Lib/gflanguages/data/languages/rab_Deva.textproto @@ -1,7 +1,7 @@ id: "rab_Deva" language: "rab" script: "Deva" -name: "Camling, Devanagari" +name: "Camling (Devanagari)" region: "NP" sample_text { masthead_full: "झरमन" diff --git a/Lib/gflanguages/data/languages/ray_Latn.textproto b/Lib/gflanguages/data/languages/ray_Latn.textproto index b1cd4f3e857..e9d39122b8d 100644 --- a/Lib/gflanguages/data/languages/ray_Latn.textproto +++ b/Lib/gflanguages/data/languages/ray_Latn.textproto @@ -1,7 +1,7 @@ id: "ray_Latn" language: "ray" script: "Latn" -name: "Rapa, Latin" +name: "Rapa (Latin)" region: "PF" sample_text { masthead_full: "TtEe" diff --git a/Lib/gflanguages/data/languages/rej_Rjng.textproto b/Lib/gflanguages/data/languages/rej_Rjng.textproto index 27613142890..fefb0346bf1 100644 --- a/Lib/gflanguages/data/languages/rej_Rjng.textproto +++ b/Lib/gflanguages/data/languages/rej_Rjng.textproto @@ -1,7 +1,7 @@ id: "rej_Rjng" language: "rej" script: "Rjng" -name: "Rejang, Rejang" +name: "Rejang (Rejang)" population: 0 sample_text { masthead_full: "ꤰꤳꤾꥁ" diff --git a/Lib/gflanguages/data/languages/rhg_Latn.textproto b/Lib/gflanguages/data/languages/rhg_Latn.textproto index fff46030445..46c8c55a638 100644 --- a/Lib/gflanguages/data/languages/rhg_Latn.textproto +++ b/Lib/gflanguages/data/languages/rhg_Latn.textproto @@ -1,7 +1,7 @@ id: "rhg_Latn" language: "rhg" script: "Latn" -name: "Rohingya, Latin" +name: "Rohingya (Latin)" region: "MM" sample_text { masthead_full: "MmAa" diff --git a/Lib/gflanguages/data/languages/ro_Cyrl.textproto b/Lib/gflanguages/data/languages/ro_Cyrl.textproto index 7129f41b2ff..130ebfa50c7 100644 --- a/Lib/gflanguages/data/languages/ro_Cyrl.textproto +++ b/Lib/gflanguages/data/languages/ro_Cyrl.textproto @@ -1,7 +1,7 @@ id: "ro_Cyrl" language: "ro" script: "Cyrl" -name: "Romanian, Cyrillic" +name: "Romanian (Cyrillic)" autonym: "Молдовеняскэ" population: 0 region: "MD" diff --git a/Lib/gflanguages/data/languages/rom_Cyrl.textproto b/Lib/gflanguages/data/languages/rom_Cyrl.textproto index 34e34f0ddef..ec35933b310 100644 --- a/Lib/gflanguages/data/languages/rom_Cyrl.textproto +++ b/Lib/gflanguages/data/languages/rom_Cyrl.textproto @@ -1,6 +1,6 @@ id: "rom_Cyrl" language: "rom" script: "Cyrl" -name: "Romany, Cyrillic" +name: "Romany (Cyrillic)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/sa_Ahom.textproto b/Lib/gflanguages/data/languages/sa_Ahom.textproto index 58bec4ac6d8..c30a1e56d28 100644 --- a/Lib/gflanguages/data/languages/sa_Ahom.textproto +++ b/Lib/gflanguages/data/languages/sa_Ahom.textproto @@ -1,7 +1,7 @@ id: "sa_Ahom" language: "sa" script: "Ahom" -name: "Sanskrit, Ahom" +name: "Sanskrit (Ahom)" region: "IN" sample_text { masthead_full: "𑜏𑜍𑜈𑜉" diff --git a/Lib/gflanguages/data/languages/sa_Bali.textproto b/Lib/gflanguages/data/languages/sa_Bali.textproto index 961f157b18c..cbf2fb501e3 100644 --- a/Lib/gflanguages/data/languages/sa_Bali.textproto +++ b/Lib/gflanguages/data/languages/sa_Bali.textproto @@ -1,7 +1,7 @@ id: "sa_Bali" language: "sa" script: "Bali" -name: "Sanskrit, Balinese" +name: "Sanskrit (Balinese)" region: "IN" sample_text { masthead_full: "ᬲᬯᬫᬦ" diff --git a/Lib/gflanguages/data/languages/sa_Bhks.textproto b/Lib/gflanguages/data/languages/sa_Bhks.textproto index b5267e11f17..c963ff2331f 100644 --- a/Lib/gflanguages/data/languages/sa_Bhks.textproto +++ b/Lib/gflanguages/data/languages/sa_Bhks.textproto @@ -1,7 +1,7 @@ id: "sa_Bhks" language: "sa" script: "Bhks" -name: "Sanskrit, Bhaiksuki" +name: "Sanskrit (Bhaiksuki)" region: "IN" sample_text { masthead_full: "𑰭𑰨𑰪𑰦" diff --git a/Lib/gflanguages/data/languages/sa_Brah.textproto b/Lib/gflanguages/data/languages/sa_Brah.textproto index 6a9d65fb72d..4704273e973 100644 --- a/Lib/gflanguages/data/languages/sa_Brah.textproto +++ b/Lib/gflanguages/data/languages/sa_Brah.textproto @@ -1,7 +1,7 @@ id: "sa_Brah" language: "sa" script: "Brah" -name: "Sanskrit, Brahmi" +name: "Sanskrit (Brahmi)" region: "IN" sample_text { masthead_full: "𑀲𑀭𑀯𑀫" diff --git a/Lib/gflanguages/data/languages/sa_Bugi.textproto b/Lib/gflanguages/data/languages/sa_Bugi.textproto index a380b68f297..638df4fe9fd 100644 --- a/Lib/gflanguages/data/languages/sa_Bugi.textproto +++ b/Lib/gflanguages/data/languages/sa_Bugi.textproto @@ -1,7 +1,7 @@ id: "sa_Bugi" language: "sa" script: "Bugi" -name: "Sanskrit, Buginese" +name: "Sanskrit (Buginese)" region: "IN" sample_text { masthead_full: "ᨔᨑᨓᨆ" diff --git a/Lib/gflanguages/data/languages/sa_Cham.textproto b/Lib/gflanguages/data/languages/sa_Cham.textproto index 83d78875e32..5190e005eae 100644 --- a/Lib/gflanguages/data/languages/sa_Cham.textproto +++ b/Lib/gflanguages/data/languages/sa_Cham.textproto @@ -1,7 +1,7 @@ id: "sa_Cham" language: "sa" script: "Cham" -name: "Sanskrit, Cham" +name: "Sanskrit (Cham)" region: "IN" sample_text { masthead_full: "ꨧꨣꨠꨘ" diff --git a/Lib/gflanguages/data/languages/sa_Gran.textproto b/Lib/gflanguages/data/languages/sa_Gran.textproto index 06cebf56d58..6c413fa6561 100644 --- a/Lib/gflanguages/data/languages/sa_Gran.textproto +++ b/Lib/gflanguages/data/languages/sa_Gran.textproto @@ -1,7 +1,7 @@ id: "sa_Gran" language: "sa" script: "Gran" -name: "Sanskrit, Grantha" +name: "Sanskrit (Grantha)" population: 0 region: "IN" sample_text { diff --git a/Lib/gflanguages/data/languages/sa_Khar.textproto b/Lib/gflanguages/data/languages/sa_Khar.textproto index 017c6695dfd..372998484bc 100644 --- a/Lib/gflanguages/data/languages/sa_Khar.textproto +++ b/Lib/gflanguages/data/languages/sa_Khar.textproto @@ -1,7 +1,7 @@ id: "sa_Khar" language: "sa" script: "Khar" -name: "Sanskrit, Kharoshthi" +name: "Sanskrit (Kharoshthi)" region: "IN" sample_text { masthead_full: "𐨯𐨪𐨬𐨨" diff --git a/Lib/gflanguages/data/languages/sa_Marc.textproto b/Lib/gflanguages/data/languages/sa_Marc.textproto index cab5b5261b7..a7ebcf07a69 100644 --- a/Lib/gflanguages/data/languages/sa_Marc.textproto +++ b/Lib/gflanguages/data/languages/sa_Marc.textproto @@ -1,7 +1,7 @@ id: "sa_Marc" language: "sa" script: "Marc" -name: "Sanskrit, Marchen" +name: "Sanskrit (Marchen)" region: "IN" sample_text { masthead_full: "𑲍𑲊𑲁𑱽" diff --git a/Lib/gflanguages/data/languages/sa_Mong.textproto b/Lib/gflanguages/data/languages/sa_Mong.textproto index 8dcebeb4348..faa6db2bef6 100644 --- a/Lib/gflanguages/data/languages/sa_Mong.textproto +++ b/Lib/gflanguages/data/languages/sa_Mong.textproto @@ -1,7 +1,7 @@ id: "sa_Mong" language: "sa" script: "Mong" -name: "Sanskrit, Mongolian" +name: "Sanskrit (Mongolian)" sample_text { masthead_full: "ᠰᠠᠷᠸ" masthead_partial: "ᠧᠮ" diff --git a/Lib/gflanguages/data/languages/sa_Mroo.textproto b/Lib/gflanguages/data/languages/sa_Mroo.textproto index bf37cbfd0d0..b4d0b485723 100644 --- a/Lib/gflanguages/data/languages/sa_Mroo.textproto +++ b/Lib/gflanguages/data/languages/sa_Mroo.textproto @@ -1,7 +1,7 @@ id: "sa_Mroo" language: "sa" script: "Mroo" -name: "Sanskrit, Mro" +name: "Sanskrit (Mro)" region: "IN" sample_text { masthead_full: "𖩔𖩒𖩓𖩗" diff --git a/Lib/gflanguages/data/languages/sa_Mult.textproto b/Lib/gflanguages/data/languages/sa_Mult.textproto index a0d6077e913..330a308f115 100644 --- a/Lib/gflanguages/data/languages/sa_Mult.textproto +++ b/Lib/gflanguages/data/languages/sa_Mult.textproto @@ -1,7 +1,7 @@ id: "sa_Mult" language: "sa" script: "Mult" -name: "Sanskrit, Multani" +name: "Sanskrit (Multani)" region: "IN" sample_text { masthead_full: "𑊥𑊢𑊤𑊠" diff --git a/Lib/gflanguages/data/languages/sa_Nand.textproto b/Lib/gflanguages/data/languages/sa_Nand.textproto index 80b4c563c93..83658a0419b 100644 --- a/Lib/gflanguages/data/languages/sa_Nand.textproto +++ b/Lib/gflanguages/data/languages/sa_Nand.textproto @@ -2,7 +2,7 @@ id: "sa_Nand" language: "sa" script: "Nand" -name: "Sanskrit, Nandinagari" +name: "Sanskrit (Nandinagari)" autonym: "𑧍𑧞𑧍𑧠𑦮𑧖𑦽𑧆𑧠" region: "IN" exemplar_chars { diff --git a/Lib/gflanguages/data/languages/sa_Newa.textproto b/Lib/gflanguages/data/languages/sa_Newa.textproto index 29fffca8636..0ef77aa756d 100644 --- a/Lib/gflanguages/data/languages/sa_Newa.textproto +++ b/Lib/gflanguages/data/languages/sa_Newa.textproto @@ -1,7 +1,7 @@ id: "sa_Newa" language: "sa" script: "Newa" -name: "Sanskrit, Newa" +name: "Sanskrit (Newa)" region: "IN" sample_text { masthead_full: "𑐳𑐬𑐰𑐩" diff --git a/Lib/gflanguages/data/languages/sa_Rjng.textproto b/Lib/gflanguages/data/languages/sa_Rjng.textproto index a30680d56c7..75ef85e98d4 100644 --- a/Lib/gflanguages/data/languages/sa_Rjng.textproto +++ b/Lib/gflanguages/data/languages/sa_Rjng.textproto @@ -1,7 +1,7 @@ id: "sa_Rjng" language: "sa" script: "Rjng" -name: "Sanskrit, Rejang" +name: "Sanskrit (Rejang)" region: "IN" sample_text { masthead_full: "ꤼꥀꤸꤵ" diff --git a/Lib/gflanguages/data/languages/sa_Shrd.textproto b/Lib/gflanguages/data/languages/sa_Shrd.textproto index fb5a400e53d..41619bcb29f 100644 --- a/Lib/gflanguages/data/languages/sa_Shrd.textproto +++ b/Lib/gflanguages/data/languages/sa_Shrd.textproto @@ -1,7 +1,7 @@ id: "sa_Shrd" language: "sa" script: "Shrd" -name: "Sanskrit, Sharada" +name: "Sanskrit (Sharada)" population: 0 region: "IN" sample_text { diff --git a/Lib/gflanguages/data/languages/sa_Sidd.textproto b/Lib/gflanguages/data/languages/sa_Sidd.textproto index 1d7409792ff..d851f84e631 100644 --- a/Lib/gflanguages/data/languages/sa_Sidd.textproto +++ b/Lib/gflanguages/data/languages/sa_Sidd.textproto @@ -1,7 +1,7 @@ id: "sa_Sidd" language: "sa" script: "Sidd" -name: "Sanskrit, Siddham" +name: "Sanskrit (Siddham)" population: 0 region: "IN" sample_text { diff --git a/Lib/gflanguages/data/languages/sa_Sinh.textproto b/Lib/gflanguages/data/languages/sa_Sinh.textproto index 0b10acb78fb..6dff2d2841e 100644 --- a/Lib/gflanguages/data/languages/sa_Sinh.textproto +++ b/Lib/gflanguages/data/languages/sa_Sinh.textproto @@ -1,7 +1,7 @@ id: "sa_Sinh" language: "sa" script: "Sinh" -name: "Sanskrit, Sinhala" +name: "Sanskrit (Sinhala)" population: 0 sample_text { masthead_full: "සරවම" diff --git a/Lib/gflanguages/data/languages/sa_Soyo.textproto b/Lib/gflanguages/data/languages/sa_Soyo.textproto index afafc0c3fa9..8245c9c9a18 100644 --- a/Lib/gflanguages/data/languages/sa_Soyo.textproto +++ b/Lib/gflanguages/data/languages/sa_Soyo.textproto @@ -1,7 +1,7 @@ id: "sa_Soyo" language: "sa" script: "Soyo" -name: "Sanskrit, Soyombo" +name: "Sanskrit (Soyombo)" region: "IN" sample_text { masthead_full: "𑪁𑩖𑩥𑪖" diff --git a/Lib/gflanguages/data/languages/sa_Tagb.textproto b/Lib/gflanguages/data/languages/sa_Tagb.textproto index 2e041e4d883..b1fe8477da1 100644 --- a/Lib/gflanguages/data/languages/sa_Tagb.textproto +++ b/Lib/gflanguages/data/languages/sa_Tagb.textproto @@ -1,7 +1,7 @@ id: "sa_Tagb" language: "sa" script: "Tagb" -name: "Sanskrit, Tagbanwa" +name: "Sanskrit (Tagbanwa)" region: "IN" sample_text { masthead_full: "ᝰᝮᝯᝫ" diff --git a/Lib/gflanguages/data/languages/sa_Tirh.textproto b/Lib/gflanguages/data/languages/sa_Tirh.textproto index 9d4c704d4c4..f423205b9bf 100644 --- a/Lib/gflanguages/data/languages/sa_Tirh.textproto +++ b/Lib/gflanguages/data/languages/sa_Tirh.textproto @@ -1,7 +1,7 @@ id: "sa_Tirh" language: "sa" script: "Tirh" -name: "Sanskrit, Tirhuta" +name: "Sanskrit (Tirhuta)" region: "IN" sample_text { masthead_full: "𑒮𑒩𑒫𑒧" diff --git a/Lib/gflanguages/data/languages/sa_Wcho.textproto b/Lib/gflanguages/data/languages/sa_Wcho.textproto index 776b7cae6e5..ab19e53c01b 100644 --- a/Lib/gflanguages/data/languages/sa_Wcho.textproto +++ b/Lib/gflanguages/data/languages/sa_Wcho.textproto @@ -1,7 +1,7 @@ id: "sa_Wcho" language: "sa" script: "Wcho" -name: "Sanskrit, Wancho" +name: "Sanskrit (Wancho)" region: "IN" sample_text { masthead_full: "𞋃𞋁𞋆𞋀" diff --git a/Lib/gflanguages/data/languages/sa_Zanb.textproto b/Lib/gflanguages/data/languages/sa_Zanb.textproto index 53d3cbfa683..28e16d1d81f 100644 --- a/Lib/gflanguages/data/languages/sa_Zanb.textproto +++ b/Lib/gflanguages/data/languages/sa_Zanb.textproto @@ -1,7 +1,7 @@ id: "sa_Zanb" language: "sa" script: "Zanb" -name: "Sanskrit, Zanabazar" +name: "Sanskrit (Zanabazar)" region: "IN" sample_text { masthead_full: "𑨒𑨍𑨙𑨁" diff --git a/Lib/gflanguages/data/languages/sat_Beng.textproto b/Lib/gflanguages/data/languages/sat_Beng.textproto index 353c72e28d8..bd3d01689d0 100644 --- a/Lib/gflanguages/data/languages/sat_Beng.textproto +++ b/Lib/gflanguages/data/languages/sat_Beng.textproto @@ -1,6 +1,6 @@ id: "sat_Beng" language: "sat" script: "Beng" -name: "Santali, Bengali" +name: "Santali (Bengali)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/sat_Deva.textproto b/Lib/gflanguages/data/languages/sat_Deva.textproto index 2b48dce7d6e..f4a35543845 100644 --- a/Lib/gflanguages/data/languages/sat_Deva.textproto +++ b/Lib/gflanguages/data/languages/sat_Deva.textproto @@ -1,7 +1,7 @@ id: "sat_Deva" language: "sat" script: "Deva" -name: "Santali, Devanagari" +name: "Santali (Devanagari)" population: 0 region: "NP" sample_text { diff --git a/Lib/gflanguages/data/languages/sat_Latn.textproto b/Lib/gflanguages/data/languages/sat_Latn.textproto index cea3459b601..019c68b35ce 100644 --- a/Lib/gflanguages/data/languages/sat_Latn.textproto +++ b/Lib/gflanguages/data/languages/sat_Latn.textproto @@ -1,6 +1,6 @@ id: "sat_Latn" language: "sat" script: "Latn" -name: "Santali, Latin" +name: "Santali (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/sat_Orya.textproto b/Lib/gflanguages/data/languages/sat_Orya.textproto index 1046da3d0ff..c7ddce905ea 100644 --- a/Lib/gflanguages/data/languages/sat_Orya.textproto +++ b/Lib/gflanguages/data/languages/sat_Orya.textproto @@ -1,6 +1,6 @@ id: "sat_Orya" language: "sat" script: "Orya" -name: "Santali, Odia" +name: "Santali (Odia)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/sd_Khoj.textproto b/Lib/gflanguages/data/languages/sd_Khoj.textproto index a2f995b2934..a1b813ac455 100644 --- a/Lib/gflanguages/data/languages/sd_Khoj.textproto +++ b/Lib/gflanguages/data/languages/sd_Khoj.textproto @@ -1,7 +1,7 @@ id: "sd_Khoj" language: "sd" script: "Khoj" -name: "Sindhi, Khojki" +name: "Sindhi (Khojki)" historical: true population: 0 region: "IN" diff --git a/Lib/gflanguages/data/languages/sd_Sind.textproto b/Lib/gflanguages/data/languages/sd_Sind.textproto index 6cde7580dac..1d0a22d55dd 100644 --- a/Lib/gflanguages/data/languages/sd_Sind.textproto +++ b/Lib/gflanguages/data/languages/sd_Sind.textproto @@ -1,7 +1,7 @@ id: "sd_Sind" language: "sd" script: "Sind" -name: "Sindhi, Khudawadi" +name: "Sindhi (Khudawadi)" population: 0 region: "IN" region: "PK" @@ -21,4 +21,4 @@ sample_text { specimen_21: "𑋝𑋗𑋛𑋙𑋠 𑊰𑋑𑋝𑋠𑋑 𑊱𑋂𑋩𑋠𑋏 𑊲 𑊰𑋂𑋩𑋍 𑊲 𑋞𑊺𑋩𑋑 𑋄𑋘 𑋞𑋛𑋠𑋚𑋘 𑊺𑊰𑋑 𑋑𑋙𑋠𑋑𑋙 𑋒𑋘𑋏𑋠𑋆𑋘𑋠 𑊱𑋞𑋑। 𑊰𑋑𑋞𑋑 𑊺𑋘 𑊰𑊺𑋩𑋚 𑊲 𑋂𑋩𑋗𑋘𑋙 𑋞𑋠𑋝𑋚 𑋆𑋘𑋛 𑊱𑋞𑋘, 𑊺𑋙𑋘 𑊰𑋑𑋞𑋑 𑊺𑋘 𑋞𑊺 𑋕𑋘𑋘 𑋝𑋚𑋑 𑋖𑋚𑋘𑋘𑋁𑊰𑋙 𑋘 𑋛𑋠𑋙𑋛 𑋝𑋠𑋛𑊺 𑊰𑊻𑋩𑋍𑋘𑋠𑋙 𑊺𑋙𑋌 𑊼𑋞𑋙 𑋂𑋘।\n𑋝𑋗𑋛𑋙𑋠 𑊰𑋑𑋝𑋠𑋑 𑊱𑋂𑋩𑋠𑋏 𑊲 𑊰𑋂𑋩𑋍 𑊲 𑋞𑊺𑋩𑋑 𑋄𑋘 𑋞𑋛𑋠𑋚𑋘 𑊺𑊰𑋑 𑋑𑋙𑋠𑋑𑋙 𑋒𑋘𑋏𑋠𑋆𑋘𑋠 𑊱𑋞𑋑। 𑊰𑋑𑋞𑋑 𑊺𑋘 𑊰𑊺𑋩𑋚 𑊲 𑋂𑋩𑋗𑋘𑋙 𑋞𑋠𑋝𑋚 𑋆𑋘𑋛 𑊱𑋞𑋘, 𑊺𑋙𑋘 𑊰𑋑𑋞𑋑 𑊺𑋘 𑋞𑊺 𑋕𑋘𑋘 𑋝𑋚𑋑 𑋖𑋚𑋘𑋘𑋁𑊰𑋙 𑋘 𑋛𑋠𑋙𑋛 𑋝𑋠𑋛𑊺 𑊰𑊻𑋩𑋍𑋘𑋠𑋙 𑊺𑋙𑋌 𑊼𑋞𑋙 𑋂𑋘." specimen_16: "𑋝𑋗𑋛𑋙𑋠 𑊰𑋑𑋝𑋠𑋑 𑊱𑋂𑋩𑋠𑋏 𑊲 𑊰𑋂𑋩𑋍 𑊲 𑋞𑊺𑋩𑋑 𑋄𑋘 𑋞𑋛𑋠𑋚𑋘 𑊺𑊰𑋑 𑋑𑋙𑋠𑋑𑋙 𑋒𑋘𑋏𑋠𑋆𑋘𑋠 𑊱𑋞𑋑। 𑊰𑋑𑋞𑋑 𑊺𑋘 𑊰𑊺𑋩𑋚 𑊲 𑋂𑋩𑋗𑋘𑋙 𑋞𑋠𑋝𑋚 𑋆𑋘𑋛 𑊱𑋞𑋘, 𑊺𑋙𑋘 𑊰𑋑𑋞𑋑 𑊺𑋘 𑋞𑊺 𑋕𑋘𑋘 𑋝𑋚𑋑 𑋖𑋚𑋘𑋘𑋁𑊰𑋙 𑋘 𑋛𑋠𑋙𑋛 𑋝𑋠𑋛𑊺 𑊰𑊻𑋩𑋍𑋘𑋠𑋙 𑊺𑋙𑋌 𑊼𑋞𑋙 𑋂𑋘।\n𑋝𑋗𑋛𑋙𑋠 𑊰𑋑𑋝𑋠𑋑 𑊱𑋂𑋩𑋠𑋏 𑊲 𑊰𑋂𑋩𑋍 𑊲 𑋞𑊺𑋩𑋑 𑋄𑋘 𑋞𑋛𑋠𑋚𑋘 𑊺𑊰𑋑 𑋑𑋙𑋠𑋑𑋙 𑋒𑋘𑋏𑋠𑋆𑋘𑋠 𑊱𑋞𑋑। 𑊰𑋑𑋞𑋑 𑊺𑋘 𑊰𑊺𑋩𑋚 𑊲 𑋂𑋩𑋗𑋘𑋙 𑋞𑋠𑋝𑋚 𑋆𑋘𑋛 𑊱𑋞𑋘, 𑊺𑋙𑋘 𑊰𑋑𑋞𑋑 𑊺𑋘 𑋞𑊺 𑋕𑋘𑋘 𑋝𑋚𑋑 𑋖𑋚𑋘𑋘𑋁𑊰𑋙 𑋘 𑋛𑋠𑋙𑋛 𑋝𑋠𑋛𑊺 𑊰𑊻𑋩𑋍𑋘𑋠𑋙 𑊺𑋙𑋌 𑊼𑋞𑋙 𑋂𑋘।\n𑋝𑋗𑋛𑋙𑋠 𑊰𑋑𑋝𑋠𑋑 𑊱𑋂𑋩𑋠𑋏 𑊲 𑊰𑋂𑋩𑋍 𑊲 𑋞𑊺𑋩𑋑 𑋄𑋘 𑋞𑋛𑋠𑋚𑋘 𑊺𑊰𑋑 𑋑𑋙𑋠𑋑𑋙 𑋒𑋘𑋏𑋠𑋆𑋘𑋠 𑊱𑋞𑋑। 𑊰𑋑𑋞𑋑 𑊺𑋘 𑊰𑊺𑋩𑋚 𑊲 𑋂𑋩𑋗𑋘𑋙 𑋞𑋠𑋝𑋚 𑋆𑋘𑋛 𑊱𑋞𑋘, 𑊺𑋙𑋘 𑊰𑋑𑋞𑋑 𑊺𑋘 𑋞𑊺 𑋕𑋘𑋘 𑋝𑋚𑋑 𑋖𑋚𑋘𑋘𑋁𑊰𑋙 𑋘 𑋛𑋠𑋙𑋛 𑋝𑋠𑋛𑊺 𑊰𑊻𑋩𑋍𑋘𑋠𑋙 𑊺𑋙𑋌 𑊼𑋞𑋙 𑋂𑋘।\n𑋝𑋗𑋛𑋙𑋠 𑊰𑋑𑋝𑋠𑋑 𑊱𑋂𑋩𑋠𑋏 𑊲 𑊰𑋂𑋩𑋍 𑊲 𑋞𑊺𑋩𑋑 𑋄𑋘 𑋞𑋛𑋠𑋚𑋘 𑊺𑊰𑋑 𑋑𑋙𑋠𑋑𑋙 𑋒𑋘𑋏𑋠𑋆𑋘𑋠 𑊱𑋞𑋑। 𑊰𑋑𑋞𑋑 𑊺𑋘 𑊰𑊺𑋩𑋚 𑊲 𑋂𑋩𑋗𑋘𑋙 𑋞𑋠𑋝𑋚 𑋆𑋘𑋛 𑊱𑋞𑋘, 𑊺𑋙𑋘 𑊰𑋑𑋞𑋑 𑊺𑋘 𑋞𑊺 𑋕𑋘𑋘 𑋝𑋚𑋑 𑋖𑋚𑋘𑋘𑋁𑊰𑋙 𑋘 𑋛𑋠𑋙𑋛 𑋝𑋠𑋛𑊺 𑊰𑊻𑋩𑋍𑋘𑋠𑋙 𑊺𑋙𑋌 𑊼𑋞𑋙 𑋂𑋘।" } - + diff --git a/Lib/gflanguages/data/languages/shi_Tfng.textproto b/Lib/gflanguages/data/languages/shi_Tfng.textproto index 9043e804f26..e6e70c3b223 100644 --- a/Lib/gflanguages/data/languages/shi_Tfng.textproto +++ b/Lib/gflanguages/data/languages/shi_Tfng.textproto @@ -1,7 +1,7 @@ id: "shi_Tfng" language: "shi" script: "Tfng" -name: "Tachelhit, Tifinagh" +name: "Tachelhit (Tifinagh)" population: 0 region: "MA" exemplar_chars { diff --git a/Lib/gflanguages/data/languages/sja_Latn.textproto b/Lib/gflanguages/data/languages/sja_Latn.textproto index e5861e92ae1..a22a76703e7 100644 --- a/Lib/gflanguages/data/languages/sja_Latn.textproto +++ b/Lib/gflanguages/data/languages/sja_Latn.textproto @@ -1,7 +1,7 @@ id: "sja_Latn" language: "sja" script: "Latn" -name: "Epena, Latin" +name: "Epena (Latin)" region: "CO" sample_text { masthead_full: "ŨũMm" diff --git a/Lib/gflanguages/data/languages/skr_Mult.textproto b/Lib/gflanguages/data/languages/skr_Mult.textproto index 2e27d32d6de..ad30b8805f9 100644 --- a/Lib/gflanguages/data/languages/skr_Mult.textproto +++ b/Lib/gflanguages/data/languages/skr_Mult.textproto @@ -1,7 +1,7 @@ id: "skr_Mult" language: "skr" script: "Mult" -name: "Saraiki, Multani" +name: "Saraiki (Multani)" historical: true sample_text { masthead_full: "𑊦𑊄𑊙𑊀" @@ -16,4 +16,4 @@ sample_text { specimen_32: "𑊦𑊄 𑊥𑊅𑊥 𑊙𑊀 𑊔𑊂 𑊛𑊂𑊟𑊢𑊦𑊚𑊩 𑊂𑊚𑊦 𑊤𑊊𑊂 𑊚𑊔𑊕𑊛𑊂 𑊄𑊂 𑊀𑊅𑊀𑊩 𑊕𑊂 𑊦𑊀 𑊛𑊂 𑊠𑊄𑊂 𑊊𑊀 𑊌𑊟𑊁 𑊦𑊥𑊀 𑊠𑊀𑊣 𑊙𑊀 𑊠𑊄𑊂 𑊀𑊙𑊀 𑊦𑊀𑊩 𑊀𑊟𑊀 𑊂 𑊀𑊛𑊕𑊁 𑊌𑊀𑊙𑊙 𑊂𑊚𑊦 𑊄𑊂 𑊤𑊔𑊊𑊟𑊁 𑊀𑊟𑊀 𑊗𑊂𑊣𑊀 𑊊𑊦𑊢𑊀 𑊄𑊕𑊂 𑊛𑊋𑊀 𑊚𑊕𑊀 𑊛𑊂𑊖𑊢 𑊥𑊛 𑊄𑊂𑊌 𑊀𑊄𑊐 𑊄𑊢𑊄𑊀 𑊦𑊄 𑊛𑊢𑊀𑊀 𑊙𑊔 𑊠𑊂𑊣𑊄 𑊤𑊊 𑊤𑊏 𑊣𑊦𑊥𑊩 𑊌𑊘𑊀 𑊀𑊛𑊕 𑊠𑊀𑊥 𑊟𑊙𑊊𑊥𑊁 𑊤𑊊 𑊢𑊒𑊀𑊂𑊥𑊩" specimen_21: "𑊦𑊄 𑊥𑊅𑊥 𑊙𑊀 𑊔𑊂 𑊛𑊂𑊟𑊢𑊦𑊚𑊩 𑊂𑊚𑊦 𑊤𑊊𑊂 𑊚𑊔𑊕𑊛𑊂 𑊄𑊂 𑊀𑊅𑊀𑊩 𑊕𑊂 𑊦𑊀 𑊛𑊂 𑊠𑊄𑊂 𑊊𑊀 𑊌𑊟𑊁 𑊦𑊥𑊀 𑊠𑊀𑊣 𑊙𑊀 𑊠𑊄𑊂 𑊀𑊙𑊀 𑊦𑊀𑊩 𑊀𑊟𑊀 𑊂 𑊀𑊛𑊕𑊁 𑊌𑊀𑊙𑊙 𑊂𑊚𑊦 𑊄𑊂 𑊤𑊔𑊊𑊟𑊁 𑊀𑊟𑊀 𑊗𑊂𑊣𑊀 𑊊𑊦𑊢𑊀 𑊄𑊕𑊂 𑊛𑊋𑊀 𑊚𑊕𑊀 𑊛𑊂𑊖𑊢 𑊥𑊛 𑊄𑊂𑊌 𑊀𑊄𑊐 𑊄𑊢𑊄𑊀 𑊦𑊄 𑊛𑊢𑊀𑊀 𑊙𑊔 𑊠𑊂𑊣𑊄 𑊤𑊊 𑊤𑊏 𑊣𑊦𑊥𑊩 \n𑊌𑊘𑊀 𑊀𑊛𑊕 𑊠𑊀𑊥 𑊟𑊙𑊊𑊥𑊁 𑊤𑊊 𑊢𑊒𑊀𑊂𑊥𑊩 𑊟𑊀 𑊂 𑊠𑊂𑊣𑊄 𑊤𑊊 𑊤𑊒𑊀 𑊄𑊀𑊣𑊛𑊀 𑊀𑊟𑊀 𑊦𑊂𑊕 𑊠𑊂𑊗𑊌 𑊗𑊁𑊤𑊕 𑊣𑊆𑊀𑊩 𑊟𑊊𑊀 𑊂 𑊠𑊂𑊣𑊄 𑊙𑊀 𑊦𑊄 𑊢𑊁𑊥 𑊙𑊀 𑊄𑊂𑊣 𑊤𑊏 𑊐𑊄𑊀 𑊌𑊀 𑊀𑊛𑊕𑊁𑊀 𑊢𑊦𑊢𑊁 𑊤𑊊 𑊂𑊄𑊂 𑊥𑊂𑊦𑊢 𑊊𑊢𑊣𑊕 𑊌𑊐𑊀𑊩" specimen_16: "𑊦𑊄 𑊥𑊅𑊥 𑊙𑊀 𑊔𑊂 𑊛𑊂𑊟𑊢𑊦𑊚𑊩 𑊂𑊚𑊦 𑊤𑊊𑊂 𑊚𑊔𑊕𑊛𑊂 𑊄𑊂 𑊀𑊅𑊀𑊩 𑊕𑊂 𑊦𑊀 𑊛𑊂 𑊠𑊄𑊂 𑊊𑊀 𑊌𑊟𑊁 𑊦𑊥𑊀 𑊠𑊀𑊣 𑊙𑊀 𑊠𑊄𑊂 𑊀𑊙𑊀 𑊦𑊀𑊩 𑊀𑊟𑊀 𑊂 𑊀𑊛𑊕𑊁 𑊌𑊀𑊙𑊙 𑊂𑊚𑊦 𑊄𑊂 𑊤𑊔𑊊𑊟𑊁 𑊀𑊟𑊀 𑊗𑊂𑊣𑊀 𑊊𑊦𑊢𑊀 𑊄𑊕𑊂 𑊛𑊋𑊀 𑊚𑊕𑊀 𑊛𑊂𑊖𑊢 𑊥𑊛 𑊄𑊂𑊌 𑊀𑊄𑊐 𑊄𑊢𑊄𑊀 𑊦𑊄 𑊛𑊢𑊀𑊀 𑊙𑊔 𑊠𑊂𑊣𑊄 𑊤𑊊 𑊤𑊏 𑊣𑊦𑊥𑊩 \n𑊌𑊘𑊀 𑊀𑊛𑊕 𑊠𑊀𑊥 𑊟𑊙𑊊𑊥𑊁 𑊤𑊊 𑊢𑊒𑊀𑊂𑊥𑊩 𑊟𑊀 𑊂 𑊠𑊂𑊣𑊄 𑊤𑊊 𑊤𑊒𑊀 𑊄𑊀𑊣𑊛𑊀 𑊀𑊟𑊀 𑊦𑊂𑊕 𑊠𑊂𑊗𑊌 𑊗𑊁𑊤𑊕 𑊣𑊆𑊀𑊩 𑊟𑊊𑊀 𑊂 𑊠𑊂𑊣𑊄 𑊙𑊀 𑊦𑊄 𑊢𑊁𑊥 𑊙𑊀 𑊄𑊂𑊣 𑊤𑊏 𑊐𑊄𑊀 𑊌𑊀 𑊀𑊛𑊕𑊁𑊀 𑊢𑊦𑊢𑊁 𑊤𑊊 𑊂𑊄𑊂 𑊥𑊂𑊦𑊢 𑊊𑊢𑊣𑊕 𑊌𑊐𑊀𑊩 𑊀𑊟𑊀 𑊂𑊥𑊙𑊁 𑊠𑊢𑊌𑊁 𑊦𑊁𑊩 𑊌𑊂 𑊂𑊚𑊦𑊀 𑊋𑊣𑊢𑊀 𑊚𑊀𑊣 𑊌𑊂 𑊥𑊂𑊦𑊢 𑊅𑊙𑊀 𑊦𑊚 𑊀𑊛𑊕 𑊔𑊔 𑊟𑊢𑊀𑊩 \n𑊛𑊢 𑊄𑊦𑊁 𑊂𑊄𑊂 𑊒𑊖𑊁𑊩 𑊖𑊒𑊀 𑊦𑊂𑊥 𑊤𑊊 𑊀 𑊄𑊀 𑊀𑊅𑊂𑊥 𑊠𑊒𑊀 𑊛𑊂 𑊙𑊀 𑊄𑊖𑊣𑊀 𑊄𑊠𑊁𑊀 𑊄𑊂 𑊢𑊌 𑊢𑊂𑊐𑊁 𑊛𑊁 𑊠𑊣𑊙𑊁 𑊦𑊀𑊩" -} \ No newline at end of file +} diff --git a/Lib/gflanguages/data/languages/so_Arab.textproto b/Lib/gflanguages/data/languages/so_Arab.textproto index 411d157935f..1d275729970 100644 --- a/Lib/gflanguages/data/languages/so_Arab.textproto +++ b/Lib/gflanguages/data/languages/so_Arab.textproto @@ -1,6 +1,6 @@ id: "so_Arab" language: "so" script: "Arab" -name: "Somali, Arabic" +name: "Somali (Arabic)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/so_Osma.textproto b/Lib/gflanguages/data/languages/so_Osma.textproto index 4f89ae15fd3..79f8b38d26d 100644 --- a/Lib/gflanguages/data/languages/so_Osma.textproto +++ b/Lib/gflanguages/data/languages/so_Osma.textproto @@ -1,7 +1,7 @@ id: "so_Osma" language: "so" script: "Osma" -name: "Somali, Osmanya" +name: "Somali (Osmanya)" population: 0 region: "SO" sample_text { diff --git a/Lib/gflanguages/data/languages/sq_Elba.textproto b/Lib/gflanguages/data/languages/sq_Elba.textproto index a5cb460b1c7..3bc22975716 100644 --- a/Lib/gflanguages/data/languages/sq_Elba.textproto +++ b/Lib/gflanguages/data/languages/sq_Elba.textproto @@ -1,7 +1,7 @@ id: "sq_Elba" language: "sq" script: "Elba" -name: "Albanian, Elbasan" +name: "Albanian (Elbasan)" population: 0 sample_text { masthead_full: "𐔟𐔁𐔀𐔒" diff --git a/Lib/gflanguages/data/languages/sq_Vith.textproto b/Lib/gflanguages/data/languages/sq_Vith.textproto index 3bbd4f03026..4bd4aa07ac0 100644 --- a/Lib/gflanguages/data/languages/sq_Vith.textproto +++ b/Lib/gflanguages/data/languages/sq_Vith.textproto @@ -1,7 +1,7 @@ id: "sq_Vith" language: "sq" script: "Vith" -name: "Albanian, Vithkuqi" +name: "Albanian (Vithkuqi)" population: 0 sample_text { masthead_full: "𐖎𐖵𐕸𐖟" diff --git a/Lib/gflanguages/data/languages/su_Sund.textproto b/Lib/gflanguages/data/languages/su_Sund.textproto index 33662a321a6..28129bf0a51 100644 --- a/Lib/gflanguages/data/languages/su_Sund.textproto +++ b/Lib/gflanguages/data/languages/su_Sund.textproto @@ -1,7 +1,7 @@ id: "su_Sund" language: "su" script: "Sund" -name: "Sundanese, Sundanese" +name: "Sundanese (Sundanese)" autonym: "ᮘᮞ ᮞᮥᮔ᮪ᮓ" population: 0 region: "ID" diff --git a/Lib/gflanguages/data/languages/sus_Arab.textproto b/Lib/gflanguages/data/languages/sus_Arab.textproto index eb6bc782b90..ec167cb04e6 100644 --- a/Lib/gflanguages/data/languages/sus_Arab.textproto +++ b/Lib/gflanguages/data/languages/sus_Arab.textproto @@ -1,6 +1,6 @@ id: "sus_Arab" language: "sus" script: "Arab" -name: "Susu, Arabic" +name: "Susu (Arabic)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/taq_Latn.textproto b/Lib/gflanguages/data/languages/taq_Latn.textproto index 6d65e5a4afd..2d2feb1e402 100644 --- a/Lib/gflanguages/data/languages/taq_Latn.textproto +++ b/Lib/gflanguages/data/languages/taq_Latn.textproto @@ -1,7 +1,7 @@ id: "taq_Latn" language: "taq" script: "Latn" -name: "Tamasheq, Latin" +name: "Tamasheq (Latin)" region: "ML" population: 914000 exemplar_chars { diff --git a/Lib/gflanguages/data/languages/taq_Tfng.textproto b/Lib/gflanguages/data/languages/taq_Tfng.textproto index 70357fd6225..d3c380e62bc 100644 --- a/Lib/gflanguages/data/languages/taq_Tfng.textproto +++ b/Lib/gflanguages/data/languages/taq_Tfng.textproto @@ -1,7 +1,7 @@ id: "taq_Tfng" language: "taq" script: "Tfng" -name: "Tamasheq, Tifinagh" +name: "Tamasheq (Tifinagh)" region: "ML" sample_text { masthead_full: "ⵎⴸⴰⵏ" diff --git a/Lib/gflanguages/data/languages/tbw_Tagb.textproto b/Lib/gflanguages/data/languages/tbw_Tagb.textproto index 9b1ffa60ce8..acd1c3d7ce5 100644 --- a/Lib/gflanguages/data/languages/tbw_Tagb.textproto +++ b/Lib/gflanguages/data/languages/tbw_Tagb.textproto @@ -1,7 +1,7 @@ id: "tbw_Tagb" language: "tbw" script: "Tagb" -name: "Tagbanwa, Tagbanwa" +name: "Tagbanwa (Tagbanwa)" population: 0 sample_text { masthead_full: "ᝣᝧᝮᝤ" diff --git a/Lib/gflanguages/data/languages/tg_Latn.textproto b/Lib/gflanguages/data/languages/tg_Latn.textproto index c394b4a6cd3..31a633119cf 100644 --- a/Lib/gflanguages/data/languages/tg_Latn.textproto +++ b/Lib/gflanguages/data/languages/tg_Latn.textproto @@ -1,7 +1,7 @@ id: "tg_Latn" language: "tg" script: "Latn" -name: "Tajik, Latin" +name: "Tajik (Latin)" population: 0 region: "TJ" sample_text { diff --git a/Lib/gflanguages/data/languages/thf_Deva.textproto b/Lib/gflanguages/data/languages/thf_Deva.textproto index ada51c773f7..72676d42d3b 100644 --- a/Lib/gflanguages/data/languages/thf_Deva.textproto +++ b/Lib/gflanguages/data/languages/thf_Deva.textproto @@ -1,7 +1,7 @@ id: "thf_Deva" language: "thf" script: "Deva" -name: "Thangmi, Devanagari" +name: "Thangmi (Devanagari)" region: "NP" sample_text { masthead_full: "सकखम" diff --git a/Lib/gflanguages/data/languages/ths_Deva.textproto b/Lib/gflanguages/data/languages/ths_Deva.textproto index 08fef6fece3..ed60147918e 100644 --- a/Lib/gflanguages/data/languages/ths_Deva.textproto +++ b/Lib/gflanguages/data/languages/ths_Deva.textproto @@ -1,7 +1,7 @@ id: "ths_Deva" language: "ths" script: "Deva" -name: "Thakali, Devanagari" +name: "Thakali (Devanagari)" region: "NP" sample_text { masthead_full: "हयमच" diff --git a/Lib/gflanguages/data/languages/tiw_Latn.textproto b/Lib/gflanguages/data/languages/tiw_Latn.textproto index a1323262752..6c1e6ca1770 100644 --- a/Lib/gflanguages/data/languages/tiw_Latn.textproto +++ b/Lib/gflanguages/data/languages/tiw_Latn.textproto @@ -1,7 +1,7 @@ id: "tiw_Latn" language: "tiw" script: "Latn" -name: "Tiwi, Latin" +name: "Tiwi (Latin)" region: "AU" sample_text { masthead_full: "TtAa" diff --git a/Lib/gflanguages/data/languages/tk_Cyrl.textproto b/Lib/gflanguages/data/languages/tk_Cyrl.textproto index 6dbe35c70a8..2145205275b 100644 --- a/Lib/gflanguages/data/languages/tk_Cyrl.textproto +++ b/Lib/gflanguages/data/languages/tk_Cyrl.textproto @@ -1,7 +1,7 @@ id: "tk_Cyrl" language: "tk" script: "Cyrl" -name: "Turkmen, Cyrillic" +name: "Turkmen (Cyrillic)" autonym: "Түркменче" population: 0 region: "TM" diff --git a/Lib/gflanguages/data/languages/tk_Latn.textproto b/Lib/gflanguages/data/languages/tk_Latn.textproto index 45ca7ac2ed7..9b1f509b190 100644 --- a/Lib/gflanguages/data/languages/tk_Latn.textproto +++ b/Lib/gflanguages/data/languages/tk_Latn.textproto @@ -1,7 +1,7 @@ id: "tk_Latn" language: "tk" script: "Latn" -name: "Turkmen, Latin" +name: "Turkmen (Latin)" autonym: "Türkmençe" population: 11000000 region: "TM" diff --git a/Lib/gflanguages/data/languages/tkr_Latn.textproto b/Lib/gflanguages/data/languages/tkr_Latn.textproto index 34803fd382a..95b160f68b3 100644 --- a/Lib/gflanguages/data/languages/tkr_Latn.textproto +++ b/Lib/gflanguages/data/languages/tkr_Latn.textproto @@ -1,7 +1,7 @@ id: "tkr_Latn" language: "tkr" script: "Latn" -name: "Tsakhur, Latin" +name: "Tsakhur (Latin)" autonym: "Ts‘əxna miz" population: 22300 region: "AZ" diff --git a/Lib/gflanguages/data/languages/tlh_Latn.textproto b/Lib/gflanguages/data/languages/tlh_Latn.textproto index e90caa26ebf..b5e9381f17e 100644 --- a/Lib/gflanguages/data/languages/tlh_Latn.textproto +++ b/Lib/gflanguages/data/languages/tlh_Latn.textproto @@ -1,4 +1,4 @@ id: "tlh_Latn" language: "tlh" script: "Latn" -name: "Klingon, Latin" +name: "Klingon (Latin)" diff --git a/Lib/gflanguages/data/languages/tly_Cyrl.textproto b/Lib/gflanguages/data/languages/tly_Cyrl.textproto index cee27378eeb..4eb5bfe7155 100644 --- a/Lib/gflanguages/data/languages/tly_Cyrl.textproto +++ b/Lib/gflanguages/data/languages/tly_Cyrl.textproto @@ -1,7 +1,7 @@ id: "tly_Cyrl" language: "tly" script: "Cyrl" -name: "Talysh, Cyrillic" +name: "Talysh (Cyrillic)" autonym: "Толыши" population: 0 exemplar_chars { diff --git a/Lib/gflanguages/data/languages/tly_Latn.textproto b/Lib/gflanguages/data/languages/tly_Latn.textproto index 27b43019ad1..4cb944c57f7 100644 --- a/Lib/gflanguages/data/languages/tly_Latn.textproto +++ b/Lib/gflanguages/data/languages/tly_Latn.textproto @@ -1,7 +1,7 @@ id: "tly_Latn" language: "tly" script: "Latn" -name: "Talysh, Latin" +name: "Talysh (Latin)" autonym: "Tolışi" population: 229590 region: "AZ" diff --git a/Lib/gflanguages/data/languages/tr_Arab.textproto b/Lib/gflanguages/data/languages/tr_Arab.textproto index cf8805c517d..1864bdba044 100644 --- a/Lib/gflanguages/data/languages/tr_Arab.textproto +++ b/Lib/gflanguages/data/languages/tr_Arab.textproto @@ -1,6 +1,6 @@ id: "tr_Arab" language: "tr" script: "Arab" -name: "Turkish, Arabic" +name: "Turkish (Arabic)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/tru_Syrc.textproto b/Lib/gflanguages/data/languages/tru_Syrc.textproto index d82a6271de1..40e9ef1a45a 100644 --- a/Lib/gflanguages/data/languages/tru_Syrc.textproto +++ b/Lib/gflanguages/data/languages/tru_Syrc.textproto @@ -1,7 +1,7 @@ id: "tru_Syrc" language: "tru" script: "Syrc" -name: "Turoyo, Syriac" +name: "Turoyo (Syriac)" population: 0 sample_text { masthead_full: "ܐܘܢܫ" diff --git a/Lib/gflanguages/data/languages/tt_Arab.textproto b/Lib/gflanguages/data/languages/tt_Arab.textproto index 518e1ebff7a..f42de4ac58c 100644 --- a/Lib/gflanguages/data/languages/tt_Arab.textproto +++ b/Lib/gflanguages/data/languages/tt_Arab.textproto @@ -1,7 +1,7 @@ id: "tt_Arab" language: "tt" script: "Arab" -name: "Tatar, Arabic" +name: "Tatar (Arabic)" region: "RU" sample_text { masthead_full: "نارل" diff --git a/Lib/gflanguages/data/languages/tt_Latn.textproto b/Lib/gflanguages/data/languages/tt_Latn.textproto index 8fda62cdb65..5eae47021d6 100644 --- a/Lib/gflanguages/data/languages/tt_Latn.textproto +++ b/Lib/gflanguages/data/languages/tt_Latn.textproto @@ -1,7 +1,7 @@ id: "tt_Latn" language: "tt" script: "Latn" -name: "Tatar, Latin" +name: "Tatar (Latin)" region: "RU" sample_text { masthead_full: "BbAa" diff --git a/Lib/gflanguages/data/languages/udm_Latn.textproto b/Lib/gflanguages/data/languages/udm_Latn.textproto index a32f45a2cb5..0fdc8bb2836 100644 --- a/Lib/gflanguages/data/languages/udm_Latn.textproto +++ b/Lib/gflanguages/data/languages/udm_Latn.textproto @@ -1,6 +1,6 @@ id: "udm_Latn" language: "udm" script: "Latn" -name: "Udmurt, Latin" +name: "Udmurt (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/ug_Latn.textproto b/Lib/gflanguages/data/languages/ug_Latn.textproto index ed215f2d9ef..018ee07c3b4 100644 --- a/Lib/gflanguages/data/languages/ug_Latn.textproto +++ b/Lib/gflanguages/data/languages/ug_Latn.textproto @@ -1,7 +1,7 @@ id: "ug_Latn" language: "ug" script: "Latn" -name: "Uyghur, Latin" +name: "Uyghur (Latin)" autonym: "Uighur" population: 0 region: "CN" diff --git a/Lib/gflanguages/data/languages/unr_Orya.textproto b/Lib/gflanguages/data/languages/unr_Orya.textproto index 18e7a979043..9f90d2e48d3 100644 --- a/Lib/gflanguages/data/languages/unr_Orya.textproto +++ b/Lib/gflanguages/data/languages/unr_Orya.textproto @@ -1,7 +1,7 @@ id: "unr_Orya" language: "unr" script: "Orya" -name: "Mundari, Odia" +name: "Mundari (Odia)" region: "IN" sample_text { masthead_full: "ସବନହ" diff --git a/Lib/gflanguages/data/languages/unx_Deva.textproto b/Lib/gflanguages/data/languages/unx_Deva.textproto index 6f79435eb3d..a73cc7fcb60 100644 --- a/Lib/gflanguages/data/languages/unx_Deva.textproto +++ b/Lib/gflanguages/data/languages/unx_Deva.textproto @@ -1,7 +1,7 @@ id: "unx_Deva" language: "unx" script: "Deva" -name: "Munda, Devanagari" +name: "Munda (Devanagari)" population: 0 region: "BT" region: "FJ" diff --git a/Lib/gflanguages/data/languages/vi_Hani.textproto b/Lib/gflanguages/data/languages/vi_Hani.textproto index 83ab7450494..fa7958b4a49 100644 --- a/Lib/gflanguages/data/languages/vi_Hani.textproto +++ b/Lib/gflanguages/data/languages/vi_Hani.textproto @@ -1,7 +1,7 @@ id: "vi_Hani" language: "vi" script: "Hani" -name: "Vietnamese, Han" +name: "Vietnamese (Han)" population: 0 region: "VN" sample_text { diff --git a/Lib/gflanguages/data/languages/wal_Ethi.textproto b/Lib/gflanguages/data/languages/wal_Ethi.textproto index b5c7fbe6d3f..41d9f77dfb2 100644 --- a/Lib/gflanguages/data/languages/wal_Ethi.textproto +++ b/Lib/gflanguages/data/languages/wal_Ethi.textproto @@ -1,6 +1,6 @@ id: "wal_Ethi" language: "wal" script: "Ethi" -name: "Wolaytta, Ethiopic" +name: "Wolaytta (Ethiopic)" population: 1946034 region: "ET" diff --git a/Lib/gflanguages/data/languages/wal_Latn.textproto b/Lib/gflanguages/data/languages/wal_Latn.textproto index c6676a56833..f5c53476312 100644 --- a/Lib/gflanguages/data/languages/wal_Latn.textproto +++ b/Lib/gflanguages/data/languages/wal_Latn.textproto @@ -1,7 +1,7 @@ id: "wal_Latn" language: "wal" script: "Latn" -name: "Wolaytta, Latin" +name: "Wolaytta (Latin)" population: 7000000 region: "ET" exemplar_chars { diff --git a/Lib/gflanguages/data/languages/wo_Arab.textproto b/Lib/gflanguages/data/languages/wo_Arab.textproto index 53ad7ce8d00..a30c05047c1 100644 --- a/Lib/gflanguages/data/languages/wo_Arab.textproto +++ b/Lib/gflanguages/data/languages/wo_Arab.textproto @@ -1,6 +1,6 @@ id: "wo_Arab" language: "wo" script: "Arab" -name: "Wolof, Arabic" +name: "Wolof (Arabic)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/xum_Latn.textproto b/Lib/gflanguages/data/languages/xum_Latn.textproto index 13b9abbc11c..12c70d1a797 100644 --- a/Lib/gflanguages/data/languages/xum_Latn.textproto +++ b/Lib/gflanguages/data/languages/xum_Latn.textproto @@ -1,6 +1,6 @@ id: "xum_Latn" language: "xum" script: "Latn" -name: "Umbrian, Latin" +name: "Umbrian (Latin)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/ybh_Deva.textproto b/Lib/gflanguages/data/languages/ybh_Deva.textproto index c4ae24a8e8b..d75164b3d50 100644 --- a/Lib/gflanguages/data/languages/ybh_Deva.textproto +++ b/Lib/gflanguages/data/languages/ybh_Deva.textproto @@ -1,7 +1,7 @@ id: "ybh_Deva" language: "ybh" script: "Deva" -name: "Yakha, Devanagari" +name: "Yakha (Devanagari)" region: "NP" sample_text { masthead_full: "घकओथ" diff --git a/Lib/gflanguages/data/languages/zh_Hebr.textproto b/Lib/gflanguages/data/languages/zh_Hebr.textproto index 708d21ee59c..80a53f87a82 100644 --- a/Lib/gflanguages/data/languages/zh_Hebr.textproto +++ b/Lib/gflanguages/data/languages/zh_Hebr.textproto @@ -1,7 +1,7 @@ id: "zh_Hebr" language: "zh" script: "Hebr" -name: "Chinese, Hebrew" +name: "Chinese (Hebrew)" region: "CN" sample_text { masthead_full: "עיןז" diff --git a/Lib/gflanguages/data/languages/zlm_Arab.textproto b/Lib/gflanguages/data/languages/zlm_Arab.textproto index d8d01d290b2..a8241da7f09 100644 --- a/Lib/gflanguages/data/languages/zlm_Arab.textproto +++ b/Lib/gflanguages/data/languages/zlm_Arab.textproto @@ -1,7 +1,7 @@ id: "zlm_Arab" language: "zlm" script: "Arab" -name: "Malay, Arabic" +name: "Malay (Arabic)" region: "BN" region: "ID" region: "MY" diff --git a/Lib/gflanguages/data/languages/zlm_Latn.textproto b/Lib/gflanguages/data/languages/zlm_Latn.textproto index 0907c3b22a1..30cd71f2639 100644 --- a/Lib/gflanguages/data/languages/zlm_Latn.textproto +++ b/Lib/gflanguages/data/languages/zlm_Latn.textproto @@ -1,7 +1,7 @@ id: "zlm_Latn" language: "zlm" script: "Latn" -name: "Malay, Latin" +name: "Malay (Latin)" region: "BN" region: "ID" region: "MY" From b84e90ebe51f8ebda84ca9e2e079a51215849875 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Sun, 21 Jul 2024 19:15:24 -0400 Subject: [PATCH 12/39] Fix language name regex to allow spaces and a couple other edge cases --- tests/test_data_languages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 4e347dd9b47..62d668f1594 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -83,7 +83,7 @@ "tlh_Latn": "Klingon is an artifical language.", } -LANGUAGE_NAME_REGEX = "^[A-Za-z-]+(, [A-Za-z-]+)?( [(][A-Za-z-]+[)])?$" +LANGUAGE_NAME_REGEX = "^[A-Za-z'- ]+[ʼ]?(, [A-Za-z- ]+)?( [(][A-Za-z- ]+[)])?$" @pytest.mark.parametrize("lang_code", LANGUAGES) From 3a48ff209d6058dc2f18d83078ebcb0d2d60ce0a Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Sun, 21 Jul 2024 20:01:48 -0400 Subject: [PATCH 13/39] Remove duplicate entries for Malay --- .../data/languages/ms_Arab.textproto | 23 ------------------- .../data/languages/ms_Latn.textproto | 16 ------------- 2 files changed, 39 deletions(-) delete mode 100644 Lib/gflanguages/data/languages/ms_Arab.textproto delete mode 100644 Lib/gflanguages/data/languages/ms_Latn.textproto diff --git a/Lib/gflanguages/data/languages/ms_Arab.textproto b/Lib/gflanguages/data/languages/ms_Arab.textproto deleted file mode 100644 index 6be5681c798..00000000000 --- a/Lib/gflanguages/data/languages/ms_Arab.textproto +++ /dev/null @@ -1,23 +0,0 @@ -id: "ms_Arab" -language: "ms" -script: "Arab" -name: "Malay (Arabic)" -population: 3228030 -region: "BN" -region: "CC" -region: "ID" -region: "MY" -sample_text { - masthead_full: "سموا" - masthead_partial: "أن" - styles: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي" - tester: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك" - poster_sm: "سموا مأنسي" - poster_md: "سموا مأنسي" - poster_lg: "سموا" - specimen_48: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن" - specimen_36: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن." - specimen_32: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن." - specimen_21: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن.\nسموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن." - specimen_16: "سموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن.\nسموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن.\nسموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن.\nسموا مأنسي دلاهيركن بيبس دان سامرات دري سڬي كموليأن دان حق. مريك ممڤوڽاي ڤميكيرن دان ڤراسأن هاتي دان هندقله برتيندق د انتارا ساتو سام لائن دڠن سماڠت ڤرساودارأن." -} diff --git a/Lib/gflanguages/data/languages/ms_Latn.textproto b/Lib/gflanguages/data/languages/ms_Latn.textproto deleted file mode 100644 index 2a838d7e9b0..00000000000 --- a/Lib/gflanguages/data/languages/ms_Latn.textproto +++ /dev/null @@ -1,16 +0,0 @@ -id: "ms_Latn" -language: "ms" -script: "Latn" -name: "Malay" -autonym: "Malaysia" -population: 34869275 -region: "BN" -region: "ID" -region: "MY" -region: "SG" -exemplar_chars { - base: "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z" - numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9" - punctuation: "- – — , ; : ! ? . … \' ‘ ’ \" “ ” ( ) [ ] @ * / & #" - index: "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z" -} From 0db6f9a30733fb8df0c6722d93047afd67669415 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Sun, 21 Jul 2024 20:03:31 -0400 Subject: [PATCH 14/39] Fix language name regex --- tests/test_data_languages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 62d668f1594..9d06abdd0bb 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -83,7 +83,7 @@ "tlh_Latn": "Klingon is an artifical language.", } -LANGUAGE_NAME_REGEX = "^[A-Za-z'- ]+[ʼ]?(, [A-Za-z- ]+)?( [(][A-Za-z- ]+[)])?$" +LANGUAGE_NAME_REGEX = "^[A-Za-z'- ]+(ʼ)?(, [A-Za-z- ]+)?( [(][A-Za-z- ]+[)])?$" @pytest.mark.parametrize("lang_code", LANGUAGES) From 17b4bdd2ef28ec90afa3d9028c02aa530b70a83e Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Sun, 21 Jul 2024 22:49:26 -0400 Subject: [PATCH 15/39] Fix dash character in character class --- tests/test_data_languages.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 9d06abdd0bb..72bbd32883a 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -83,7 +83,8 @@ "tlh_Latn": "Klingon is an artifical language.", } -LANGUAGE_NAME_REGEX = "^[A-Za-z'- ]+(ʼ)?(, [A-Za-z- ]+)?( [(][A-Za-z- ]+[)])?$" +# "ʼ" allowed as last character in language name for Metaʼ +LANGUAGE_NAME_REGEX = "^[-A-Za-z ]+(ʼ)?(, [-A-Za-z ]+)?( [(][-A-Za-z ]+[)])?$" @pytest.mark.parametrize("lang_code", LANGUAGES) From d7416e26d08206b908d9fe3d1230f7bcf6d9bee0 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Mon, 22 Jul 2024 00:17:18 -0400 Subject: [PATCH 16/39] Update language name character class to allow accent marks --- Lib/gflanguages/data/languages/art_Latn.textproto | 2 +- tests/test_data_languages.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/gflanguages/data/languages/art_Latn.textproto b/Lib/gflanguages/data/languages/art_Latn.textproto index a7c38ea65ba..09c660b688b 100644 --- a/Lib/gflanguages/data/languages/art_Latn.textproto +++ b/Lib/gflanguages/data/languages/art_Latn.textproto @@ -1,7 +1,7 @@ id: "art_Latn" language: "art" script: "Latn" -name: "Artificial languages, Latin, World" +name: "Artificial languages, World (Latin)" sample_text { masthead_full: "JjAa" masthead_partial: "Nn" diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 72bbd32883a..c05b7d8a574 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -84,7 +84,7 @@ } # "ʼ" allowed as last character in language name for Metaʼ -LANGUAGE_NAME_REGEX = "^[-A-Za-z ]+(ʼ)?(, [-A-Za-z ]+)?( [(][-A-Za-z ]+[)])?$" +LANGUAGE_NAME_REGEX = "^[-A-Za-zÀ-ÿ ]+(ʼ)?(, [-A-Za-zÀ-ÿ ]+)?( [(][-A-Za-zÀ-ÿ ]+[)])?$" @pytest.mark.parametrize("lang_code", LANGUAGES) From 5dc2b3ba0107d90189fde0fe5434086a838c542d Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Mon, 22 Jul 2024 00:21:25 -0400 Subject: [PATCH 17/39] Re-structure several language names --- Lib/gflanguages/data/languages/azj_Cyrl.textproto | 2 +- Lib/gflanguages/data/languages/chn_Dupl.textproto | 2 +- Lib/gflanguages/data/languages/de_Dupl.textproto | 2 +- Lib/gflanguages/data/languages/dhw_Deva.textproto | 2 +- Lib/gflanguages/data/languages/fr_Dupl.textproto | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/gflanguages/data/languages/azj_Cyrl.textproto b/Lib/gflanguages/data/languages/azj_Cyrl.textproto index 7cb43424efe..2af9ae3a581 100644 --- a/Lib/gflanguages/data/languages/azj_Cyrl.textproto +++ b/Lib/gflanguages/data/languages/azj_Cyrl.textproto @@ -1,7 +1,7 @@ id: "azj_Cyrl" language: "azj" script: "Cyrl" -name: "Azerbaijani, North, Cyrillic" +name: "Azerbaijani, North (Cyrillic)" autonym: "азәрбајҹан (Кирил, Азәрбајҹан)" region: "AZ" sample_text { diff --git a/Lib/gflanguages/data/languages/chn_Dupl.textproto b/Lib/gflanguages/data/languages/chn_Dupl.textproto index d3f4689566e..d402556fcbe 100644 --- a/Lib/gflanguages/data/languages/chn_Dupl.textproto +++ b/Lib/gflanguages/data/languages/chn_Dupl.textproto @@ -1,6 +1,6 @@ id: "chn_Dupl" language: "chn" script: "Dupl" -name: "Chinook Jargon, Duployan" +name: "Chinook Jargon (Duployan shorthand)" region: "US" region: "CA" diff --git a/Lib/gflanguages/data/languages/de_Dupl.textproto b/Lib/gflanguages/data/languages/de_Dupl.textproto index 718b6fd7479..7910e1de427 100644 --- a/Lib/gflanguages/data/languages/de_Dupl.textproto +++ b/Lib/gflanguages/data/languages/de_Dupl.textproto @@ -1,5 +1,5 @@ id: "de_Dupl" language: "de" script: "Dupl" -name: "German (Duployan)" +name: "German (Duployan shorthand)" region: "DE" diff --git a/Lib/gflanguages/data/languages/dhw_Deva.textproto b/Lib/gflanguages/data/languages/dhw_Deva.textproto index 9bbe57b8a38..3ddb7774ba3 100644 --- a/Lib/gflanguages/data/languages/dhw_Deva.textproto +++ b/Lib/gflanguages/data/languages/dhw_Deva.textproto @@ -1,7 +1,7 @@ id: "dhw_Deva" language: "dhw" script: "Deva" -name: "Dhanwar, Nepal, Devanagari" +name: "Dhanwar, Nepal (Devanagari)" region: "NP" sample_text { masthead_full: "सभमन" diff --git a/Lib/gflanguages/data/languages/fr_Dupl.textproto b/Lib/gflanguages/data/languages/fr_Dupl.textproto index 0093d282a83..584d4cbda76 100644 --- a/Lib/gflanguages/data/languages/fr_Dupl.textproto +++ b/Lib/gflanguages/data/languages/fr_Dupl.textproto @@ -1,5 +1,5 @@ id: "fr_Dupl" language: "fr" script: "Dupl" -name: "French (Duployan)" +name: "French (Duployan shorthand)" historical: true From 640c050f19b10245ef27ebea400f4bf445843c02 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Mon, 22 Jul 2024 00:43:01 -0400 Subject: [PATCH 18/39] Expand character class for language names --- Lib/gflanguages/data/languages/bdh_Latn.textproto | 2 +- Lib/gflanguages/data/languages/bkc_Latn.textproto | 2 +- Lib/gflanguages/data/languages/bm_Nkoo.textproto | 2 +- Lib/gflanguages/data/languages/bsq_Bass.textproto | 2 +- Lib/gflanguages/data/languages/cbk_Latn.textproto | 2 +- Lib/gflanguages/data/languages/chn_Dupl.textproto | 2 +- Lib/gflanguages/data/languages/de_Dupl.textproto | 2 +- Lib/gflanguages/data/languages/dyu_Nkoo.textproto | 2 +- Lib/gflanguages/data/languages/eto_Latn.textproto | 2 +- Lib/gflanguages/data/languages/fr_Dupl.textproto | 2 +- Lib/gflanguages/data/languages/gcf_Latn.textproto | 2 +- Lib/gflanguages/data/languages/man_Nkoo.textproto | 2 +- Lib/gflanguages/data/scripts/Beng.textproto | 3 +-- tests/test_data_languages.py | 12 ++++++++++-- 14 files changed, 23 insertions(+), 16 deletions(-) diff --git a/Lib/gflanguages/data/languages/bdh_Latn.textproto b/Lib/gflanguages/data/languages/bdh_Latn.textproto index 7e7ccf6dda5..ecbbb850b38 100644 --- a/Lib/gflanguages/data/languages/bdh_Latn.textproto +++ b/Lib/gflanguages/data/languages/bdh_Latn.textproto @@ -1,7 +1,7 @@ id: "bdh_Latn" language: "bdh" script: "Latn" -name: "Baka (DRC/South Sudan)" +name: "Baka, DRC/South Sudan" autonym: "Tara Baká" population: 60000 region: "CD" diff --git a/Lib/gflanguages/data/languages/bkc_Latn.textproto b/Lib/gflanguages/data/languages/bkc_Latn.textproto index 02b8bca30d3..08da4d26dac 100644 --- a/Lib/gflanguages/data/languages/bkc_Latn.textproto +++ b/Lib/gflanguages/data/languages/bkc_Latn.textproto @@ -1,7 +1,7 @@ id: "bkc_Latn" language: "bkc" script: "Latn" -name: "Baka (Cameroon/Gabon)" +name: "Baka, Cameroon/Gabon" population: 71000 region: "CM" region: "GA" diff --git a/Lib/gflanguages/data/languages/bm_Nkoo.textproto b/Lib/gflanguages/data/languages/bm_Nkoo.textproto index 2734cdca766..1136be516c1 100644 --- a/Lib/gflanguages/data/languages/bm_Nkoo.textproto +++ b/Lib/gflanguages/data/languages/bm_Nkoo.textproto @@ -1,6 +1,6 @@ id: "bm_Nkoo" language: "bm" script: "Nkoo" -name: "Bambara (Nko)" +name: "Bambara (N’Ko)" population: 16000000 region: "ML" diff --git a/Lib/gflanguages/data/languages/bsq_Bass.textproto b/Lib/gflanguages/data/languages/bsq_Bass.textproto index 3d76b53b1da..e2984fefbbe 100644 --- a/Lib/gflanguages/data/languages/bsq_Bass.textproto +++ b/Lib/gflanguages/data/languages/bsq_Bass.textproto @@ -1,7 +1,7 @@ id: "bsq_Bass" language: "bsq" script: "Bass" -name: "Bassa (Vah)" +name: "Bassa (Bassa Vah)" population: 410000 region: "LR" region: "SL" diff --git a/Lib/gflanguages/data/languages/cbk_Latn.textproto b/Lib/gflanguages/data/languages/cbk_Latn.textproto index 7053e530f99..748d555e183 100644 --- a/Lib/gflanguages/data/languages/cbk_Latn.textproto +++ b/Lib/gflanguages/data/languages/cbk_Latn.textproto @@ -1,7 +1,7 @@ id: "cbk_Latn" language: "cbk" script: "Latn" -name: "Chavacano, Latin, Philippines" +name: "Chavacano, Philippines (Latin)" region: "PH" sample_text { masthead_full: "TtOo" diff --git a/Lib/gflanguages/data/languages/chn_Dupl.textproto b/Lib/gflanguages/data/languages/chn_Dupl.textproto index d402556fcbe..2e8e8213cb7 100644 --- a/Lib/gflanguages/data/languages/chn_Dupl.textproto +++ b/Lib/gflanguages/data/languages/chn_Dupl.textproto @@ -1,6 +1,6 @@ id: "chn_Dupl" language: "chn" script: "Dupl" -name: "Chinook Jargon (Duployan shorthand)" +name: "Chinook Jargon (Duployan)" region: "US" region: "CA" diff --git a/Lib/gflanguages/data/languages/de_Dupl.textproto b/Lib/gflanguages/data/languages/de_Dupl.textproto index 7910e1de427..718b6fd7479 100644 --- a/Lib/gflanguages/data/languages/de_Dupl.textproto +++ b/Lib/gflanguages/data/languages/de_Dupl.textproto @@ -1,5 +1,5 @@ id: "de_Dupl" language: "de" script: "Dupl" -name: "German (Duployan shorthand)" +name: "German (Duployan)" region: "DE" diff --git a/Lib/gflanguages/data/languages/dyu_Nkoo.textproto b/Lib/gflanguages/data/languages/dyu_Nkoo.textproto index 7edc157ecd5..eb891abd2d9 100644 --- a/Lib/gflanguages/data/languages/dyu_Nkoo.textproto +++ b/Lib/gflanguages/data/languages/dyu_Nkoo.textproto @@ -1,5 +1,5 @@ id: "dyu_Nkoo" language: "dyu" script: "Nkoo" -name: "Dyula (Nko)" +name: "Dyula (N’Ko)" region: "CI" diff --git a/Lib/gflanguages/data/languages/eto_Latn.textproto b/Lib/gflanguages/data/languages/eto_Latn.textproto index d016fd39f97..2da83f6b078 100644 --- a/Lib/gflanguages/data/languages/eto_Latn.textproto +++ b/Lib/gflanguages/data/languages/eto_Latn.textproto @@ -1,7 +1,7 @@ id: "eto_Latn" language: "eto" script: "Latn" -name: "Eton (Cameroon)" +name: "Eton, Cameroon" population: 400000 region: "CM" exemplar_chars { diff --git a/Lib/gflanguages/data/languages/fr_Dupl.textproto b/Lib/gflanguages/data/languages/fr_Dupl.textproto index 584d4cbda76..0093d282a83 100644 --- a/Lib/gflanguages/data/languages/fr_Dupl.textproto +++ b/Lib/gflanguages/data/languages/fr_Dupl.textproto @@ -1,5 +1,5 @@ id: "fr_Dupl" language: "fr" script: "Dupl" -name: "French (Duployan shorthand)" +name: "French (Duployan)" historical: true diff --git a/Lib/gflanguages/data/languages/gcf_Latn.textproto b/Lib/gflanguages/data/languages/gcf_Latn.textproto index 56401d24d83..b36c6e60449 100644 --- a/Lib/gflanguages/data/languages/gcf_Latn.textproto +++ b/Lib/gflanguages/data/languages/gcf_Latn.textproto @@ -1,7 +1,7 @@ id: "gcf_Latn" language: "gcf" script: "Latn" -name: "Guadeloupean Creole French, Latin, Martinique" +name: "Guadeloupean Creole French, Martinique (Latin)" region: "GP" region: "MQ" sample_text { diff --git a/Lib/gflanguages/data/languages/man_Nkoo.textproto b/Lib/gflanguages/data/languages/man_Nkoo.textproto index 2ba2349b183..4e9166d0201 100644 --- a/Lib/gflanguages/data/languages/man_Nkoo.textproto +++ b/Lib/gflanguages/data/languages/man_Nkoo.textproto @@ -1,5 +1,5 @@ id: "man_Nkoo" language: "man" script: "Nkoo" -name: "Mandingo (Nko)" +name: "Mandingo (N’Ko)" region: "GN" diff --git a/Lib/gflanguages/data/scripts/Beng.textproto b/Lib/gflanguages/data/scripts/Beng.textproto index d583c7ead81..d68be9e4d79 100644 --- a/Lib/gflanguages/data/scripts/Beng.textproto +++ b/Lib/gflanguages/data/scripts/Beng.textproto @@ -1,3 +1,2 @@ id: "Beng" -name: "Bangla" - +name: "Bengali" diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index c05b7d8a574..a859cf78fe4 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -84,7 +84,15 @@ } # "ʼ" allowed as last character in language name for Metaʼ -LANGUAGE_NAME_REGEX = "^[-A-Za-zÀ-ÿ ]+(ʼ)?(, [-A-Za-zÀ-ÿ ]+)?( [(][-A-Za-zÀ-ÿ ]+[)])?$" +LANGUAGE_NAME_REGEX = "^[-’A-Za-zÀ-ÿ ]+(ʼ)?(, [-’A-Za-zÀ-ÿ/ ]+)?( [(][-’A-Za-zÀ-ÿ ]+[)])?$" +# Some scripts have abbreviated names for reference in language names that are +# sufficient in context. If an alternate is listed here, it should be used +# universally and consistently across all language names. +ALTERNATE_SCRIPT_NAMES = { + "Dupl": "Duployan", + "Hans": "Simplified", + "Hant": "Traditional", +} @pytest.mark.parametrize("lang_code", LANGUAGES) @@ -291,7 +299,7 @@ def test_language_uniqueness(): def test_language_name_structure(): languages_with_bad_name_structure = {} for lang in LANGUAGES.values(): - script_name = SCRIPTS[lang.script].name + script_name = SCRIPTS[lang.script].name if lang.script not in ALTERNATE_SCRIPT_NAMES else ALTERNATE_SCRIPT_NAMES[lang.script] names = [["name", lang.name]] if lang.preferred_name: names += [["preferred_name", lang.preferred_name]] From 7d5fe21c45ec7944f09fd43825a693938fa677b8 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Mon, 22 Jul 2024 01:06:51 -0400 Subject: [PATCH 19/39] Correct a batch of language names --- Lib/gflanguages/data/languages/brx_Beng.textproto | 2 +- Lib/gflanguages/data/languages/jiv_Latn.textproto | 2 +- Lib/gflanguages/data/languages/kek_Latn.textproto | 2 +- Lib/gflanguages/data/languages/khr_Beng.textproto | 2 +- Lib/gflanguages/data/languages/ksw_Mymr.textproto | 2 +- Lib/gflanguages/data/languages/kyw_Beng.textproto | 2 +- Lib/gflanguages/data/languages/laj_Latn.textproto | 2 +- Lib/gflanguages/data/languages/las_Latn.textproto | 4 ++-- Lib/gflanguages/data/languages/mcf_Latn.textproto | 2 +- Lib/gflanguages/data/languages/mic_Latn.textproto | 2 +- Lib/gflanguages/data/languages/non_Latn.textproto | 2 +- Lib/gflanguages/data/languages/ojb_Cans.textproto | 2 +- Lib/gflanguages/data/languages/pnz_Latn.textproto | 2 +- Lib/gflanguages/data/languages/qud_Latn.textproto | 2 +- Lib/gflanguages/data/languages/ria_Latn.textproto | 2 +- Lib/gflanguages/data/languages/rob_Latn.textproto | 2 +- 16 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Lib/gflanguages/data/languages/brx_Beng.textproto b/Lib/gflanguages/data/languages/brx_Beng.textproto index 2dd936b68a5..5e8f30cc06a 100644 --- a/Lib/gflanguages/data/languages/brx_Beng.textproto +++ b/Lib/gflanguages/data/languages/brx_Beng.textproto @@ -1,7 +1,7 @@ id: "brx_Beng" language: "brx" script: "Beng" -name: "Bodo (Bangla)" +name: "Bodo (Bengali)" region: "IN" sample_text { masthead_full: "গসবঙ" diff --git a/Lib/gflanguages/data/languages/jiv_Latn.textproto b/Lib/gflanguages/data/languages/jiv_Latn.textproto index b2e53d7d4b0..eee5d00a911 100644 --- a/Lib/gflanguages/data/languages/jiv_Latn.textproto +++ b/Lib/gflanguages/data/languages/jiv_Latn.textproto @@ -1,7 +1,7 @@ id: "jiv_Latn" language: "jiv" script: "Latn" -name: "Shuar, Latin, Ecuador" +name: "Shuar, Ecuador (Latin)" region: "EC" sample_text { masthead_full: "PpEe" diff --git a/Lib/gflanguages/data/languages/kek_Latn.textproto b/Lib/gflanguages/data/languages/kek_Latn.textproto index 0b03bd01d04..23920251eab 100644 --- a/Lib/gflanguages/data/languages/kek_Latn.textproto +++ b/Lib/gflanguages/data/languages/kek_Latn.textproto @@ -1,7 +1,7 @@ id: "kek_Latn" language: "kek" script: "Latn" -name: "Q\'eqchi\'" +name: "Qʼeqchiʼ" region: "GT" region: "BZ" sample_text { diff --git a/Lib/gflanguages/data/languages/khr_Beng.textproto b/Lib/gflanguages/data/languages/khr_Beng.textproto index ae571a7eced..b56586c7630 100644 --- a/Lib/gflanguages/data/languages/khr_Beng.textproto +++ b/Lib/gflanguages/data/languages/khr_Beng.textproto @@ -1,7 +1,7 @@ id: "khr_Beng" language: "khr" script: "Beng" -name: "Kharia (Bangla)" +name: "Kharia (Bengali)" region: "IN" sample_text { masthead_full: "সউবম" diff --git a/Lib/gflanguages/data/languages/ksw_Mymr.textproto b/Lib/gflanguages/data/languages/ksw_Mymr.textproto index 82d1ba04bd3..430b234e872 100644 --- a/Lib/gflanguages/data/languages/ksw_Mymr.textproto +++ b/Lib/gflanguages/data/languages/ksw_Mymr.textproto @@ -1,7 +1,7 @@ id: "ksw_Mymr" language: "ksw" script: "Mymr" -name: "S\'gaw Karen, Myanmar" +name: "S’gaw Karen, Myanmar" region: "MM" sample_text { masthead_full: "ဟခပက" diff --git a/Lib/gflanguages/data/languages/kyw_Beng.textproto b/Lib/gflanguages/data/languages/kyw_Beng.textproto index 4569efcca4a..6825755de62 100644 --- a/Lib/gflanguages/data/languages/kyw_Beng.textproto +++ b/Lib/gflanguages/data/languages/kyw_Beng.textproto @@ -1,7 +1,7 @@ id: "kyw_Beng" language: "kyw" script: "Beng" -name: "Kudmali (Bangla)" +name: "Kudmali (Bengali)" region: "IN" sample_text { masthead_full: "সভমন" diff --git a/Lib/gflanguages/data/languages/laj_Latn.textproto b/Lib/gflanguages/data/languages/laj_Latn.textproto index 5f88f2f6fb2..23b39e9e110 100644 --- a/Lib/gflanguages/data/languages/laj_Latn.textproto +++ b/Lib/gflanguages/data/languages/laj_Latn.textproto @@ -1,7 +1,7 @@ id: "laj_Latn" language: "laj" script: "Latn" -name: "Lango [Uganda]" +name: "Lango, Uganda" population: 1643614 region: "UG" exemplar_chars { diff --git a/Lib/gflanguages/data/languages/las_Latn.textproto b/Lib/gflanguages/data/languages/las_Latn.textproto index 6827456ffec..c4f58d485bd 100644 --- a/Lib/gflanguages/data/languages/las_Latn.textproto +++ b/Lib/gflanguages/data/languages/las_Latn.textproto @@ -1,7 +1,7 @@ id: "las_Latn" language: "las" script: "Latn" -name: "Lama (Togo)" +name: "Lama, Togo" autonym: "lamʋ" population: 260000 region: "TG" @@ -11,4 +11,4 @@ exemplar_chars { marks: "◌̃" } source: "Wikipedia" -source: "https://www.webonary.org/lama" \ No newline at end of file +source: "https://www.webonary.org/lama" diff --git a/Lib/gflanguages/data/languages/mcf_Latn.textproto b/Lib/gflanguages/data/languages/mcf_Latn.textproto index 683ca93bd89..32c10bf3da8 100644 --- a/Lib/gflanguages/data/languages/mcf_Latn.textproto +++ b/Lib/gflanguages/data/languages/mcf_Latn.textproto @@ -1,7 +1,7 @@ id: "mcf_Latn" language: "mcf" script: "Latn" -name: "Matsés, Latin, Peru" +name: "Matsés (Latin)" region: "PE" region: "BR" sample_text { diff --git a/Lib/gflanguages/data/languages/mic_Latn.textproto b/Lib/gflanguages/data/languages/mic_Latn.textproto index 8bb5bd8ee86..62251f04157 100644 --- a/Lib/gflanguages/data/languages/mic_Latn.textproto +++ b/Lib/gflanguages/data/languages/mic_Latn.textproto @@ -1,7 +1,7 @@ id: "mic_Latn" language: "mic" script: "Latn" -name: "Mi\'kmaq" +name: "Mi'kmaq" autonym: "Míkmawísimk" population: 7140 region: "CA" diff --git a/Lib/gflanguages/data/languages/non_Latn.textproto b/Lib/gflanguages/data/languages/non_Latn.textproto index 33368e12fed..d895a3a5a4a 100644 --- a/Lib/gflanguages/data/languages/non_Latn.textproto +++ b/Lib/gflanguages/data/languages/non_Latn.textproto @@ -1,7 +1,7 @@ id: "non_Latn" language: "non" script: "Latn" -name: "Old Norse, Latin, Sweden" +name: "Old Norse, Sweden (Latin)" region: "SE" sample_text { masthead_full: "AaLl" diff --git a/Lib/gflanguages/data/languages/ojb_Cans.textproto b/Lib/gflanguages/data/languages/ojb_Cans.textproto index 824475aeca8..60c47d6fa25 100644 --- a/Lib/gflanguages/data/languages/ojb_Cans.textproto +++ b/Lib/gflanguages/data/languages/ojb_Cans.textproto @@ -1,7 +1,7 @@ id: "ojb_Cans" language: "ojb" script: "Cans" -name: "Northwestern Ojibwa, Unified Canadian Aboriginal Syllabics, Canada" +name: "Northwestern Ojibwa, Canada (Unified Canadian Aboriginal Syllabics)" region: "CA" sample_text { masthead_full: "ᑲᐦᑭᓇ" diff --git a/Lib/gflanguages/data/languages/pnz_Latn.textproto b/Lib/gflanguages/data/languages/pnz_Latn.textproto index 6472940fe8d..91c5707976f 100644 --- a/Lib/gflanguages/data/languages/pnz_Latn.textproto +++ b/Lib/gflanguages/data/languages/pnz_Latn.textproto @@ -1,7 +1,7 @@ id: "pnz_Latn" language: "pnz" script: "Latn" -name: "Pana (Central African Republic)" +name: "Pana, Central African Republic" population: 153000 region: "CF" region: "CM" diff --git a/Lib/gflanguages/data/languages/qud_Latn.textproto b/Lib/gflanguages/data/languages/qud_Latn.textproto index 1079620abba..6bd4faa201a 100644 --- a/Lib/gflanguages/data/languages/qud_Latn.textproto +++ b/Lib/gflanguages/data/languages/qud_Latn.textproto @@ -1,7 +1,7 @@ id: "qud_Latn" language: "qud" script: "Latn" -name: "Quechua, Unified Quichua, old Hispanic orthography" +name: "Kichwa, Unified Quichua" region: "PE" sample_text { masthead_full: "TtUu" diff --git a/Lib/gflanguages/data/languages/ria_Latn.textproto b/Lib/gflanguages/data/languages/ria_Latn.textproto index 934bfa896c5..4a07108a8e8 100644 --- a/Lib/gflanguages/data/languages/ria_Latn.textproto +++ b/Lib/gflanguages/data/languages/ria_Latn.textproto @@ -1,6 +1,6 @@ id: "ria_Latn" language: "ria" script: "Latn" -name: "Riang [India]" +name: "Riang, India" population: 172391 region: "IN" diff --git a/Lib/gflanguages/data/languages/rob_Latn.textproto b/Lib/gflanguages/data/languages/rob_Latn.textproto index 79cc564ac5d..3c771dbbf3c 100644 --- a/Lib/gflanguages/data/languages/rob_Latn.textproto +++ b/Lib/gflanguages/data/languages/rob_Latn.textproto @@ -1,6 +1,6 @@ id: "rob_Latn" language: "rob" script: "Latn" -name: "Tae\'" +name: "Taeʼ" population: 293728 region: "ID" From 91119daf006c5f45b8903c8874dc776a9e72c251 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Mon, 22 Jul 2024 01:07:12 -0400 Subject: [PATCH 20/39] Update character class for language names to use Unicode class --- tests/test_data_languages.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index a859cf78fe4..252d081bd84 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -83,8 +83,7 @@ "tlh_Latn": "Klingon is an artifical language.", } -# "ʼ" allowed as last character in language name for Metaʼ -LANGUAGE_NAME_REGEX = "^[-’A-Za-zÀ-ÿ ]+(ʼ)?(, [-’A-Za-zÀ-ÿ/ ]+)?( [(][-’A-Za-zÀ-ÿ ]+[)])?$" +LANGUAGE_NAME_REGEX = "^[-'’ʼ\p{L} ]+(, [-'’ʼ\p{L}/ ]+)?( [(][-'’ʼ\p{L} ]+[)])?$" # Some scripts have abbreviated names for reference in language names that are # sufficient in context. If an alternate is listed here, it should be used # universally and consistently across all language names. From 1303c7060e80f2356205321ca76ebbcf82144f8f Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Mon, 22 Jul 2024 01:11:13 -0400 Subject: [PATCH 21/39] Fix unicode character class in language name regex --- tests/test_data_languages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 252d081bd84..56c218ac683 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -83,7 +83,7 @@ "tlh_Latn": "Klingon is an artifical language.", } -LANGUAGE_NAME_REGEX = "^[-'’ʼ\p{L} ]+(, [-'’ʼ\p{L}/ ]+)?( [(][-'’ʼ\p{L} ]+[)])?$" +LANGUAGE_NAME_REGEX = r"^[-'’ʼ\p{L} ]+(, [-'’ʼ\p{L}/ ]+)?( [(][-'’ʼ\p{L} ]+[)])?$" # Some scripts have abbreviated names for reference in language names that are # sufficient in context. If an alternate is listed here, it should be used # universally and consistently across all language names. From fc1dc973b3700a426e078e396879daf8f94fecfe Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Mon, 22 Jul 2024 01:19:51 -0400 Subject: [PATCH 22/39] Use regex instead of re for access to unicode character classes --- tests/test_data_languages.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 56c218ac683..823a5791dd6 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -15,7 +15,7 @@ # limitations under the License. # from collections import defaultdict, Counter -import re +import regex import unicodedata from gflanguages import ( @@ -83,7 +83,7 @@ "tlh_Latn": "Klingon is an artifical language.", } -LANGUAGE_NAME_REGEX = r"^[-'’ʼ\p{L} ]+(, [-'’ʼ\p{L}/ ]+)?( [(][-'’ʼ\p{L} ]+[)])?$" +LANGUAGE_NAME_REGEX = regex.compile(r"^[-'’ʼ\p{L} ]+(, [-'’ʼ\p{L}/ ]+)?( [(][-'’ʼ\p{L} ]+[)])?$") # Some scripts have abbreviated names for reference in language names that are # sufficient in context. If an alternate is listed here, it should be used # universally and consistently across all language names. @@ -197,7 +197,7 @@ def test_exemplars_are_in_script(lang_code): if field.name == "auxiliary" or field.name == "index": continue exemplars = getattr(lang.exemplar_chars, field.name) - group_of_chars = re.findall(r"(\{[^}]+\}|\S+)", exemplars) + group_of_chars = regex.findall(r"(\{[^}]+\}|\S+)", exemplars) for chars in group_of_chars: for char in chars: char_script = youseedee.ucd_data(ord(char)).get("Script") @@ -304,7 +304,7 @@ def test_language_name_structure(): names += [["preferred_name", lang.preferred_name]] bad_names = [] for type, name in names: - bad_structure = not re.match(LANGUAGE_NAME_REGEX, name) + bad_structure = not regex.match(LANGUAGE_NAME_REGEX, name) bad_script_suffix = name.endswith( ")") and not name.endswith(f"({script_name})") if bad_structure or bad_script_suffix: From 83eb95f990328d3a6b78fa8aa20c6f0036867e4f Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Mon, 22 Jul 2024 01:34:24 -0400 Subject: [PATCH 23/39] Add regex package to dev reqs --- dev-requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 3bc644ab88c..99fd9c801d9 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,4 +2,5 @@ uharfbuzz youseedee black isort -pytest \ No newline at end of file +pytest +regex From a661bc2979d6ed2cec3a429a16c2afecf20525c7 Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Mon, 22 Jul 2024 01:53:41 -0400 Subject: [PATCH 24/39] Add regex to deps in TOML config --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index eb559c54727..61546649abb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ dynamic = ["version"] name = "gflanguages" description = "A python API for evaluating language support in the Google Fonts collection." readme = "README.md" -authors = [ +authors = [ { name = "Simon Cozens", email = "simon@simon-cozens.org" } ] @@ -29,5 +29,6 @@ dependencies = [ dev = [ "uharfbuzz", "youseedee", - "pytest" + "pytest", + "regex" ] From 3cecbbbcb95ebf027eaf8c86ccaa0596845d275c Mon Sep 17 00:00:00 2001 From: nathan-williams Date: Mon, 22 Jul 2024 01:57:14 -0400 Subject: [PATCH 25/39] Fix name structure for two languages --- Lib/gflanguages/data/languages/krs_Latn.textproto | 4 ++-- Lib/gflanguages/data/languages/mis_Latn.textproto | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/gflanguages/data/languages/krs_Latn.textproto b/Lib/gflanguages/data/languages/krs_Latn.textproto index 57f466d5101..5c9733fd06a 100644 --- a/Lib/gflanguages/data/languages/krs_Latn.textproto +++ b/Lib/gflanguages/data/languages/krs_Latn.textproto @@ -1,7 +1,7 @@ id: "krs_Latn" language: "krs" script: "Latn" -name: "Gbaya (Sudan)" +name: "Gbaya, Sudan" population: 47000 region: "SD" region: "SS" @@ -9,4 +9,4 @@ exemplar_chars { base: "a A b B c C d D e E f F g G h H i I j J k K l L ḷ Ḷ m M n N ŋ Ŋ o O p P r R s S t T u U v V w W y Y z Z" marks: "◌̣" auxiliary: "q Q x X" -} \ No newline at end of file +} diff --git a/Lib/gflanguages/data/languages/mis_Latn.textproto b/Lib/gflanguages/data/languages/mis_Latn.textproto index 532ecb72c88..b68b86bd3ef 100644 --- a/Lib/gflanguages/data/languages/mis_Latn.textproto +++ b/Lib/gflanguages/data/languages/mis_Latn.textproto @@ -1,7 +1,7 @@ id: "mis_Latn" language: "mis" script: "Latn" -name: "Uncoded languages, Latin, World" +name: "Uncoded languages, World (Latin)" region: "ES" region: "FR" sample_text { From be426cd01d42047ff878f8351f43acbe52bf5333 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Mon, 19 Aug 2024 16:09:58 +0100 Subject: [PATCH 26/39] Assert uniqueness of all IDs --- Lib/gflanguages/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/gflanguages/__init__.py b/Lib/gflanguages/__init__.py index c7b0bc61817..0754d6c1355 100644 --- a/Lib/gflanguages/__init__.py +++ b/Lib/gflanguages/__init__.py @@ -44,6 +44,7 @@ def LoadLanguages(base_dir=DATA_DIR): for textproto_file in glob.iglob(os.path.join(languages_dir, "*.textproto")): with open(textproto_file, "r", encoding="utf-8") as f: language = text_format.Parse(f.read(), languages_public_pb2.LanguageProto()) + assert language.id not in langs, f"Duplicate language id: {language.id}" langs[language.id] = language return langs @@ -57,6 +58,7 @@ def LoadScripts(base_dir=DATA_DIR): for textproto_file in glob.iglob(os.path.join(scripts_dir, "*.textproto")): with open(textproto_file, "r", encoding="utf-8") as f: script = text_format.Parse(f.read(), languages_public_pb2.ScriptProto()) + assert script.id not in scripts, f"Duplicate script id: {script.id}" scripts[script.id] = script return scripts @@ -70,6 +72,7 @@ def LoadRegions(base_dir=DATA_DIR): for textproto_file in glob.iglob(os.path.join(regions_dir, "*.textproto")): with open(textproto_file, "r", encoding="utf-8") as f: region = text_format.Parse(f.read(), languages_public_pb2.RegionProto()) + assert region.id not in regions, f"Duplicate region id: {region.id}" regions[region.id] = region return regions From bac8f7c3c51609785e8a2dd6210e20241202c990 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Mon, 19 Aug 2024 16:10:19 +0100 Subject: [PATCH 27/39] Fix up languages with non-unique IDs --- Lib/gflanguages/data/languages/mam_Latn_MX.textproto | 4 ++-- Lib/gflanguages/data/languages/xsl_Latn.textproto | 4 ++-- Lib/gflanguages/data/languages/yo_Latn_BJ.textproto | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/gflanguages/data/languages/mam_Latn_MX.textproto b/Lib/gflanguages/data/languages/mam_Latn_MX.textproto index 110b0269355..27712e7a215 100644 --- a/Lib/gflanguages/data/languages/mam_Latn_MX.textproto +++ b/Lib/gflanguages/data/languages/mam_Latn_MX.textproto @@ -1,7 +1,7 @@ -id: "mam_Latn" +id: "mam_Latn_MX" language: "mam" script: "Latn" -name: "Mam" +name: "Mexican Mam" region: "MX" exemplar_chars { base: "a A {bꞌ} {BꞋ} {ch} {CH} {chꞌ} {CHꞋ} d D e E g G i I j J k K {kꞌ} {KꞋ} {ky} {KY} {kyꞌ} {KYꞋ} l L m M n N o O p P q Q {qꞌ} {QꞋ} r R s S t T {tꞌ} {TꞋ} {ts} {TS} {tsꞌ} {TSꞋ} {tx} {TX} {txꞌ} {TXꞋ} u U w W x X {xh} {XH} y Y ꞌ Ꞌ" diff --git a/Lib/gflanguages/data/languages/xsl_Latn.textproto b/Lib/gflanguages/data/languages/xsl_Latn.textproto index ef229cf89f7..259e4e011de 100644 --- a/Lib/gflanguages/data/languages/xsl_Latn.textproto +++ b/Lib/gflanguages/data/languages/xsl_Latn.textproto @@ -1,5 +1,5 @@ -id: "scs_Latn" -language: "scs" +id: "xsl_Latn" +language: "xsl" script: "Latn" name: "South Slavey" population: 950 diff --git a/Lib/gflanguages/data/languages/yo_Latn_BJ.textproto b/Lib/gflanguages/data/languages/yo_Latn_BJ.textproto index d876805617d..d7798b8653e 100644 --- a/Lib/gflanguages/data/languages/yo_Latn_BJ.textproto +++ b/Lib/gflanguages/data/languages/yo_Latn_BJ.textproto @@ -1,7 +1,7 @@ -id: "yo_Latn" +id: "yo_Latn_BJ" language: "yo" script: "Latn" -name: "Yoruba" +name: "Beninese Yoruba" autonym: "Èdè Yorùbá" population: 200000 region: "BJ" From a0e70c0d30569ad8996abb701dbb6e48159a0489 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Fri, 23 Aug 2024 16:32:23 +0100 Subject: [PATCH 28/39] Rename to Language, Place --- Lib/gflanguages/data/languages/yo_Latn_BJ.textproto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/gflanguages/data/languages/yo_Latn_BJ.textproto b/Lib/gflanguages/data/languages/yo_Latn_BJ.textproto index d7798b8653e..2136a20457d 100644 --- a/Lib/gflanguages/data/languages/yo_Latn_BJ.textproto +++ b/Lib/gflanguages/data/languages/yo_Latn_BJ.textproto @@ -1,7 +1,7 @@ id: "yo_Latn_BJ" language: "yo" script: "Latn" -name: "Beninese Yoruba" +name: "Yoruba, Benin" autonym: "Èdè Yorùbá" population: 200000 region: "BJ" @@ -27,4 +27,4 @@ sample_text { specimen_16: "Ɛnì kɔ̀ɔ̀kan ló ní ɛ̀tɔ́ kpé kí a má shàdédé she àyɔjúràn sí ɔ̀rɔ̀ ìgbésí ayé rɛ̀, tàbí sí ɔ̀rɔ̀ɛbí rɛ̀ tàbí sí ɔ̀rɔ̀ ìdílé rɛ̀ tàbí ìwé tí a kɔ sí i; a kò sì gbɔdɔ̀ ba iyì àti orúkɔ rɛ̀ jɛ́. Ɛnì kɔ̀ɔ̀kan ló ní ɛ̀tɔ́ sí ààbò lábɛ́ òfin kúrò lɔ́wɔ́ irú àyɔjúràn tàbí ìbanijɛ́ bɛ́ɛ̀.\nƐnì kɔ̀ɔ̀kan ló ní ɛ̀tɔ́ sí òmìnira èrò, òmìnira ɛ̀rí-ɔkàn àti òmìnira ɛ sìn. Ɛtɔ́ yìí sì gbani láàyè láti kpààrɔ̀ ɛ sìn tàbí ìgbàgbɔ́ ɛni. Ó sì fún ɛyɔ ɛnì kan tàbí àkójɔkpɔ̀ ènìyàn láàyè láti she ɛ̀sìn wɔn àti ìgbàgbɔ́ wɔn bó she jɛ mɔ́ ti ìkɔ́ni, ìshesí, ìjɔ́sìn àti ìmúshe ohun tí wɔ́n gbàgbɔ́ yálà ní ìkɔ̀kɔ̀ tàbí ní gban̄gba.\nƐnì kɔ̀ɔ̀kan ló ní ɛ̀tɔ́ sí ìsinmi àti fàájì kpɛ̀lú àkókò tí kò kpɔ̀ jù lɛ́nu ishɛ́ àti àsìkò ìsinmi lɛ́nu ishɛ́ láti ìgbà dé ìgbà tí a ó sanwó fún." } source: "Centre national de linguistique appliquée (CENALA), Alphabet des langues nationales béninoises, Cotonou: CENALA avec le concours de l’Initiative francophone pour la formation à distance des maîtres (IFADEM), 2008, 6th ed." -note: "Yoruba (Benin) uses ɛ ɔ kp sh instead of Yoruba (Nigeria) ẹ ọ p ṣ." \ No newline at end of file +note: "Yoruba (Benin) uses ɛ ɔ kp sh instead of Yoruba (Nigeria) ẹ ọ p ṣ." From ecc71b8e5623a776b14136e7e02515d27b5dc660 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Mon, 26 Aug 2024 14:35:13 +0100 Subject: [PATCH 29/39] Add "removable" category of characters not required for language support --- Lib/gflanguages/languages_public.proto | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/gflanguages/languages_public.proto b/Lib/gflanguages/languages_public.proto index 0e8f6505c23..b9df77db9b5 100644 --- a/Lib/gflanguages/languages_public.proto +++ b/Lib/gflanguages/languages_public.proto @@ -48,8 +48,9 @@ message ExemplarCharsProto { optional string numerals = 4; optional string punctuation = 5; optional string index = 6; + optional string removable = 7; // Base characters which can be ignored when determining language support - // Next = 7; + // Next = 8; } message SampleTextProto { From 07a2d635b27891855b3324e71310308b63751a16 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Mon, 26 Aug 2024 14:35:32 +0100 Subject: [PATCH 30/39] Add LATIN CAPITAL LETTER SHARP S to removable --- Lib/gflanguages/data/languages/de_Latn.textproto | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/gflanguages/data/languages/de_Latn.textproto b/Lib/gflanguages/data/languages/de_Latn.textproto index 2b4f1a012c8..6d27f73f657 100644 --- a/Lib/gflanguages/data/languages/de_Latn.textproto +++ b/Lib/gflanguages/data/languages/de_Latn.textproto @@ -38,6 +38,7 @@ exemplar_chars { numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9" punctuation: "- – — , ; : ! ? . … \' ‘ ‚ \" “ „ « » ( ) [ ] { } @ * / & #" index: "A B C D E F G H I J K L M N O P Q R S ẞ T U V W X Y Z" + removable: "ẞ" } sample_text { masthead_full: "AaLl" From 68177e6df46db0717ce023be3878063b0e35caa3 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Mon, 26 Aug 2024 14:39:42 +0100 Subject: [PATCH 31/39] Recompile protobuf (protoc 3.17.3) --- Lib/gflanguages/languages_public_pb2.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/Lib/gflanguages/languages_public_pb2.py b/Lib/gflanguages/languages_public_pb2.py index 2b544d4d9a1..a246fd5cbf8 100644 --- a/Lib/gflanguages/languages_public_pb2.py +++ b/Lib/gflanguages/languages_public_pb2.py @@ -19,7 +19,7 @@ syntax='proto2', serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_pb=b'\n\x16languages_public.proto\x12\x17google.languages_public\"Q\n\x0bRegionProto\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x12\n\npopulation\x18\x03 \x01(\x05\x12\x14\n\x0cregion_group\x18\x04 \x03(\t\"\'\n\x0bScriptProto\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\"\xce\x02\n\rLanguageProto\x12\n\n\x02id\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\x12\x0e\n\x06script\x18\x03 \x01(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12\x16\n\x0epreferred_name\x18\x05 \x01(\t\x12\x0f\n\x07\x61utonym\x18\x06 \x01(\t\x12\x12\n\npopulation\x18\x07 \x01(\x05\x12\x0e\n\x06region\x18\x08 \x03(\t\x12\x43\n\x0e\x65xemplar_chars\x18\t \x01(\x0b\x32+.google.languages_public.ExemplarCharsProto\x12=\n\x0bsample_text\x18\n \x01(\x0b\x32(.google.languages_public.SampleTextProto\x12\x12\n\nhistorical\x18\x0b \x01(\x08\x12\x0e\n\x06source\x18\x0c \x03(\t\x12\x0c\n\x04note\x18\r \x01(\t\"z\n\x12\x45xemplarCharsProto\x12\x0c\n\x04\x62\x61se\x18\x01 \x01(\t\x12\x11\n\tauxiliary\x18\x02 \x01(\t\x12\r\n\x05marks\x18\x03 \x01(\t\x12\x10\n\x08numerals\x18\x04 \x01(\t\x12\x13\n\x0bpunctuation\x18\x05 \x01(\t\x12\r\n\x05index\x18\x06 \x01(\t\"\x92\x02\n\x0fSampleTextProto\x12\x15\n\rmasthead_full\x18\x01 \x01(\t\x12\x18\n\x10masthead_partial\x18\x02 \x01(\t\x12\x0e\n\x06styles\x18\x03 \x01(\t\x12\x0e\n\x06tester\x18\x04 \x01(\t\x12\x11\n\tposter_sm\x18\x05 \x01(\t\x12\x11\n\tposter_md\x18\x06 \x01(\t\x12\x11\n\tposter_lg\x18\x07 \x01(\t\x12\x13\n\x0bspecimen_48\x18\x08 \x01(\t\x12\x13\n\x0bspecimen_36\x18\t \x01(\t\x12\x13\n\x0bspecimen_32\x18\n \x01(\t\x12\x13\n\x0bspecimen_21\x18\x0b \x01(\t\x12\x13\n\x0bspecimen_16\x18\x0c \x01(\t\x12\x0c\n\x04note\x18\r \x01(\t' + serialized_pb=b'\n\x16languages_public.proto\x12\x17google.languages_public\"Q\n\x0bRegionProto\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x12\n\npopulation\x18\x03 \x01(\x05\x12\x14\n\x0cregion_group\x18\x04 \x03(\t\"\'\n\x0bScriptProto\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\"\xce\x02\n\rLanguageProto\x12\n\n\x02id\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\x12\x0e\n\x06script\x18\x03 \x01(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12\x16\n\x0epreferred_name\x18\x05 \x01(\t\x12\x0f\n\x07\x61utonym\x18\x06 \x01(\t\x12\x12\n\npopulation\x18\x07 \x01(\x05\x12\x0e\n\x06region\x18\x08 \x03(\t\x12\x43\n\x0e\x65xemplar_chars\x18\t \x01(\x0b\x32+.google.languages_public.ExemplarCharsProto\x12=\n\x0bsample_text\x18\n \x01(\x0b\x32(.google.languages_public.SampleTextProto\x12\x12\n\nhistorical\x18\x0b \x01(\x08\x12\x0e\n\x06source\x18\x0c \x03(\t\x12\x0c\n\x04note\x18\r \x01(\t\"\x8d\x01\n\x12\x45xemplarCharsProto\x12\x0c\n\x04\x62\x61se\x18\x01 \x01(\t\x12\x11\n\tauxiliary\x18\x02 \x01(\t\x12\r\n\x05marks\x18\x03 \x01(\t\x12\x10\n\x08numerals\x18\x04 \x01(\t\x12\x13\n\x0bpunctuation\x18\x05 \x01(\t\x12\r\n\x05index\x18\x06 \x01(\t\x12\x11\n\tremovable\x18\x07 \x01(\t\"\x92\x02\n\x0fSampleTextProto\x12\x15\n\rmasthead_full\x18\x01 \x01(\t\x12\x18\n\x10masthead_partial\x18\x02 \x01(\t\x12\x0e\n\x06styles\x18\x03 \x01(\t\x12\x0e\n\x06tester\x18\x04 \x01(\t\x12\x11\n\tposter_sm\x18\x05 \x01(\t\x12\x11\n\tposter_md\x18\x06 \x01(\t\x12\x11\n\tposter_lg\x18\x07 \x01(\t\x12\x13\n\x0bspecimen_48\x18\x08 \x01(\t\x12\x13\n\x0bspecimen_36\x18\t \x01(\t\x12\x13\n\x0bspecimen_32\x18\n \x01(\t\x12\x13\n\x0bspecimen_21\x18\x0b \x01(\t\x12\x13\n\x0bspecimen_16\x18\x0c \x01(\t\x12\x0c\n\x04note\x18\r \x01(\t' ) @@ -283,6 +283,13 @@ message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='removable', full_name='google.languages_public.ExemplarCharsProto.removable', index=6, + number=7, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), ], extensions=[ ], @@ -295,8 +302,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=512, - serialized_end=634, + serialized_start=513, + serialized_end=654, ) @@ -411,8 +418,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=637, - serialized_end=911, + serialized_start=657, + serialized_end=931, ) _LANGUAGEPROTO.fields_by_name['exemplar_chars'].message_type = _EXEMPLARCHARSPROTO From 5248f078360a3fa54ee44c8bac2d54fbd01d18a6 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Mon, 26 Aug 2024 18:04:02 +0100 Subject: [PATCH 32/39] Rename removable to not_required --- Lib/gflanguages/data/languages/de_Latn.textproto | 2 +- Lib/gflanguages/languages_public.proto | 2 +- Lib/gflanguages/languages_public_pb2.py | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/gflanguages/data/languages/de_Latn.textproto b/Lib/gflanguages/data/languages/de_Latn.textproto index 6d27f73f657..56023005f1e 100644 --- a/Lib/gflanguages/data/languages/de_Latn.textproto +++ b/Lib/gflanguages/data/languages/de_Latn.textproto @@ -38,7 +38,7 @@ exemplar_chars { numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9" punctuation: "- – — , ; : ! ? . … \' ‘ ‚ \" “ „ « » ( ) [ ] { } @ * / & #" index: "A B C D E F G H I J K L M N O P Q R S ẞ T U V W X Y Z" - removable: "ẞ" + not_required: "ẞ" } sample_text { masthead_full: "AaLl" diff --git a/Lib/gflanguages/languages_public.proto b/Lib/gflanguages/languages_public.proto index b9df77db9b5..db00ef077da 100644 --- a/Lib/gflanguages/languages_public.proto +++ b/Lib/gflanguages/languages_public.proto @@ -48,7 +48,7 @@ message ExemplarCharsProto { optional string numerals = 4; optional string punctuation = 5; optional string index = 6; - optional string removable = 7; // Base characters which can be ignored when determining language support + optional string not_required = 7; // Base characters which can be ignored when determining language support // Next = 8; } diff --git a/Lib/gflanguages/languages_public_pb2.py b/Lib/gflanguages/languages_public_pb2.py index a246fd5cbf8..4729af34655 100644 --- a/Lib/gflanguages/languages_public_pb2.py +++ b/Lib/gflanguages/languages_public_pb2.py @@ -19,7 +19,7 @@ syntax='proto2', serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_pb=b'\n\x16languages_public.proto\x12\x17google.languages_public\"Q\n\x0bRegionProto\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x12\n\npopulation\x18\x03 \x01(\x05\x12\x14\n\x0cregion_group\x18\x04 \x03(\t\"\'\n\x0bScriptProto\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\"\xce\x02\n\rLanguageProto\x12\n\n\x02id\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\x12\x0e\n\x06script\x18\x03 \x01(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12\x16\n\x0epreferred_name\x18\x05 \x01(\t\x12\x0f\n\x07\x61utonym\x18\x06 \x01(\t\x12\x12\n\npopulation\x18\x07 \x01(\x05\x12\x0e\n\x06region\x18\x08 \x03(\t\x12\x43\n\x0e\x65xemplar_chars\x18\t \x01(\x0b\x32+.google.languages_public.ExemplarCharsProto\x12=\n\x0bsample_text\x18\n \x01(\x0b\x32(.google.languages_public.SampleTextProto\x12\x12\n\nhistorical\x18\x0b \x01(\x08\x12\x0e\n\x06source\x18\x0c \x03(\t\x12\x0c\n\x04note\x18\r \x01(\t\"\x8d\x01\n\x12\x45xemplarCharsProto\x12\x0c\n\x04\x62\x61se\x18\x01 \x01(\t\x12\x11\n\tauxiliary\x18\x02 \x01(\t\x12\r\n\x05marks\x18\x03 \x01(\t\x12\x10\n\x08numerals\x18\x04 \x01(\t\x12\x13\n\x0bpunctuation\x18\x05 \x01(\t\x12\r\n\x05index\x18\x06 \x01(\t\x12\x11\n\tremovable\x18\x07 \x01(\t\"\x92\x02\n\x0fSampleTextProto\x12\x15\n\rmasthead_full\x18\x01 \x01(\t\x12\x18\n\x10masthead_partial\x18\x02 \x01(\t\x12\x0e\n\x06styles\x18\x03 \x01(\t\x12\x0e\n\x06tester\x18\x04 \x01(\t\x12\x11\n\tposter_sm\x18\x05 \x01(\t\x12\x11\n\tposter_md\x18\x06 \x01(\t\x12\x11\n\tposter_lg\x18\x07 \x01(\t\x12\x13\n\x0bspecimen_48\x18\x08 \x01(\t\x12\x13\n\x0bspecimen_36\x18\t \x01(\t\x12\x13\n\x0bspecimen_32\x18\n \x01(\t\x12\x13\n\x0bspecimen_21\x18\x0b \x01(\t\x12\x13\n\x0bspecimen_16\x18\x0c \x01(\t\x12\x0c\n\x04note\x18\r \x01(\t' + serialized_pb=b'\n\x16languages_public.proto\x12\x17google.languages_public\"Q\n\x0bRegionProto\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x12\n\npopulation\x18\x03 \x01(\x05\x12\x14\n\x0cregion_group\x18\x04 \x03(\t\"\'\n\x0bScriptProto\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\"\xce\x02\n\rLanguageProto\x12\n\n\x02id\x18\x01 \x01(\t\x12\x10\n\x08language\x18\x02 \x01(\t\x12\x0e\n\x06script\x18\x03 \x01(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12\x16\n\x0epreferred_name\x18\x05 \x01(\t\x12\x0f\n\x07\x61utonym\x18\x06 \x01(\t\x12\x12\n\npopulation\x18\x07 \x01(\x05\x12\x0e\n\x06region\x18\x08 \x03(\t\x12\x43\n\x0e\x65xemplar_chars\x18\t \x01(\x0b\x32+.google.languages_public.ExemplarCharsProto\x12=\n\x0bsample_text\x18\n \x01(\x0b\x32(.google.languages_public.SampleTextProto\x12\x12\n\nhistorical\x18\x0b \x01(\x08\x12\x0e\n\x06source\x18\x0c \x03(\t\x12\x0c\n\x04note\x18\r \x01(\t\"\x90\x01\n\x12\x45xemplarCharsProto\x12\x0c\n\x04\x62\x61se\x18\x01 \x01(\t\x12\x11\n\tauxiliary\x18\x02 \x01(\t\x12\r\n\x05marks\x18\x03 \x01(\t\x12\x10\n\x08numerals\x18\x04 \x01(\t\x12\x13\n\x0bpunctuation\x18\x05 \x01(\t\x12\r\n\x05index\x18\x06 \x01(\t\x12\x14\n\x0cnot_required\x18\x07 \x01(\t\"\x92\x02\n\x0fSampleTextProto\x12\x15\n\rmasthead_full\x18\x01 \x01(\t\x12\x18\n\x10masthead_partial\x18\x02 \x01(\t\x12\x0e\n\x06styles\x18\x03 \x01(\t\x12\x0e\n\x06tester\x18\x04 \x01(\t\x12\x11\n\tposter_sm\x18\x05 \x01(\t\x12\x11\n\tposter_md\x18\x06 \x01(\t\x12\x11\n\tposter_lg\x18\x07 \x01(\t\x12\x13\n\x0bspecimen_48\x18\x08 \x01(\t\x12\x13\n\x0bspecimen_36\x18\t \x01(\t\x12\x13\n\x0bspecimen_32\x18\n \x01(\t\x12\x13\n\x0bspecimen_21\x18\x0b \x01(\t\x12\x13\n\x0bspecimen_16\x18\x0c \x01(\t\x12\x0c\n\x04note\x18\r \x01(\t' ) @@ -284,7 +284,7 @@ is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), _descriptor.FieldDescriptor( - name='removable', full_name='google.languages_public.ExemplarCharsProto.removable', index=6, + name='not_required', full_name='google.languages_public.ExemplarCharsProto.not_required', index=6, number=7, type=9, cpp_type=9, label=1, has_default_value=False, default_value=b"".decode('utf-8'), message_type=None, enum_type=None, containing_type=None, @@ -303,7 +303,7 @@ oneofs=[ ], serialized_start=513, - serialized_end=654, + serialized_end=657, ) @@ -418,8 +418,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=657, - serialized_end=931, + serialized_start=660, + serialized_end=934, ) _LANGUAGEPROTO.fields_by_name['exemplar_chars'].message_type = _EXEMPLARCHARSPROTO From bd9ad711242b1050ac4ff135db1dc716828b8e3c Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Fri, 6 Sep 2024 13:49:11 +0100 Subject: [PATCH 33/39] Add Todhri --- .../data/languages/sq_Todr.textproto | 20 +++++++++++++++++++ Lib/gflanguages/data/scripts/Todr.textproto | 3 +++ 2 files changed, 23 insertions(+) create mode 100644 Lib/gflanguages/data/languages/sq_Todr.textproto create mode 100644 Lib/gflanguages/data/scripts/Todr.textproto diff --git a/Lib/gflanguages/data/languages/sq_Todr.textproto b/Lib/gflanguages/data/languages/sq_Todr.textproto new file mode 100644 index 00000000000..be859899c75 --- /dev/null +++ b/Lib/gflanguages/data/languages/sq_Todr.textproto @@ -0,0 +1,20 @@ +id: "sq_Todr" +language: "sq" +script: "Todr" +name: "Albanian (Todhri)" +population: 0 +sample_text { + masthead_full: "𐗎𐗒𐗢𐗐𐗊" + masthead_partial: "𐗢𐗊" + styles: "𐗢𐗊 𐗎𐗒𐗢𐗐𐗊 𐗙𐗉𐗝𐗊𐗬𐗒𐗢 𐗕𐗒𐗇𐗒𐗘 𐗢𐗊 𐗕𐗒𐗝𐗊 𐗈𐗉 𐗢𐗊" + tester: "𐗢𐗊 𐗎𐗒𐗢𐗐𐗊 𐗙𐗉𐗝𐗊𐗬𐗒𐗢 𐗕𐗒𐗇𐗒𐗘 𐗢𐗊 𐗕𐗒𐗝𐗊 𐗈𐗉 𐗢𐗊 𐗂𐗀𐗝𐗀𐗂𐗀𐗝𐗢𐗊 𐗘𐗊 𐗆𐗒𐗙𐗒𐗢𐗉𐗢" + poster_sm: "𐗢𐗊 𐗎𐗒𐗢𐗐𐗊 𐗙𐗉𐗝𐗊𐗬𐗒𐗢" + poster_md: "𐗢𐗊 𐗎𐗒𐗢𐗐𐗊" + poster_lg: "𐗎𐗒𐗢𐗐𐗊" + specimen_48: "𐗢𐗊 𐗎𐗒𐗢𐗐𐗊 𐗙𐗉𐗝𐗊𐗬𐗒𐗢 𐗕𐗒𐗇𐗒𐗘 𐗢𐗊 𐗕𐗒𐗝𐗊 𐗈𐗉 𐗢𐗊 𐗂𐗀𐗝𐗀𐗂𐗀𐗝𐗢𐗊" + specimen_36: "𐗢𐗊 𐗎𐗒𐗢𐗐𐗊 𐗙𐗉𐗝𐗊𐗬𐗒𐗢 𐗕𐗒𐗇𐗒𐗘 𐗢𐗊 𐗕𐗒𐗝𐗊 𐗈𐗉 𐗢𐗊 𐗂𐗀𐗝𐗀𐗂𐗀𐗝𐗢𐗊 𐗘𐗊 𐗆𐗒𐗙𐗒𐗢𐗉𐗢 𐗈𐗉 𐗘𐗊 𐗢𐗊 𐗆𐗝𐗉𐗓𐗢𐗀. 𐗀𐗢𐗀 𐗔𐗀𐗘𐗊 𐗀𐗝𐗟𐗪𐗉 𐗈𐗉 𐗇𐗊𐗝𐗎𐗉𐗎𐗉 𐗈𐗉 𐗆𐗤𐗐𐗉𐗢 𐗢𐗊 𐗟𐗒𐗖𐗉𐗘 𐗇𐗀𐗓 𐗙𐗊𐗝𐗒 𐗢𐗓𐗉𐗢𐗝𐗒𐗢 𐗗𐗉 𐗋𐗝𐗪𐗗𐗊 𐗥𐗊𐗖𐗀𐗬𐗊𐗝𐗒𐗗𐗒." + specimen_32: "𐗢𐗊 𐗎𐗒𐗢𐗐𐗊 𐗙𐗉𐗝𐗊𐗬𐗒𐗢 𐗕𐗒𐗇𐗒𐗘 𐗢𐗊 𐗕𐗒𐗝𐗊 𐗈𐗉 𐗢𐗊 𐗂𐗀𐗝𐗀𐗂𐗀𐗝𐗢𐗊 𐗘𐗊 𐗆𐗒𐗙𐗒𐗢𐗉𐗢 𐗈𐗉 𐗘𐗊 𐗢𐗊 𐗆𐗝𐗉𐗓𐗢𐗀. 𐗀𐗢𐗀 𐗔𐗀𐗘𐗊 𐗀𐗝𐗟𐗪𐗉 𐗈𐗉 𐗇𐗊𐗝𐗎𐗉𐗎𐗉 𐗈𐗉 𐗆𐗤𐗐𐗉𐗢 𐗢𐗊 𐗟𐗒𐗖𐗉𐗘 𐗇𐗀𐗓 𐗙𐗊𐗝𐗒 𐗢𐗓𐗉𐗢𐗝𐗒𐗢 𐗗𐗉 𐗋𐗝𐗪𐗗𐗊 𐗥𐗊𐗖𐗀𐗬𐗊𐗝𐗒𐗗𐗒." + specimen_21: "𐗢𐗊 𐗎𐗒𐗢𐗐𐗊 𐗙𐗉𐗝𐗊𐗬𐗒𐗢 𐗕𐗒𐗇𐗒𐗘 𐗢𐗊 𐗕𐗒𐗝𐗊 𐗈𐗉 𐗢𐗊 𐗂𐗀𐗝𐗀𐗂𐗀𐗝𐗢𐗊 𐗘𐗊 𐗆𐗒𐗙𐗒𐗢𐗉𐗢 𐗈𐗉 𐗘𐗊 𐗢𐗊 𐗆𐗝𐗉𐗓𐗢𐗀. 𐗀𐗢𐗀 𐗔𐗀𐗘𐗊 𐗀𐗝𐗟𐗪𐗉 𐗈𐗉 𐗇𐗊𐗝𐗎𐗉𐗎𐗉 𐗈𐗉 𐗆𐗤𐗐𐗉𐗢 𐗢𐗊 𐗟𐗒𐗖𐗉𐗘 𐗇𐗀𐗓 𐗙𐗊𐗝𐗒 𐗢𐗓𐗉𐗢𐗝𐗒𐗢 𐗗𐗉 𐗋𐗝𐗪𐗗𐗊 𐗥𐗊𐗖𐗀𐗬𐗊𐗝𐗒𐗗𐗒.\n𐗢𐗊 𐗎𐗒𐗢𐗐𐗊 𐗙𐗉𐗝𐗊𐗬𐗒𐗢 𐗕𐗒𐗇𐗒𐗘 𐗢𐗊 𐗕𐗒𐗝𐗊 𐗈𐗉 𐗢𐗊 𐗂𐗀𐗝𐗀𐗂𐗀𐗝𐗢𐗊 𐗘𐗊 𐗆𐗒𐗙𐗒𐗢𐗉𐗢 𐗈𐗉 𐗘𐗊 𐗢𐗊 𐗆𐗝𐗉𐗓𐗢𐗀. 𐗀𐗢𐗀 𐗔𐗀𐗘𐗊 𐗀𐗝𐗟𐗪𐗉 𐗈𐗉 𐗇𐗊𐗝𐗎𐗉𐗎𐗉 𐗈𐗉 𐗆𐗤𐗐𐗉𐗢 𐗢𐗊 𐗟𐗒𐗖𐗉𐗘 𐗇𐗀𐗓 𐗙𐗊𐗝𐗒 𐗢𐗓𐗉𐗢𐗝𐗒𐗢 𐗗𐗉 𐗋𐗝𐗪𐗗𐗊 𐗥𐗊𐗖𐗀𐗬𐗊𐗝𐗒𐗗𐗒." + specimen_16: "𐗢𐗊 𐗎𐗒𐗢𐗐𐗊 𐗙𐗉𐗝𐗊𐗬𐗒𐗢 𐗕𐗒𐗇𐗒𐗘 𐗢𐗊 𐗕𐗒𐗝𐗊 𐗈𐗉 𐗢𐗊 𐗂𐗀𐗝𐗀𐗂𐗀𐗝𐗢𐗊 𐗘𐗊 𐗆𐗒𐗙𐗒𐗢𐗉𐗢 𐗈𐗉 𐗘𐗊 𐗢𐗊 𐗆𐗝𐗉𐗓𐗢𐗀. 𐗀𐗢𐗀 𐗔𐗀𐗘𐗊 𐗀𐗝𐗟𐗪𐗉 𐗈𐗉 𐗇𐗊𐗝𐗎𐗉𐗎𐗉 𐗈𐗉 𐗆𐗤𐗐𐗉𐗢 𐗢𐗊 𐗟𐗒𐗖𐗉𐗘 𐗇𐗀𐗓 𐗙𐗊𐗝𐗒 𐗢𐗓𐗉𐗢𐗝𐗒𐗢 𐗗𐗉 𐗋𐗝𐗪𐗗𐗊 𐗥𐗊𐗖𐗀𐗬𐗊𐗝𐗒𐗗𐗒.\n𐗢𐗊 𐗎𐗒𐗢𐗐𐗊 𐗙𐗉𐗝𐗊𐗬𐗒𐗢 𐗕𐗒𐗇𐗒𐗘 𐗢𐗊 𐗕𐗒𐗝𐗊 𐗈𐗉 𐗢𐗊 𐗂𐗀𐗝𐗀𐗂𐗀𐗝𐗢𐗊 𐗘𐗊 𐗆𐗒𐗙𐗒𐗢𐗉𐗢 𐗈𐗉 𐗘𐗊 𐗢𐗊 𐗆𐗝𐗉𐗓𐗢𐗀. 𐗀𐗢𐗀 𐗔𐗀𐗘𐗊 𐗀𐗝𐗟𐗪𐗉 𐗈𐗉 𐗇𐗊𐗝𐗎𐗉𐗎𐗉 𐗈𐗉 𐗆𐗤𐗐𐗉𐗢 𐗢𐗊 𐗟𐗒𐗖𐗉𐗘 𐗇𐗀𐗓 𐗙𐗊𐗝𐗒 𐗢𐗓𐗉𐗢𐗝𐗒𐗢 𐗗𐗉 𐗋𐗝𐗪𐗗𐗊 𐗥𐗊𐗖𐗀𐗬𐗊𐗝𐗒𐗗𐗒.\n𐗢𐗊 𐗎𐗒𐗢𐗐𐗊 𐗙𐗉𐗝𐗊𐗬𐗒𐗢 𐗕𐗒𐗇𐗒𐗘 𐗢𐗊 𐗕𐗒𐗝𐗊 𐗈𐗉 𐗢𐗊 𐗂𐗀𐗝𐗀𐗂𐗀𐗝𐗢𐗊 𐗘𐗊 𐗆𐗒𐗙𐗒𐗢𐗉𐗢 𐗈𐗉 𐗘𐗊 𐗢𐗊 𐗆𐗝𐗉𐗓𐗢𐗀. 𐗀𐗢𐗀 𐗔𐗀𐗘𐗊 𐗀𐗝𐗟𐗪𐗉 𐗈𐗉 𐗇𐗊𐗝𐗎𐗉𐗎𐗉 𐗈𐗉 𐗆𐗤𐗐𐗉𐗢 𐗢𐗊 𐗟𐗒𐗖𐗉𐗘 𐗇𐗀𐗓 𐗙𐗊𐗝𐗒 𐗢𐗓𐗉𐗢𐗝𐗒𐗢 𐗗𐗉 𐗋𐗝𐗪𐗗𐗊 𐗥𐗊𐗖𐗀𐗬𐗊𐗝𐗒𐗗𐗒.\n𐗢𐗊 𐗎𐗒𐗢𐗐𐗊 𐗙𐗉𐗝𐗊𐗬𐗒𐗢 𐗕𐗒𐗇𐗒𐗘 𐗢𐗊 𐗕𐗒𐗝𐗊 𐗈𐗉 𐗢𐗊 𐗂𐗀𐗝𐗀𐗂𐗀𐗝𐗢𐗊 𐗘𐗊 𐗆𐗒𐗙𐗒𐗢𐗉𐗢 𐗈𐗉 𐗘𐗊 𐗢𐗊 𐗆𐗝𐗉𐗓𐗢𐗀. 𐗀𐗢𐗀 𐗔𐗀𐗘𐗊 𐗀𐗝𐗟𐗪𐗉 𐗈𐗉 𐗇𐗊𐗝𐗎𐗉𐗎𐗉 𐗈𐗉 𐗆𐗤𐗐𐗉𐗢 𐗢𐗊 𐗟𐗒𐗖𐗉𐗘 𐗇𐗀𐗓 𐗙𐗊𐗝𐗒 𐗢𐗓𐗉𐗢𐗝𐗒𐗢 𐗗𐗉 𐗋𐗝𐗪𐗗𐗊 𐗥𐗊𐗖𐗀𐗬𐗊𐗝𐗒𐗗𐗒." + note: "This is a transliteration of the text in sq_Latn" +} diff --git a/Lib/gflanguages/data/scripts/Todr.textproto b/Lib/gflanguages/data/scripts/Todr.textproto new file mode 100644 index 00000000000..4b2d4405540 --- /dev/null +++ b/Lib/gflanguages/data/scripts/Todr.textproto @@ -0,0 +1,3 @@ +id: "Todr" +name: "Todhri" + From e06ea87571e72294d38fd153936cb013b2f51f83 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Thu, 12 Sep 2024 07:37:39 +0100 Subject: [PATCH 34/39] Improvements to Ukrainian as per #155 --- Lib/gflanguages/data/languages/uk_Cyrl.textproto | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/gflanguages/data/languages/uk_Cyrl.textproto b/Lib/gflanguages/data/languages/uk_Cyrl.textproto index e4df0b577f8..60c89efbaf9 100644 --- a/Lib/gflanguages/data/languages/uk_Cyrl.textproto +++ b/Lib/gflanguages/data/languages/uk_Cyrl.textproto @@ -18,11 +18,11 @@ exemplar_chars { index: "А Б В Г Ґ Д Е Є Ж З И І Ї Й К Л М Н О П Р С Т У Ф Х Ц Ч Ш Щ Ю Я" } sample_text { - masthead_full: "ВвСс" - masthead_partial: "Іі" - styles: "Беручи до уваги, що визнання гідності, яка" + masthead_full: "ҐґЄє" + masthead_partial: "Її" + styles: "Беручи до уваги, що визнання гідності" tester: "беручи до уваги, що зневажання і нехтування правами людини призвели" - poster_sm: "беручи до уваги," + poster_sm: "беручи до уваги" poster_md: "беручи до" poster_lg: "Всі" specimen_48: "беручи до уваги, що народи Об\'єднаних Націй підтвердили в Статуті" From 4beb50012cd42079781c299cf9746036402c2b23 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Thu, 12 Sep 2024 07:57:20 +0100 Subject: [PATCH 35/39] No Chinook Jargon in the US --- Lib/gflanguages/data/languages/chn_Dupl.textproto | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/gflanguages/data/languages/chn_Dupl.textproto b/Lib/gflanguages/data/languages/chn_Dupl.textproto index 2e8e8213cb7..ab8fec99a76 100644 --- a/Lib/gflanguages/data/languages/chn_Dupl.textproto +++ b/Lib/gflanguages/data/languages/chn_Dupl.textproto @@ -2,5 +2,4 @@ id: "chn_Dupl" language: "chn" script: "Dupl" name: "Chinook Jargon (Duployan)" -region: "US" region: "CA" From 446878eb5c94436e474d93bc7cd7e49c8c837833 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Thu, 12 Sep 2024 10:50:22 +0100 Subject: [PATCH 36/39] Improve Maori sample, fixes #55 --- .../data/languages/mi_Latn.textproto | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Lib/gflanguages/data/languages/mi_Latn.textproto b/Lib/gflanguages/data/languages/mi_Latn.textproto index 50c353b5db4..f753995d063 100644 --- a/Lib/gflanguages/data/languages/mi_Latn.textproto +++ b/Lib/gflanguages/data/languages/mi_Latn.textproto @@ -2,7 +2,7 @@ id: "mi_Latn" language: "mi" script: "Latn" name: "Maori" -autonym: "Māori" +autonym: "te reo Māori" population: 137913 region: "NZ" exemplar_chars { @@ -12,16 +12,16 @@ exemplar_chars { index: "A E H I K M N O P R T U W" } sample_text { - masthead_full: "KkOo" - masthead_partial: "Tt" - styles: "No te mea na te whakanoa a na te whakahawea" - tester: "No te mea ki te kore te tangata ae akina kia tae ki te tino hemanawatanga" - poster_sm: "No te mea he" - poster_md: "No te mea" - poster_lg: "katoa" - specimen_48: "No te mea ko nga iwi o roto i tenei Kotahitanga kua oati i runga" - specimen_36: "No te mea e tutaki ai tenei oati he mea nui rawa kia matou te mano tini o te tangata ki enei tikanga rangatira." - specimen_32: "Kaua tetahi tangata e hopukia noatia e te ringa o te ture e puritia noatia ranei i roto i tetahi whare herehere e peia noatia ranei ki tetahi whenua ke." - specimen_21: "Ko ia tangata e tika ana kia whakatuturutia ki a ia tetahi whakawa tika ki te aroaro o te katoa e tetahi runanga wehekore whakahoahoa ranei, mo runga i te whakataunga i ona tika me nga tikanga hei whakarite mana tae atu hoki ki nga whakapae mona tera kua hara kino ia i raro i te ture.\nKo ia tangata e whai-tika ana ki nga ritenga o te noho pai o te iwi me te ao katoa, ma reira nei e tino tuturu ai nga tika me nga rangatiratanga kua whakararangitia nei ki roto i tenei Whakapuakitanga." - specimen_16: "Kahore rawa i roto i tenei Whakapuakitanga tetahi mea e ahei ana kia whakamoaritia tera kei tetahi Mana Kawanatanga, kei tetahi ropu, kei tetahi tangata ranei tetahi mana ki te whakahaere i tetahi ritenga, ki te mahi ranei i tetahi mahi e anga atu ana hei tikanga turaki i tetahi o nga mano me nga rangatiratanga e mau ake nei.\nNo te mea na te whakanoa a na te whakahawea ki nga mana o te tangata i tupu ai nga mahi whakarihariha i pouri ai te ngakau tangata, a ko te kohaetanga o tetahi ao hou e mahorahora ai te tangata ki te korero ki te whakapono, ki te noho noa i runga i te rangimarie a i te ora, kua panuitia hei taumata mo te koingotanga o te ngakau o te mano tini o te tangata." + masthead_full: "IiĀā" + masthead_partial: "Ēē" + styles: "I te whānautanga mai o te tangata, kāhore" + tester: "E āhei ana ia tangata ki ngā tika me ngā herekoretanga e rār" + poster_sm: "E whai tik" + poster_md: "Kia ka" + poster_lg: "mai" + specimen_48: "Kia kaua te tangata e tukuna kia tūkinotia, kia whi" + specimen_36: "Kia kaua te tangata e hopukina pokanoatia, e mauheretia pokanoatia rānei, e panaia pokanoatia rānei i te whenua." + specimen_32: "E whai tika ana ia tangata ki te whakatā, ki te whakangahau hoki, me whakarite ngā hāora mahi kia pēnei, me whakawhiwhi hoki he wā hararei e utua ana." + specimen_21: "E whai tika ana ia tangata ki tētahi punaha hapori i tōna whenua me te ao whānui e mana katoa ai ngā tika me ngā herekoretanga kei tēnei Whakapuakitanga e rārangi ana.\nKāhore he kōrero i tēnei Whakapuakitanga ka taea te kī māna e whai tika ai te Mana Whenua, te rōpū, te tangata rānei ki te mahi i tētahi mahi e korehāhātia ai ngā tika me ngā herekoretanga e rārangi ake nei." + specimen_16: "I te whānautanga mai o te tangata, kāhore ōna here, e ōrite ana tōna mana me ōna tika ki te katoa. Ka whakatōkia ki roto i te tangata he wairua,\nhe hinengaro hoki, ā, me mahi tahi ia ki ngā tāngata o te ao i runga i te āhua o te tuakana me te teina.\nE whai tika ana ia tangata ki te ora, ki te noho herekore, ki te haumarutanga o te tinana.\nKia kaua te tangata e pupuritia hei taurekareka, hei pononga mā tētahi, ā, me aukati ngā āhuatanga katoa o te whakataurekareka i te tangata, o te hoko rānei i te tangata hei taurekareka.\nKia kaua te tangata e tukuna kia tūkinotia, kia whiua rānei ki te mahi whakawiri, whakāhawea rānei i a ia." } From 15a5c30221862e06300f865f37d2860589e9e9d3 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Thu, 12 Sep 2024 10:53:32 +0100 Subject: [PATCH 37/39] Move sample text updater from gftools to here --- Lib/gflanguages/udhr.py | 252 +++++++++++++++++++++++++++++++++++ snippets/lang_sample_text.py | 65 +++++++++ 2 files changed, 317 insertions(+) create mode 100644 Lib/gflanguages/udhr.py create mode 100755 snippets/lang_sample_text.py diff --git a/Lib/gflanguages/udhr.py b/Lib/gflanguages/udhr.py new file mode 100644 index 00000000000..ea076bef4f9 --- /dev/null +++ b/Lib/gflanguages/udhr.py @@ -0,0 +1,252 @@ +from gflanguages import languages_public_pb2 +import enum +import re + + +class Udhr: + def __init__( + self, key, iso639_3, iso15924, bcp47, direction, ohchr, stage, loc, name + ): + self.key = key + self.iso639_3 = iso639_3 + self.iso15924 = iso15924 + self.bcp47 = bcp47 + self.direction = direction + self.ohchr = ohchr + self.stage = stage + self.loc = loc + self.name = name + + self.title = None + self.preamble = None + self.articles = [] + + def Parse(self, translation_data): + if translation_data is None or self.stage < 2: + return + + if translation_data.find("./{*}title") is not None: + self.title = translation_data.find("./{*}title").text + + preamble_data = translation_data.find("./{*}preamble") + if preamble_data is not None: + if preamble_data.find("./{*}title") is not None: + self.preamble = { + "title": preamble_data.find("./{*}title").text, + "content": [ + para.text for para in preamble_data.findall("./{*}para") + ], + } + + articles_data = translation_data.findall("./{*}article") + for article_data in articles_data: + title_data = article_data.find("./{*}title") + article = { + "id": int(article_data.get("number")), + "title": None if title_data is None else title_data.text, + "content": [para.text for para in article_data.findall("./{*}para")], + } + self.articles.append(article) + + def LoadArticleOne(self, article_one): + self.articles.append({"id": 0, "title": None, "content": [article_one]}) + + def GetSampleTexts(self): + extractor = SampleTextExtractor(self) + return extractor.GetSampleTexts() + + +class SampleTextExtractor: + class TextType(enum.Enum): + GLYPHS = 1 + WORD = 2 + PHRASE = 3 + SENTENCE = 4 + PARAGRAPH = 5 + PASSAGE = 6 + + def __init__(self, udhr): + self._udhr = udhr + self._glyphs = iter(self._GetGlyphs()) + self._words = iter(self._GetWords()) + self._paragraphs = iter(self._GetParagraphs()) + self._phrase_history = set() + + self._non_word_regex = re.compile(r"[^\w]+") + self._space_regex = re.compile(r"\s+") + self._non_space_regex = re.compile(r"[^\s]+") + self._non_word_space_regex = re.compile(r"[^\w\s]+") + self._any_regex = re.compile(r".") + + def _DisplayLength(self, s): + """Returns length of given string. Omits combining characters. + + Some entire scripts will not be counted; in those cases, the raw length of + the string is returned. + """ + word_space_length = len(self._non_word_space_regex.sub("", s)) + space_length = len(self._non_space_regex.sub("", s)) + if word_space_length == space_length: + return len(s) + return word_space_length + + def _GetGlyphs(self): + seen = set() + for article in self._udhr.articles: + for para in article["content"]: + for ch in self._non_word_regex.sub("", para) or self._space_regex.sub( + "", para + ): + ch = ch.lower() + if ch not in seen: + seen.add(ch) + yield ch + + def _GetWords(self): + if self._space_regex.search(self._udhr.articles[0]["content"][0]) is not None: + splitter = self._space_regex + else: + splitter = self._non_word_regex + + seen = set() + for article in self._udhr.articles: + for para in article["content"]: + for s in splitter.split(para): + if s not in seen: + seen.add(s) + yield s + + def _GetParagraphs(self): + if self._udhr.preamble is not None: + for para in self._udhr.preamble["content"]: + yield para + for article in self._udhr.articles: + for para in article["content"]: + yield para + + def _ExtractGlyphs(self, min_chars, max_chars): + s = "" + for ch in self._glyphs: + s += ch.upper() + if len(s) >= min_chars: + break + if ch != ch.upper(): + s += ch + if len(s) >= min_chars: + break + return s + + def _ExtractWord(self, min_chars, max_chars): + for iterator in [self._words, self._GetWords()]: + for w in iterator: + if w is None: + continue + if min_chars <= self._DisplayLength(w) <= max_chars: + return w + # Fallback to using multiple words for languages with very small words + return self._ExtractPhrase(min_chars, max_chars) + + def _ExtractPhrase(self, min_chars, max_chars): + for iterator in [self._paragraphs, self._GetParagraphs()]: + for para in iterator: + if para is None: + continue + for regex in [self._any_regex, self._space_regex, self._non_word_regex]: + breaks = [-1] + for match in regex.finditer(para, min_chars): + breaks.append(match.start()) + phrase = para[breaks[0] + 1 : breaks[len(breaks) - 1]] + p_size = self._DisplayLength(phrase) + while p_size > max_chars and len(breaks) > 1: + breaks.pop() + phrase = para[breaks[0] + 1 : breaks[len(breaks) - 1]] + p_size = self._DisplayLength(phrase) + if min_chars <= p_size and phrase not in self._phrase_history: + self._phrase_history.add(phrase) + return phrase + return self._ExtractParagraph(min_chars, max_chars) + + def _ExtractSentence(self, min_chars, max_chars): + # Sentence delimination may differ between scripts, so tokenizing on spaces + # would be unreliable. Prefer to use _ExtractPhrase. + return self._ExtractPhrase(min_chars, max_chars) + + def _ExtractParagraph(self, min_chars, max_chars): + for iterator in [self._paragraphs, self._GetParagraphs()]: + for para in iterator: + if para is None: + continue + if min_chars <= self._DisplayLength(para) <= max_chars: + return para + # Paragraphs likely insufficient length; try combining into passages + return self._ExtractPassage(min_chars, max_chars) + + def _ExtractPassage(self, min_chars, max_chars): + p = [] + p_size = 0 + while p_size < min_chars: + for iterator in [self._paragraphs, self._GetParagraphs()]: + for para in iterator: + if para is None: + continue + p.append(para) + p_size = self._DisplayLength(" ".join(p)) + if max_chars < p_size: + p.pop() + elif min_chars <= p_size: + return "\n".join(p) + assert len(p) > 0, "Unable to extract passage: " + self._udhr.key + if len(p) == 0: + p.append([p for p in self._GetParagraphs()][0]) + return "\n".join(p) + + def _Get(self, text_type, **kwargs): + if "char_count" in kwargs: + min_chars = kwargs["char_count"] + max_chars = kwargs["char_count"] + else: + min_chars = kwargs["min_chars"] + max_chars = kwargs["max_chars"] + if text_type == self.TextType.GLYPHS: + return self._ExtractGlyphs(min_chars, max_chars) + if text_type == self.TextType.WORD: + return self._ExtractWord(min_chars, max_chars) + if text_type == self.TextType.PHRASE: + return self._ExtractPhrase(min_chars, max_chars) + if text_type == self.TextType.SENTENCE: + return self._ExtractSentence(min_chars, max_chars) + if text_type == self.TextType.PARAGRAPH: + return self._ExtractParagraph(min_chars, max_chars) + if text_type == self.TextType.PASSAGE: + return self._ExtractPassage(min_chars, max_chars) + raise Exception("Unsupported text type: " + text_type) + + def GetSampleTexts(self): + sample_text = languages_public_pb2.SampleTextProto() + sample_text.masthead_full = self._Get(self.TextType.GLYPHS, char_count=4) + sample_text.masthead_partial = self._Get(self.TextType.GLYPHS, char_count=2) + sample_text.styles = self._Get(self.TextType.PHRASE, min_chars=40, max_chars=60) + sample_text.tester = self._Get(self.TextType.PHRASE, min_chars=60, max_chars=90) + sample_text.poster_sm = self._Get( + self.TextType.PHRASE, min_chars=10, max_chars=17 + ) + sample_text.poster_md = self._Get( + self.TextType.PHRASE, min_chars=6, max_chars=12 + ) + sample_text.poster_lg = self._Get(self.TextType.WORD, min_chars=3, max_chars=8) + sample_text.specimen_48 = self._Get( + self.TextType.SENTENCE, min_chars=50, max_chars=80 + ) + sample_text.specimen_36 = self._Get( + self.TextType.PARAGRAPH, min_chars=100, max_chars=120 + ) + sample_text.specimen_32 = self._Get( + self.TextType.PARAGRAPH, min_chars=140, max_chars=180 + ) + sample_text.specimen_21 = self._Get( + self.TextType.PASSAGE, min_chars=300, max_chars=500 + ) + sample_text.specimen_16 = self._Get( + self.TextType.PASSAGE, min_chars=550, max_chars=750 + ) + return sample_text diff --git a/snippets/lang_sample_text.py b/snippets/lang_sample_text.py new file mode 100755 index 00000000000..10b38913279 --- /dev/null +++ b/snippets/lang_sample_text.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +""" +lang-sample-text + +Adds sample text for a given language using the specified UDHR translation. + +Usage: + +lang-sample-text -l ./languages/en.textproto ./udhr_translations/en.xml + +""" + +from gflanguages import LoadLanguages, languages_public_pb2 +from gftools.util.google_fonts import ReadProto, WriteProto +from gflanguages.udhr import Udhr +from lxml import etree +import os +import re +import argparse + + +def main(argv=None): + parser = argparse.ArgumentParser( + description="Update UDHR sample text for a given language" + ) + parser.add_argument( + "-l", + "--lang", + help="Language proto file to update", + required=True, + ) + parser.add_argument( + "-u", + "--udhr", + help="Path to UDHR translation (XML)", + required=True, + ) + args = parser.parse_args(argv) + + language = ReadProto(languages_public_pb2.LanguageProto(), args.lang) + + udhr_data = etree.parse(args.udhr) + head = udhr_data.getroot() + for name, value in head.attrib.items(): + if re.search(r"\{.*\}lang", name): + bcp47 = value.replace("-", "_") + udhr = Udhr( + key=head.get("key"), + iso639_3=head.get("iso639-3"), + iso15924=head.get("iso15924"), + bcp47=bcp47, + direction=head.get("dir"), + ohchr=None, + stage=4, + loc=None, + name=head.get("n"), + ) + udhr.Parse(udhr_data) + + language.sample_text.MergeFrom(udhr.GetSampleTexts()) + WriteProto(language, args.lang) + + +if __name__ == "__main__": + main() From fe4b8afad20d315926f2de1422657ec7ef8879ae Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Thu, 12 Sep 2024 11:01:57 +0100 Subject: [PATCH 38/39] Change 'Bengali' to 'Bangla' --- Lib/gflanguages/data/languages/bn_Beng.textproto | 2 +- Lib/gflanguages/data/scripts/Beng.textproto | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/gflanguages/data/languages/bn_Beng.textproto b/Lib/gflanguages/data/languages/bn_Beng.textproto index 5c63693f6ac..15b609a907d 100644 --- a/Lib/gflanguages/data/languages/bn_Beng.textproto +++ b/Lib/gflanguages/data/languages/bn_Beng.textproto @@ -1,7 +1,7 @@ id: "bn_Beng" language: "bn" script: "Beng" -name: "Bengali" +name: "Bangla" autonym: "বাংলা" population: 267280377 region: "BD" diff --git a/Lib/gflanguages/data/scripts/Beng.textproto b/Lib/gflanguages/data/scripts/Beng.textproto index d68be9e4d79..079fd2ca4ff 100644 --- a/Lib/gflanguages/data/scripts/Beng.textproto +++ b/Lib/gflanguages/data/scripts/Beng.textproto @@ -1,2 +1,2 @@ id: "Beng" -name: "Bengali" +name: "Bangla" From e4e9d409fb239b46ec05519328b786da12f218d7 Mon Sep 17 00:00:00 2001 From: Simon Cozens Date: Thu, 12 Sep 2024 11:07:33 +0100 Subject: [PATCH 39/39] Other languages using Bangla script --- Lib/gflanguages/data/languages/brx_Beng.textproto | 2 +- Lib/gflanguages/data/languages/ccp_Beng.textproto | 2 +- Lib/gflanguages/data/languages/kha_Beng.textproto | 2 +- Lib/gflanguages/data/languages/khr_Beng.textproto | 2 +- Lib/gflanguages/data/languages/kyw_Beng.textproto | 2 +- Lib/gflanguages/data/languages/sat_Beng.textproto | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/gflanguages/data/languages/brx_Beng.textproto b/Lib/gflanguages/data/languages/brx_Beng.textproto index 5e8f30cc06a..2dd936b68a5 100644 --- a/Lib/gflanguages/data/languages/brx_Beng.textproto +++ b/Lib/gflanguages/data/languages/brx_Beng.textproto @@ -1,7 +1,7 @@ id: "brx_Beng" language: "brx" script: "Beng" -name: "Bodo (Bengali)" +name: "Bodo (Bangla)" region: "IN" sample_text { masthead_full: "গসবঙ" diff --git a/Lib/gflanguages/data/languages/ccp_Beng.textproto b/Lib/gflanguages/data/languages/ccp_Beng.textproto index 01367ef1de3..c7d7181b435 100644 --- a/Lib/gflanguages/data/languages/ccp_Beng.textproto +++ b/Lib/gflanguages/data/languages/ccp_Beng.textproto @@ -1,7 +1,7 @@ id: "ccp_Beng" language: "ccp" script: "Beng" -name: "Chakma (Bengali)" +name: "Chakma (Bangla)" population: 729137 region: "BD" region: "IN" diff --git a/Lib/gflanguages/data/languages/kha_Beng.textproto b/Lib/gflanguages/data/languages/kha_Beng.textproto index 286ed155d83..b5beb627bd6 100644 --- a/Lib/gflanguages/data/languages/kha_Beng.textproto +++ b/Lib/gflanguages/data/languages/kha_Beng.textproto @@ -1,6 +1,6 @@ id: "kha_Beng" language: "kha" script: "Beng" -name: "Khasi (Bengali)" +name: "Khasi (Bangla)" population: 0 historical: true diff --git a/Lib/gflanguages/data/languages/khr_Beng.textproto b/Lib/gflanguages/data/languages/khr_Beng.textproto index b56586c7630..ae571a7eced 100644 --- a/Lib/gflanguages/data/languages/khr_Beng.textproto +++ b/Lib/gflanguages/data/languages/khr_Beng.textproto @@ -1,7 +1,7 @@ id: "khr_Beng" language: "khr" script: "Beng" -name: "Kharia (Bengali)" +name: "Kharia (Bangla)" region: "IN" sample_text { masthead_full: "সউবম" diff --git a/Lib/gflanguages/data/languages/kyw_Beng.textproto b/Lib/gflanguages/data/languages/kyw_Beng.textproto index 6825755de62..4569efcca4a 100644 --- a/Lib/gflanguages/data/languages/kyw_Beng.textproto +++ b/Lib/gflanguages/data/languages/kyw_Beng.textproto @@ -1,7 +1,7 @@ id: "kyw_Beng" language: "kyw" script: "Beng" -name: "Kudmali (Bengali)" +name: "Kudmali (Bangla)" region: "IN" sample_text { masthead_full: "সভমন" diff --git a/Lib/gflanguages/data/languages/sat_Beng.textproto b/Lib/gflanguages/data/languages/sat_Beng.textproto index bd3d01689d0..cd2f577cb47 100644 --- a/Lib/gflanguages/data/languages/sat_Beng.textproto +++ b/Lib/gflanguages/data/languages/sat_Beng.textproto @@ -1,6 +1,6 @@ id: "sat_Beng" language: "sat" script: "Beng" -name: "Santali (Bengali)" +name: "Santali (Bangla)" population: 0 historical: true