Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[glyphdata] handle production names for ligatures with script suffixes #771

Merged
merged 8 commits into from
Aug 20, 2023
62 changes: 50 additions & 12 deletions Lib/glyphsLib/glyphdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,22 +196,16 @@ def _construct_category(glyph_name, data):
# Glyph variants (e.g. "fi.alt") don't have their own entry, so we strip e.g. the
# ".alt" and try a second lookup with just the base name. A variant is hopefully in
# the same category as its base glyph.
base_name = glyph_name.split(".", 1)[0]
base_attribute = data.names.get(base_name) or {}
base_name = _split_glyph_name(glyph_name, data)[0]
base_attribute = _lookup_attributes(base_name, data) or {}
if base_attribute:
category = base_attribute.get("category")
sub_category = base_attribute.get("subCategory")
return category, sub_category

# Detect ligatures.
if "_" in base_name:
base_names = base_name.split("_")
# The last name has a suffix, add it to all the names.
if "-" in base_names[-1]:
_, s = base_names[-1].rsplit("-", 1)
base_names = [
(n if n.endswith(f"-{s}") else f"{n}-{s}") for n in base_names
]
base_names = _split_ligature_glyph_name(base_name, data)
base_names_attributes = [_lookup_attributes(name, data) for name in base_names]
first_attribute = base_names_attributes[0]

Expand Down Expand Up @@ -300,6 +294,50 @@ def _translate_category(glyph_name, unicode_category):
return glyphs_category


def _split_ligature_glyph_name(name, data):
# Split name to ligature parts
parts = name.split("_")

# If the last part has a script suffix, strip it and re-split the name.
if "-" in parts[-1]:
base, script = name.rsplit("-", 1)
parts = base.split("_")

# If there is more than one part, try adding the script suffix to each
# part, if this results in a known glyph name, use it as the part name.
if len(parts) > 1:
for i, part in enumerate(parts):
new = f"{part}-{script}"
# If the part already has a script suffix, keep it unchanged.
if "-" in part:
continue
# If the non suffixed name exists and the suffixed name does
# not exist, keep the part name unchanged.
if _lookup_attributes(part, data) and not _lookup_attributes(new, data):
continue
parts[i] = new
else:
parts = name.split("_")
return parts


def _split_glyph_name(name, data):
# Split glyph name into base and suffix
base, dot, suffix = name.partition(".")

# If there are more than one suffix (e.g. ".below.ro"), try adding each
# suffix to the base name, if it results in a known glyph name, use that as
# base name.
if dot and dot in suffix:
suffixes = suffix.split(dot)
new = base
while suffixes:
new += dot + suffixes.pop(0)
if _lookup_attributes(new, data):
return new, dot, dot.join(suffixes)
return base, dot, suffix


def _construct_production_name(glyph_name, data=None):
"""Return the production name for a glyph name from the GlyphData.xml
database according to the AGL specification.
Expand All @@ -321,7 +359,7 @@ def _construct_production_name(glyph_name, data=None):

# At this point, we have already checked the data for the full glyph name, so
# directly go to the base name here (e.g. when looking at "fi.alt").
base_name, dot, suffix = glyph_name.partition(".")
base_name, dot, suffix = _split_glyph_name(glyph_name, data)
glyphinfo = _lookup_attributes(base_name, data)
if glyphinfo and glyphinfo.get("production"):
# Found the base glyph.
Expand All @@ -339,7 +377,7 @@ def _construct_production_name(glyph_name, data=None):

# So we have a ligature that is not mapped in the data. Split it up and
# look up the individual parts.
base_name_parts = base_name.split("_")
base_name_parts = _split_ligature_glyph_name(base_name, data)

# If all parts are in the AGLFN list, the glyph name is our production
# name already.
Expand All @@ -354,7 +392,7 @@ def _construct_production_name(glyph_name, data=None):
# A name present in the AGLFN is a production name already.
production_names.append(part)
else:
part_entry = data.names.get(part) or {}
part_entry = _lookup_attributes(part, data) or {}
part_production_name = part_entry.get("production")
if part_production_name:
production_names.append(part_production_name)
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ fs==2.4.16
# via fonttools
iniconfig==1.1.1
# via pytest
lxml==4.9.1
lxml==4.9.3
# via xmldiff
mccabe==0.7.0
# via flake8
Expand Down
219 changes: 219 additions & 0 deletions tests/glyphdata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import unittest
import xml.etree.ElementTree

import pytest

from glyphsLib.glyphdata import get_glyph


Expand Down Expand Up @@ -81,6 +83,7 @@ def prod(n):
self.assertEqual(prod("vaphalaa-malayalam"), "uni0D030D35.1")
self.assertEqual(prod("onethird"), "uni2153")
self.assertEqual(prod("Jacute"), "uni004A0301")
self.assertEqual(prod("Ech_Vew-arm.liga"), "uni0535054E.liga")

def test_unicode(self):
def uni(n):
Expand Down Expand Up @@ -190,5 +193,221 @@ def test_glyphdata_no_duplicates(self):
production_names.add(glyph_name_production)


# Testing more production names separately because parameterizing is easier.
PRODUCTION_NAMES = {
"Ech_Vew-arm.liga": "uni0535054E.liga",
"Men_Ech-arm.liga": "uni05440535.liga",
"Men_Ini-arm.liga": "uni0544053B.liga",
"Men_Now-arm.liga": "uni05440546.liga",
"Men_Xeh-arm.liga": "uni0544053D.liga",
"Vew_Now-arm.liga": "uni054E0546.liga",
"aiMatra_anusvara-deva": "uni09480902",
"aiMatra_candraBindu-deva": "uni09480901",
"aiMatra_reph-deva": "uni09480930094D",
"aiMatra_reph_anusvara-deva": "uni09480930094D0902",
"ca_iMatra-tamil": "uni0B9A0BBF",
"ca_uMatra-tamil": "uni0B9A0BC1",
"ca_uuMatra-tamil": "uni0B9A0BC2",
"ch_ya-deva": "uni091B094D092F",
"d_ba-deva": "uni0926094D092C",
"d_bha-deva": "uni0926094D092D",
"d_da-deva": "uni0926094D0926",
"d_dh_ya-deva": "uni0926094D0927094D092F",
"d_dha-deva": "uni0926094D0927",
"d_ga-deva": "uni0926094D0917",
"d_gha-deva": "uni0926094D0918",
"d_ma-deva": "uni0926094D092E",
"d_ra-deva": "uni0926094D0930",
"d_va-deva": "uni0926094D0935",
"d_ya-deva": "uni0926094D092F",
"da-khmer.below.ro": "uni17D2178A.ro",
"da_rVocalicMatra-deva": "uni09260943",
"da_uMatra-deva": "uni09260941",
"da_uuMatra-deva": "uni09260942",
"dd_dda-deva": "uni0921094D0921",
"dd_ddha-deva": "uni0921094D0922",
"dd_ya-deva": "uni0921094D092F",
"ddh_ddha-deva": "uni0922094D0922",
"ddh_ya-deva": "uni0922094D092F",
"eCandraMatra_anusvara-deva": "uni09450902",
"eCandraMatra_reph-deva": "uni09450930094D",
"eMatra_anusvara-deva": "uni09470902",
"eMatra_candraBindu-deva": "uni09470901",
"eMatra_reph-deva": "uni09470930094D",
"eMatra_reph_anusvara-deva": "uni09470930094D0902",
"eShortMatra_anusvara-deva": "uni09460902",
"eShortMatra_candraBindu-deva": "uni09460901",
"eShortMatra_reph-deva": "uni09460930094D",
"eShortMatra_reph_anusvara-deva": "uni09460930094D0902",
"ech_vew-arm.liga.sc": "uni0565057E.liga.sc",
"finalkaf_qamats-hb": "uni05DA05B8",
"finalkaf_sheva-hb": "uni05DA05B0",
"finalkafdagesh_qamats-hb": "uniFB3A05B8",
"finalkafdagesh_sheva-hb": "uniFB3A05B0",
"h_la-deva": "uni0939094D0932",
"h_ma-deva": "uni0939094D092E",
"h_na-deva": "uni0939094D0928",
"h_nna-deva": "uni0939094D0923",
"h_ra-deva": "uni0939094D0930",
"h_ra_uMatra-deva": "uni0939094D09300941",
"h_ra_uuMatra-deva": "uni0939094D09300942",
"h_va-deva": "uni0939094D0935",
"h_ya-deva": "uni0939094D092F",
"ha_iMatra-tamil": "uni0BB90BBF",
"ha_iiMatra-tamil": "uni0BB90BC0",
"ha_rVocalicMatra-deva": "uni09390943",
"ha_rrVocalicMatra-deva": "uni09390944",
"ha_uMatra-deva": "uni09390941",
"ha_uMatra-tamil": "uni0BB90BC1",
"ha_uuMatra-deva": "uni09390942",
"ha_uuMatra-tamil": "uni0BB90BC2",
"hatafpatah_siluqleft-hb": "uni05B205BD",
"hatafqamats_siluqleft-hb": "uni05B305BD",
"hatafsegol_siluqleft-hb": "uni05B105BD",
"iMark_toandakhiat-khmer": "uni17B717CD",
"iMark_toandakhiat-khmer.narrow": "uni17B717CD.narrow",
"idotaccent.sc": "i.loclTRK.sc", # i.sc.loclTRK
"iiMatra_reph-deva": "uni09400930094D",
"iiMatra_reph-deva.alt2": "uni09400930094D.alt2",
"iiMatra_reph_anusvara-deva": "uni09400930094D0902",
"iiMatra_reph_anusvara-deva.alt2": "uni09400930094D0902.alt2",
"j_ny-deva": "uni091C094D091E094D",
"j_ny-deva.alt2": "uni091C094D091E094D.alt2",
"j_ny-deva.alt3": "uni091C094D091E094D.alt3",
"j_ny-deva.alt4": "uni091C094D091E094D.alt4",
"j_ny-deva.alt5": "uni091C094D091E094D.alt5",
"j_ny-deva.alt6": "uni091C094D091E094D.alt6",
"j_ny-deva.alt7": "uni091C094D091E094D.alt7",
"j_ny-deva.alt8": "uni091C094D091E094D.alt8",
"j_nya-deva": "uni091C094D091E",
"ja_iMatra-tamil": "uni0B9C0BBF",
"ja_iiMatra-tamil": "uni0B9C0BC0",
"k_ss-deva": "uni0915094D0937094D",
"k_ss-deva.alt2": "uni0915094D0937094D.alt2",
"k_ss-deva.alt3": "uni0915094D0937094D.alt3",
"k_ss-deva.alt4": "uni0915094D0937094D.alt4",
"k_ss-deva.alt5": "uni0915094D0937094D.alt5",
"k_ss-deva.alt6": "uni0915094D0937094D.alt6",
"k_ss-deva.alt7": "uni0915094D0937094D.alt7",
"k_ssa-deva": "uni0915094D0937",
"k_ssa-tamil": "uni0B950BCD0BB7",
"k_ssa_iMatra-tamil": "uni0B950BCD0BB70BBF",
"k_ssa_iiMatra-tamil": "uni0B950BCD0BB70BC0",
"k_ssa_uMatra-tamil": "uni0B950BCD0BB70BC1",
"k_ssa_uuMatra-tamil": "uni0B950BCD0BB70BC2",
"ka_iMatra-tamil": "uni0B950BBF",
"ka_uMatra-tamil": "uni0B950BC1",
"ka_uuMatra-tamil": "uni0B950BC2",
"la_iMatra-tamil": "uni0BB20BBF",
"la_iiMatra-tamil": "uni0BB20BC0",
"la_uMatra-tamil": "uni0BB20BC1",
"la_uuMatra-tamil": "uni0BB20BC2",
"lamed_dagesh_holam-hb": "uni05DC05BC05B9",
"lamed_holam-hb": "uni05DC05B9",
"lla_uMatra-tamil": "uni0BB30BC1",
"lla_uuMatra-tamil": "uni0BB30BC2",
"llla_iMatra-tamil": "uni0BB40BBF",
"llla_iiMatra-tamil": "uni0BB40BC0",
"llla_uMatra-tamil": "uni0BB40BC1",
"llla_uuMatra-tamil": "uni0BB40BC2",
"ma_iMatra-tamil": "uni0BAE0BBF",
"ma_iiMatra-tamil": "uni0BAE0BC0",
"ma_uMatra-tamil": "uni0BAE0BC1",
"ma_uuMatra-tamil": "uni0BAE0BC2",
"mo-khmer.below.ro": "uni17D21798.ro",
"moMa_underscore-thai": "uni0E21005F", # uni0E21_uni005F
"na_iMatra-tamil": "uni0BA80BBF",
"na_uMatra-tamil": "uni0BA80BC1",
"na_uuMatra-tamil": "uni0BA80BC2",
"ng_ya-deva": "uni0919094D092F",
"nga_uMatra-tamil": "uni0B990BC1",
"nga_uuMatra-tamil": "uni0B990BC2",
"ngoNgu_underscore-thai": "uni0E07005F", # uni0E07_uni005F
"niggahita_maiCatawa-lao": "uni0ECD0ECB",
"niggahita_maiCatawa-lao.right": "uni0ECD0ECB.right",
"niggahita_maiEk-lao": "uni0ECD0EC8",
"niggahita_maiEk-lao.right": "uni0ECD0EC8.right",
"niggahita_maiTho-lao": "uni0ECD0EC9",
"niggahita_maiTho-lao.right": "uni0ECD0EC9.right",
"niggahita_maiTi-lao": "uni0ECD0ECA",
"niggahita_maiTi-lao.right": "uni0ECD0ECA.right",
"nikhahit_maiChattawa-thai": "uni0E4D0E4B",
"nikhahit_maiChattawa-thai.narrow": "uni0E4D0E4B.narrow",
"nikhahit_maiEk-thai": "uni0E4D0E48",
"nikhahit_maiEk-thai.narrow": "uni0E4D0E48.narrow",
"nikhahit_maiTho-thai": "uni0E4D0E49",
"nikhahit_maiTho-thai.narrow": "uni0E4D0E49.narrow",
"nikhahit_maiTri-thai": "uni0E4D0E4A",
"nikhahit_maiTri-thai.narrow": "uni0E4D0E4A.narrow",
"nna_uMatra-tamil": "uni0BA30BC1",
"nna_uuMatra-tamil": "uni0BA30BC2",
"nnna_uMatra-tamil": "uni0BA90BC1",
"nnna_uuMatra-tamil": "uni0BA90BC2",
"nno-khmer.below.narrow1": "uni17D2178E.narrow1",
"nno-khmer.below.narrow2": "uni17D2178E.narrow2",
"noNu_underscore-thai": "uni0E19005F", # uni0E19_uni005F
"nya_iMatra-tamil": "uni0B9E0BBF",
"nya_uMatra-tamil": "uni0B9E0BC1",
"nya_uuMatra-tamil": "uni0B9E0BC2",
"nyo-khmer.full.below.narrow": "uni17D21789.full.below.narrow",
"p_ta-deva": "uni092A094D0924",
"pa_uMatra-tamil": "uni0BAA0BC1",
"pa_uuMatra-tamil": "uni0BAA0BC2",
"pho-khmer.below.ro": "uni17D21797.ro",
"po-khmer.below.ro": "uni17D21796.ro",
"ra_uMatra-deva": "uni09300941",
"ra_uMatra-tamil": "uni0BB00BC1",
"ra_uuMatra-deva": "uni09300942",
"ra_uuMatra-tamil": "uni0BB00BC2",
"reph_anusvara-deva": "uni0930094D0902",
"ro-khmer.pre.narrow": "uni17D2179A.narrow",
"rra_iMatra-tamil": "uni0BB10BBF",
"rra_iiMatra-tamil": "uni0BB10BC0",
"rra_uMatra-tamil": "uni0BB10BC1",
"rra_uuMatra-tamil": "uni0BB10BC2",
"sa_iMatra-tamil": "uni0BB80BBF",
"sa_iiMatra-tamil": "uni0BB80BC0",
"sa_uMatra-tamil": "uni0BB80BC1",
"sa_uuMatra-tamil": "uni0BB80BC2",
"sh_r-deva": "uni0936094D094D0930", # uni0936094D0930094D
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'm not familiar with devanagari conjucts but looks like there's some reordering that is (or is not) happening here?
/cc @schriftgestalt

- uni0936094D094D0930
+ uni0936094D0930094D

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is addressed by the mini Devanagri shaper in #818, so I’m not going to worry about it here.

"sh_ra-deva": "uni0936094D0930",
"sh_ra_iiMatra-tamil": "uni0BB60BCD0BB00BC0",
"ss_tta-deva": "uni0937094D091F",
"ss_ttha-deva": "uni0937094D0920",
"ssa_iMatra-tamil": "uni0BB70BBF",
"ssa_iiMatra-tamil": "uni0BB70BC0",
"ssa_uMatra-tamil": "uni0BB70BC1",
"ssa_uuMatra-tamil": "uni0BB70BC2",
"t_r-deva": "uni0924094D094D0930", # uni0924094D0930094D
"t_ra-deva": "uni0924094D0930",
"t_ta-deva": "uni0924094D0924",
"ta-khmer.below.ro": "uni17D2178F.ro",
"ta_iMatra-tamil": "uni0BA40BBF",
"ta_uMatra-tamil": "uni0BA40BC1",
"ta_uuMatra-tamil": "uni0BA40BC2",
"tt_tta-deva": "uni091F094D091F",
"tt_ttha-deva": "uni091F094D0920",
"tt_ya-deva": "uni091F094D092F",
"tta_iMatra-tamil": "uni0B9F0BBF",
"tta_uMatra-tamil": "uni0B9F0BC1",
"tta_uuMatra-tamil": "uni0B9F0BC2",
"tth_ttha-deva": "uni0920094D0920",
"tth_ya-deva": "uni0920094D092F",
"va_uMatra-tamil": "uni0BB50BC1",
"va_uuMatra-tamil": "uni0BB50BC2",
"ya_uMatra-tamil": "uni0BAF0BC1",
"ya_uuMatra-tamil": "uni0BAF0BC2",
"yoYing_underscore-thai": "uni0E0D005F", # uni0E0D_uni005F
}


@pytest.mark.parametrize("test_input,expected", PRODUCTION_NAMES.items())
def test_prod_names(test_input, expected):
def prod(n):
return get_glyph(n).production_name

assert prod(test_input) == expected


if __name__ == "__main__":
unittest.main()
Loading