Skip to content

Commit

Permalink
updated so it runs with the current set of fonts; still needs correct…
Browse files Browse the repository at this point in the history
…ions
  • Loading branch information
marekjez86 committed Dec 8, 2020
1 parent 1e7789e commit a3b1ba2
Show file tree
Hide file tree
Showing 13 changed files with 1,416 additions and 532 deletions.
486 changes: 250 additions & 236 deletions nototools/data/family_name_info_p3.xml

Large diffs are not rendered by default.

311 changes: 158 additions & 153 deletions nototools/data/noto_cmap_phase3.xml

Large diffs are not rendered by default.

12 changes: 10 additions & 2 deletions nototools/noto_cmap_reqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,13 +342,13 @@ def _build_block_to_primary_script():
max_script = assigned_primaries[block]
# print('assigning primary', block_info, '->', max_script)
else:
sys.stderr.write("ERROR: no primary\n", block, block_info)
print("ERROR: no inherited primary\n %s\n %s\n" % (block, block_info), file=sys.stderr)
max_script = None
elif max_script == "Zinh":
if block in inherited_primaries:
max_script = inherited_primaries[block]
else:
sys.stderr.write("ERROR: no inherited primary\n", block, block_info)
print("ERROR: no inherited primary\n %s\n %s\n" % (block, block_info), file=sys.stderr)
max_script = None
block_to_script[block] = max_script
return block_to_script
Expand Down Expand Up @@ -518,7 +518,15 @@ def _reassign_common_by_block(cmap_ops):
"Alchemical Symbols": "Zsym",
"Geometric Shapes Extended": "SYM2",
"Supplemental Arrows-C": "SYM2",
"Chess Symbols": "SYM2",
"Ideographic Symbols and Punctuation": "CJK",
"Symbols and Pictographs Extended-A": "SYM2",
"Symbols for Legacy Computing": "SYM2",
"Supplemental Symbols and Pictographs": "SYM2",
"Counting Rod Numerals": "SYM2",
"Mayan Numerals": "Zmth",
"Ottoman Siyaq Numbers": "Arab",
"Indic Siyaq Numbers": "Arab",
"Tags": "EXCL",
}

Expand Down
147 changes: 102 additions & 45 deletions nototools/noto_fonts.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,12 @@
# The '[xxx]' syntax is used to get the noto-xxx value from notoconfig.
# for now we exclude alpha, the phase 3 fonts are here but we don't use
# them yet.
NOTO_FONT_PATHS = ["[fonts]/hinted", "[fonts]/unhinted", "[emoji]/fonts", "[cjk]"]
NOTO_FONT_PATHS = [
"[fonts]/hinted",
"[fonts]/unhinted",
"[emoji]/fonts",
"[cjk]",
]


ODD_SCRIPTS = {
Expand All @@ -54,8 +59,12 @@
"Symbols": "Zsym",
"Emoji": "Zsye",
"TifinaghAPT": "Tfng",
"TifinaghAdrar": "Tfng",
"TifinaghAgrawImazighen": "Tfng",
"TifinaghAhaggar": "Tfng",
"TifinaghAir": "Tfng",
"TifinaghAzawagh": "Tfng",
"TifinaghGhat": "Tfng",
"TifinaghHawad": "Tfng",
"TifinaghRhissaIxa": "Tfng",
"TifinaghSIL": "Tfng",
Expand Down Expand Up @@ -92,9 +101,9 @@ def preferred_script_name(script_key):


def script_name_for_report(script_key):
return _script_key_to_report_name.get(script_key, None) or preferred_script_name(
script_key
)
return _script_key_to_report_name.get(
script_key, None
) or preferred_script_name(script_key)


# NotoFont maps a font path to information we assume the font to have, based
Expand Down Expand Up @@ -149,10 +158,10 @@ def script_name_for_report(script_key):
_FONT_NAME_REGEX = (
# family should be prepended - this is so Roboto can be used with unittests
# that use this regex to parse.
"(Sans|Serif|Naskh|Kufi|Nastaliq|Emoji|ColorEmoji|Music)?"
"(Sans|Serif|Naskh|Kufi|Nastaliq|Emoji|ColorEmoji|Music|Rashi|Traditional)?"
"(Mono(?:space)?)?"
"(.*?)"
"(Eastern|Estrangela|Western|Slanted|New|Unjoined|APT|AgrawImazighen|Ahaggar|Hawad|RhissaIxa|SIL|Tawellemmet)?"
"(Adrar|AgrawImazighen|Ahaggar|Air|APT|Azawagh|Eastern|Estrangela|Ghat|Hawad|Looped|New|RhissaIxa|SIL|Slanted|Supplement|Tawellemmet|Unjoined|Western)?"
"(UI)?"
"(Display)?"
"-?"
Expand All @@ -165,9 +174,11 @@ def script_name_for_report(script_key):
_EXT_REGEX = re.compile(r".*\.(?:ttf|ttc|otf)$")


def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3):
def get_noto_font(
filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3
):
"""Return a NotoFont if filepath points to a noto font, or None if we can't
process the path."""
process the path."""

filedir, filename = os.path.split(filepath)
if not filedir:
Expand Down Expand Up @@ -213,7 +224,13 @@ def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3):

is_mono = mono == "Mono"

if width not in [None, "", "Condensed", "SemiCondensed", "ExtraCondensed"]:
if width not in [
None,
"",
"Condensed",
"SemiCondensed",
"ExtraCondensed",
]:
sys.stderr.write('noto_fonts: Unexpected width "%s"\n' % width)
if width in ["SemiCond", "Narrow"]:
width = "SemiCondensed"
Expand All @@ -227,6 +244,10 @@ def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3):
script = "MONO"
else:
script = "LGC"
elif script == "IndicSiyaqNumbers":
script = "Aran"
elif script == "MayanNumerals":
script = "Maya"
elif script == "Urdu":
# Use 'Aran' for languages written in the Nastaliq Arabic style, like Urdu.
# The font naming uses 'Urdu' which is not a script, but a language.
Expand All @@ -243,7 +264,9 @@ def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3):
try:
script = convert_to_four_letter(script)
except ValueError:
sys.stderr.write("unknown script: %s for %s\n" % (script, filename))
sys.stderr.write(
"unknown script: %s for %s\n" % (script, filename)
)
return None

if not weight:
Expand All @@ -253,7 +276,10 @@ def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3):
is_UI_metrics = (
is_UI
or style == "Emoji"
or (style == "Sans" and script in noto_data.DEEMED_UI_SCRIPTS_SET)
or (
style == "Sans"
and script in noto_data.DEEMED_UI_SCRIPTS_SET
)
)

is_display = display == "Display"
Expand All @@ -263,7 +289,10 @@ def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3):
is_hinted = False
else:
hint_status = path.basename(filedir)
if hint_status not in ["hinted", "unhinted"] and "noto-source" not in filedir:
if (
hint_status not in ["hinted", "unhinted"]
and "noto-source" not in filedir
):
# print >> sys.stderr, (
# 'unknown hint status for %s, defaulting to unhinted') % filedir
pass
Expand Down Expand Up @@ -320,11 +349,11 @@ def parse_weight(name):

def script_key_to_scripts(script_key):
"""Return a set of scripts for a script key. The script key is used by
a font to define the set of scripts it supports. Some keys are ours,
e.g. 'LGC', and some are standard script codes that map to multiple
scripts, like 'Jpan'. In either case we need to be able to map a script
code (either unicode character script code, or more general iso script
code) to a font, and we do so by finding it in the list returned here."""
a font to define the set of scripts it supports. Some keys are ours,
e.g. 'LGC', and some are standard script codes that map to multiple
scripts, like 'Jpan'. In either case we need to be able to map a script
code (either unicode character script code, or more general iso script
code) to a font, and we do so by finding it in the list returned here."""
if script_key == "LGC":
return frozenset(["Latn", "Grek", "Cyrl"])
elif script_key == "Aran":
Expand All @@ -343,9 +372,9 @@ def script_key_to_scripts(script_key):

def script_key_to_primary_script(script_key):
"""We need a default script for a font, and fonts using a 'script key' support
multiple fonts. This lets us pick a default sample for a font based on it.
The sample is named with a script that can include 'Jpan' so 'Jpan' should be
the primary script in this case."""
multiple fonts. This lets us pick a default sample for a font based on it.
The sample is named with a script that can include 'Jpan' so 'Jpan' should be
the primary script in this case."""
if script_key == "LGC":
return "Latn"
if script_key == "Aran":
Expand Down Expand Up @@ -398,13 +427,13 @@ def noto_font_to_family_id(notofont):

def noto_font_to_wws_family_id(notofont):
"""Return an id roughly corresponding to the wws family. Used to identify
naming rules for the corresponding fonts. Compare to noto_font_to_family_id,
which corresponds to a preferred family and is used to determine the language
support for those fonts. For example, 'Noto Sans Devanagari UI' and
'Noto Sans Devanagari' support the same languages (e.g. have the same cmap)
but have different wws family names and different name rules (names for the
UI variant use very short abbreviations).
CJK font naming does reflect 'mono' so we add it back to the id."""
naming rules for the corresponding fonts. Compare to noto_font_to_family_id,
which corresponds to a preferred family and is used to determine the language
support for those fonts. For example, 'Noto Sans Devanagari UI' and
'Noto Sans Devanagari' support the same languages (e.g. have the same cmap)
but have different wws family names and different name rules (names for the
UI variant use very short abbreviations).
CJK font naming does reflect 'mono' so we add it back to the id."""
id = noto_font_to_family_id(notofont)
if notofont.is_cjk and notofont.is_mono:
id += "-mono"
Expand Down Expand Up @@ -455,7 +484,9 @@ def wws_family_id_to_name_parts(wws_id):
# mono comes before CJK in the name
if len(part_keys) > 2 and part_keys[2] == "mono":
parts.append("Mono")
part_keys = part_keys[:2] # trim mono so we don't try to add it again
part_keys = part_keys[
:2
] # trim mono so we don't try to add it again
parts.append("CJK")
if script == "hans":
parts.append("sc")
Expand All @@ -474,7 +505,11 @@ def wws_family_id_to_name_parts(wws_id):
# Mono works as a script. The phase 2 'mono-mono' tag was special-cased
# above so it won't get added a second time.
script_name = preferred_script_name(script.title())
script_name = script_name.replace(" ", "").replace("'", "").replace("-", "")
script_name = (
script_name.replace(" ", "")
.replace("'", "")
.replace("-", "")
)
parts.append(script_name)
if len(part_keys) > 2:
extra = part_keys[2]
Expand All @@ -483,18 +518,26 @@ def wws_family_id_to_name_parts(wws_id):
elif extra == "ui":
parts.append("UI")
elif extra in [
"adrar",
"agrawimazighen",
"ahaggar",
"air",
"apt",
"azawagh",
"display",
"eastern",
"estrangela",
"western",
"display",
"unjoined",
"apt",
"ghat",
"hawad",
"agrawimazighen",
"ahaggar",
"looped",
"new",
"rhissaixa",
"sil",
"slanted",
"supplement",
"tawellemmet",
"unjoined",
"western",
]:
parts.append(extra.title())
else:
Expand All @@ -504,9 +547,11 @@ def wws_family_id_to_name_parts(wws_id):

def get_noto_fonts(paths=NOTO_FONT_PATHS):
"""Scan paths for fonts, and create a NotoFont for each one, returning a list
of these. 'paths' defaults to the standard noto font paths, using notoconfig."""
of these. 'paths' defaults to the standard noto font paths, using notoconfig."""

font_dirs = list(filter(None, [tool_utils.resolve_path(p) for p in paths]))
font_dirs = list(
filter(None, [tool_utils.resolve_path(p) for p in paths])
)
print("Getting fonts from: %s" % font_dirs)

all_fonts = []
Expand All @@ -521,7 +566,8 @@ def get_noto_fonts(paths=NOTO_FONT_PATHS):
font = get_noto_font(filepath)
if not font:
sys.stderr.write(
"bad font filename in %s: '%s'.\n" % ((font_dir, filename))
"bad font filename in %s: '%s'.\n"
% ((font_dir, filename))
)
continue

Expand Down Expand Up @@ -563,7 +609,7 @@ def get_font_family_name(font_file):

def get_families(fonts):
"""Group fonts into families, separate into hinted and unhinted, select
representative."""
representative."""

family_id_to_fonts = collections.defaultdict(set)
families = {}
Expand Down Expand Up @@ -600,7 +646,8 @@ def get_families(fonts):
rep_member = rep_member or rep_backup
if not rep_member:
raise ValueError(
"Family %s does not have a representative font." % family_id
"Family %s does not have a representative font."
% family_id
)

name = get_font_family_name(rep_member.filepath)
Expand All @@ -612,16 +659,21 @@ def get_families(fonts):
charset = None

families[family_id] = NotoFamily(
name, family_id, rep_member, charset, hinted_members, unhinted_members
name,
family_id,
rep_member,
charset,
hinted_members,
unhinted_members,
)

return families


def get_family_filename(family):
"""Returns a filename to use for a family zip of hinted/unhinted members.
This is basically the postscript name with weight/style removed.
"""
This is basically the postscript name with weight/style removed.
"""
font = ttLib.TTFont(family.rep_member.filepath, fontNumber=0)
name_record = font_data.get_name_records(font)
try:
Expand All @@ -645,7 +697,10 @@ def _all_noto_font_key_to_names(paths):
ix = fontname.find("-")
familyname = fontname if ix == -1 else fontname[:ix]
wws_key = noto_font_to_wws_family_id(font)
if wws_key_to_family_name.get(wws_key, familyname) != familyname:
if (
wws_key_to_family_name.get(wws_key, familyname)
!= familyname
):
print(
"!!! mismatching font names for key %s: %s and %s"
% (wws_key, wws_key_to_family_name[wws_key], familyname)
Expand All @@ -662,7 +717,9 @@ def test(paths):
print(key, val)
name = "".join(wws_family_id_to_name_parts(key))
if name != val:
raise Exception("!!! generated name %s does not match" % name)
raise Exception(
"!!! generated name %s does not match" % name
)


def main():
Expand Down
Loading

0 comments on commit a3b1ba2

Please sign in to comment.