updated so it runs with the current set of fonts; still needs correct…

…ions
notofonts · Dec 8, 2020 · a3b1ba2 · a3b1ba2
1 parent 1e7789e
commit a3b1ba2
Show file tree

Hide file tree

Showing 13 changed files with 1,416 additions and 532 deletions.
diff --git a/nototools/data/family_name_info_p3.xml b/nototools/data/family_name_info_p3.xml
diff --git a/nototools/data/noto_cmap_phase3.xml b/nototools/data/noto_cmap_phase3.xml
diff --git a/nototools/noto_cmap_reqs.py b/nototools/noto_cmap_reqs.py
@@ -342,13 +342,13 @@ def _build_block_to_primary_script():
                 max_script = assigned_primaries[block]
                 # print('assigning primary', block_info, '->', max_script)
             else:
-                sys.stderr.write("ERROR: no primary\n", block, block_info)
+                print("ERROR: no inherited primary\n %s\n %s\n" % (block, block_info), file=sys.stderr)
                 max_script = None
         elif max_script == "Zinh":
             if block in inherited_primaries:
                 max_script = inherited_primaries[block]
             else:
-                sys.stderr.write("ERROR: no inherited primary\n", block, block_info)
+                print("ERROR: no inherited primary\n %s\n %s\n" % (block, block_info), file=sys.stderr)
                 max_script = None
         block_to_script[block] = max_script
     return block_to_script
@@ -518,7 +518,15 @@ def _reassign_common_by_block(cmap_ops):
         "Alchemical Symbols": "Zsym",
         "Geometric Shapes Extended": "SYM2",
         "Supplemental Arrows-C": "SYM2",
+        "Chess Symbols": "SYM2",
+        "Ideographic Symbols and Punctuation": "CJK",
+        "Symbols and Pictographs Extended-A": "SYM2",
+        "Symbols for Legacy Computing": "SYM2",
         "Supplemental Symbols and Pictographs": "SYM2",
+        "Counting Rod Numerals": "SYM2",
+        "Mayan Numerals": "Zmth",
+        "Ottoman Siyaq Numbers": "Arab",
+        "Indic Siyaq Numbers": "Arab",
         "Tags": "EXCL",
     }
 

diff --git a/nototools/noto_fonts.py b/nototools/noto_fonts.py
@@ -34,7 +34,12 @@
 # The '[xxx]' syntax is used to get the noto-xxx value from notoconfig.
 # for now we exclude alpha, the phase 3 fonts are here but we don't use
 # them yet.
-NOTO_FONT_PATHS = ["[fonts]/hinted", "[fonts]/unhinted", "[emoji]/fonts", "[cjk]"]
+NOTO_FONT_PATHS = [
+    "[fonts]/hinted",
+    "[fonts]/unhinted",
+    "[emoji]/fonts",
+    "[cjk]",
+]
 
 
 ODD_SCRIPTS = {
@@ -54,8 +59,12 @@
     "Symbols": "Zsym",
     "Emoji": "Zsye",
     "TifinaghAPT": "Tfng",
+    "TifinaghAdrar": "Tfng",
     "TifinaghAgrawImazighen": "Tfng",
     "TifinaghAhaggar": "Tfng",
+    "TifinaghAir": "Tfng",
+    "TifinaghAzawagh": "Tfng",
+    "TifinaghGhat": "Tfng",
     "TifinaghHawad": "Tfng",
     "TifinaghRhissaIxa": "Tfng",
     "TifinaghSIL": "Tfng",
@@ -92,9 +101,9 @@ def preferred_script_name(script_key):
 
 
 def script_name_for_report(script_key):
-    return _script_key_to_report_name.get(script_key, None) or preferred_script_name(
-        script_key
-    )
+    return _script_key_to_report_name.get(
+        script_key, None
+    ) or preferred_script_name(script_key)
 
 
 # NotoFont maps a font path to information we assume the font to have, based
@@ -149,10 +158,10 @@ def script_name_for_report(script_key):
 _FONT_NAME_REGEX = (
     # family should be prepended - this is so Roboto can be used with unittests
     # that use this regex to parse.
-    "(Sans|Serif|Naskh|Kufi|Nastaliq|Emoji|ColorEmoji|Music)?"
+    "(Sans|Serif|Naskh|Kufi|Nastaliq|Emoji|ColorEmoji|Music|Rashi|Traditional)?"
     "(Mono(?:space)?)?"
     "(.*?)"
-    "(Eastern|Estrangela|Western|Slanted|New|Unjoined|APT|AgrawImazighen|Ahaggar|Hawad|RhissaIxa|SIL|Tawellemmet)?"
+    "(Adrar|AgrawImazighen|Ahaggar|Air|APT|Azawagh|Eastern|Estrangela|Ghat|Hawad|Looped|New|RhissaIxa|SIL|Slanted|Supplement|Tawellemmet|Unjoined|Western)?"
     "(UI)?"
     "(Display)?"
     "-?"
@@ -165,9 +174,11 @@ def script_name_for_report(script_key):
 _EXT_REGEX = re.compile(r".*\.(?:ttf|ttc|otf)$")
 
 
-def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3):
+def get_noto_font(
+    filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3
+):
     """Return a NotoFont if filepath points to a noto font, or None if we can't
-  process the path."""
+    process the path."""
 
     filedir, filename = os.path.split(filepath)
     if not filedir:
@@ -213,7 +224,13 @@ def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3):
 
     is_mono = mono == "Mono"
 
-    if width not in [None, "", "Condensed", "SemiCondensed", "ExtraCondensed"]:
+    if width not in [
+        None,
+        "",
+        "Condensed",
+        "SemiCondensed",
+        "ExtraCondensed",
+    ]:
         sys.stderr.write('noto_fonts: Unexpected width "%s"\n' % width)
         if width in ["SemiCond", "Narrow"]:
             width = "SemiCondensed"
@@ -227,6 +244,10 @@ def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3):
             script = "MONO"
         else:
             script = "LGC"
+    elif script == "IndicSiyaqNumbers":
+        script = "Aran"
+    elif script == "MayanNumerals":
+        script = "Maya"
     elif script == "Urdu":
         # Use 'Aran' for languages written in the Nastaliq Arabic style, like Urdu.
         # The font naming uses 'Urdu' which is not a script, but a language.
@@ -243,7 +264,9 @@ def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3):
         try:
             script = convert_to_four_letter(script)
         except ValueError:
-            sys.stderr.write("unknown script: %s for %s\n" % (script, filename))
+            sys.stderr.write(
+                "unknown script: %s for %s\n" % (script, filename)
+            )
             return None
 
     if not weight:
@@ -253,7 +276,10 @@ def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3):
     is_UI_metrics = (
         is_UI
         or style == "Emoji"
-        or (style == "Sans" and script in noto_data.DEEMED_UI_SCRIPTS_SET)
+        or (
+            style == "Sans"
+            and script in noto_data.DEEMED_UI_SCRIPTS_SET
+        )
     )
 
     is_display = display == "Display"
@@ -263,7 +289,10 @@ def get_noto_font(filepath, family_name="Arimo|Cousine|Tinos|Noto", phase=3):
         is_hinted = False
     else:
         hint_status = path.basename(filedir)
-        if hint_status not in ["hinted", "unhinted"] and "noto-source" not in filedir:
+        if (
+            hint_status not in ["hinted", "unhinted"]
+            and "noto-source" not in filedir
+        ):
             # print >> sys.stderr, (
             #    'unknown hint status for %s, defaulting to unhinted') % filedir
             pass
@@ -320,11 +349,11 @@ def parse_weight(name):
 
 def script_key_to_scripts(script_key):
     """Return a set of scripts for a script key.  The script key is used by
-  a font to define the set of scripts it supports.  Some keys are ours,
-  e.g. 'LGC', and some are standard script codes that map to multiple
-  scripts, like 'Jpan'.  In either case we need to be able to map a script
-  code (either unicode character script code, or more general iso script
-  code) to a font, and we do so by finding it in the list returned here."""
+    a font to define the set of scripts it supports.  Some keys are ours,
+    e.g. 'LGC', and some are standard script codes that map to multiple
+    scripts, like 'Jpan'.  In either case we need to be able to map a script
+    code (either unicode character script code, or more general iso script
+    code) to a font, and we do so by finding it in the list returned here."""
     if script_key == "LGC":
         return frozenset(["Latn", "Grek", "Cyrl"])
     elif script_key == "Aran":
@@ -343,9 +372,9 @@ def script_key_to_scripts(script_key):
 
 def script_key_to_primary_script(script_key):
     """We need a default script for a font, and fonts using a 'script key' support
-  multiple fonts.  This lets us pick a default sample for a font based on it.
-  The sample is named with a script that can include 'Jpan' so 'Jpan' should be
-  the primary script in this case."""
+    multiple fonts.  This lets us pick a default sample for a font based on it.
+    The sample is named with a script that can include 'Jpan' so 'Jpan' should be
+    the primary script in this case."""
     if script_key == "LGC":
         return "Latn"
     if script_key == "Aran":
@@ -398,13 +427,13 @@ def noto_font_to_family_id(notofont):
 
 def noto_font_to_wws_family_id(notofont):
     """Return an id roughly corresponding to the wws family.  Used to identify
-  naming rules for the corresponding fonts. Compare to noto_font_to_family_id,
-  which corresponds to a preferred family and is used to determine the language
-  support for those fonts.  For example, 'Noto Sans Devanagari UI' and
-  'Noto Sans Devanagari' support the same languages (e.g. have the same cmap)
-  but have different wws family names and different name rules (names for the
-  UI variant use very short abbreviations).
-  CJK font naming does reflect 'mono' so we add it back to the id."""
+    naming rules for the corresponding fonts. Compare to noto_font_to_family_id,
+    which corresponds to a preferred family and is used to determine the language
+    support for those fonts.  For example, 'Noto Sans Devanagari UI' and
+    'Noto Sans Devanagari' support the same languages (e.g. have the same cmap)
+    but have different wws family names and different name rules (names for the
+    UI variant use very short abbreviations).
+    CJK font naming does reflect 'mono' so we add it back to the id."""
     id = noto_font_to_family_id(notofont)
     if notofont.is_cjk and notofont.is_mono:
         id += "-mono"
@@ -455,7 +484,9 @@ def wws_family_id_to_name_parts(wws_id):
         # mono comes before CJK in the name
         if len(part_keys) > 2 and part_keys[2] == "mono":
             parts.append("Mono")
-            part_keys = part_keys[:2]  # trim mono so we don't try to add it again
+            part_keys = part_keys[
+                :2
+            ]  # trim mono so we don't try to add it again
         parts.append("CJK")
         if script == "hans":
             parts.append("sc")
@@ -474,7 +505,11 @@ def wws_family_id_to_name_parts(wws_id):
         # Mono works as a script. The phase 2 'mono-mono' tag was special-cased
         # above so it won't get added a second time.
         script_name = preferred_script_name(script.title())
-        script_name = script_name.replace(" ", "").replace("'", "").replace("-", "")
+        script_name = (
+            script_name.replace(" ", "")
+            .replace("'", "")
+            .replace("-", "")
+        )
         parts.append(script_name)
     if len(part_keys) > 2:
         extra = part_keys[2]
@@ -483,18 +518,26 @@ def wws_family_id_to_name_parts(wws_id):
         elif extra == "ui":
             parts.append("UI")
         elif extra in [
+            "adrar",
+            "agrawimazighen",
+            "ahaggar",
+            "air",
+            "apt",
+            "azawagh",
+            "display",
             "eastern",
             "estrangela",
-            "western",
-            "display",
-            "unjoined",
-            "apt",
+            "ghat",
             "hawad",
-            "agrawimazighen",
-            "ahaggar",
+            "looped",
+            "new",
             "rhissaixa",
             "sil",
+            "slanted",
+            "supplement",
             "tawellemmet",
+            "unjoined",
+            "western",
         ]:
             parts.append(extra.title())
         else:
@@ -504,9 +547,11 @@ def wws_family_id_to_name_parts(wws_id):
 
 def get_noto_fonts(paths=NOTO_FONT_PATHS):
     """Scan paths for fonts, and create a NotoFont for each one, returning a list
-  of these.  'paths' defaults to the standard noto font paths, using notoconfig."""
+    of these.  'paths' defaults to the standard noto font paths, using notoconfig."""
 
-    font_dirs = list(filter(None, [tool_utils.resolve_path(p) for p in paths]))
+    font_dirs = list(
+        filter(None, [tool_utils.resolve_path(p) for p in paths])
+    )
     print("Getting fonts from: %s" % font_dirs)
 
     all_fonts = []
@@ -521,7 +566,8 @@ def get_noto_fonts(paths=NOTO_FONT_PATHS):
             font = get_noto_font(filepath)
             if not font:
                 sys.stderr.write(
-                    "bad font filename in %s: '%s'.\n" % ((font_dir, filename))
+                    "bad font filename in %s: '%s'.\n"
+                    % ((font_dir, filename))
                 )
                 continue
 
@@ -563,7 +609,7 @@ def get_font_family_name(font_file):
 
 def get_families(fonts):
     """Group fonts into families, separate into hinted and unhinted, select
-  representative."""
+    representative."""
 
     family_id_to_fonts = collections.defaultdict(set)
     families = {}
@@ -600,7 +646,8 @@ def get_families(fonts):
         rep_member = rep_member or rep_backup
         if not rep_member:
             raise ValueError(
-                "Family %s does not have a representative font." % family_id
+                "Family %s does not have a representative font."
+                % family_id
             )
 
         name = get_font_family_name(rep_member.filepath)
@@ -612,16 +659,21 @@ def get_families(fonts):
             charset = None
 
         families[family_id] = NotoFamily(
-            name, family_id, rep_member, charset, hinted_members, unhinted_members
+            name,
+            family_id,
+            rep_member,
+            charset,
+            hinted_members,
+            unhinted_members,
         )
 
     return families
 
 
 def get_family_filename(family):
     """Returns a filename to use for a family zip of hinted/unhinted members.
-     This is basically the postscript name with weight/style removed.
-  """
+    This is basically the postscript name with weight/style removed.
+    """
     font = ttLib.TTFont(family.rep_member.filepath, fontNumber=0)
     name_record = font_data.get_name_records(font)
     try:
@@ -645,7 +697,10 @@ def _all_noto_font_key_to_names(paths):
         ix = fontname.find("-")
         familyname = fontname if ix == -1 else fontname[:ix]
         wws_key = noto_font_to_wws_family_id(font)
-        if wws_key_to_family_name.get(wws_key, familyname) != familyname:
+        if (
+            wws_key_to_family_name.get(wws_key, familyname)
+            != familyname
+        ):
             print(
                 "!!! mismatching font names for key %s: %s and %s"
                 % (wws_key, wws_key_to_family_name[wws_key], familyname)
@@ -662,7 +717,9 @@ def test(paths):
         print(key, val)
         name = "".join(wws_family_id_to_name_parts(key))
         if name != val:
-            raise Exception("!!! generated name %s does not match" % name)
+            raise Exception(
+                "!!! generated name %s does not match" % name
+            )
 
 
 def main():