From a655a97bab750547d1997dc04cec6db78dd178b9 Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Tue, 3 Oct 2017 22:50:37 +0100 Subject: [PATCH 1/2] added unit tests for gsub_diff module. The current implementation only supports LookupType 1, Single sub --- tests/gsub_diff_test.py | 49 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/gsub_diff_test.py diff --git a/tests/gsub_diff_test.py b/tests/gsub_diff_test.py new file mode 100644 index 00000000..92b7492c --- /dev/null +++ b/tests/gsub_diff_test.py @@ -0,0 +1,49 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import tempfile +import unittest +from nototools.gsub_diff import GsubDiffFinder +from hb_input_test import make_font + + +class GposDiffFinderText(unittest.TestCase): + def _expect_gsub_diffs(self, source_a, source_b, pairs): + font_a = make_font('feature ccmp {\n%s\n} ccmp;' % source_a) + font_b = make_font('feature ccmp {\n%s\n} ccmp;' % source_b) + file_a = tempfile.NamedTemporaryFile() + file_b = tempfile.NamedTemporaryFile() + font_a.save(file_a.name) + font_b.save(file_b.name) + finder = GsubDiffFinder(file_a.name, file_b.name) + + diffs = finder.find_gsub_diffs() + print(len(diffs)) + self.assertIn('%d differences in GSUB rules' % len(pairs), diffs) + for pair_diff in pairs: + self.assertIn(pair_diff, diffs) + + def test_type1_gsub(self): + self._expect_gsub_diffs(''' + sub A by A.sc; + sub B by B.sc; + ''', ''' + sub A by A.sc; + ''', + [('- ccmp B B.sc')]) + + +if __name__ == '__main__': + unittest.main() From 7303339321ed2ea730da5a0238087da796a0e8d7 Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Tue, 3 Oct 2017 23:09:28 +0100 Subject: [PATCH 2/2] gsub_diff: Find diffs for LookupTypes 2, 3, 4, 7 In the previous implementation, only LookupType 1, Single substitutions could be compared. This commit adds support for LookupTypes 2, 3, 4 and 7. LookupTypes 5, 6, 8 are still not supported by this commit. Tests have been added to ensure each LookupType gets parsed correctly. Further tests have been added for each missing LookupType to ensure they are not getting parsed. --- nototools/gsub_diff.py | 155 +++++++++++++++++++++++++++++++++------- tests/gsub_diff_test.py | 131 ++++++++++++++++++++++++++++++++- 2 files changed, 257 insertions(+), 29 deletions(-) diff --git a/nototools/gsub_diff.py b/nototools/gsub_diff.py index de032b91..7acabca3 100644 --- a/nototools/gsub_diff.py +++ b/nototools/gsub_diff.py @@ -44,41 +44,144 @@ def __init__(self, file_a, file_b, output_lines=20): def find_gsub_diffs(self): """Report differences in substitution rules.""" - - rules_a = self._get_gsub_rules(self.text_a, self.file_a) - rules_b = self._get_gsub_rules(self.text_b, self.file_b) - - diffs = [] - report = [''] # first line replaced by difference count - for rule in rules_a: - if rule not in rules_b: - diffs.append(('-',) + rule) - for rule in rules_b: - if rule not in rules_a: - diffs.append(('+',) + rule) - # ('+', 'smcp', 'Q', 'Q.sc') + new = [self._format_rule(r, '+') for r in self.find_new_rules()] + missing = [self._format_rule(r, '-') for r in self.find_missing_rules()] + diffs = missing + new + # ('+', 'smcp', 'Q', 'by', Q.sc') # Sort order: # 1. Feature tag # 2. Glyph name before substitution # 3. Glyph name after substitution - diffs.sort(key=lambda t:(t[1], t[2], t[3])) + diffs.sort(key=lambda t:(t[1], t[2], t[4])) report = ['%d differences in GSUB rules' % len(diffs)] report.extend(' '.join(diff) for diff in diffs) return '\n'.join(report[:self.output_lines + 1]) - def _get_gsub_rules(self, text, filename): - """Get substitution rules in this ttxn output.""" + def find_new_rules(self): + rules_a = self._get_gsub_rules(self.text_a, self.file_a) + rules_b = self._get_gsub_rules(self.text_b, self.file_b) + return [r for r in rules_b if r not in rules_a] + + def find_missing_rules(self): + rules_a = self._get_gsub_rules(self.text_a, self.file_a) + rules_b = self._get_gsub_rules(self.text_b, self.file_b) + return [r for r in rules_a if r not in rules_b] + + def _get_gsub_rules(self, text, file): + """ + Parse the ttxn GSUB table in the following manner: + + 1. Get features + 2. Get feature content + 3. Extract lookup rules from feature content + + Following substitutions are currently implemented: + - Type 1: Single substitutions + - Type 2: Multiple substitutions + - Type 3: Alternate substitutions + - Type 4: Ligature substitutionss + TODO: LookupTypes 5, 6, 8 still need implementing + """ + rules = [] + features = self._get_gsub_features(text) + for feature in features: + content = self._get_feature_content(text, feature) + lookups_rules = self._get_lookups_rules(text, content[0], feature) + rules += lookups_rules + return rules + + def _get_gsub_features(self, text): + features = set() feature_name_rx = r'feature (\w+) {' - contents_rx = r'feature %s {(.*?)} %s;' - rule_rx = r'sub ([\w.]+) by ([\w.]+);' - rules = set() for name in re.findall(feature_name_rx, text): - contents = re.findall(contents_rx % (name, name), text, re.S) - assert len(contents) == 1, 'Multiple %s features in %s' % ( - name, filename) - contents = contents[0] - for lhs, rhs in re.findall(rule_rx, contents): - rules.add((name, lhs, rhs)) - return rules + features.add(name) + return list(features) + + def _get_feature_content(self, text, feature): + contents_rx = r'feature %s {(.*?)} %s;' + contents = re.findall(contents_rx % (feature, feature), text, re.S) + return contents + + def _get_lookups_rules(self, text, content, feature): + """Ignore rules which use "'". These are contextual and not in + lookups 1-4""" + rule_rx = r"[^C] sub (.*[^\']) (by|from) (.*);" + rules = re.findall(rule_rx, content) + parsed_rules = self._parse_gsub_rules(rules, feature) + return parsed_rules + + def _parse_gsub_rules(self, rules, feature): + """ + Parse GSUB sub LookupTypes 1, 2, 3, 4, 7. Return list of tuples with + the following tuple sequence. + + (feature, [input glyphs], operator, [output glyphs]) + + Type 1 Single Sub: + sub a by a.sc; + sub b by b.sc; + [ + (feat, ['a'], 'by' ['a.sc']), + (feat, ['b'], 'by' ['b.cs']) + ] + + + Type 2 Multiple Sub: + sub f_f by f f; + sub f_f_i by f f i; + [ + (feat, ['f_f'], 'by', ['f', 'f']), + (feat, ['f_f_i'], 'by', ['f', 'f', 'i']) + ] + + Type 3 Alternative Sub: + sub ampersand from [ampersand.1 ampersand.2 ampersand.3]; + [ + (feat, ['ampersand'], 'from', ['ampersand.1']), + (feat, ['ampersand'], 'from', ['ampersand.2']), + (feat, ['ampersand'], 'from', ['ampersand.3']) + ] + + Type 4 Ligature Sub: + sub f f by f_f; + sub f f i by f_f_i; + [ + (feat, ['f', 'f'] 'by' ['f_f]), + (feat, ['f', 'f', 'i'] 'by' ['f_f_i']) + ] + + http://www.adobe.com/devnet/opentype/afdko/topic_feature_file_syntax.html#4.e + """ + parsed = [] + for idx, (left, op, right) in enumerate(rules): + + left_group, right_group = [], [] + if left.startswith('[') and left.endswith(']'): + left = self._gsub_rule_group_to_string(left) + + if right.startswith('[') and right.endswith(']'): + right = self._gsub_rule_group_to_string(right) + + if op == 'by': # parse LookupType 1, 2, 4 + parsed.append((feature, left.split(), op, right.split())) + elif op == 'from': # parse LookupType 3 + for glyph in right.split(): # 'a.alt a.sc' -> ['a.alt', 'a.sc'] + parsed.append((feature, left.split(), op, [glyph])) + return parsed + + def _format_rule(self, rule, sign): + """Unnest the tuple rule sequence to more report friendly format""" + s = [sign] + for item in rule: + if not isinstance(item, str): + for sub_item in item: + s.append(sub_item) + else: + s.append(item) + return s + + def _gsub_rule_group_to_string(self, seq): + """[a a.sc a.sups] --> 'a a.sc a.sups'""" + return seq[1:-1] diff --git a/tests/gsub_diff_test.py b/tests/gsub_diff_test.py index 92b7492c..b76ccd3a 100644 --- a/tests/gsub_diff_test.py +++ b/tests/gsub_diff_test.py @@ -13,6 +13,11 @@ # limitations under the License. +"""Tests for gsub_diff module. Test examples for each LookupType taken from +the Adobe fea spec: +http://www.adobe.com/devnet/opentype/afdko/topic_feature_file_syntax.html#5.b +""" + import tempfile import unittest from nototools.gsub_diff import GsubDiffFinder @@ -30,19 +35,139 @@ def _expect_gsub_diffs(self, source_a, source_b, pairs): finder = GsubDiffFinder(file_a.name, file_b.name) diffs = finder.find_gsub_diffs() - print(len(diffs)) self.assertIn('%d differences in GSUB rules' % len(pairs), diffs) for pair_diff in pairs: self.assertIn(pair_diff, diffs) - def test_type1_gsub(self): + def test_type1_gsub_1(self): + """Test LookupType 1 Single substitutions""" self._expect_gsub_diffs(''' sub A by A.sc; sub B by B.sc; ''', ''' sub A by A.sc; ''', - [('- ccmp B B.sc')]) + [('- ccmp B by B.sc')]) + + def test_type1_gsub_2(self): + """Test LookupType 1 Single substitutions on groups""" + self._expect_gsub_diffs(''' + sub [A B] by [A.sc B.sc]; + ''', ''' + sub [A] by [A.sc]; + ''', + [('- ccmp B by B.sc')]) + + def test_type2_gsub(self): + """Test LookupType 2 Multiple substitutions""" + self._expect_gsub_diffs(''' + sub f_l by f l; + ''', ''' + sub f_l by f l; + sub c_h by c h; + ''', + [('+ ccmp c_h by c h')]) + + def test_type3_gsub(self): + """Test LookupType 3 Alternate substitutions""" + self._expect_gsub_diffs(''' + sub A from [A.swash A.sc]; + ''', ''' + sub A from [A.swash A.sc]; + sub B from [B.swash B.sc]; + ''', + [('+ ccmp B from B.swash'), + ('+ ccmp B from B.sc')]) + + def test_type4_gsub_1(self): + """Test LookupType 4 Ligature substitutions""" + self._expect_gsub_diffs(''' + sub f l by f_l; + sub c h by c_h; + ''', ''' + sub f l by f_l; + ''', + [('- ccmp c h by c_h')]) + + def test_type4_gsub_2(self): + """Test LookupType 4 Ligature substitutions on groups""" + self._expect_gsub_diffs(''' + sub [f F.swash] [l L.swash] by f_l; + ''', ''' + sub [f] [l] by f_l; + ''', + [('- ccmp F.swash L.swash by f_l'), + ('- ccmp F.swash l by f_l'), + ('- ccmp f L.swash by f_l'), + ]) + + def test_type5_and_6_gsub_1(self): + """LookupType 5 and 6 not implemented, make sure it returns nothing. + + This lookupType can use other lookups so include them in the test""" + self._expect_gsub_diffs(''' + lookup CNTXT_LIGS { + sub c t by c_t; + } CNTXT_LIGS; + + lookup CNTXT_SUB { + sub s by s.end; + } CNTXT_SUB; + + # LookupType 6 implementation + lookup test { + sub [ a e i o u] c' lookup CNTXT_LIGS t' s' lookup CNTXT_SUB; + } test; + ''',''' + lookup CNTXT_LIGS { + sub c t by c_t; + } CNTXT_LIGS; + + lookup CNTXT_SUB { + sub s by s.end; + } CNTXT_SUB; + ''', + []) + + def test_type5_and_6_gsub_2(self): + """LookupType 5 and 6 not implemented, make sure it returns nothing. + """ + self._expect_gsub_diffs(''' + substitute [a e n] d' by d.alt; + ''',''' + ''', + []) + + def test_type5_and_6_gsub_3(self): + """LookupType 5 and 6 not implemented, make sure it returns nothing. + """ + self._expect_gsub_diffs(''' + substitute [e e.begin]' t' c by ampersand; + ''',''' + ''', + []) + + def test_type7_gsub(self): + """Test LookupType 7 Extension substitution""" + self._expect_gsub_diffs(''' + lookup fracbar useExtension { + sub slash by fraction; + } fracbar; + ''',''' + lookup fracbar useExtension { + # missing rules + } fracbar; + ''', + [('- ccmp slash by fraction')]) + + def test_type8_gsub(self): + """LookupType 8 not implemented, make sure it returns nothing""" + self._expect_gsub_diffs(''' + reversesub [a e n] d' by d.alt; + ''',''' + + ''', + []) if __name__ == '__main__':