-
Notifications
You must be signed in to change notification settings - Fork 4
/
kern_triples.py
133 lines (114 loc) · 3.72 KB
/
kern_triples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from fontTools.ttLib import TTFont
import ngrams
import kern_pair as kern
import cairoft
import functools
import unicodedata
from collections import defaultdict
@functools.cache
def create_blurred_surface_for_text(text):
glyph = kern.Glyph(text)
if kern.surface_sum(glyph.surface) == 0:
return None
glyph.surface = kern.blur(glyph.surface)
return glyph
if __name__ == "__main__":
import sys
import argparse
parser = argparse.ArgumentParser(
"python3 kern_triples.py",
description="Kern middle letter of triples for a font & language pair.",
)
parser.add_argument("font", metavar="font.ttf", help="Font file.")
parser.add_argument("dict", metavar="dict", nargs="+", help="Dictionary file.")
parser.add_argument(
"--encoding",
type=str,
help="Text encoding. Default: utf-8",
)
parser.add_argument(
"--fea",
action="store_true",
help="Output in FEA format. Default: False",
)
parser.add_argument(
"-l",
"--letters-only",
action="store_true",
help="Only list trigrams of letters. Default: False",
)
parser.add_argument(
"--tolerance",
type=float,
help="Tolerance for kerning value. Default: 0.033.",
)
parser.add_argument(
"--cutoff",
type=float,
help="Bigram cutoff probability if dictionary is provided. Default: .999",
)
options = parser.parse_args(sys.argv[1:])
fontfile = options.font
dictfiles = options.dict
encoding = options.encoding or "utf-8"
tolerance = options.tolerance or 0.033
if tolerance >= 1:
tolerance = tolerance / kern.FONT_SIZE
cutoff = options.cutoff or 0.999
if cutoff > 1:
cutoff = cutoff / 100.0
ttfont = TTFont(fontfile)
cmap = ttfont["cmap"].getBestCmap()
kern.FONT_FACE = cairoft.create_cairo_font_face_for_file(fontfile, 0)
kern.HB_FONT = kern.create_hb_font(fontfile)
upem = kern.HB_FONT.face.upem
min_s, max_s = kern.find_s()
all_bigrams = defaultdict(int)
all_trigrams = defaultdict(int)
for dictfile in dictfiles:
this_bigrams = ngrams.extract_ngrams_from_file(
dictfile,
2,
cutoff=cutoff,
encoding=encoding,
letters_only=options.letters_only,
)
for k, v in this_bigrams.items():
all_bigrams[k] += v
this_trigrams = ngrams.extract_ngrams_from_file(
dictfile,
3,
cutoff=cutoff,
encoding=encoding,
letters_only=options.letters_only,
)
for k, v in this_trigrams.items():
all_trigrams[k] += v
kern_values = defaultdict(int)
for bigram in all_bigrams:
if (
unicodedata.category(bigram[0]) == "Mn"
or unicodedata.category(bigram[1]) == "Mn"
):
continue
l = create_blurred_surface_for_text(bigram[0])
r = create_blurred_surface_for_text(bigram[1])
if l is None or r is None:
continue
kern_value, _ = kern.kern_pair(l, r, min_s, max_s, blurred=True)
if kern_value is None:
continue
kern_values[bigram] = kern_value
for trigram in all_trigrams:
shift = (kern_values[trigram[:2]] - kern_values[trigram[1:]]) / 2
if abs(shift) < kern.FONT_SIZE * tolerance:
continue
units = round(shift / kern.FONT_SIZE * upem)
if options.fea:
trigram = tuple(cmap.get(ord(c)) for c in trigram)
print(
" position %s' %s' <%d 0 0 0> %s;"
% (trigram[0], trigram[2], units, trigram[2])
)
else:
print(trigram, units)