-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathaee_compare.py
150 lines (136 loc) · 6.16 KB
/
aee_compare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import yaml
from AEEChars import vchar_data
COMPUTER_BRAILLE_TO_UNICODE = " a1b'k2l`cif/msp\"e3h9o6r~djg>ntq,*5<-u8v.%{$+x!&;:4|0z7(_?w}#y)="
def computer_braille_to_unicode(ascii: str):
result = "";
ascii = ascii.lower()
for ch in ascii:
try:
result += chr(COMPUTER_BRAILLE_TO_UNICODE.index(ch) + 0x2800)
except:
print("problem translating '{}'".format(ch))
exit()
return result
def get_AEE_UEB_data() -> dict[str, str]:
aee_dict = {}
for entry in vchar_data:
aee_dict[entry[0]] = computer_braille_to_unicode(entry[1])
return aee_dict
UEB_EXTRA_CHAR_DICT = {
"S": "SSS", # sans-serif
"B": "⠘", # bold
"𝔹": "⠈BBB", # blackboard
"T": "⠈", # script
"I": "⠨", # italic
"R": "", # roman
# "E": "⠰", # English
"1": "⠰", # Grade 1 symbol
"L": "", # Letter left in to assist in locating letters
"D": "DDD", # German (Deutsche)
"G": "⠨", # Greek
# "V": "⠨⠈", # Greek Variants
# "H": "⠠⠠", # Hebrew
# "U": "⠈⠈", # Russian
"C": "⠠", # capital
"𝐶": "⠠", # capital that never should get word indicator (from chemical element)
"N": "⠼", # number indicator
"t": "⠱", # shape terminator
"W": "⠀", # whitespace
"𝐖": "⠀", # whitespace
"s": "⠆", # typeface single char indicator
"w": "⠂", # typeface word indicator
"e": "⠄", # typeface & capital terminator
"o": "", # flag that what follows is an open indicator (used for standing alone rule)
"c": "", # flag that what follows is an close indicator (used for standing alone rule)
"b": "", # flag that what follows is an open or close indicator (used for standing alone rule)
",": "⠂", # comma
".": "⠲", # period
"-": "-", # hyphen
"—": "⠠⠤", # normal dash (2014) -- assume all normal dashes are unified here [RUEB appendix 3]
"―": "⠐⠠⠤", # long dash (2015) -- assume all long dashes are unified here [RUEB appendix 3]
"#": "", # signals end of script
}
import re
# not quite right match for second digit in denom, but good enough for here
VULGAR_FRACTION = re.compile('#N([⠁⠃⠉⠙⠑⠋⠛⠓⠊⠚])N⠌N([⠁⠃⠉⠙⠑⠋⠛⠓⠊⠚])N*([⠁⠃⠉⠙⠑⠋⠛⠓⠊⠚])*')
def remove_mathcat_indicators(braille_entry: str) -> str:
# remove 'N' hack for vulgar fractions (#Nb+N⠌Nb+)
braille_entry = VULGAR_FRACTION.sub("N\\1⠌\\2\\3", braille_entry)
for letter, braille in UEB_EXTRA_CHAR_DICT.items():
braille_entry = braille_entry.replace(letter, braille)
return braille_entry
import os
def get_mathcat_data_from_file(file, braille_code: str) -> dict[str,str]:
with open(file, 'r', encoding="utf-8") as file:
names = {}
definitions = yaml.safe_load(file)
for definition in definitions:
# definition looks like {'0', [{'t': 'N⠴'}]}
if len(definition) != 1:
print("*** problem with definition: " + definition)
char = list(definition.keys())[0]
replacement_dict = list(definition.values())[0][0]
braille_entry = replacement_dict.get('t')
if braille_entry:
# need to replace "C", etc, with the expansion
braille_entry = remove_mathcat_indicators(braille_entry)
else:
# special case for chemistry elements
try:
braille_entry = replacement_dict['test']['else'][0]['t']
braille_entry = remove_mathcat_indicators(braille_entry)
except:
# not simple text entry
print("*** problem with braille entry KEY of definition: {}".format(definition))
names[char] = braille_entry
return names
def get_mathcat_data(dir: str, braille_code: str) -> dict[str,str]:
dict = get_mathcat_data_from_file(os.path.join(dir, "unicode.yaml"), braille_code)
# dict.update(get_mathcat_data_from_file(os.path.join(dir, "unicode-full.yaml"), braille_code))
return dict
def write_dictionary(stream, dict: dict[str,str]):
stream.write("{\n")
for key,value in dict.items():
stream.write(" {{{}: {}}},\n".format(key,value))
stream.write("}\n")
def compare_UEB_defs():
aee_dict = get_AEE_UEB_data()
mc_dict = get_mathcat_data("../Rules/Braille/UEB", "UEB")
only_in_aee = {}
only_in_mc = {}
differs = {}
for aee_char, aee_braille in aee_dict.items():
mc_braille = mc_dict.get(aee_char)
if mc_braille:
if aee_braille != mc_braille:
differs[aee_char] = {"aee": aee_braille, "MathCAT": mc_braille}
else:
only_in_aee[aee_char] = aee_braille
for mc_char, mc_braille in mc_dict.items():
if not(mc_dict.get(mc_char)):
only_in_mc[mc_char] = mc_braille
with open("aee-mathcat-compare.txt", 'w', encoding='utf8') as out_stream:
out_stream.write("\nDIFFERENCES\n")
write_dictionary(out_stream, differs)
out_stream.write("\nOnly in AEE\n")
write_dictionary(out_stream, only_in_aee)
out_stream.write("\nOnly in MathCAT\n")
write_dictionary(out_stream, only_in_mc)
def create_aee_rust_tests(in_file: str, out_file: str):
import xml.etree.ElementTree as ET
tree = ET.parse(in_file)
root = tree.getroot()
count = 0
with open(out_file, 'w', encoding='utf8') as out_stream:
out_stream.write("use crate::common::*;\n")
for test in root:
out_stream.write("\n#[test]\n")
out_stream.write("fn aee_{:04d}() {{\n".format(count))
mathml = ET.tostring(test[1], encoding='unicode').replace('"', '\'').replace('\\', '\\\\').rstrip()
out_stream.write(" let expr = \"{}\";\n".format(mathml))
nemeth = test[2].text.replace('\\', '\\\\')
out_stream.write(" test_braille(\"Nemeth\", expr, \"{}\");\n".format(test[2].text))
out_stream.write("}\n")
count += 1
# compare_UEB_defs()
create_aee_rust_tests("cptob.xml", "AEE.rs")