-
Notifications
You must be signed in to change notification settings - Fork 10
/
laws2lsx.awk
executable file
·63 lines (59 loc) · 2.32 KB
/
laws2lsx.awk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env -S awk -f
# This file creates an lsx file that merges law names into np's in
# certain fixed contexts. Maybe it could be made more general, but for
# now this is what we need.
BEGIN {
print "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
print "<dictionary type=\"separable\">"
print ""
print "<!-- Generated automatically from "ARGV[1]" - DO NOT EDIT MANUALLY! -->"
print ""
print " <alphabet></alphabet>"
print " <sdefs>"
print " <sdef n=\"np\" c=\"Proper noun\"/>"
print " <sdef n=\"lpar\" c=\"Left parenthesis\" />"
print " <sdef n=\"pr\" c=\"Preposition\"/>"
print " <sdef n=\"@app\"/>"
print " <sdef n=\"aa\"/>"
print " </sdefs>"
print ""
print " <pardefs>"
print ""
print " <pardef n=\"reading\" c=\"match and keep readings (incl. tagless/unknown). Includes end delimiter\">"
print " <e> <i><f/><w/><d/></i> </e>"
print " <e> <i><f/><w/><t/><d/></i> </e>"
print " </pardef>"
print ""
print " <pardef n=\"reading:\" c=\"match and drop readings (incl. tagless/unknown). Includes end delimiter\">"
print " <e><p><l><f/><w/><d/></l> <r/></p></e>"
print " <e><p><l><f/><w/><t/><d/></l><r/></p></e>"
print " </pardef>"
print ""
print " <pardef n=\"pr|lpar|jf\" c=\"includes end delimiter\">"
print " <e><i><w/><f/><w/><s n=\"pr\"/><t/><d/></i></e>"
print " <e><i><w/><f/><w/><s n=\"lpar\"/><t/><d/></i></e>"
print " <e><i>jf</i> <par n=\"reading\"/></e>"
print " <e><i>jf.</i> <par n=\"reading\"/></e>"
print " <e><i>jamføre</i> <par n=\"reading\"/></e>"
print " </pardef>"
print ""
print " </pardefs>"
print ""
print " <section id=\"main\" type=\"standard\">"
print ""
}
{
gsub(/^[[:space:]]+|[[:space:]]+$/, "") # trim whitespace from beginning/end
print ""
print " <e>"
print " <par n=\"pr|lpar|jf\"/>"
for(i=1;i<=NF;i++)
printf " <p><l>%s</l> <r></r></p> <par n=\"reading:\"/>\n",$i
gsub(/ /, "<b/>") # use <b/> for blanks
print " <p><l></l> <r>"$0"<f/>"$0"<s n=\"np\"/><s n=\"aa\"/><s n=\"@app\"/><d/></r></p>"
print " </e>"
}
END {
print " </section>"
print "</dictionary>"
}