From 66c6c5ff9c2b1364f013de3a8422ac3aa1617b3a Mon Sep 17 00:00:00 2001 From: Github CI Date: Mon, 4 Mar 2024 11:01:18 +0000 Subject: [PATCH] Automatic build of version commit-b7b698 --- tables/en-ueb-chardefs.uti | 133 +++++++++++++++---------------------- tables/en-ueb-g1.ctb | 18 ++--- tables/en-ueb-g2.ctb | 16 ++++- tables/zhcn-g1.ctb | 10 +++ tables/zhcn-g2.ctb | 10 +++ 5 files changed, 97 insertions(+), 90 deletions(-) diff --git a/tables/en-ueb-chardefs.uti b/tables/en-ueb-chardefs.uti index 86482cd..38ff241 100644 --- a/tables/en-ueb-chardefs.uti +++ b/tables/en-ueb-chardefs.uti @@ -31,49 +31,14 @@ space \x001b 1b escape character for html back-translation space \x00A0 a NO-BREAK SPACE 0020 NON-BREAKING SPACE noback correct "\x200b" "\s" + +include text_nabcc.dis include spaces.uti include latinLetterDef6Dots.uti include latinUppercaseComp6.uti -# Numeric Symbols - -display 0 356 -display 1 2 -display 2 23 -display 3 25 -display 4 256 -display 5 26 -display 6 235 -display 7 2356 -display 8 236 -display 9 35 - -digit 0 245 -digit 1 1 -digit 2 12 -digit 3 14 -digit 4 145 -digit 5 15 -digit 6 124 -digit 7 1245 -digit 8 125 -digit 9 24 - -# this is a bad work-around for doing numeric mode backwards -litdigit 0 245 -litdigit 1 1 -litdigit 2 12 -litdigit 3 14 -litdigit 4 145 -litdigit 5 15 -litdigit 6 124 -litdigit 7 1245 -litdigit 8 125 -litdigit 9 24 - - # Modifiers # acute accent @@ -82,7 +47,6 @@ sign \x0301 45-34 ́ noback correct [$l]"́" "́"* # circumflex/carot -display \x005e 457 ^ sign \x005e 4-26 ^ sign \x0302 45-146 ̂ noback correct [$l]"̂" "̂"* @@ -94,7 +58,6 @@ sign \x0300 45-16 ̀̀ noback correct [$l]"̀" "̀"* # tilde -display \x007e 45 ~ math \x007e 4-35 ~ sign \x0303 45-12456 ̃ noback correct [$l]"̃" "̃"* @@ -136,16 +99,13 @@ noback correct [$l]"̊" "̊"* # Unicode: Basic Latin -display ! 2346 punctuation ! 235 nofor postpunc ! 235 match %a ! %a 56-235 -display " 5 punctuation " 6-2356 # The "?" symbol is mostly handled below, # but the pattern needs to be defined before prepunc and postpunc can be used. -display ? 1456 nofor punctuation ? 236 nofor punctuation " 356 nofor prepunc " 236 @@ -153,73 +113,54 @@ nofor postpunc " 356 match %[^_~]%<* " %[_.$]*%[a#] 236 match %[a#]%[_.$]* " %>*%[^_~] 356 -display # 3456 sign # 456-1456 -display $ 1246 sign $ 4-234 -display % 146 sign % 46-356 -display & 12346 sign & 4-12346 -display ' 3 punctuation ' 3 -display ( 12356 punctuation ( 5-126 -display ) 23456 +nofor prepunc ( 5-126 punctuation ) 5-345 -display * 16 +postpunc ) 5-345 sign * 5-35 -display + 346 math + 5-235 -display , 6 punctuation , 2 +nofor postpunc , 2 match %a , %a 56-2 -display - 36 punctuation - 36 hyphen - 36 -display . 46 punctuation . 256 -display / 34 +nofor postpunc . 256 math / 456-34 # 0-9 see Numeric Symbols -display : 156 punctuation : 25 postpunc : 25 match %a : %a 56-25 -#TODO: this is unnecessarily necessary -display ; 56 punctuation ; 23 -noback punctuation ; 56 +postpunc ; 23 match %a ; %a 56-23 -display < 126 math < 4-126 -display = 123456 math = 5-2356 -display > 345 punctuation > 4-345 # requires grade one indicator when by itself punctuation ? 56-236 postpunc ? 236 -display @ 47 sign @ 4-1 # A-Z see latinLetterDef8Dots.uti -display [ 2467 punctuation [ 46-126 -display \\ 12567 +nofor prepunc [ 46-126 sign \\ 456-16 -display ] 124567 punctuation ] 46-345 +nofor postpunc ] 46-345 # \x005e ^ see Modifiers -display _ 456 sign _ 46-36 # /x0060 ` see Modifiers # a-z see latinLetterDef8Dots.uti -display { 246 punctuation { 456-126 -display | 1256 +nofor prepunc { 456-126 sign | 456-1256 -display } 12456 punctuation } 456-345 +nofor postpunc } 456-345 # \x007e ~ see Modifiers @@ -931,16 +872,52 @@ base uppercase \x04d8 \x04d9 Әә base uppercase \x04ee \x04ef Ӯӯ base uppercase \x04f0 \x04f1 Ӱӱ + +# Numeric Symbols + +digit 0 356 +digit 1 2 +digit 2 23 +digit 3 25 +digit 4 256 +digit 5 26 +digit 6 235 +digit 7 2356 +digit 8 236 +digit 9 35 + +litdigit 0 245 +litdigit 1 1 +litdigit 2 12 +litdigit 3 14 +litdigit 4 145 +litdigit 5 15 +litdigit 6 124 +litdigit 7 1245 +litdigit 8 125 +litdigit 9 24 + + # Unicode prefers \x2019 for apostrophe instead of \x0027 -endword \x2019d 3-145 -endword \x2019m 3-134 -endword \x2019re 3-1235-15 -endword \x2019ve 3-1236-15 -endword \x2019ll 3-123-123 -endword \x2019s 3-234 -endword s\x2019 234-3 -endword \x2019t 3-2345 +endword 'd 3-145 +noback endword \x2019d 3-145 +endword 'll 3-123-123 +noback endword \x2019ll 3-123-123 +endword 'm 3-134 +noback endword \x2019m 3-134 +endword 'n 3-1345 +noback endword \x2019n 3-1345 +endword 're 3-1235-15 +noback endword \x2019re 3-1235-15 +endword 's 3-234 +noback endword \x2019s 3-234 +endword s' 234-3 +noback endword s\x2019 234-3 +endword 't 3-2345 +noback endword \x2019t 3-2345 +endword 've 3-1236-15 +noback endword \x2019ve 3-1236-15 # Remove dot 6 from ligatures (æ, œ and ij) when they are lowercase or part of an uppercase word diff --git a/tables/en-ueb-g1.ctb b/tables/en-ueb-g1.ctb index cad0588..c95a102 100644 --- a/tables/en-ueb-g1.ctb +++ b/tables/en-ueb-g1.ctb @@ -55,10 +55,10 @@ numericnocontchars abcdefghijABCDEFGHIJ # A capital ends numeric mode, so clean up extraneous letter indicators between a number # and a subsequent capital letter -noback pass2 _$d[@6-56]$U @6 # handle "1234Card" -noback pass2 _$d[@6-6-56]$U @6-6 # handle "1234CARD" -noback pass2 _$d[@256-6-6-56]$U @256-6-6 # handle "1234.CARD" -noback pass2 _$d[@256-6-56]$U @256-6 # handle "1234.Card" +noback pass2 _$D[@6-56]$U @6 # handle "1234Card" +noback pass2 _$D[@6-6-56]$U @6-6 # handle "1234CARD" +noback pass2 _$D[@256-6-6-56]$U @256-6-6 # handle "1234.CARD" +noback pass2 _$D[@256-6-56]$U @256-6 # handle "1234.Card" # Correct order of comma and numeric indicator match %a , %# 2-34569 force correct position of numeric indicator @@ -69,13 +69,13 @@ match %a . %# 256-34569 force correct position of numeric indicator noback pass2 @3456-256-34569 @256-3456 Clear up extra indicator after the match line # numeric space -noback correct _$d["\s"]$d "\xE020" # change space to custom symbol representing numeric space -nofor correct _$d["\xE020"]$d "\s" +noback correct _$d["\x00a0"]$d "\xE020" # change space to custom symbol representing numeric space +nofor correct _$d["\xE020"]$d "\x00a0" litdigit \xE020 5a # add virtual dot to make distinction with other uses of dot 5 noback pass2 @5a @5 -nofor pass2 [@3456]$d @3456#1=1 # enter numeric mode when number sign followed by digit is encountered -nofor pass2 $d * -nofor pass2 #1=1[@5]$d @5a # when in numeric mode interpret dot 5 as numeric space +nofor pass2 [@3456]$D @3456#1=1 # enter numeric mode when number sign followed by digit is encountered +nofor pass2 $D * +nofor pass2 #1=1[@5]$D @5a # when in numeric mode interpret dot 5 as numeric space nofor pass2 $a *#1=0 # exit numeric mode when other character than digit or numeric space is encountered capsletter 6 diff --git a/tables/en-ueb-g2.ctb b/tables/en-ueb-g2.ctb index 642e006..9ba57df 100644 --- a/tables/en-ueb-g2.ctb +++ b/tables/en-ueb-g2.ctb @@ -850,14 +850,14 @@ sufword conned 14-135-1345-1345-1246 nofor begword con 25 # dis 10.6.1 10.6.2 10.6.3 10.6.4 -word disc 145-24-234-14 + word dish 145-24-146 word disher 145-24-146-12456 word dishers 145-24-146-12456-234 match ^|!%a dishi ([Ee]([Rr]|([Ss][Tt]))) 145-24-146-24 word dishy 145-24-146-13456 word diss 145-24-234-234 -empmatchafter match %[^_~]%<* dis [Cc]![Ss'’] 256 +empmatchafter match %[^_~]%<* dis [Cc]([HLRhlr]?)[AEIOUYaeiouy] 256 empmatchafter match %[^_~]%<* dis [Hh][Ee]![DSVdsv] 256 empmatchafter match %[^_~]%<* dis [Hh]![BCDEFGHIKLMNPRTWbcdefghiklmnprtw'’] 256 empmatchafter match %[^_~]%<* dis [Pp]![Ii] 256 @@ -1427,7 +1427,7 @@ sufword theseus 2346-234-15-136-234 # those 10.7.2 nofor always those 45-1456 -word those 45-1456 In practice, only the word "those" uses the sign +match ^|!%a those ^|!%a 45-1456 In practice, only the word "those" uses the sign # through always through 5-1456 @@ -4343,3 +4343,13 @@ contraction \x25cb ○ circle contraction \x25cd ◍ shaded circle + +# Punctuation in strange places require grade 1 indicator +# note, forward direction handled with match elsewhere + +nofor always , 56-2 +nofor always ; 56-23 +nofor always : 56-25 +nofor always . 56-256 +nofor always ! 56-235 +nofor always ? 56-236 diff --git a/tables/zhcn-g1.ctb b/tables/zhcn-g1.ctb index b2d7f5c..b746feb 100644 --- a/tables/zhcn-g1.ctb +++ b/tables/zhcn-g1.ctb @@ -26414,6 +26414,16 @@ noback correct $w[$s] ? # Stop # # Quoted the en-ueb-g1.ctb +digit 0 245 +digit 1 1 +digit 2 12 +digit 3 14 +digit 4 145 +digit 5 15 +digit 6 124 +digit 7 1245 +digit 8 125 +digit 9 24 include en-ueb-chardefs.uti include en-ueb-math.ctb space \x3000 0 diff --git a/tables/zhcn-g2.ctb b/tables/zhcn-g2.ctb index cb6ae62..d5d902e 100644 --- a/tables/zhcn-g2.ctb +++ b/tables/zhcn-g2.ctb @@ -26411,6 +26411,16 @@ noback correct $w[$s] ? # Stop # # Quoted the en-ueb-g1.ctb +digit 0 245 +digit 1 1 +digit 2 12 +digit 3 14 +digit 4 145 +digit 5 15 +digit 6 124 +digit 7 1245 +digit 8 125 +digit 9 24 include en-ueb-chardefs.uti include en-ueb-math.ctb #