From 6d2593d3516233651bb1ae7cdc7650526aacf835 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 20:46:21 +0200 Subject: [PATCH 1/8] UnicodeData.txt lines from L2/24-219 --- unicodetools/data/ucd/dev/UnicodeData.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 64258a373..f5845b77e 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,7 @@ +209D;LATIN SUBSCRIPT SMALL LETTER W;Lm;0;L; 0077;;;;N;;;;; +209E;LATIN SUBSCRIPT SMALL LETTER Y;Lm;0;L; 0079;;;;N;;;;; +209F;LATIN SUBSCRIPT SMALL LETTER Z;Lm;0;L; 007A;;;;N;;;;; +1DFD0;LATIN SUBSCRIPT SMALL LETTER GAMMA;Lm;0;L; 0263;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From 5f00330ee878c8bcdff9c5ae2b8683e2e257dca5 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 20:49:44 +0200 Subject: [PATCH 2/8] lb=AL --- unicodetools/data/ucd/dev/LineBreak.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 3ce258217..d96e9662b 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ -# LineBreak-16.0.0.txt -# Date: 2024-07-29, 16:26:55 GMT +# LineBreak-17.0.0.txt +# Date: 2024-10-21, 18:48:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -960,6 +960,7 @@ 208D ; OP # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; CL # Pe SUBSCRIPT RIGHT PARENTHESIS 2090..209C ; AL # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +209D..209F ; AL # Lm [3] LATIN SUBSCRIPT SMALL LETTER W..LATIN SUBSCRIPT SMALL LETTER Z 20A0..20A6 ; PR # Sc [7] EURO-CURRENCY SIGN..NAIRA SIGN 20A7 ; PO # Sc PESETA SIGN 20A8..20B5 ; PR # Sc [14] RUPEE SIGN..CEDI SIGN @@ -3378,6 +3379,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1DF0A ; AL # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; AL # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; AL # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; AL # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E000..1E006 ; CM # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; CM # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; CM # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI From f790fe4182553907a0655e7f6a569265acb06dc8 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 20:52:14 +0200 Subject: [PATCH 3/8] Latin --- unicodetools/data/ucd/dev/Scripts.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 443a6d2dd..d6d76aaf0 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,5 @@ +209D..209F; Latin +1DFD0; Latin # Scripts-16.0.0.txt # Date: 2024-04-30, 21:48:40 GMT # © 2024 Unicode®, Inc. From b570718cea5d0e1fdbb52bb5f7832cfd79a0fe13 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 20:54:52 +0200 Subject: [PATCH 4/8] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 15 ++++++- .../data/ucd/dev/DerivedCoreProperties.txt | 39 +++++++++++-------- .../ucd/dev/DerivedNormalizationProps.txt | 31 ++++++++++----- unicodetools/data/ucd/dev/EastAsianWidth.txt | 7 ++-- unicodetools/data/ucd/dev/LineBreak.txt | 5 +-- .../data/ucd/dev/NormalizationTest.txt | 8 +++- unicodetools/data/ucd/dev/Scripts.txt | 11 +++--- unicodetools/data/ucd/dev/UnicodeData.txt | 8 ++-- .../data/ucd/dev/VerticalOrientation.txt | 7 ++-- .../data/ucd/dev/auxiliary/LineBreakTest.html | 4 +- .../dev/auxiliary/SentenceBreakProperty.txt | 8 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 9 +++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 9 +++-- .../dev/extracted/DerivedCombiningClass.txt | 9 +++-- .../extracted/DerivedDecompositionType.txt | 9 +++-- .../dev/extracted/DerivedEastAsianWidth.txt | 9 +++-- .../dev/extracted/DerivedGeneralCategory.txt | 15 +++---- .../ucd/dev/extracted/DerivedLineBreak.txt | 13 ++++--- .../data/ucd/dev/extracted/DerivedName.txt | 10 +++-- 19 files changed, 136 insertions(+), 90 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index b4dcd2e48..49080ccff 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ -# DerivedAge-16.0.0.txt -# Date: 2024-04-30, 21:48:12 GMT +# DerivedAge-17.0.0.txt +# Date: 2024-10-21, 18:52:55 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2059,4 +2059,15 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L # Total code points: 5185 +# ================================================ + +# Age=V17_0 + +# Newly assigned in Unicode 17.0.0 (September, 2025) + +209D..209F ; 17.0 # [3] LATIN SUBSCRIPT SMALL LETTER W..LATIN SUBSCRIPT SMALL LETTER Z +1DFD0 ; 17.0 # LATIN SUBSCRIPT SMALL LETTER GAMMA + +# Total code points: 4 + # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 1075638f1..ce6710bd3 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ -# DerivedCoreProperties-16.0.0.txt -# Date: 2024-05-31, 18:09:32 GMT +# DerivedCoreProperties-17.0.0.txt +# Date: 2024-10-21, 18:53:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -748,7 +748,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1FF6..1FFC ; Alphabetic # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2071 ; Alphabetic # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; Alphabetic # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; Alphabetic # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; Alphabetic # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 2102 ; Alphabetic # L& DOUBLE-STRUCK CAPITAL C 2107 ; Alphabetic # L& EULER CONSTANT 210A..2113 ; Alphabetic # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -1371,6 +1371,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1DF0A ; Alphabetic # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Alphabetic # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Alphabetic # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; Alphabetic # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E000..1E006 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -1441,7 +1442,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142759 +# Total code points: 142763 # ================================================ @@ -3238,7 +3239,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 2066..206F ; Case_Ignorable # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES 2071 ; Case_Ignorable # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; Case_Ignorable # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; Case_Ignorable # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; Case_Ignorable # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 20D0..20DC ; Case_Ignorable # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; Case_Ignorable # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E1 ; Case_Ignorable # Mn COMBINING LEFT RIGHT ARROW ABOVE @@ -3483,6 +3484,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 1DA84 ; Case_Ignorable # Mn SIGNWRITING LOCATION HEAD NECK 1DA9B..1DA9F ; Case_Ignorable # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 1DAA1..1DAAF ; Case_Ignorable # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1DFD0 ; Case_Ignorable # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E000..1E006 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; Case_Ignorable # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -3505,7 +3507,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2749 +# Total code points: 2753 # ================================================ @@ -6478,7 +6480,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 1FF6..1FFC ; ID_Start # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2071 ; ID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; ID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; ID_Start # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; ID_Start # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 2102 ; ID_Start # L& DOUBLE-STRUCK CAPITAL C 2107 ; ID_Start # L& EULER CONSTANT 210A..2113 ; ID_Start # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -6902,6 +6904,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF0A ; ID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; ID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; ID_Start # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; ID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -6962,7 +6965,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141269 +# Total code points: 141273 # ================================================ @@ -7548,7 +7551,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2054 ; ID_Continue # Pc INVERTED UNDERTIE 2071 ; ID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; ID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; ID_Continue # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; ID_Continue # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 20D0..20DC ; ID_Continue # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20E1 ; ID_Continue # Mn COMBINING LEFT RIGHT ARROW ABOVE 20E5..20F0 ; ID_Continue # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE @@ -8290,6 +8293,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1DF0A ; ID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; ID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; ID_Continue # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E000..1E006 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; ID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -8370,7 +8374,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144541 +# Total code points: 144545 # ================================================ @@ -8660,7 +8664,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 1FF6..1FFC ; XID_Start # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2071 ; XID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; XID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; XID_Start # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; XID_Start # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 2102 ; XID_Start # L& DOUBLE-STRUCK CAPITAL C 2107 ; XID_Start # L& EULER CONSTANT 210A..2113 ; XID_Start # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -9088,6 +9092,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1DF0A ; XID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; XID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; XID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; XID_Start # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; XID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; XID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; XID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -9148,7 +9153,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141246 +# Total code points: 141250 # ================================================ @@ -9730,7 +9735,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 2054 ; XID_Continue # Pc INVERTED UNDERTIE 2071 ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; XID_Continue # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; XID_Continue # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 20D0..20DC ; XID_Continue # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20E1 ; XID_Continue # Mn COMBINING LEFT RIGHT ARROW ABOVE 20E5..20F0 ; XID_Continue # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE @@ -10477,6 +10482,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1DF0A ; XID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; XID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; XID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; XID_Continue # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E000..1E006 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; XID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -10557,7 +10563,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144522 +# Total code points: 144526 # ================================================ @@ -11638,7 +11644,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 208A..208C ; Grapheme_Base # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; Grapheme_Base # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; Grapheme_Base # Pe SUBSCRIPT RIGHT PARENTHESIS -2090..209C ; Grapheme_Base # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; Grapheme_Base # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 20A0..20C0 ; Grapheme_Base # Sc [33] EURO-CURRENCY SIGN..SOM SIGN 2100..2101 ; Grapheme_Base # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2102 ; Grapheme_Base # L& DOUBLE-STRUCK CAPITAL C @@ -12693,6 +12699,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1DF0A ; Grapheme_Base # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Grapheme_Base # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Grapheme_Base # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; Grapheme_Base # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; Grapheme_Base # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; Grapheme_Base # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; Grapheme_Base # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -12812,7 +12819,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152730 +# Total code points: 152734 # ================================================ diff --git a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt index ce636abb5..b73740aaf 100644 --- a/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt +++ b/unicodetools/data/ucd/dev/DerivedNormalizationProps.txt @@ -1,5 +1,5 @@ -# DerivedNormalizationProps-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedNormalizationProps-17.0.0.txt +# Date: 2024-10-21, 18:53:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1382,7 +1382,7 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 208A..208C ; NFKD_QC; N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; NFKD_QC; N # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; NFKD_QC; N # Pe SUBSCRIPT RIGHT PARENTHESIS -2090..209C ; NFKD_QC; N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; NFKD_QC; N # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 20A8 ; NFKD_QC; N # Sc RUPEE SIGN 2100..2101 ; NFKD_QC; N # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2102 ; NFKD_QC; N # L& DOUBLE-STRUCK CAPITAL C @@ -1707,6 +1707,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D7C3 ; NFKD_QC; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; NFKD_QC; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; NFKD_QC; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1DFD0 ; NFKD_QC; N # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; NFKD_QC; N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1EE00..1EE03 ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; NFKD_QC; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF @@ -1753,7 +1754,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKD_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 17085 +# Total code points: 17089 # ================================================ @@ -1882,7 +1883,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 208A..208C ; NFKC_QC; N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; NFKC_QC; N # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; NFKC_QC; N # Pe SUBSCRIPT RIGHT PARENTHESIS -2090..209C ; NFKC_QC; N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; NFKC_QC; N # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 20A8 ; NFKC_QC; N # Sc RUPEE SIGN 2100..2101 ; NFKC_QC; N # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2102 ; NFKC_QC; N # L& DOUBLE-STRUCK CAPITAL C @@ -2118,6 +2119,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D7C3 ; NFKC_QC; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; NFKC_QC; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; NFKC_QC; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1DFD0 ; NFKC_QC; N # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; NFKC_QC; N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1EE00..1EE03 ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; NFKC_QC; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF @@ -2164,7 +2166,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1FBF0..1FBF9 ; NFKC_QC; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; NFKC_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 4964 +# Total code points: 4968 # ================================================ @@ -4122,6 +4124,9 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 209A ; NFKC_CF; 0070 # Lm LATIN SUBSCRIPT SMALL LETTER P 209B ; NFKC_CF; 0073 # Lm LATIN SUBSCRIPT SMALL LETTER S 209C ; NFKC_CF; 0074 # Lm LATIN SUBSCRIPT SMALL LETTER T +209D ; NFKC_CF; 0077 # Lm LATIN SUBSCRIPT SMALL LETTER W +209E ; NFKC_CF; 0079 # Lm LATIN SUBSCRIPT SMALL LETTER Y +209F ; NFKC_CF; 007A # Lm LATIN SUBSCRIPT SMALL LETTER Z 20A8 ; NFKC_CF; 0072 0073 # Sc RUPEE SIGN 2100 ; NFKC_CF; 0061 002F 0063 # So ACCOUNT OF 2101 ; NFKC_CF; 0061 002F 0073 # So ADDRESSED TO THE SUBJECT @@ -8225,6 +8230,7 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] ...... -# Total code points: 10554 +# Total code points: 10558 # ================================================ @@ -10256,6 +10262,9 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] ........ -# Total code points: 10516 +# Total code points: 10520 # ================================================ @@ -15818,7 +15828,7 @@ E01F0..E0FFF ; NFKC_SCF; # Cn [3600] ........ -# Total code points: 10554 +# Total code points: 10558 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 99f7a31ea..7378144c3 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ -# EastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# EastAsianWidth-17.0.0.txt +# Date: 2024-10-21, 18:53:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -971,7 +971,7 @@ 208A..208C ; N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; N # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; N # Pe SUBSCRIPT RIGHT PARENTHESIS -2090..209C ; N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; N # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 20A0..20A8 ; N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN 20A9 ; H # Sc WON SIGN 20AA..20AB ; N # Sc [2] NEW SHEQEL SIGN..DONG SIGN @@ -2468,6 +2468,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; N # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; N # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; N # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index d96e9662b..a6c6c8799 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2024-10-21, 18:48:21 GMT +# Date: 2024-10-21, 18:53:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -959,8 +959,7 @@ 208A..208C ; AL # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; OP # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; CL # Pe SUBSCRIPT RIGHT PARENTHESIS -2090..209C ; AL # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T -209D..209F ; AL # Lm [3] LATIN SUBSCRIPT SMALL LETTER W..LATIN SUBSCRIPT SMALL LETTER Z +2090..209F ; AL # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 20A0..20A6 ; PR # Sc [7] EURO-CURRENCY SIGN..NAIRA SIGN 20A7 ; PO # Sc PESETA SIGN 20A8..20B5 ; PR # Sc [14] RUPEE SIGN..CEDI SIGN diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 3aae8f72e..c97543fa0 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ -# NormalizationTest-16.0.0.txt -# Date: 2024-04-30, 21:48:23 GMT +# NormalizationTest-17.0.0.txt +# Date: 2024-10-21, 18:53:32 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1230,6 +1230,9 @@ FEFA 0334;FEFA 0334;FEFA 0334;0644 0625 0334;0644 0627 0334 0655; # (ﻺ◌̴; 209A;209A;209A;0070;0070; # (ₚ; ₚ; ₚ; p; p; ) LATIN SUBSCRIPT SMALL LETTER P 209B;209B;209B;0073;0073; # (ₛ; ₛ; ₛ; s; s; ) LATIN SUBSCRIPT SMALL LETTER S 209C;209C;209C;0074;0074; # (ₜ; ₜ; ₜ; t; t; ) LATIN SUBSCRIPT SMALL LETTER T +209D;209D;209D;0077;0077; # (₝; ₝; ₝; w; w; ) LATIN SUBSCRIPT SMALL LETTER W +209E;209E;209E;0079;0079; # (₞; ₞; ₞; y; y; ) LATIN SUBSCRIPT SMALL LETTER Y +209F;209F;209F;007A;007A; # (₟; ₟; ₟; z; z; ) LATIN SUBSCRIPT SMALL LETTER Z 20A8;20A8;20A8;0052 0073;0052 0073; # (₨; ₨; ₨; Rs; Rs; ) RUPEE SIGN 2100;2100;2100;0061 002F 0063;0061 002F 0063; # (℀; ℀; ℀; a/c; a/c; ) ACCOUNT OF 2101;2101;2101;0061 002F 0073;0061 002F 0073; # (℁; ℁; ℁; a/s; a/s; ) ADDRESSED TO THE SUBJECT @@ -16284,6 +16287,7 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 1D7FD;1D7FD;1D7FD;0037;0037; # (𝟽; 𝟽; 𝟽; 7; 7; ) MATHEMATICAL MONOSPACE DIGIT SEVEN 1D7FE;1D7FE;1D7FE;0038;0038; # (𝟾; 𝟾; 𝟾; 8; 8; ) MATHEMATICAL MONOSPACE DIGIT EIGHT 1D7FF;1D7FF;1D7FF;0039;0039; # (𝟿; 𝟿; 𝟿; 9; 9; ) MATHEMATICAL MONOSPACE DIGIT NINE +1DFD0;1DFD0;1DFD0;0263;0263; # (𝿐; 𝿐; 𝿐; ɣ; ɣ; ) LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030;1E030;1E030;0430;0430; # (𞀰; 𞀰; 𞀰; а; а; ) MODIFIER LETTER CYRILLIC SMALL A 1E031;1E031;1E031;0431;0431; # (𞀱; 𞀱; 𞀱; б; б; ) MODIFIER LETTER CYRILLIC SMALL BE 1E032;1E032;1E032;0432;0432; # (𞀲; 𞀲; 𞀲; в; в; ) MODIFIER LETTER CYRILLIC SMALL VE diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index d6d76aaf0..359153a51 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,7 +1,5 @@ -209D..209F; Latin -1DFD0; Latin -# Scripts-16.0.0.txt -# Date: 2024-04-30, 21:48:40 GMT +# Scripts-17.0.0.txt +# Date: 2024-10-21, 18:53:53 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -663,7 +661,7 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG 1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP 2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; Latin # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; Latin # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN 2132 ; Latin # L& TURNED CAPITAL F 214E ; Latin # L& TURNED SMALL F @@ -703,8 +701,9 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF0A ; Latin # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; Latin # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA -# Total code points: 1487 +# Total code points: 1491 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index f5845b77e..11db5fff4 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,7 +1,3 @@ -209D;LATIN SUBSCRIPT SMALL LETTER W;Lm;0;L; 0077;;;;N;;;;; -209E;LATIN SUBSCRIPT SMALL LETTER Y;Lm;0;L; 0079;;;;N;;;;; -209F;LATIN SUBSCRIPT SMALL LETTER Z;Lm;0;L; 007A;;;;N;;;;; -1DFD0;LATIN SUBSCRIPT SMALL LETTER GAMMA;Lm;0;L; 0263;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -7516,6 +7512,9 @@ 209A;LATIN SUBSCRIPT SMALL LETTER P;Lm;0;L; 0070;;;;N;;;;; 209B;LATIN SUBSCRIPT SMALL LETTER S;Lm;0;L; 0073;;;;N;;;;; 209C;LATIN SUBSCRIPT SMALL LETTER T;Lm;0;L; 0074;;;;N;;;;; +209D;LATIN SUBSCRIPT SMALL LETTER W;Lm;0;L; 0077;;;;N;;;;; +209E;LATIN SUBSCRIPT SMALL LETTER Y;Lm;0;L; 0079;;;;N;;;;; +209F;LATIN SUBSCRIPT SMALL LETTER Z;Lm;0;L; 007A;;;;N;;;;; 20A0;EURO-CURRENCY SIGN;Sc;0;ET;;;;;N;;;;; 20A1;COLON SIGN;Sc;0;ET;;;;;N;;;;; 20A2;CRUZEIRO SIGN;Sc;0;ET;;;;;N;;;;; @@ -35660,6 +35659,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1DF28;LATIN SMALL LETTER R WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; 1DF29;LATIN SMALL LETTER S WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; 1DF2A;LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; +1DFD0;LATIN SUBSCRIPT SMALL LETTER GAMMA;Lm;0;L; 0263;;;;N;;;;; 1E000;COMBINING GLAGOLITIC LETTER AZU;Mn;230;NSM;;;;;N;;;;; 1E001;COMBINING GLAGOLITIC LETTER BUKY;Mn;230;NSM;;;;;N;;;;; 1E002;COMBINING GLAGOLITIC LETTER VEDE;Mn;230;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 1ebcd7228..38a1dc24d 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ -# VerticalOrientation-16.0.0.txt -# Date: 2024-04-30, 21:48:42 GMT +# VerticalOrientation-17.0.0.txt +# Date: 2024-10-21, 18:53:56 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -898,7 +898,7 @@ 208A..208C ; R # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; R # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; R # Pe SUBSCRIPT RIGHT PARENTHESIS -2090..209C ; R # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; R # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 20A0..20C0 ; R # Sc [33] EURO-CURRENCY SIGN..SOM SIGN 20D0..20DC ; R # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; U # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH @@ -2320,6 +2320,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1DF0A ; R # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; R # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; R # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; R # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E000..1E006 ; R # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; R # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; R # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI diff --git a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html index 3b2899e64..a5eca7878 100644 --- a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html +++ b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html @@ -7,7 +7,7 @@

Line_Break Chart

Unicode Version: 17.0.0

-

Date: 2024-10-14, 12:25:22 GMT

+

Date: 2024-10-21, 18:53:25 GMT

This page illustrates the application of the Line_Break specification. The material here is informative, not normative.

The first chart shows where breaks would appear between different sample characters or strings. The sample characters are chosen mechanically to represent the different properties used by the specification.

Each cell shows the break-status for the position between the character(s) in its row header and the character(s) in its column header. The × symbol indicates no break, while the ÷ symbol indicated a break. The cells with × are also shaded to make it easier to scan the table. For example, in the cell at the intersection of the row headed by “CR” and the column headed by “LF”, there is a × symbol, indicating that there is no break between CR and LF.

Some column headers may be composed, reflecting “treat as” or “ignore” rules.

If your browser handles titles (tooltips), then hovering the mouse over the row header will show a sample character of that type. Hovering over a column header will show the sample character, plus its abbreviated general category and script. Hovering over the intersected cells shows the rule number that produces the break-status. For example, hovering over the cell at the intersection of H3 and JT shows ×, with the rule 26.03. Checking below the table, rule 26.03 is “JT | H3 × JT”, which is the one that applies to that case. Note that a rule is invoked only when no lower-numbered rules have applied.

@@ -93,7 +93,7 @@

Rules

7.02× ZW 8.0ZW SP* ÷ 8.1ZWJ_O × -9.0(?<X>[^SP BK CR LF NL ZW]) ( CM | ZWJ )* → {X} +9.0(?<X>[^BK CR LF NL SP ZW]) ( CM | ZWJ )* → {X} 10.0( CM | ZWJ ) → A 11.01× WJ 11.02WJ × diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index ca3689e6b..8490031be 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ -# SentenceBreakProperty-16.0.0.txt -# Date: 2024-07-29, 16:27:32 GMT +# SentenceBreakProperty-17.0.0.txt +# Date: 2024-10-21, 18:53:54 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2206,6 +2206,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1CEE..1CF3 ; OLetter # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA 1CF5..1CF6 ; OLetter # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1CFA ; OLetter # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +209D..209F ; OLetter # Lm [3] LATIN SUBSCRIPT SMALL LETTER W..LATIN SUBSCRIPT SMALL LETTER Z 2135..2138 ; OLetter # Lo [4] ALEF SYMBOL..DALET SYMBOL 2180..2182 ; OLetter # Nl [3] ROMAN NUMERAL ONE THOUSAND C D..ROMAN NUMERAL TEN THOUSAND 2185..2188 ; OLetter # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND @@ -2527,6 +2528,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1BC80..1BC88 ; OLetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; OLetter # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1DF0A ; OLetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DFD0 ; OLetter # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E100..1E12C ; OLetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; OLetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E14E ; OLetter # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ @@ -2585,7 +2587,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136908 +# Total code points: 136912 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index e557c3d0d..9c41bbe4b 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ -# WordBreakProperty-16.0.0.txt -# Date: 2024-07-29, 16:27:36 GMT +# WordBreakProperty-17.0.0.txt +# Date: 2024-10-21, 18:53:57 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -930,7 +930,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 1FF6..1FFC ; ALetter # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2071 ; ALetter # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; ALetter # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; ALetter # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; ALetter # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 2102 ; ALetter # L& DOUBLE-STRUCK CAPITAL C 2107 ; ALetter # L& EULER CONSTANT 210A..2113 ; ALetter # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -1301,6 +1301,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF0A ; ALetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ALetter # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; ALetter # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; ALetter # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; ALetter # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ALetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ALetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1355,7 +1356,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33791 +# Total code points: 33795 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2aceac0aa..d0dcb43da 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ -# DerivedBidiClass-16.0.0.txt -# Date: 2024-04-30, 21:48:13 GMT +# DerivedBidiClass-17.0.0.txt +# Date: 2024-10-21, 18:53:15 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -558,7 +558,7 @@ 200E ; L # Cf LEFT-TO-RIGHT MARK 2071 ; L # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; L # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; L # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; L # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 2102 ; L # L& DOUBLE-STRUCK CAPITAL C 2107 ; L # L& EULER CONSTANT 210A..2113 ; L # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -1175,6 +1175,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1DF0A ; L # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; L # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; L # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; L # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; L # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; L # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; L # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1214,7 +1215,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815351 code points not listed here. +# The above property value applies to 815347 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index a5d57af96..219ad6962 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ -# DerivedCombiningClass-16.0.0.txt -# Date: 2024-04-30, 21:48:15 GMT +# DerivedCombiningClass-17.0.0.txt +# Date: 2024-10-21, 18:53:16 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -751,7 +751,7 @@ 208A..208C ; 0 # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; 0 # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; 0 # Pe SUBSCRIPT RIGHT PARENTHESIS -2090..209C ; 0 # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; 0 # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 20A0..20C0 ; 0 # Sc [33] EURO-CURRENCY SIGN..SOM SIGN 20DD..20E0 ; 0 # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E2..20E4 ; 0 # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE @@ -1936,6 +1936,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1DF0A ; 0 # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; 0 # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; 0 # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; 0 # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; 0 # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; 0 # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; 0 # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -2060,7 +2061,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821581 code points not listed here. +# The above property value applies to 821577 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt index a825479ac..7cb456ed2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt @@ -1,5 +1,5 @@ -# DerivedDecompositionType-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedDecompositionType-17.0.0.txt +# Date: 2024-10-21, 18:53:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -893,11 +893,12 @@ AB69 ; Super # Lm MODIFIER LETTER SMALL TURNED W 208A..208C ; Sub # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; Sub # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; Sub # Pe SUBSCRIPT RIGHT PARENTHESIS -2090..209C ; Sub # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; Sub # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 2C7C ; Sub # Lm LATIN SUBSCRIPT SMALL LETTER J +1DFD0 ; Sub # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E051..1E06A ; Sub # Lm [26] CYRILLIC SUBSCRIPT SMALL LETTER A..CYRILLIC SUBSCRIPT SMALL LETTER DZHE -# Total code points: 64 +# Total code points: 68 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index cc1d91aaa..089340169 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ -# DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedEastAsianWidth-17.0.0.txt +# Date: 2024-10-21, 18:53:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -836,7 +836,7 @@ 208A..208C ; N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; N # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; N # Pe SUBSCRIPT RIGHT PARENTHESIS -2090..209C ; N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; N # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 20A0..20A8 ; N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN 20AA..20AB ; N # Sc [2] NEW SHEQEL SIGN..DONG SIGN 20AD..20C0 ; N # Sc [20] KIP SIGN..SOM SIGN @@ -1965,6 +1965,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; N # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; N # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; N # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -2103,7 +2104,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761099 code points not listed here. +# The above property value applies to 761095 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 07bf7bca9..5558942d2 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ -# DerivedGeneralCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# DerivedGeneralCategory-17.0.0.txt +# Date: 2024-10-21, 18:53:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -256,7 +256,6 @@ 2065 ; Cn # 2072..2073 ; Cn # [2] .. 208F ; Cn # -209D..209F ; Cn # [3] .. 20C1..20CF ; Cn # [15] .. 20F1..20FF ; Cn # [15] .. 218C..218F ; Cn # [4] .. @@ -633,7 +632,8 @@ FFFE..FFFF ; Cn # [2] .. 1DAA0 ; Cn # 1DAB0..1DEFF ; Cn # [1104] .. 1DF1F..1DF24 ; Cn # [6] .. -1DF2B..1DFFF ; Cn # [213] .. +1DF2B..1DFCF ; Cn # [165] .. +1DFD1..1DFFF ; Cn # [47] .. 1E007 ; Cn # 1E019..1E01A ; Cn # [2] .. 1E022 ; Cn # @@ -747,7 +747,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819533 +# Total code points: 819529 # ================================================ @@ -2126,7 +2126,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1D9B..1DBF ; Lm # [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 2071 ; Lm # SUPERSCRIPT LATIN SMALL LETTER I 207F ; Lm # SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; Lm # [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; Lm # [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 2C7C..2C7D ; Lm # [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2D6F ; Lm # TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2E2F ; Lm # VERTICAL TILDE @@ -2168,12 +2168,13 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1AFF0..1AFF3 ; Lm # [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Lm # [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Lm # [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1DFD0 ; Lm # LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; Lm # [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E137..1E13D ; Lm # [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 404 +# Total code points: 408 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 31d143e92..065b81b0b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ -# DerivedLineBreak-16.0.0.txt -# Date: 2024-07-29, 16:26:50 GMT +# DerivedLineBreak-17.0.0.txt +# Date: 2024-10-21, 18:53:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757653 code points not listed here. -# Total code points: 895121 +# The above property value applies to 757649 code points not listed here. +# Total code points: 895117 # ================================================ @@ -921,7 +921,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 2080 ; AL # No SUBSCRIPT ZERO 2085..2089 ; AL # No [5] SUBSCRIPT FIVE..SUBSCRIPT NINE 208A..208C ; AL # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN -2090..209C ; AL # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; AL # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 2100..2101 ; AL # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2102 ; AL # L& DOUBLE-STRUCK CAPITAL C 2104 ; AL # So CENTRE LINE SYMBOL @@ -1520,6 +1520,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1DF0A ; AL # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; AL # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; AL # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; AL # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; AL # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; AL # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; AL # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1613,7 +1614,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26679 +# Total code points: 26683 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 07b0176b5..e4ff0a8b6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ -# DerivedName-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# DerivedName-17.0.0.txt +# Date: 2024-10-21, 18:53:21 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -7487,6 +7487,9 @@ 209A ; LATIN SUBSCRIPT SMALL LETTER P 209B ; LATIN SUBSCRIPT SMALL LETTER S 209C ; LATIN SUBSCRIPT SMALL LETTER T +209D ; LATIN SUBSCRIPT SMALL LETTER W +209E ; LATIN SUBSCRIPT SMALL LETTER Y +209F ; LATIN SUBSCRIPT SMALL LETTER Z 20A0 ; EURO-CURRENCY SIGN 20A1 ; COLON SIGN 20A2 ; CRUZEIRO SIGN @@ -41461,6 +41464,7 @@ FFFD ; REPLACEMENT CHARACTER 1DF28 ; LATIN SMALL LETTER R WITH MID-HEIGHT LEFT HOOK 1DF29 ; LATIN SMALL LETTER S WITH MID-HEIGHT LEFT HOOK 1DF2A ; LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; LATIN SUBSCRIPT SMALL LETTER GAMMA 1E000 ; COMBINING GLAGOLITIC LETTER AZU 1E001 ; COMBINING GLAGOLITIC LETTER BUKY 1E002 ; COMBINING GLAGOLITIC LETTER VEDE @@ -45367,6 +45371,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 154998 +# Total code points: 155002 # EOF From 8aa7f48f3b70671b81ae65ce77290cd6a1a4e060 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 21:04:46 +0200 Subject: [PATCH 5/8] Failing test --- .../text/UCD/AdditionComparisons/157.txt | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/157.txt diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/157.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/157.txt new file mode 100644 index 000000000..b6de4537a --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/157.txt @@ -0,0 +1,30 @@ +# Phonetic characters: Subscript w y z and ɣ +# https://github.com/unicode-org/utc-release-management/issues/157 + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Propertywise [\x{209D} \N{LATIN SUBSCRIPT SMALL LETTER W} + \x{209E} \N{LATIN SUBSCRIPT SMALL LETTER Y} + \x{209F} \N{LATIN SUBSCRIPT SMALL LETTER Z}] + : [wyz] + : [WYZ] +CorrespondTo [\x{2093} ₓ \N{LATIN SUBSCRIPT SMALL LETTER X}] + : [x] + : [X] + +Propertywise [\x{1DFD0} \N{LATIN SUBSCRIPT SMALL LETTER GAMMA}] + : [ \x{0263} ɣ \N{LATIN SMALL LETTER GAMMA}] + : [ \x{0194} Ɣ \N{LATIN CAPITAL LETTER GAMMA}] +CorrespondTo [\x{2093} ₓ \N{LATIN SUBSCRIPT SMALL LETTER X}] + : [x] + : [X] + + +end Ignoring; + +end Ignoring; \ No newline at end of file From 6cd7a21570b07b19fd478e994250030a1593cccf Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 21:06:06 +0200 Subject: [PATCH 6/8] Other_Lowercase --- unicodetools/data/ucd/dev/PropList.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index fae2831e7..c773e8aee 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,5 @@ +209D..209F; Other_Lowercase +1DFD0; Other_Lowercase # PropList-16.0.0.txt # Date: 2024-05-31, 18:09:48 GMT # © 2024 Unicode®, Inc. From e7dd1b1d0cd161f1848d214df827561a9efb75c6 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 21:08:45 +0200 Subject: [PATCH 7/8] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 12 +++++++----- unicodetools/data/ucd/dev/PropList.txt | 11 +++++------ .../data/ucd/dev/auxiliary/SentenceBreakProperty.txt | 11 +++++------ 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index ce6710bd3..aee622aa1 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2024-10-21, 18:53:17 GMT +# Date: 2024-10-21, 19:07:54 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1884,7 +1884,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1FF6..1FF7 ; Lowercase # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI 2071 ; Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; Lowercase # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 210A ; Lowercase # L& SCRIPT SMALL G 210E..210F ; Lowercase # L& [2] PLANCK CONSTANT..PLANCK CONSTANT OVER TWO PI 2113 ; Lowercase # L& SCRIPT SMALL L @@ -2137,10 +2137,11 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1DF00..1DF09 ; Lowercase # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Lowercase # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Lowercase # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; Lowercase # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2569 +# Total code points: 2573 # ================================================ @@ -2879,7 +2880,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1FF6..1FFC ; Cased # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2071 ; Cased # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; Cased # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; Cased # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; Cased # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 2102 ; Cased # L& DOUBLE-STRUCK CAPITAL C 2107 ; Cased # L& EULER CONSTANT 210A..2113 ; Cased # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -2983,13 +2984,14 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF00..1DF09 ; Cased # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Cased # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Cased # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; Cased # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; Cased # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E900..1E943 ; Cased # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA 1F130..1F149 ; Cased # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4578 +# Total code points: 4582 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index c773e8aee..4439f38c6 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,7 +1,5 @@ -209D..209F; Other_Lowercase -1DFD0; Other_Lowercase -# PropList-16.0.0.txt -# Date: 2024-05-31, 18:09:48 GMT +# PropList-17.0.0.txt +# Date: 2024-10-21, 19:08:15 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1216,7 +1214,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 2071 ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; Other_Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; Other_Lowercase # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 2170..217F ; Other_Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND 24D0..24E9 ; Other_Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z 2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V @@ -1230,9 +1228,10 @@ AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W 10783..10785 ; Other_Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Other_Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +1DFD0 ; Other_Lowercase # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; Other_Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE -# Total code points: 311 +# Total code points: 315 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 8490031be..fc90a6be9 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2024-10-21, 18:53:54 GMT +# Date: 2024-10-21, 19:08:34 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1064,7 +1064,7 @@ E0001 ; Format # Cf LANGUAGE TAG 1FF6..1FF7 ; Lower # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI 2071 ; Lower # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; Lower # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; Lower # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2090..209F ; Lower # Lm [16] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER Z 210A ; Lower # L& SCRIPT SMALL G 210E..210F ; Lower # L& [2] PLANCK CONSTANT..PLANCK CONSTANT OVER TWO PI 2113 ; Lower # L& SCRIPT SMALL L @@ -1317,10 +1317,11 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF00..1DF09 ; Lower # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Lower # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL 1DF25..1DF2A ; Lower # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DFD0 ; Lower # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2522 +# Total code points: 2526 # ================================================ @@ -2206,7 +2207,6 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1CEE..1CF3 ; OLetter # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA 1CF5..1CF6 ; OLetter # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1CFA ; OLetter # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA -209D..209F ; OLetter # Lm [3] LATIN SUBSCRIPT SMALL LETTER W..LATIN SUBSCRIPT SMALL LETTER Z 2135..2138 ; OLetter # Lo [4] ALEF SYMBOL..DALET SYMBOL 2180..2182 ; OLetter # Nl [3] ROMAN NUMERAL ONE THOUSAND C D..ROMAN NUMERAL TEN THOUSAND 2185..2188 ; OLetter # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND @@ -2528,7 +2528,6 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1BC80..1BC88 ; OLetter # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; OLetter # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1DF0A ; OLetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK -1DFD0 ; OLetter # Lm LATIN SUBSCRIPT SMALL LETTER GAMMA 1E100..1E12C ; OLetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; OLetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E14E ; OLetter # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ @@ -2587,7 +2586,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136912 +# Total code points: 136908 # ================================================ From f4c65328d92a2d976b48608cdf1cd451b3d511bb Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 21 Oct 2024 21:16:15 +0200 Subject: [PATCH 8/8] Make the test pass --- .../org/unicode/text/UCD/AdditionComparisons/157.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/157.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/157.txt index b6de4537a..5adde6956 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/157.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/157.txt @@ -17,13 +17,16 @@ CorrespondTo [\x{2093} ₓ \N{LATIN SUBSCRIPT SMALL LETTER X}] : [x] : [X] +# Comparing against ₔ rather than ₓ so we don’t need to ignore the +# East_Asian_Width of x (Narrow, whereas ə and ɣ are Neutral). +Ignoring Block: Propertywise [\x{1DFD0} \N{LATIN SUBSCRIPT SMALL LETTER GAMMA}] : [ \x{0263} ɣ \N{LATIN SMALL LETTER GAMMA}] : [ \x{0194} Ɣ \N{LATIN CAPITAL LETTER GAMMA}] -CorrespondTo [\x{2093} ₓ \N{LATIN SUBSCRIPT SMALL LETTER X}] - : [x] - : [X] - +CorrespondTo [\x{2094} ₔ \N{LATIN SUBSCRIPT SMALL LETTER SCHWA}] + : [\x{0259} ə \N{LATIN SMALL LETTER SCHWA}] + : [\x{018F} \N{LATIN CAPITAL LETTER SCHWA}] +end Ignoring; end Ignoring;