From f75e07b97b18b65f547a69951541d20a1c708758 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 10 Jan 2023 16:41:20 -0800 Subject: [PATCH] move 3 props from misc to string, adjust generator --- unicodetools/data/ucd/dev/BidiBrackets.txt | 6 +++--- unicodetools/data/ucd/dev/PropertyAliases.txt | 6 +++--- .../main/java/org/unicode/props/UcdProperty.java | 2 +- .../org/unicode/text/UCD/MakeUnicodeFiles.java | 1 + .../text/UCD/ToolUnicodePropertySource.java | 16 ++++++++++++++-- 5 files changed, 22 insertions(+), 9 deletions(-) diff --git a/unicodetools/data/ucd/dev/BidiBrackets.txt b/unicodetools/data/ucd/dev/BidiBrackets.txt index 5f0c117cb..8cebea415 100644 --- a/unicodetools/data/ucd/dev/BidiBrackets.txt +++ b/unicodetools/data/ucd/dev/BidiBrackets.txt @@ -1,5 +1,5 @@ # BidiBrackets-15.1.0.txt -# Date: 2023-01-05 +# Date: 2023-01-18 # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -12,11 +12,11 @@ # This file is a normative contributory data file in the Unicode # Character Database. # -# Bidi_Paired_Bracket is a normative property of type Miscellaneous, +# Bidi_Paired_Bracket is a normative property # which establishes a mapping between characters that are treated as # bracket pairs by the Unicode Bidirectional Algorithm. # -# Bidi_Paired_Bracket_Type is a normative property of type Enumeration, +# Bidi_Paired_Bracket_Type is a normative property # which classifies characters into opening and closing paired brackets # for the purposes of the Unicode Bidirectional Algorithm. # diff --git a/unicodetools/data/ucd/dev/PropertyAliases.txt b/unicodetools/data/ucd/dev/PropertyAliases.txt index 89703ca58..c1fdbd886 100644 --- a/unicodetools/data/ucd/dev/PropertyAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyAliases.txt @@ -64,9 +64,12 @@ nv ; Numeric_Value # ================================================ # String Properties # ================================================ +bmg ; Bidi_Mirroring_Glyph +bpb ; Bidi_Paired_Bracket cf ; Case_Folding cjkCompatibilityVariant ; kCompatibilityVariant dm ; Decomposition_Mapping +EqUIdeo ; Equivalent_Unified_Ideograph FC_NFKC ; FC_NFKC_Closure lc ; Lowercase_Mapping NFKC_CF ; NFKC_Casefold @@ -80,8 +83,6 @@ uc ; Uppercase_Mapping # ================================================ # Miscellaneous Properties # ================================================ -bmg ; Bidi_Mirroring_Glyph -bpb ; Bidi_Paired_Bracket cjkIICore ; kIICore cjkIRG_GSource ; kIRG_GSource cjkIRG_HSource ; kIRG_HSource @@ -95,7 +96,6 @@ cjkIRG_UKSource ; kIRG_UKSource cjkIRG_USource ; kIRG_USource cjkIRG_VSource ; kIRG_VSource cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS -EqUIdeo ; Equivalent_Unified_Ideograph isc ; ISO_Comment JSN ; Jamo_Short_Name na ; Name diff --git a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java index 85e92b809..c471f6b50 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java @@ -55,6 +55,7 @@ public enum UcdProperty { Confusable_SA(PropertyType.String, "ConfSA"), Confusable_SL(PropertyType.String, "ConfSL"), Decomposition_Mapping(PropertyType.String, "dm"), + Equivalent_Unified_Ideograph(PropertyType.String, "EqUIdeo"), FC_NFKC_Closure(PropertyType.String, "FC_NFKC"), Idn_Mapping(PropertyType.String, "idnm"), Lowercase_Mapping(PropertyType.String, "lc"), @@ -76,7 +77,6 @@ public enum UcdProperty { Emoji_DCM(PropertyType.Miscellaneous, "EDCM"), Emoji_KDDI(PropertyType.Miscellaneous, "EKDDI"), Emoji_SB(PropertyType.Miscellaneous, "ESB"), - Equivalent_Unified_Ideograph(PropertyType.Miscellaneous, "EqUIdeo"), ISO_Comment(PropertyType.Miscellaneous, "isc"), Jamo_Short_Name(PropertyType.Miscellaneous, "JSN"), Name(PropertyType.Miscellaneous, "na"), diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java index d3375ba05..5ba774ccc 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java @@ -1024,6 +1024,7 @@ private static void printDefaultValueComment( if (defaultValue != null) { // ok } else if (propName.equals("Bidi_Mirroring_Glyph") + || propName.equals("Bidi_Paired_Bracket") || propName.equals("ISO_Comment") || propName.equals("Name") || propName.equals("Unicode_Radical_Stroke") diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java index ae5abf259..1fa8351cf 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java @@ -125,6 +125,10 @@ private ToolUnicodePropertySource(String version) { UnicodeSet Zwj = new UnicodeSet(0x200D, 0x200D).freeze(); version = ucd.getVersion(); // regularize + // Unicode 15.1 reclassifies some properties from miscellaneous to string. + // TODO: Try to get the classification from parsing PropertyAliases.txt rather than hardcode + // it. + boolean isAtLeast15_1 = ucd.getCompositeVersion() >= 0x0f0100; // first the special cases if (DEBUG) { @@ -515,7 +519,11 @@ public String _getValue(int codepoint) { return ucd.getBidiMirror(codepoint); } }.setValues("") - .setMain("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.MISC, version)); + .setMain( + "Bidi_Mirroring_Glyph", + "bmg", + isAtLeast15_1 ? UnicodeProperty.STRING : UnicodeProperty.MISC, + version)); add( new UnicodeProperty.SimpleProperty() { @@ -524,7 +532,11 @@ public String _getValue(int codepoint) { return UTF16.valueOf(ucd.getBidi_Paired_Bracket(codepoint)); } }.setValues("") - .setMain("Bidi_Paired_Bracket", "bpb", UnicodeProperty.MISC, version)); + .setMain( + "Bidi_Paired_Bracket", + "bpb", + isAtLeast15_1 ? UnicodeProperty.STRING : UnicodeProperty.MISC, + version)); BaseProperty bpt = new UnicodeProperty.SimpleProperty() {