Skip to content

Commit

Permalink
Updates Unicode data files to version 15.1
Browse files Browse the repository at this point in the history
Signed-off-by: Jon Stovell <[email protected]>
  • Loading branch information
Sesquipedalian committed Nov 10, 2023
1 parent 91f50c8 commit fe7d497
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 35 deletions.
3 changes: 3 additions & 0 deletions Sources/Unicode/CaseFold.php
Original file line number Diff line number Diff line change
Expand Up @@ -815,10 +815,12 @@ function utf8_casefold_simple_maps()
"\xE1\xBF\x8A" => "\xE1\xBD\xB4",
"\xE1\xBF\x8B" => "\xE1\xBD\xB5",
"\xE1\xBF\x8C" => "\xE1\xBF\x83",
"\xE1\xBF\x93" => "\xCE\x90",
"\xE1\xBF\x98" => "\xE1\xBF\x90",
"\xE1\xBF\x99" => "\xE1\xBF\x91",
"\xE1\xBF\x9A" => "\xE1\xBD\xB6",
"\xE1\xBF\x9B" => "\xE1\xBD\xB7",
"\xE1\xBF\xA3" => "\xCE\xB0",
"\xE1\xBF\xA8" => "\xE1\xBF\xA0",
"\xE1\xBF\xA9" => "\xE1\xBF\xA1",
"\xE1\xBF\xAA" => "\xE1\xBD\xBA",
Expand Down Expand Up @@ -1195,6 +1197,7 @@ function utf8_casefold_simple_maps()
"\xEA\xAE\xBD" => "\xE1\x8F\xAD",
"\xEA\xAE\xBE" => "\xE1\x8F\xAE",
"\xEA\xAE\xBF" => "\xE1\x8F\xAF",
"\xEF\xAC\x85" => "\xEF\xAC\x86",
"\xEF\xBC\xA1" => "\xEF\xBD\x81",
"\xEF\xBC\xA2" => "\xEF\xBD\x82",
"\xEF\xBC\xA3" => "\xEF\xBD\x83",
Expand Down
10 changes: 5 additions & 5 deletions Sources/Unicode/Idna.php
Original file line number Diff line number Diff line change
Expand Up @@ -831,7 +831,7 @@ function idna_maps()
"\xE1\xBA\x94" => "\xE1\xBA\x95",
"\xE1\xBA\x9A" => "\x61\xCA\xBE",
"\xE1\xBA\x9B" => "\xE1\xB9\xA1",
"\xE1\xBA\x9E" => "\x73\x73",
"\xE1\xBA\x9E" => "\xC3\x9F",
"\xE1\xBA\xA0" => "\xE1\xBA\xA1",
"\xE1\xBA\xA2" => "\xE1\xBA\xA3",
"\xE1\xBA\xA4" => "\xE1\xBA\xA5",
Expand Down Expand Up @@ -6369,8 +6369,6 @@ function idna_regex()
'\x{2101}' .
'\x{2105}' .
'\x{2106}' .
'\x{2260}' .
'\x{226E}-\x{226F}' .
'\x{2474}' .
'\x{2475}' .
'\x{2476}' .
Expand Down Expand Up @@ -6911,7 +6909,8 @@ function idna_regex()
'\x{3130}' .
'\x{3164}' .
'\x{318F}' .
'\x{31E4}-\x{31EF}' .
'\x{31E4}-\x{31EE}' .
'\x{31EF}' .
'\x{321F}' .
'\x{33C2}' .
'\x{33C7}' .
Expand Down Expand Up @@ -7353,7 +7352,8 @@ function idna_regex()
'\x{2B73A}-\x{2B73F}' .
'\x{2B81E}-\x{2B81F}' .
'\x{2CEA2}-\x{2CEAF}' .
'\x{2EBE1}-\x{2F7FF}' .
'\x{2EBE1}-\x{2EBEF}' .
'\x{2EE5E}-\x{2F7FF}' .
'\x{2F868}' .
'\x{2F874}' .
'\x{2F91F}' .
Expand Down
2 changes: 1 addition & 1 deletion Sources/Unicode/Metadata.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@
die('No direct access...');

if (!defined('SMF_UNICODE_VERSION'))
define('SMF_UNICODE_VERSION', '15.0.0.0');
define('SMF_UNICODE_VERSION', '15.1.0.0');

?>
63 changes: 38 additions & 25 deletions Sources/Unicode/RegularExpressions.php
Original file line number Diff line number Diff line change
Expand Up @@ -800,13 +800,12 @@ function utf8_regex_properties()
'\x{2E9A}' .
'\x{2EF4}-\x{2EFF}' .
'\x{2FD6}-\x{2FEF}' .
'\x{2FFC}-\x{2FFF}' .
'\x{3040}' .
'\x{3097}-\x{3098}' .
'\x{3100}-\x{3104}' .
'\x{3130}' .
'\x{318F}' .
'\x{31E4}-\x{31EF}' .
'\x{31E4}-\x{31EE}' .
'\x{321F}' .
'\x{A48D}-\x{A48F}' .
'\x{A4C7}-\x{A4CF}' .
Expand Down Expand Up @@ -1229,7 +1228,8 @@ function utf8_regex_properties()
'\x{2B73A}-\x{2B73F}' .
'\x{2B81E}-\x{2B81F}' .
'\x{2CEA2}-\x{2CEAF}' .
'\x{2EBE1}-\x{2F7FF}' .
'\x{2EBE1}-\x{2EBEF}' .
'\x{2EE5E}-\x{2F7FF}' .
'\x{2FA1E}-\x{2FFFF}' .
'\x{3134B}-\x{3134F}' .
'\x{323B0}-\x{E0000}' .
Expand Down Expand Up @@ -1692,6 +1692,7 @@ function utf8_regex_properties()
'\x{2B740}-\x{2B81D}' .
'\x{2B820}-\x{2CEA1}' .
'\x{2CEB0}-\x{2EBE0}' .
'\x{2EBF0}-\x{2EE5D}' .
'\x{2F800}-\x{2FA1D}' .
'\x{30000}-\x{3134A}' .
'\x{31350}-\x{323AF}',
Expand Down Expand Up @@ -1737,9 +1738,7 @@ function utf8_regex_variation_selectors()
'\x{231A}-\x{231B}' .
'\x{2328}' .
'\x{23CF}' .
'\x{23E9}-\x{23EA}' .
'\x{23ED}-\x{23EF}' .
'\x{23F1}-\x{23F3}' .
'\x{23E9}-\x{23F3}' .
'\x{23F8}-\x{23FA}' .
'\x{24C2}' .
'\x{25AA}-\x{25AB}' .
Expand Down Expand Up @@ -1777,29 +1776,35 @@ function utf8_regex_variation_selectors()
'\x{26BD}-\x{26BE}' .
'\x{26C4}-\x{26C5}' .
'\x{26C8}' .
'\x{26CF}' .
'\x{26CE}-\x{26CF}' .
'\x{26D1}' .
'\x{26D3}-\x{26D4}' .
'\x{26E9}-\x{26EA}' .
'\x{26F0}-\x{26F5}' .
'\x{26F7}-\x{26FA}' .
'\x{26FD}' .
'\x{2702}' .
'\x{2708}-\x{2709}' .
'\x{270C}-\x{270D}' .
'\x{2705}' .
'\x{2708}-\x{270D}' .
'\x{270F}' .
'\x{2712}' .
'\x{2714}' .
'\x{2716}' .
'\x{271D}' .
'\x{2721}' .
'\x{2728}' .
'\x{2733}-\x{2734}' .
'\x{2744}' .
'\x{2747}' .
'\x{2753}' .
'\x{274C}' .
'\x{274E}' .
'\x{2753}-\x{2755}' .
'\x{2757}' .
'\x{2763}-\x{2764}' .
'\x{2795}-\x{2797}' .
'\x{27A1}' .
'\x{27B0}' .
'\x{27BF}' .
'\x{2934}-\x{2935}' .
'\x{2B05}-\x{2B07}' .
'\x{2B1B}-\x{2B1C}' .
Expand Down Expand Up @@ -3183,17 +3188,9 @@ function utf8_regex_joining_type()
'\x{0711}' .
'\x{0730}-\x{074A}',
),
'Adlam' => array(
'Join_Causing' =>
'\x{0640}',
'Dual_Joining' =>
'\x{1E900}-\x{1E943}',
'Transparent' =>
'\x{1E944}-\x{1E94A}' .
'\x{1E94B}',
),
'Tirhuta' => array(
'Dual_Joining' =>
'\x{A840}-\x{A871}' .
'\x{A840}-\x{A871}',
'Transparent' =>
'\x{0951}-\x{0957}' .
Expand All @@ -3202,6 +3199,15 @@ function utf8_regex_joining_type()
'\x{114BF}-\x{114C0}' .
'\x{114C2}-\x{114C3}',
),
'Adlam' => array(
'Join_Causing' =>
'\x{0640}',
'Dual_Joining' =>
'\x{1E900}-\x{1E943}',
'Transparent' =>
'\x{1E944}-\x{1E94A}' .
'\x{1E94B}',
),
'Nko' => array(
'Join_Causing' =>
'\x{07FA}',
Expand Down Expand Up @@ -3536,6 +3542,7 @@ function utf8_regex_indic()
'\x{0D54}-\x{0D63}' .
'\x{0D66}-\x{0D7F}' .
'\x{1CDA}' .
'\x{1CF2}' .
'\x{A838}',
'Letter' =>
'\x{0D04}-\x{0D0C}' .
Expand All @@ -3545,7 +3552,8 @@ function utf8_regex_indic()
'\x{0D4E}' .
'\x{0D54}-\x{0D56}' .
'\x{0D5F}-\x{0D61}' .
'\x{0D7A}-\x{0D7F}',
'\x{0D7A}-\x{0D7F}' .
'\x{1CF2}',
'Nonspacing_Combining_Mark' =>
'\x{0951}-\x{0952}' .
'\x{0D3B}-\x{0D3C}' .
Expand Down Expand Up @@ -3669,13 +3677,15 @@ function utf8_regex_indic()
'\x{0DD8}-\x{0DDF}' .
'\x{0DE6}-\x{0DEF}' .
'\x{0DF2}-\x{0DF4}' .
'\x{1CF2}' .
'\x{111E1}-\x{111F4}',
'Letter' =>
'\x{0D85}-\x{0D96}' .
'\x{0D9A}-\x{0DB1}' .
'\x{0DB3}-\x{0DBB}' .
'\x{0DBD}' .
'\x{0DC0}-\x{0DC6}',
'\x{0DC0}-\x{0DC6}' .
'\x{1CF2}',
'Nonspacing_Combining_Mark' =>
'\x{0DCA}',
'Nonspacing_Mark' =>
Expand Down Expand Up @@ -3970,6 +3980,9 @@ function utf8_regex_indic()
'\x{1CD9}' .
'\x{1CDD}' .
'\x{1CE0}' .
'\x{A838}' .
'\x{A83B}' .
'\x{A83D}' .
'\x{11180}-\x{111DF}',
'Letter' =>
'\x{11183}-\x{111B2}' .
Expand Down Expand Up @@ -4130,8 +4143,8 @@ function utf8_regex_indic()
'\x{0964}-\x{0965}' .
'\x{1CF2}' .
'\x{A838}-\x{A839}' .
'\x{A83D}' .
'\x{A83F}-\x{A840}' .
'\x{A83E}' .
'\x{A840}' .
'\x{11480}-\x{114C7}' .
'\x{114D0}-\x{114D9}',
'Letter' =>
Expand Down Expand Up @@ -4260,8 +4273,8 @@ function utf8_regex_indic()
'All' =>
'\x{0964}-\x{0965}' .
'\x{A838}-\x{A839}' .
'\x{A83C}' .
'\x{A83E}-\x{A83F}' .
'\x{A83D}' .
'\x{A83F}' .
'\x{11680}-\x{116B9}' .
'\x{116C0}-\x{116C9}',
'Letter' =>
Expand Down
8 changes: 4 additions & 4 deletions Sources/tasks/UpdateUnicode.php
Original file line number Diff line number Diff line change
Expand Up @@ -1497,7 +1497,7 @@ private function build_regex_variation_selectors()

foreach ($this->funcs['utf8_regex_variation_selectors']['data'] as $variation_selector => $class_string)
{
$this->funcs['utf8_regex_variation_selectors']['data'][$variation_selector] = preg_split('/(?<=})(?=\\\x{)/', $class_string);
$this->funcs['utf8_regex_variation_selectors']['data'][$variation_selector] = array_unique(preg_split('/(?<=})(?=\\\x{)/', $class_string));
}

krsort($this->funcs['utf8_regex_variation_selectors']['data']);
Expand Down Expand Up @@ -1815,7 +1815,7 @@ private function build_regex_joining_type()
return $a['stats']['age'] - $b['stats']['age'];
}
});
foreach ($this->funcs['utf8_regex_joining_type']['data'] as $char_script => $joining_types)
foreach ($this->funcs['utf8_regex_joining_type']['data'] as $char_script => &$joining_types)
{
unset($this->funcs['utf8_regex_joining_type']['data'][$char_script]['stats'], $joining_types['stats']);

Expand All @@ -1826,7 +1826,7 @@ private function build_regex_joining_type()
continue;
}

foreach ($joining_types as $joining_type => $value)
foreach ($joining_types as $joining_type => &$value)
{
sort($value);
}
Expand Down Expand Up @@ -2001,7 +2001,7 @@ private function build_regex_indic()
}
}

$this->funcs['utf8_regex_indic']['data'][$char_script][$insc] = preg_split('/(?<=})(?=\\\x{)/', $class_string);
$this->funcs['utf8_regex_indic']['data'][$char_script][$insc] = array_unique(preg_split('/(?<=})(?=\\\x{)/', $class_string));
}

ksort($this->funcs['utf8_regex_indic']['data'][$char_script]);
Expand Down

0 comments on commit fe7d497

Please sign in to comment.