Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates Unicode data files to version 15.1 #7861

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Sources/Unicode/CaseFold.php
Original file line number Diff line number Diff line change
Expand Up @@ -815,10 +815,12 @@ function utf8_casefold_simple_maps()
"\xE1\xBF\x8A" => "\xE1\xBD\xB4",
"\xE1\xBF\x8B" => "\xE1\xBD\xB5",
"\xE1\xBF\x8C" => "\xE1\xBF\x83",
"\xE1\xBF\x93" => "\xCE\x90",
"\xE1\xBF\x98" => "\xE1\xBF\x90",
"\xE1\xBF\x99" => "\xE1\xBF\x91",
"\xE1\xBF\x9A" => "\xE1\xBD\xB6",
"\xE1\xBF\x9B" => "\xE1\xBD\xB7",
"\xE1\xBF\xA3" => "\xCE\xB0",
"\xE1\xBF\xA8" => "\xE1\xBF\xA0",
"\xE1\xBF\xA9" => "\xE1\xBF\xA1",
"\xE1\xBF\xAA" => "\xE1\xBD\xBA",
Expand Down Expand Up @@ -1195,6 +1197,7 @@ function utf8_casefold_simple_maps()
"\xEA\xAE\xBD" => "\xE1\x8F\xAD",
"\xEA\xAE\xBE" => "\xE1\x8F\xAE",
"\xEA\xAE\xBF" => "\xE1\x8F\xAF",
"\xEF\xAC\x85" => "\xEF\xAC\x86",
"\xEF\xBC\xA1" => "\xEF\xBD\x81",
"\xEF\xBC\xA2" => "\xEF\xBD\x82",
"\xEF\xBC\xA3" => "\xEF\xBD\x83",
Expand Down
10 changes: 5 additions & 5 deletions Sources/Unicode/Idna.php
Original file line number Diff line number Diff line change
Expand Up @@ -831,7 +831,7 @@ function idna_maps()
"\xE1\xBA\x94" => "\xE1\xBA\x95",
"\xE1\xBA\x9A" => "\x61\xCA\xBE",
"\xE1\xBA\x9B" => "\xE1\xB9\xA1",
"\xE1\xBA\x9E" => "\x73\x73",
"\xE1\xBA\x9E" => "\xC3\x9F",
"\xE1\xBA\xA0" => "\xE1\xBA\xA1",
"\xE1\xBA\xA2" => "\xE1\xBA\xA3",
"\xE1\xBA\xA4" => "\xE1\xBA\xA5",
Expand Down Expand Up @@ -6369,8 +6369,6 @@ function idna_regex()
'\x{2101}' .
'\x{2105}' .
'\x{2106}' .
'\x{2260}' .
'\x{226E}-\x{226F}' .
'\x{2474}' .
'\x{2475}' .
'\x{2476}' .
Expand Down Expand Up @@ -6911,7 +6909,8 @@ function idna_regex()
'\x{3130}' .
'\x{3164}' .
'\x{318F}' .
'\x{31E4}-\x{31EF}' .
'\x{31E4}-\x{31EE}' .
'\x{31EF}' .
'\x{321F}' .
'\x{33C2}' .
'\x{33C7}' .
Expand Down Expand Up @@ -7353,7 +7352,8 @@ function idna_regex()
'\x{2B73A}-\x{2B73F}' .
'\x{2B81E}-\x{2B81F}' .
'\x{2CEA2}-\x{2CEAF}' .
'\x{2EBE1}-\x{2F7FF}' .
'\x{2EBE1}-\x{2EBEF}' .
'\x{2EE5E}-\x{2F7FF}' .
'\x{2F868}' .
'\x{2F874}' .
'\x{2F91F}' .
Expand Down
2 changes: 1 addition & 1 deletion Sources/Unicode/Metadata.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@
die('No direct access...');

if (!defined('SMF_UNICODE_VERSION'))
define('SMF_UNICODE_VERSION', '15.0.0.0');
define('SMF_UNICODE_VERSION', '15.1.0.0');

?>
63 changes: 38 additions & 25 deletions Sources/Unicode/RegularExpressions.php
Original file line number Diff line number Diff line change
Expand Up @@ -800,13 +800,12 @@ function utf8_regex_properties()
'\x{2E9A}' .
'\x{2EF4}-\x{2EFF}' .
'\x{2FD6}-\x{2FEF}' .
'\x{2FFC}-\x{2FFF}' .
'\x{3040}' .
'\x{3097}-\x{3098}' .
'\x{3100}-\x{3104}' .
'\x{3130}' .
'\x{318F}' .
'\x{31E4}-\x{31EF}' .
'\x{31E4}-\x{31EE}' .
'\x{321F}' .
'\x{A48D}-\x{A48F}' .
'\x{A4C7}-\x{A4CF}' .
Expand Down Expand Up @@ -1229,7 +1228,8 @@ function utf8_regex_properties()
'\x{2B73A}-\x{2B73F}' .
'\x{2B81E}-\x{2B81F}' .
'\x{2CEA2}-\x{2CEAF}' .
'\x{2EBE1}-\x{2F7FF}' .
'\x{2EBE1}-\x{2EBEF}' .
'\x{2EE5E}-\x{2F7FF}' .
'\x{2FA1E}-\x{2FFFF}' .
'\x{3134B}-\x{3134F}' .
'\x{323B0}-\x{E0000}' .
Expand Down Expand Up @@ -1692,6 +1692,7 @@ function utf8_regex_properties()
'\x{2B740}-\x{2B81D}' .
'\x{2B820}-\x{2CEA1}' .
'\x{2CEB0}-\x{2EBE0}' .
'\x{2EBF0}-\x{2EE5D}' .
'\x{2F800}-\x{2FA1D}' .
'\x{30000}-\x{3134A}' .
'\x{31350}-\x{323AF}',
Expand Down Expand Up @@ -1737,9 +1738,7 @@ function utf8_regex_variation_selectors()
'\x{231A}-\x{231B}' .
'\x{2328}' .
'\x{23CF}' .
'\x{23E9}-\x{23EA}' .
'\x{23ED}-\x{23EF}' .
'\x{23F1}-\x{23F3}' .
'\x{23E9}-\x{23F3}' .
'\x{23F8}-\x{23FA}' .
'\x{24C2}' .
'\x{25AA}-\x{25AB}' .
Expand Down Expand Up @@ -1777,29 +1776,35 @@ function utf8_regex_variation_selectors()
'\x{26BD}-\x{26BE}' .
'\x{26C4}-\x{26C5}' .
'\x{26C8}' .
'\x{26CF}' .
'\x{26CE}-\x{26CF}' .
'\x{26D1}' .
'\x{26D3}-\x{26D4}' .
'\x{26E9}-\x{26EA}' .
'\x{26F0}-\x{26F5}' .
'\x{26F7}-\x{26FA}' .
'\x{26FD}' .
'\x{2702}' .
'\x{2708}-\x{2709}' .
'\x{270C}-\x{270D}' .
'\x{2705}' .
'\x{2708}-\x{270D}' .
'\x{270F}' .
'\x{2712}' .
'\x{2714}' .
'\x{2716}' .
'\x{271D}' .
'\x{2721}' .
'\x{2728}' .
'\x{2733}-\x{2734}' .
'\x{2744}' .
'\x{2747}' .
'\x{2753}' .
'\x{274C}' .
'\x{274E}' .
'\x{2753}-\x{2755}' .
'\x{2757}' .
'\x{2763}-\x{2764}' .
'\x{2795}-\x{2797}' .
'\x{27A1}' .
'\x{27B0}' .
'\x{27BF}' .
'\x{2934}-\x{2935}' .
'\x{2B05}-\x{2B07}' .
'\x{2B1B}-\x{2B1C}' .
Expand Down Expand Up @@ -3183,17 +3188,9 @@ function utf8_regex_joining_type()
'\x{0711}' .
'\x{0730}-\x{074A}',
),
'Adlam' => array(
'Join_Causing' =>
'\x{0640}',
'Dual_Joining' =>
'\x{1E900}-\x{1E943}',
'Transparent' =>
'\x{1E944}-\x{1E94A}' .
'\x{1E94B}',
),
'Tirhuta' => array(
'Dual_Joining' =>
'\x{A840}-\x{A871}' .
'\x{A840}-\x{A871}',
'Transparent' =>
'\x{0951}-\x{0957}' .
Expand All @@ -3202,6 +3199,15 @@ function utf8_regex_joining_type()
'\x{114BF}-\x{114C0}' .
'\x{114C2}-\x{114C3}',
),
'Adlam' => array(
'Join_Causing' =>
'\x{0640}',
'Dual_Joining' =>
'\x{1E900}-\x{1E943}',
'Transparent' =>
'\x{1E944}-\x{1E94A}' .
'\x{1E94B}',
),
'Nko' => array(
'Join_Causing' =>
'\x{07FA}',
Expand Down Expand Up @@ -3536,6 +3542,7 @@ function utf8_regex_indic()
'\x{0D54}-\x{0D63}' .
'\x{0D66}-\x{0D7F}' .
'\x{1CDA}' .
'\x{1CF2}' .
'\x{A838}',
'Letter' =>
'\x{0D04}-\x{0D0C}' .
Expand All @@ -3545,7 +3552,8 @@ function utf8_regex_indic()
'\x{0D4E}' .
'\x{0D54}-\x{0D56}' .
'\x{0D5F}-\x{0D61}' .
'\x{0D7A}-\x{0D7F}',
'\x{0D7A}-\x{0D7F}' .
'\x{1CF2}',
'Nonspacing_Combining_Mark' =>
'\x{0951}-\x{0952}' .
'\x{0D3B}-\x{0D3C}' .
Expand Down Expand Up @@ -3669,13 +3677,15 @@ function utf8_regex_indic()
'\x{0DD8}-\x{0DDF}' .
'\x{0DE6}-\x{0DEF}' .
'\x{0DF2}-\x{0DF4}' .
'\x{1CF2}' .
'\x{111E1}-\x{111F4}',
'Letter' =>
'\x{0D85}-\x{0D96}' .
'\x{0D9A}-\x{0DB1}' .
'\x{0DB3}-\x{0DBB}' .
'\x{0DBD}' .
'\x{0DC0}-\x{0DC6}',
'\x{0DC0}-\x{0DC6}' .
'\x{1CF2}',
'Nonspacing_Combining_Mark' =>
'\x{0DCA}',
'Nonspacing_Mark' =>
Expand Down Expand Up @@ -3970,6 +3980,9 @@ function utf8_regex_indic()
'\x{1CD9}' .
'\x{1CDD}' .
'\x{1CE0}' .
'\x{A838}' .
'\x{A83B}' .
'\x{A83D}' .
'\x{11180}-\x{111DF}',
'Letter' =>
'\x{11183}-\x{111B2}' .
Expand Down Expand Up @@ -4130,8 +4143,8 @@ function utf8_regex_indic()
'\x{0964}-\x{0965}' .
'\x{1CF2}' .
'\x{A838}-\x{A839}' .
'\x{A83D}' .
'\x{A83F}-\x{A840}' .
'\x{A83E}' .
'\x{A840}' .
'\x{11480}-\x{114C7}' .
'\x{114D0}-\x{114D9}',
'Letter' =>
Expand Down Expand Up @@ -4260,8 +4273,8 @@ function utf8_regex_indic()
'All' =>
'\x{0964}-\x{0965}' .
'\x{A838}-\x{A839}' .
'\x{A83C}' .
'\x{A83E}-\x{A83F}' .
'\x{A83D}' .
'\x{A83F}' .
'\x{11680}-\x{116B9}' .
'\x{116C0}-\x{116C9}',
'Letter' =>
Expand Down
8 changes: 4 additions & 4 deletions Sources/tasks/UpdateUnicode.php
Original file line number Diff line number Diff line change
Expand Up @@ -1497,7 +1497,7 @@ private function build_regex_variation_selectors()

foreach ($this->funcs['utf8_regex_variation_selectors']['data'] as $variation_selector => $class_string)
{
$this->funcs['utf8_regex_variation_selectors']['data'][$variation_selector] = preg_split('/(?<=})(?=\\\x{)/', $class_string);
$this->funcs['utf8_regex_variation_selectors']['data'][$variation_selector] = array_unique(preg_split('/(?<=})(?=\\\x{)/', $class_string));
}

krsort($this->funcs['utf8_regex_variation_selectors']['data']);
Expand Down Expand Up @@ -1815,7 +1815,7 @@ private function build_regex_joining_type()
return $a['stats']['age'] - $b['stats']['age'];
}
});
foreach ($this->funcs['utf8_regex_joining_type']['data'] as $char_script => $joining_types)
foreach ($this->funcs['utf8_regex_joining_type']['data'] as $char_script => &$joining_types)
{
unset($this->funcs['utf8_regex_joining_type']['data'][$char_script]['stats'], $joining_types['stats']);

Expand All @@ -1826,7 +1826,7 @@ private function build_regex_joining_type()
continue;
}

foreach ($joining_types as $joining_type => $value)
foreach ($joining_types as $joining_type => &$value)
{
sort($value);
}
Expand Down Expand Up @@ -2001,7 +2001,7 @@ private function build_regex_indic()
}
}

$this->funcs['utf8_regex_indic']['data'][$char_script][$insc] = preg_split('/(?<=})(?=\\\x{)/', $class_string);
$this->funcs['utf8_regex_indic']['data'][$char_script][$insc] = array_unique(preg_split('/(?<=})(?=\\\x{)/', $class_string));
}

ksort($this->funcs['utf8_regex_indic']['data'][$char_script]);
Expand Down
Loading