Skip to content

Commit 6cdcec1

Browse files
authored
is_utf8() for JSON error fixed
1 parent 7c1c88e commit 6cdcec1

File tree

1 file changed

+21
-10
lines changed

1 file changed

+21
-10
lines changed

UTF8.php

+21-10
Original file line numberDiff line numberDiff line change
@@ -2703,6 +2703,27 @@ public static function is_ascii($data, &$error_char_offset = null)
27032703
public static function is_utf8($data, $is_strict = true)
27042704
{
27052705
if (! ReflectionTypeHint::isValid()) return false;
2706+
if (is_string($data))
2707+
{
2708+
if (preg_match('~~suSX', $data) !== 1) return false;
2709+
//if (function_exists('preg_last_error') && preg_last_error() !== PREG_NO_ERROR) return false;
2710+
//preg_match('~~suSX') much faster (up to 4 times), then mb_check_encoding($data, 'UTF-8')!
2711+
//if (function_exists('mb_check_encoding') && ! mb_check_encoding($data, 'UTF-8')) return false; #DEPRECATED
2712+
/**
2713+
* Специальные символы по спецификации JSON (http://json.org/)
2714+
* \b represents the backspace character (U+0008)
2715+
* \t represents the character tabulation character (U+0009)
2716+
* \n represents the line feed character (U+000A)
2717+
* \f represents the form feed character (U+000C)
2718+
* \r represents the carriage return character (U+000D)
2719+
*/
2720+
//с данным регулярным выражением preg_match() работает в 2 раза быстрее, чем strpbrk()
2721+
if ($is_strict && preg_match('/[^\x08\x09\x0A\x0C\x0D\x20-\xBF\xC2-\xF7]/sSX', $data)) {
2722+
return false;
2723+
}
2724+
return true;
2725+
}
2726+
if (is_scalar($data) || is_null($data)) return true; #int/float/bool/null
27062727
if (is_array($data))
27072728
{
27082729
foreach ($data as $k => &$v)
@@ -2711,16 +2732,6 @@ public static function is_utf8($data, $is_strict = true)
27112732
}
27122733
return true;
27132734
}
2714-
if (is_string($data))
2715-
{
2716-
if (! preg_match('~~suSX', $data)) return false;
2717-
if (function_exists('preg_last_error') && preg_last_error() !== PREG_NO_ERROR) return false;
2718-
#preg_match('~~suSX') much faster (up to 4 times), then mb_check_encoding($data, 'UTF-8')!
2719-
#if (function_exists('mb_check_encoding') && ! mb_check_encoding($data, 'UTF-8')) return false; #DEPRECATED
2720-
if ($is_strict && preg_match('/[^\x09\x0A\x0D\x20-\xBF\xC2-\xF7]/sSX', $data)) return false;
2721-
return true;
2722-
}
2723-
if (is_scalar($data) || is_null($data)) return true; #int/float/bool/null
27242735
return false; #object or resource
27252736
}
27262737

0 commit comments

Comments
 (0)