26
26
#include <unicode/ucol.h>
27
27
#include <unicode/ustring.h>
28
28
#include <unicode/ubrk.h>
29
+ #include <unicode/usearch.h>
29
30
30
31
/* }}} */
31
32
@@ -979,10 +980,8 @@ PHP_FUNCTION(grapheme_levenshtein)
979
980
intl_convert_utf8_to_utf16 (& ustring1 , & ustring1_len , pstr1 , ZSTR_LEN (string1 ), & ustatus1 );
980
981
981
982
if (U_FAILURE (ustatus1 )) {
982
- /* Set global error code. */
983
983
intl_error_set_code (NULL , ustatus1 );
984
984
985
- /* Set error messages. */
986
985
intl_error_set_custom_msg (NULL , "Error converting input string to UTF-16" , 0 );
987
986
if (ustring1 ) {
988
987
efree (ustring1 );
@@ -993,10 +992,8 @@ PHP_FUNCTION(grapheme_levenshtein)
993
992
intl_convert_utf8_to_utf16 (& ustring2 , & ustring2_len , pstr2 , ZSTR_LEN (string2 ), & ustatus2 );
994
993
995
994
if (U_FAILURE (ustatus2 )) {
996
- /* Set global error code. */
997
995
intl_error_set_code (NULL , ustatus2 );
998
996
999
- /* Set error messages. */
1000
997
intl_error_set_custom_msg (NULL , "Error converting input string to UTF-16" , 0 );
1001
998
if (ustring2 ) {
1002
999
efree (ustring2 );
@@ -1007,8 +1004,6 @@ PHP_FUNCTION(grapheme_levenshtein)
1007
1004
RETURN_FALSE ;
1008
1005
}
1009
1006
1010
- UText * ut1 = NULL ;
1011
- UText * ut2 = NULL ;
1012
1007
UBreakIterator * bi1 , * bi2 ;
1013
1008
1014
1009
int32_t strlen_1 , strlen_2 ;
@@ -1031,10 +1026,28 @@ PHP_FUNCTION(grapheme_levenshtein)
1031
1026
bi1 = grapheme_get_break_iterator ((void * )u_break_iterator_buffer1 , & ustatus1 );
1032
1027
bi2 = grapheme_get_break_iterator ((void * )u_break_iterator_buffer2 , & ustatus2 );
1033
1028
1034
- ut1 = utext_openUTF8 (ut1 , pstr1 , ZSTR_LEN (string1 ), & ustatus1 );
1035
- ubrk_setUText (bi1 , ut1 , & ustatus1 );
1036
- ut2 = utext_openUTF8 (ut2 , pstr2 , ZSTR_LEN (string2 ), & ustatus2 );
1037
- ubrk_setUText (bi2 , ut2 , & ustatus2 );
1029
+ ubrk_setText (bi1 , ustring1 , ustring1_len , & ustatus1 );
1030
+
1031
+ if (U_FAILURE (ustatus1 )) {
1032
+ intl_error_set_code (NULL , ustatus1 );
1033
+
1034
+ intl_error_set_custom_msg (NULL , "Error on ubrk_setText on ustring1" , 0 );
1035
+ if (ustring1 ) {
1036
+ efree (ustring1 );
1037
+ }
1038
+ RETURN_FALSE ;
1039
+ }
1040
+
1041
+ ubrk_setText (bi2 , ustring2 , ustring2_len , & ustatus2 );
1042
+ if (U_FAILURE (ustatus2 )) {
1043
+ intl_error_set_code (NULL , ustatus2 );
1044
+
1045
+ intl_error_set_custom_msg (NULL , "Error on ubrk_setText on ustring2" , 0 );
1046
+ if (ustring2 ) {
1047
+ efree (ustring2 );
1048
+ }
1049
+ RETURN_FALSE ;
1050
+ }
1038
1051
1039
1052
p1 = safe_emalloc (strlen_2 + 1 , sizeof (zend_long ), 0 );
1040
1053
p2 = safe_emalloc (strlen_2 + 1 , sizeof (zend_long ), 0 );
@@ -1048,6 +1061,7 @@ PHP_FUNCTION(grapheme_levenshtein)
1048
1061
int32_t pos1 = 0 ;
1049
1062
int32_t pos2 = 0 ;
1050
1063
int32_t usrch_pos = 0 ;
1064
+
1051
1065
while (pos1 != UBRK_DONE ) {
1052
1066
current1 = ubrk_current (bi1 );
1053
1067
pos1 = ubrk_next (bi1 );
@@ -1061,8 +1075,19 @@ PHP_FUNCTION(grapheme_levenshtein)
1061
1075
if (pos2 == UBRK_DONE ) {
1062
1076
break ;
1063
1077
}
1064
- usrch_pos = grapheme_strpos_utf16 (pstr1 + current1 , pos1 - current1 , pstr2 + current2 , pos2 - current2 , 0 , NULL , 0 , 0 );
1065
- if (usrch_pos == 0 ) {
1078
+ UStringSearch * srch = usearch_open (ustring1 + current1 , pos1 - current1 , ustring2 + current2 , pos2 - current2 , "" , NULL , & ustatus2 );
1079
+ if (U_FAILURE (ustatus2 )) {
1080
+ intl_error_set_code (NULL , ustatus2 );
1081
+ intl_error_set_custom_msg (NULL , "Error usearch_open" , 0 );
1082
+ }
1083
+ usrch_pos = usearch_first (srch , & ustatus2 );
1084
+ if (U_FAILURE (ustatus2 )) {
1085
+ intl_error_set_code (NULL , ustatus2 );
1086
+ intl_error_set_custom_msg (NULL , "Error usearch_first" , 0 );
1087
+ }
1088
+ usearch_close (srch );
1089
+
1090
+ if (usrch_pos != USEARCH_DONE ) {
1066
1091
c0 = p1 [i2 ];
1067
1092
} else {
1068
1093
c0 = p1 [i2 ] + cost_rep ;
@@ -1083,9 +1108,6 @@ PHP_FUNCTION(grapheme_levenshtein)
1083
1108
p2 = tmp ;
1084
1109
}
1085
1110
1086
- utext_close (ut1 );
1087
- utext_close (ut2 );
1088
-
1089
1111
ubrk_close (bi1 );
1090
1112
ubrk_close (bi2 );
1091
1113
0 commit comments