Skip to content

Commit 3baf919

Browse files
committed
grapheme_levenshtein unify internal character is UTF-16
1 parent 5b29834 commit 3baf919

File tree

1 file changed

+37
-15
lines changed

1 file changed

+37
-15
lines changed

ext/intl/grapheme/grapheme_string.c

+37-15
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <unicode/ucol.h>
2727
#include <unicode/ustring.h>
2828
#include <unicode/ubrk.h>
29+
#include <unicode/usearch.h>
2930

3031
/* }}} */
3132

@@ -979,10 +980,8 @@ PHP_FUNCTION(grapheme_levenshtein)
979980
intl_convert_utf8_to_utf16(&ustring1, &ustring1_len, pstr1, ZSTR_LEN(string1), &ustatus1);
980981

981982
if (U_FAILURE(ustatus1)) {
982-
/* Set global error code. */
983983
intl_error_set_code(NULL, ustatus1);
984984

985-
/* Set error messages. */
986985
intl_error_set_custom_msg(NULL, "Error converting input string to UTF-16", 0);
987986
if (ustring1) {
988987
efree(ustring1);
@@ -993,10 +992,8 @@ PHP_FUNCTION(grapheme_levenshtein)
993992
intl_convert_utf8_to_utf16(&ustring2, &ustring2_len, pstr2, ZSTR_LEN(string2), &ustatus2);
994993

995994
if (U_FAILURE(ustatus2)) {
996-
/* Set global error code. */
997995
intl_error_set_code(NULL, ustatus2);
998996

999-
/* Set error messages. */
1000997
intl_error_set_custom_msg(NULL, "Error converting input string to UTF-16", 0);
1001998
if (ustring2) {
1002999
efree(ustring2);
@@ -1007,8 +1004,6 @@ PHP_FUNCTION(grapheme_levenshtein)
10071004
RETURN_FALSE;
10081005
}
10091006

1010-
UText *ut1 = NULL;
1011-
UText *ut2 = NULL;
10121007
UBreakIterator *bi1, *bi2;
10131008

10141009
int32_t strlen_1, strlen_2;
@@ -1031,10 +1026,28 @@ PHP_FUNCTION(grapheme_levenshtein)
10311026
bi1 = grapheme_get_break_iterator((void*)u_break_iterator_buffer1, &ustatus1);
10321027
bi2 = grapheme_get_break_iterator((void*)u_break_iterator_buffer2, &ustatus2);
10331028

1034-
ut1 = utext_openUTF8(ut1, pstr1, ZSTR_LEN(string1), &ustatus1);
1035-
ubrk_setUText(bi1, ut1, &ustatus1);
1036-
ut2 = utext_openUTF8(ut2, pstr2, ZSTR_LEN(string2), &ustatus2);
1037-
ubrk_setUText(bi2, ut2, &ustatus2);
1029+
ubrk_setText(bi1, ustring1, ustring1_len, &ustatus1);
1030+
1031+
if (U_FAILURE(ustatus1)) {
1032+
intl_error_set_code(NULL, ustatus1);
1033+
1034+
intl_error_set_custom_msg(NULL, "Error on ubrk_setText on ustring1", 0);
1035+
if (ustring1) {
1036+
efree(ustring1);
1037+
}
1038+
RETURN_FALSE;
1039+
}
1040+
1041+
ubrk_setText(bi2, ustring2, ustring2_len, &ustatus2);
1042+
if (U_FAILURE(ustatus2)) {
1043+
intl_error_set_code(NULL, ustatus2);
1044+
1045+
intl_error_set_custom_msg(NULL, "Error on ubrk_setText on ustring2", 0);
1046+
if (ustring2) {
1047+
efree(ustring2);
1048+
}
1049+
RETURN_FALSE;
1050+
}
10381051

10391052
p1 = safe_emalloc(strlen_2 + 1, sizeof(zend_long), 0);
10401053
p2 = safe_emalloc(strlen_2 + 1, sizeof(zend_long), 0);
@@ -1048,6 +1061,7 @@ PHP_FUNCTION(grapheme_levenshtein)
10481061
int32_t pos1 = 0;
10491062
int32_t pos2 = 0;
10501063
int32_t usrch_pos = 0;
1064+
10511065
while (pos1 != UBRK_DONE) {
10521066
current1 = ubrk_current(bi1);
10531067
pos1 = ubrk_next(bi1);
@@ -1061,8 +1075,19 @@ PHP_FUNCTION(grapheme_levenshtein)
10611075
if (pos2 == UBRK_DONE) {
10621076
break;
10631077
}
1064-
usrch_pos = grapheme_strpos_utf16(pstr1 + current1, pos1 - current1, pstr2 + current2, pos2 - current2, 0, NULL, 0, 0);
1065-
if (usrch_pos == 0) {
1078+
UStringSearch *srch = usearch_open(ustring1 + current1, pos1 - current1, ustring2 + current2, pos2 - current2, "", NULL, &ustatus2);
1079+
if (U_FAILURE(ustatus2)) {
1080+
intl_error_set_code(NULL, ustatus2);
1081+
intl_error_set_custom_msg(NULL, "Error usearch_open", 0);
1082+
}
1083+
usrch_pos = usearch_first(srch, &ustatus2);
1084+
if (U_FAILURE(ustatus2)) {
1085+
intl_error_set_code(NULL, ustatus2);
1086+
intl_error_set_custom_msg(NULL, "Error usearch_first", 0);
1087+
}
1088+
usearch_close(srch);
1089+
1090+
if (usrch_pos != USEARCH_DONE) {
10661091
c0 = p1[i2];
10671092
} else {
10681093
c0 = p1[i2] + cost_rep;
@@ -1083,9 +1108,6 @@ PHP_FUNCTION(grapheme_levenshtein)
10831108
p2 = tmp;
10841109
}
10851110

1086-
utext_close(ut1);
1087-
utext_close(ut2);
1088-
10891111
ubrk_close(bi1);
10901112
ubrk_close(bi2);
10911113

0 commit comments

Comments
 (0)