Skip to content

Commit

Permalink
unicode: Preserve modifiers in AppendReverse() and ReverseString()
Browse files Browse the repository at this point in the history
Current implementation does not check for modifier runes, which causes modifiers to be applied to wrong characters after string is reversed. Unicode Character Categories M and Sk are considered as modifiers.

Fixes golang/go#50633
elliotwutingfeng committed Jun 23, 2023

Verified

This commit was signed with the committer’s verified signature.
elliotwutingfeng Wu Tingfeng
1 parent 2df65d7 commit 58b4a27
Showing 2 changed files with 56 additions and 15 deletions.
46 changes: 37 additions & 9 deletions unicode/bidi/bidi.go
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@ package bidi // import "golang.org/x/text/unicode/bidi"

import (
"bytes"
"unicode"
)

// This API tries to avoid dealing with embedding levels for now. Under the hood
@@ -321,23 +322,36 @@ func (r *Run) Pos() (start, end int) {
// and returns the result. Modifiers will still follow the runes they modify.
// Brackets are replaced with their counterparts.
func AppendReverse(out, in []byte) []byte {
ret := make([]byte, len(in)+len(out))
copy(ret, out)
inRunes := bytes.Runes(in)

li := len(inRunes)
ret := make([]rune, li)
modifiers := make([]rune, 0, li)
for i, r := range inRunes {
if unicode.In(r, unicode.M, unicode.Sk) {
modifiers = append(modifiers, r)
continue
}
if len(modifiers) > 0 {
ret[li-i] = ret[li-i+len(modifiers)]
copy(ret[li-i+1:li-i+1+len(modifiers)], modifiers)
modifiers = nil
}
prop, _ := LookupRune(r)
if prop.IsBracket() {
inRunes[i] = prop.reverseBracket(r)
ret[li-i-1] = prop.reverseBracket(r)
} else {
ret[li-i-1] = r
}
}

for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
if len(modifiers) > 0 {
ret[0] = ret[len(modifiers)]
copy(ret[1:1+len(modifiers)], modifiers)
}
copy(ret[len(out):], string(inRunes))

return ret
res := make([]byte, len(in)+len(out))
copy(res, out)
copy(res[len(out):], string(ret))
return res
}

// ReverseString reverses the order of characters in s and returns a new string.
@@ -347,13 +361,27 @@ func ReverseString(s string) string {
input := []rune(s)
li := len(input)
ret := make([]rune, li)
modifiers := make([]rune, 0, li)
for i, r := range input {
if unicode.In(r, unicode.M, unicode.Sk) {
modifiers = append(modifiers, r)
continue
}
if len(modifiers) > 0 {
ret[li-i] = ret[li-i+len(modifiers)]
copy(ret[li-i+1:li-i+1+len(modifiers)], modifiers)
modifiers = nil
}
prop, _ := LookupRune(r)
if prop.IsBracket() {
ret[li-i-1] = prop.reverseBracket(r)
} else {
ret[li-i-1] = r
}
}
if len(modifiers) > 0 {
ret[0] = ret[len(modifiers)]
copy(ret[1:1+len(modifiers)], modifiers)
}
return string(ret)
}
25 changes: 19 additions & 6 deletions unicode/bidi/bidi_test.go
Original file line number Diff line number Diff line change
@@ -321,10 +321,22 @@ func TestDoubleSetString(t *testing.T) {
}

func TestReverseString(t *testing.T) {
input := "(Hello)"
want := "(olleH)"
if str := ReverseString(input); str != want {
t.Errorf("ReverseString(%s) = %q; want %q", input, str, want)
testcase := []struct {
input string
want string
}{
{"(Hello)", "(olleH)"},
{"nice (world) placé́́", "é́́calp (dlrow) ecin"},
{"äu", "uä"},
{"uä", "äu"},
{"é́́abé́́é́́", "é́́é́́baé́́"},
{"é́́é́́baé́́", "é́́abé́́é́́"},
{"✍🏾✍🏾ab✍🏾✍🏾", "✍🏾✍🏾ba✍🏾✍🏾"},
}
for _, tc := range testcase {
if str := ReverseString(tc.input); str != tc.want {
t.Errorf("ReverseString(%s) = %q; want %q", tc.input, str, tc.want)
}
}
}

@@ -335,8 +347,9 @@ func TestAppendReverse(t *testing.T) {
want string
}{
{"", "Hëllo", "Hëllo"},
{"nice (wörld)", "", "(dlröw) ecin"},
{"nice (wörld)", "Hëllo", "Hëllo(dlröw) ecin"},
{"nice (wörld) placé́́", "", "é́́calp (dlröw) ecin"},
{"nice (wörld) placé́́", "Hëllo", "Hëlloé́́calp (dlröw) ecin"},
{"✍🏾✍🏾ab✍🏾✍🏾", "✍🏾✍🏾ba✍🏾✍🏾", "✍🏾✍🏾ba✍🏾✍🏾✍🏾✍🏾ba✍🏾✍🏾"},
}
for _, tc := range testcase {
if r := AppendReverse([]byte(tc.outString), []byte(tc.inString)); string(r) != tc.want {

0 comments on commit 58b4a27

Please sign in to comment.