@@ -3,7 +3,6 @@ package gchatmeow
3
3
import (
4
4
"context"
5
5
"encoding/base64"
6
- "encoding/binary"
7
6
"encoding/json"
8
7
"errors"
9
8
"fmt"
@@ -17,7 +16,7 @@ import (
17
16
"strconv"
18
17
"strings"
19
18
"time"
20
- "unicode/utf8 "
19
+ "unicode/utf16 "
21
20
22
21
"go.mau.fi/util/pblite"
23
22
@@ -62,6 +61,16 @@ type Channel struct {
62
61
OnReceiveArray * Event
63
62
}
64
63
64
+ type UTF16String []uint16
65
+
66
+ func NewUTF16String (s string ) UTF16String {
67
+ return utf16 .Encode ([]rune (s ))
68
+ }
69
+
70
+ func (u UTF16String ) String () string {
71
+ return string (utf16 .Decode (u ))
72
+ }
73
+
65
74
type ChunkParser struct {
66
75
buf []byte
67
76
}
@@ -72,91 +81,24 @@ func NewChunkParser() *ChunkParser {
72
81
}
73
82
}
74
83
75
- // bestEffortDecode attempts to decode as much UTF-8 data as possible from the buffer
76
- func bestEffortDecode (data []byte ) string {
77
- valid := make ([]byte , 0 , len (data ))
78
- for len (data ) > 0 {
79
- r , size := utf8 .DecodeRune (data )
80
- if r == utf8 .RuneError {
81
- break
82
- }
83
- valid = append (valid , data [:size ]... )
84
- data = data [size :]
85
- }
86
- return string (valid )
87
- }
88
-
89
- // GetChunks yields chunks generated from received data.
90
- // The buffer may not be decodable as UTF-8 if there's a split multi-byte
91
- // character at the end. To handle this, we do a "best effort" decode of the
92
- // buffer to decode as much of it as possible.
93
84
func (p * ChunkParser ) GetChunks (newDataBytes []byte ) []string {
94
85
var chunks []string
95
86
p .buf = append (p .buf , newDataBytes ... )
96
87
97
88
for {
98
- // Decode buffer with best effort
99
- bufDecoded := bestEffortDecode (p .buf )
100
-
101
- // Convert to UTF-16 (removing BOM)
102
- var bufUtf16 []byte
103
- for _ , r := range bufDecoded {
104
- // Convert each rune to UTF-16
105
- buf := make ([]byte , 2 )
106
- binary .BigEndian .PutUint16 (buf , uint16 (r ))
107
- bufUtf16 = append (bufUtf16 , buf ... )
108
- }
109
-
110
- // Find length string match
111
- matches := lenRegex .FindStringSubmatch (bufDecoded )
112
- if matches == nil {
113
- break
114
- }
115
-
116
- lengthStr := matches [1 ]
117
- // Both lengths are in number of bytes in UTF-16 encoding
89
+ bufStr := string (p .buf )
90
+ lengthStr , after , _ := strings .Cut (bufStr , "\n " )
118
91
length , err := strconv .Atoi (lengthStr )
119
92
if err != nil {
120
93
break
121
94
}
122
- length *= 2 // Convert to UTF-16 byte count
123
-
124
- // Calculate length of the submission length and newline in UTF-16
125
- lenStrAndNewline := lengthStr + "\n "
126
- var lenLength int
127
- for _ , r := range lenStrAndNewline {
128
- lenLength += 2 // Each UTF-16 character is 2 bytes
129
- _ = r
130
- }
131
-
132
- if len (bufUtf16 )- lenLength < length {
95
+ utf16Str := NewUTF16String (after )
96
+ if len (utf16Str ) < length {
133
97
break
134
98
}
135
99
136
- // Extract submission
137
- submission := bufUtf16 [lenLength : lenLength + length ]
138
-
139
- // Convert UTF-16 bytes back to string
140
- var result string
141
- for i := 0 ; i < len (submission ); i += 2 {
142
- if i + 1 >= len (submission ) {
143
- break
144
- }
145
- char := binary .BigEndian .Uint16 (submission [i : i + 2 ])
146
- result += string (rune (char ))
147
- }
148
-
149
- chunks = append (chunks , result )
150
-
151
- // Calculate how many bytes to drop from the buffer
152
- dropLength := len (matches [0 ]) // length of the length string and newline
153
- dropLength += len (result ) // length of the actual content in UTF-8
154
-
155
- if dropLength <= len (p .buf ) {
156
- p .buf = p .buf [dropLength :]
157
- } else {
158
- p .buf = p .buf [:0 ]
159
- }
100
+ chunks = append (chunks , utf16Str [0 :length ].String ())
101
+ p .buf = []byte (utf16Str [length :].String ())
160
102
}
161
103
162
104
return chunks
0 commit comments