@@ -133,40 +133,35 @@ impl<'a> Iterator<char> for Chars<'a> {
133
133
#[ inline]
134
134
fn next ( & mut self ) -> Option < char > {
135
135
// Decode UTF-8, using the valid UTF-8 invariant
136
- #[ inline]
137
- fn decode_multibyte < ' a > ( x : u8 , it : & mut slice:: Items < ' a , u8 > ) -> char {
138
- // NOTE: Performance is very sensitive to the exact formulation here
139
- // Decode from a byte combination out of: [[[x y] z] w]
140
- let init = utf8_first_byte ! ( x, 2 ) ;
141
- let y = unwrap_or_0 ( it. next ( ) ) ;
142
- let mut ch = utf8_acc_cont_byte ! ( init, y) ;
143
- if x >= 0xE0 {
144
- /* [[x y z] w] case
145
- * 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid */
146
- let z = unwrap_or_0 ( it. next ( ) ) ;
147
- let y_z = utf8_acc_cont_byte ! ( ( y & CONT_MASK ) as u32 , z) ;
148
- ch = init << 12 | y_z;
149
- if x >= 0xF0 {
150
- /* [x y z w] case
151
- * use only the lower 3 bits of `init` */
152
- let w = unwrap_or_0 ( it. next ( ) ) ;
153
- ch = ( init & 7 ) << 18 | utf8_acc_cont_byte ! ( y_z, w) ;
154
- }
155
- }
156
- unsafe {
157
- mem:: transmute ( ch)
136
+ let x = match self . iter . next ( ) {
137
+ None => return None ,
138
+ Some ( & next_byte) if next_byte < 128 => return Some ( next_byte as char ) ,
139
+ Some ( & next_byte) => next_byte,
140
+ } ;
141
+
142
+ // Multibyte case follows
143
+ // Decode from a byte combination out of: [[[x y] z] w]
144
+ // NOTE: Performance is sensitive to the exact formulation here
145
+ let init = utf8_first_byte ! ( x, 2 ) ;
146
+ let y = unwrap_or_0 ( self . iter . next ( ) ) ;
147
+ let mut ch = utf8_acc_cont_byte ! ( init, y) ;
148
+ if x >= 0xE0 {
149
+ // [[x y z] w] case
150
+ // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
151
+ let z = unwrap_or_0 ( self . iter . next ( ) ) ;
152
+ let y_z = utf8_acc_cont_byte ! ( ( y & CONT_MASK ) as u32 , z) ;
153
+ ch = init << 12 | y_z;
154
+ if x >= 0xF0 {
155
+ // [x y z w] case
156
+ // use only the lower 3 bits of `init`
157
+ let w = unwrap_or_0 ( self . iter . next ( ) ) ;
158
+ ch = ( init & 7 ) << 18 | utf8_acc_cont_byte ! ( y_z, w) ;
158
159
}
159
160
}
160
161
161
- match self . iter . next ( ) {
162
- None => None ,
163
- Some ( & next_byte) => {
164
- if next_byte < 128 {
165
- Some ( next_byte as char )
166
- } else {
167
- Some ( decode_multibyte ( next_byte, & mut self . iter ) )
168
- }
169
- }
162
+ // str invariant says `ch` is a valid Unicode Scalar Value
163
+ unsafe {
164
+ Some ( mem:: transmute ( ch) )
170
165
}
171
166
}
172
167
@@ -180,38 +175,32 @@ impl<'a> Iterator<char> for Chars<'a> {
180
175
impl < ' a > DoubleEndedIterator < char > for Chars < ' a > {
181
176
#[ inline]
182
177
fn next_back ( & mut self ) -> Option < char > {
183
- #[ inline]
184
- fn decode_multibyte_back < ' a > ( w : u8 , it : & mut slice:: Items < ' a , u8 > ) -> char {
185
- // Decode from a byte combination out of: [x [y [z w]]]
186
- let mut ch;
187
- let z = unwrap_or_0 ( it. next_back ( ) ) ;
188
- ch = utf8_first_byte ! ( z, 2 ) ;
189
- if utf8_is_cont_byte ! ( z) {
190
- let y = unwrap_or_0 ( it. next_back ( ) ) ;
191
- ch = utf8_first_byte ! ( y, 3 ) ;
192
- if utf8_is_cont_byte ! ( y) {
193
- let x = unwrap_or_0 ( it. next_back ( ) ) ;
194
- ch = utf8_first_byte ! ( x, 4 ) ;
195
- ch = utf8_acc_cont_byte ! ( ch, y) ;
196
- }
197
- ch = utf8_acc_cont_byte ! ( ch, z) ;
198
- }
199
- ch = utf8_acc_cont_byte ! ( ch, w) ;
178
+ let w = match self . iter . next_back ( ) {
179
+ None => return None ,
180
+ Some ( & back_byte) if back_byte < 128 => return Some ( back_byte as char ) ,
181
+ Some ( & back_byte) => back_byte,
182
+ } ;
200
183
201
- unsafe {
202
- mem:: transmute ( ch)
184
+ // Multibyte case follows
185
+ // Decode from a byte combination out of: [x [y [z w]]]
186
+ let mut ch;
187
+ let z = unwrap_or_0 ( self . iter . next_back ( ) ) ;
188
+ ch = utf8_first_byte ! ( z, 2 ) ;
189
+ if utf8_is_cont_byte ! ( z) {
190
+ let y = unwrap_or_0 ( self . iter . next_back ( ) ) ;
191
+ ch = utf8_first_byte ! ( y, 3 ) ;
192
+ if utf8_is_cont_byte ! ( y) {
193
+ let x = unwrap_or_0 ( self . iter . next_back ( ) ) ;
194
+ ch = utf8_first_byte ! ( x, 4 ) ;
195
+ ch = utf8_acc_cont_byte ! ( ch, y) ;
203
196
}
197
+ ch = utf8_acc_cont_byte ! ( ch, z) ;
204
198
}
199
+ ch = utf8_acc_cont_byte ! ( ch, w) ;
205
200
206
- match self . iter . next_back ( ) {
207
- None => None ,
208
- Some ( & back_byte) => {
209
- if back_byte < 128 {
210
- Some ( back_byte as char )
211
- } else {
212
- Some ( decode_multibyte_back ( back_byte, & mut self . iter ) )
213
- }
214
- }
201
+ // str invariant says `ch` is a valid Unicode Scalar Value
202
+ unsafe {
203
+ Some ( mem:: transmute ( ch) )
215
204
}
216
205
}
217
206
}
0 commit comments