@@ -26,8 +26,7 @@ pub fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error
26
26
/// available.
27
27
#[ cfg( feature = "public_imp" ) ]
28
28
pub struct Utf8ValidatorImp {
29
- incomplete_data : [ u8 ; 4 ] ,
30
- incomplete_len : u8 ,
29
+ expected_cont_bytes : u8 ,
31
30
err : bool ,
32
31
}
33
32
@@ -39,65 +38,59 @@ pub use Utf8ValidatorImp as ChunkedUtf8ValidatorImp;
39
38
#[ cfg( feature = "public_imp" ) ]
40
39
impl Utf8ValidatorImp {
41
40
#[ inline]
42
- #[ expect( clippy:: cast_possible_truncation) ]
43
41
fn update ( & mut self , mut input : & [ u8 ] ) {
44
42
if self . err {
45
43
return ;
46
44
}
47
- if self . incomplete_len > 0 {
48
- let total_bytes_needed: usize = match self . incomplete_data [ 0 ] {
49
- 0 ..0b1000_0000 => {
50
- panic ! ( "ASCII data should never be incomplete" ) ;
51
- }
52
- 0b1000_0000 ..0b1100_0000 => {
53
- // first byte cannot be a continuation byte
45
+ if self . expected_cont_bytes > 0 {
46
+ let to_check = ( self . expected_cont_bytes as usize ) . min ( input. len ( ) ) ;
47
+ for b in & input[ ..to_check] {
48
+ if b & 0b1100_0000 != 0b1000_0000 {
49
+ // not a continuation byte
54
50
self . err = true ;
55
51
return ;
56
52
}
57
- 0b1100_0000 ..0b1110_0000 => 2 ,
58
- 0b1110_0000 ..0b1111_0000 => 3 ,
59
- 0b1111_0000 ..0b1111_1000 => 4 ,
53
+ self . expected_cont_bytes -= 1 ;
54
+ }
55
+ if self . expected_cont_bytes > 0 {
56
+ // not enough continuation bytes
57
+ return ;
58
+ }
59
+ input = & input[ to_check..] ;
60
+ }
61
+ if let Err ( e) = core:: str:: from_utf8 ( input) {
62
+ // cannot wrap, since there is at least one byte left which is not valid UTF-8
63
+ // by itself
64
+ self . expected_cont_bytes = match input[ e. valid_up_to ( ) ] {
65
+ 0b1100_0000 ..0b1110_0000 => 1 ,
66
+ 0b1110_0000 ..0b1111_0000 => 2 ,
67
+ 0b1111_0000 ..0b1111_1000 => 3 ,
60
68
_ => {
61
69
// invalid byte for starting sequence
62
70
self . err = true ;
63
71
return ;
64
72
}
65
73
} ;
66
- if self . incomplete_len as usize >= total_bytes_needed {
67
- // actually errored on previous update
68
- self . err = true ;
69
- return ;
70
- }
71
- let bytes_needed = total_bytes_needed - self . incomplete_len as usize ;
72
- let to_copy = core:: cmp:: min ( bytes_needed, input. len ( ) ) ;
73
- self . incomplete_data
74
- [ self . incomplete_len as usize ..self . incomplete_len as usize + to_copy]
75
- . copy_from_slice ( & input[ ..to_copy] ) ;
76
- if to_copy < bytes_needed {
77
- self . incomplete_len += to_copy as u8 ;
78
- return ;
79
- }
80
- if core:: str:: from_utf8 ( & self . incomplete_data [ ..total_bytes_needed] ) . is_err ( ) {
74
+ let rem_input = input. len ( ) - e. valid_up_to ( ) - 1 ;
75
+ if rem_input >= self . expected_cont_bytes as usize {
76
+ // too many continuation bytes so they are not valid
81
77
self . err = true ;
82
78
return ;
83
79
}
84
- self . incomplete_len = 0 ;
85
- input = & input[ to_copy.. ] ;
86
- }
87
- if let Err ( e ) = core :: str :: from_utf8 ( input ) {
88
- if input . len ( ) - e . valid_up_to ( ) > 3 {
89
- self . err = true ;
90
- return ;
80
+ for i in 0 ..rem_input {
81
+ if input[ e . valid_up_to ( ) + i + 1 ] & 0b1100_0000 != 0b1000_0000 {
82
+ // not a continuation byte
83
+ self . err = true ;
84
+ return ;
85
+ }
86
+ self . expected_cont_bytes -= 1 ;
91
87
}
92
- self . incomplete_len = ( input. len ( ) - e. valid_up_to ( ) ) as u8 ;
93
- self . incomplete_data [ ..self . incomplete_len as usize ]
94
- . copy_from_slice ( & input[ e. valid_up_to ( ) ..] ) ;
95
88
}
96
89
}
97
90
98
91
#[ inline]
99
92
const fn finalize ( self ) -> core:: result:: Result < ( ) , crate :: basic:: Utf8Error > {
100
- if self . err || self . incomplete_len > 0 {
93
+ if self . err || self . expected_cont_bytes > 0 {
101
94
Err ( crate :: basic:: Utf8Error { } )
102
95
} else {
103
96
Ok ( ( ) )
@@ -111,8 +104,7 @@ impl crate::basic::imp::Utf8Validator for Utf8ValidatorImp {
111
104
#[ must_use]
112
105
fn new ( ) -> Self {
113
106
Self {
114
- incomplete_data : [ 0 ; 4 ] ,
115
- incomplete_len : 0 ,
107
+ expected_cont_bytes : 0 ,
116
108
err : false ,
117
109
}
118
110
}
@@ -137,8 +129,7 @@ impl crate::basic::imp::ChunkedUtf8Validator for Utf8ValidatorImp {
137
129
#[ must_use]
138
130
fn new ( ) -> Self {
139
131
Self {
140
- incomplete_data : [ 0 ; 4 ] ,
141
- incomplete_len : 0 ,
132
+ expected_cont_bytes : 0 ,
142
133
err : false ,
143
134
}
144
135
}
0 commit comments