1
+
2
+ pub use crate :: error:: { Error , ErrorType } ;
3
+ pub use crate :: Deserializer ;
4
+ pub use crate :: Result ;
5
+ pub use crate :: neon:: stage1:: * ;
6
+ pub use crate :: neon:: utf8check:: * ;
7
+ pub use crate :: neon:: intrinsics:: * ;
8
+ pub use crate :: stringparse:: * ;
9
+
10
+ impl < ' de > Deserializer < ' de > {
11
+ #[ cfg_attr( not( feature = "no-inline" ) , inline( always) ) ]
12
+ pub fn parse_str_ ( & mut self ) -> Result < & ' de str > {
13
+ // Add 1 to skip the initial "
14
+ let idx = self . iidx + 1 ;
15
+ let mut padding = [ 0u8 ; 32 ] ;
16
+ //let mut read: usize = 0;
17
+
18
+ // we include the terminal '"' so we know where to end
19
+ // This is safe since we check sub's lenght in the range access above and only
20
+ // create sub sliced form sub to `sub.len()`.
21
+
22
+ let src: & [ u8 ] = unsafe { & self . input . get_unchecked ( idx..) } ;
23
+ let mut src_i: usize = 0 ;
24
+ let mut len = src_i;
25
+ loop {
26
+ // store to dest unconditionally - we can overwrite the bits we don't like
27
+ // later
28
+
29
+ let ( v0, v1) = if src. len ( ) >= src_i + 32 {
30
+ // This is safe since we ensure src is at least 16 wide
31
+ #[ allow( clippy:: cast_ptr_alignment) ]
32
+ unsafe {
33
+ (
34
+ vld1q_u8 ( src. get_unchecked ( src_i..src_i + 16 ) . as_ptr ( ) ) ,
35
+ vld1q_u8 ( src. get_unchecked ( src_i + 16 ..src_i + 32 ) . as_ptr ( ) ) ,
36
+ )
37
+ }
38
+ } else {
39
+ unsafe {
40
+ padding
41
+ . get_unchecked_mut ( ..src. len ( ) - src_i)
42
+ . clone_from_slice ( src. get_unchecked ( src_i..) ) ;
43
+ // This is safe since we ensure src is at least 32 wide
44
+ (
45
+ vld1q_u8 ( padding. get_unchecked ( 0 ..16 ) . as_ptr ( ) ) ,
46
+ vld1q_u8 ( padding. get_unchecked ( 16 ..32 ) . as_ptr ( ) ) ,
47
+ )
48
+ }
49
+ } ;
50
+
51
+ let ParseStringHelper { bs_bits, quote_bits } = find_bs_bits_and_quote_bits ( v0, v1) ;
52
+
53
+ if ( bs_bits. wrapping_sub ( 1 ) & quote_bits) != 0 {
54
+ // we encountered quotes first. Move dst to point to quotes and exit
55
+ // find out where the quote is...
56
+ let quote_dist: u32 = quote_bits. trailing_zeros ( ) ;
57
+
58
+ ///////////////////////
59
+ // Above, check for overflow in case someone has a crazy string (>=4GB?)
60
+ // But only add the overflow check when the document itself exceeds 4GB
61
+ // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
62
+ ////////////////////////
63
+
64
+ // we advance the point, accounting for the fact that we have a NULl termination
65
+
66
+ len += quote_dist as usize ;
67
+ unsafe {
68
+ let v = self . input . get_unchecked ( idx..idx + len) as * const [ u8 ] as * const str ;
69
+ return Ok ( & * v) ;
70
+ }
71
+
72
+ // we compare the pointers since we care if they are 'at the same spot'
73
+ // not if they are the same value
74
+ }
75
+ if ( quote_bits. wrapping_sub ( 1 ) & bs_bits) != 0 {
76
+ // Move to the 'bad' character
77
+ let bs_dist: u32 = bs_bits. trailing_zeros ( ) ;
78
+ len += bs_dist as usize ;
79
+ src_i += bs_dist as usize ;
80
+ break ;
81
+ } else {
82
+ // they are the same. Since they can't co-occur, it means we encountered
83
+ // neither.
84
+ src_i += 32 ;
85
+ len += 32 ;
86
+ }
87
+ }
88
+
89
+ let mut dst_i: usize = 0 ;
90
+ let dst: & mut [ u8 ] = self . strings . as_mut_slice ( ) ;
91
+
92
+ loop {
93
+ let ( v0, v1) = if src. len ( ) >= src_i + 32 {
94
+ // This is safe since we ensure src is at least 16 wide
95
+ #[ allow( clippy:: cast_ptr_alignment) ]
96
+ unsafe {
97
+ (
98
+ vld1q_u8 ( src. get_unchecked ( src_i..src_i + 16 ) . as_ptr ( ) ) ,
99
+ vld1q_u8 ( src. get_unchecked ( src_i + 16 ..src_i + 32 ) . as_ptr ( ) ) ,
100
+ )
101
+ }
102
+ } else {
103
+ unsafe {
104
+ padding
105
+ . get_unchecked_mut ( ..src. len ( ) - src_i)
106
+ . clone_from_slice ( src. get_unchecked ( src_i..) ) ;
107
+ // This is safe since we ensure src is at least 32 wide
108
+ (
109
+ vld1q_u8 ( padding. get_unchecked ( 0 ..16 ) . as_ptr ( ) ) ,
110
+ vld1q_u8 ( padding. get_unchecked ( 16 ..32 ) . as_ptr ( ) ) ,
111
+ )
112
+ }
113
+ } ;
114
+
115
+ unsafe {
116
+ dst. get_unchecked_mut ( dst_i..dst_i + 32 ) . copy_from_slice ( src. get_unchecked ( src_i..src_i + 32 ) ) ;
117
+ }
118
+
119
+ // store to dest unconditionally - we can overwrite the bits we don't like
120
+ // later
121
+ let ParseStringHelper { bs_bits, quote_bits } = find_bs_bits_and_quote_bits ( v0, v1) ;
122
+
123
+ if ( bs_bits. wrapping_sub ( 1 ) & quote_bits) != 0 {
124
+ // we encountered quotes first. Move dst to point to quotes and exit
125
+ // find out where the quote is...
126
+ let quote_dist: u32 = quote_bits. trailing_zeros ( ) ;
127
+
128
+ ///////////////////////
129
+ // Above, check for overflow in case someone has a crazy string (>=4GB?)
130
+ // But only add the overflow check when the document itself exceeds 4GB
131
+ // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
132
+ ////////////////////////
133
+
134
+ // we advance the point, accounting for the fact that we have a NULl termination
135
+
136
+ dst_i += quote_dist as usize ;
137
+ unsafe {
138
+ self . input
139
+ . get_unchecked_mut ( idx + len..idx + len + dst_i)
140
+ . clone_from_slice ( & self . strings . get_unchecked ( ..dst_i) ) ;
141
+ let v = self . input . get_unchecked ( idx..idx + len + dst_i) as * const [ u8 ]
142
+ as * const str ;
143
+ self . str_offset += dst_i as usize ;
144
+ return Ok ( & * v) ;
145
+ }
146
+
147
+ // we compare the pointers since we care if they are 'at the same spot'
148
+ // not if they are the same value
149
+ }
150
+ if ( quote_bits. wrapping_sub ( 1 ) & bs_bits) != 0 {
151
+ // find out where the backspace is
152
+ let bs_dist: u32 = bs_bits. trailing_zeros ( ) ;
153
+ let escape_char: u8 = unsafe { * src. get_unchecked ( src_i + bs_dist as usize + 1 ) } ;
154
+ // we encountered backslash first. Handle backslash
155
+ if escape_char == b'u' {
156
+ // move src/dst up to the start; they will be further adjusted
157
+ // within the unicode codepoint handling code.
158
+ src_i += bs_dist as usize ;
159
+ dst_i += bs_dist as usize ;
160
+ let ( o, s) = if let Ok ( r) = handle_unicode_codepoint (
161
+ unsafe { src. get_unchecked ( src_i..) } ,
162
+ unsafe { dst. get_unchecked_mut ( dst_i..) }
163
+ )
164
+ {
165
+ r
166
+ } else {
167
+ return Err ( self . error ( ErrorType :: InvlaidUnicodeCodepoint ) ) ;
168
+ } ;
169
+ if o == 0 {
170
+ return Err ( self . error ( ErrorType :: InvlaidUnicodeCodepoint ) ) ;
171
+ } ;
172
+ // We moved o steps forword at the destiation and 6 on the source
173
+ src_i += s;
174
+ dst_i += o;
175
+ } else {
176
+ // simple 1:1 conversion. Will eat bs_dist+2 characters in input and
177
+ // write bs_dist+1 characters to output
178
+ // note this may reach beyond the part of the buffer we've actually
179
+ // seen. I think this is ok
180
+ let escape_result: u8 =
181
+ unsafe { * ESCAPE_MAP . get_unchecked ( escape_char as usize ) } ;
182
+ if escape_result == 0 {
183
+ return Err ( self . error ( ErrorType :: InvalidEscape ) ) ;
184
+ }
185
+ unsafe {
186
+ * dst. get_unchecked_mut ( dst_i + bs_dist as usize ) = escape_result;
187
+ }
188
+ src_i += bs_dist as usize + 2 ;
189
+ dst_i += bs_dist as usize + 1 ;
190
+ }
191
+ } else {
192
+ // they are the same. Since they can't co-occur, it means we encountered
193
+ // neither.
194
+ src_i += 32 ;
195
+ dst_i += 32 ;
196
+ }
197
+ }
198
+ }
199
+ }
0 commit comments