Skip to content

Commit 9f7b8a9

Browse files
authored
RFC: Neon support (pretty much working) (#35)
* feat: neon support * feat: temp stub replacements for neon intrinsics (pending rust-lang/stdarch#792) * fix: drone CI rustup nightly * feat: fix guards, use rust stdlib for bit count operations * fix: remove double semicolon * feat: fancy generic generator functions, thanks @Licenser
1 parent 50ba38a commit 9f7b8a9

20 files changed

+2393
-100
lines changed

.drone.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,5 +58,7 @@ steps:
5858
- name: test
5959
image: rust:1
6060
commands:
61-
- cargo build --verbose --all
62-
- cargo test --verbose --all
61+
- rustup default nightly
62+
- rustup update
63+
- cargo clean && cargo +nightly build --verbose --all
64+
- cargo +nightly test --verbose --all

src/avx2/generator.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#[cfg(target_arch = "x86")]
2+
use std::arch::x86::*;
3+
#[cfg(target_arch = "x86_64")]
4+
use std::arch::x86_64::*;
5+
6+
use crate::value::generator::ESCAPED;
7+
use std::io;
8+
9+
#[inline(always)]
10+
pub unsafe fn write_str_simd<W>(writer: &mut W, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> where W: std::io::Write {
11+
let zero = _mm256_set1_epi8(0);
12+
let lower_quote_range = _mm256_set1_epi8(0x1F as i8);
13+
let quote = _mm256_set1_epi8(b'"' as i8);
14+
let backslash = _mm256_set1_epi8(b'\\' as i8);
15+
while *len - *idx >= 32 {
16+
// Load 32 bytes of data;
17+
#[allow(clippy::cast_ptr_alignment)]
18+
let data: __m256i = _mm256_loadu_si256(string.as_ptr().add(*idx) as *const __m256i);
19+
// Test the data against being backslash and quote.
20+
let bs_or_quote = _mm256_or_si256(
21+
_mm256_cmpeq_epi8(data, backslash),
22+
_mm256_cmpeq_epi8(data, quote),
23+
);
24+
// Now mask the data with the quote range (0x1F).
25+
let in_quote_range = _mm256_and_si256(data, lower_quote_range);
26+
// then test of the data is unchanged. aka: xor it with the
27+
// Any field that was inside the quote range it will be zero
28+
// now.
29+
let is_unchanged = _mm256_xor_si256(data, in_quote_range);
30+
let in_range = _mm256_cmpeq_epi8(is_unchanged, zero);
31+
let quote_bits = _mm256_movemask_epi8(_mm256_or_si256(bs_or_quote, in_range));
32+
if quote_bits != 0 {
33+
let quote_dist = quote_bits.trailing_zeros() as usize;
34+
stry!(writer.write_all(&string[0..*idx + quote_dist]));
35+
let ch = string[*idx + quote_dist];
36+
match ESCAPED[ch as usize] {
37+
b'u' => stry!(write!(writer, "\\u{:04x}", ch)),
38+
39+
escape => stry!(writer.write_all(&[b'\\', escape])),
40+
};
41+
*string = &string[*idx + quote_dist + 1..];
42+
*idx = 0;
43+
*len = string.len();
44+
} else {
45+
*idx += 32;
46+
}
47+
}
48+
stry!(writer.write_all(&string[0..*idx]));
49+
*string = &string[*idx..];
50+
Ok(())
51+
}

src/avx2/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
pub mod deser;
22
pub mod stage1;
3-
pub mod utf8check;
3+
pub mod utf8check;
4+
pub mod generator;

src/lib.rs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,18 @@
11
#![deny(warnings)]
2+
3+
#![cfg_attr(target_feature = "neon", feature(
4+
asm,
5+
stdsimd,
6+
repr_simd,
7+
custom_inner_attributes,
8+
aarch64_target_feature,
9+
platform_intrinsics,
10+
stmt_expr_attributes,
11+
simd_ffi,
12+
link_llvm_intrinsics
13+
)
14+
)]
15+
216
#![cfg_attr(feature = "hints", feature(core_intrinsics))]
317
//! simdjson-rs is a rust port of the simejson c++ library. It follows
418
//! most of the design closely with a few exceptions to make it better
@@ -89,17 +103,25 @@ pub use crate::avx2::deser::*;
89103
#[cfg(target_feature = "avx2")]
90104
use crate::avx2::stage1::SIMDJSON_PADDING;
91105

92-
#[cfg(not(target_feature = "avx2"))]
106+
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))]
93107
mod sse42;
94-
#[cfg(not(target_feature = "avx2"))]
108+
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))]
95109
pub use crate::sse42::deser::*;
96-
#[cfg(not(target_feature = "avx2"))]
110+
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))]
97111
use crate::sse42::stage1::SIMDJSON_PADDING;
98112

113+
#[cfg(target_feature = "neon")]
114+
mod neon;
115+
#[cfg(target_feature = "neon")]
116+
pub use crate::neon::deser::*;
117+
#[cfg(target_feature = "neon")]
118+
use crate::neon::stage1::SIMDJSON_PADDING;
119+
99120
mod stage2;
100121
pub mod value;
101122

102123
use crate::numberparse::Number;
124+
#[cfg(not(target_feature = "neon"))]
103125
use std::mem;
104126
use std::str;
105127

@@ -163,7 +185,11 @@ impl<'de> Deserializer<'de> {
163185

164186
let counts = Deserializer::validate(input, &structural_indexes)?;
165187

166-
let strings = Vec::with_capacity(len + SIMDJSON_PADDING);
188+
// Set length to allow slice access in ARM code
189+
let mut strings = Vec::with_capacity(len + SIMDJSON_PADDING);
190+
unsafe {
191+
strings.set_len(len + SIMDJSON_PADDING);
192+
}
167193

168194
Ok(Deserializer {
169195
counts,

src/neon/deser.rs

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
2+
pub use crate::error::{Error, ErrorType};
3+
pub use crate::Deserializer;
4+
pub use crate::Result;
5+
pub use crate::neon::stage1::*;
6+
pub use crate::neon::utf8check::*;
7+
pub use crate::neon::intrinsics::*;
8+
pub use crate::stringparse::*;
9+
10+
impl<'de> Deserializer<'de> {
11+
#[cfg_attr(not(feature = "no-inline"), inline(always))]
12+
pub fn parse_str_(&mut self) -> Result<&'de str> {
13+
// Add 1 to skip the initial "
14+
let idx = self.iidx + 1;
15+
let mut padding = [0u8; 32];
16+
//let mut read: usize = 0;
17+
18+
// we include the terminal '"' so we know where to end
19+
// This is safe since we check sub's lenght in the range access above and only
20+
// create sub sliced form sub to `sub.len()`.
21+
22+
let src: &[u8] = unsafe { &self.input.get_unchecked(idx..) };
23+
let mut src_i: usize = 0;
24+
let mut len = src_i;
25+
loop {
26+
// store to dest unconditionally - we can overwrite the bits we don't like
27+
// later
28+
29+
let (v0, v1) = if src.len() >= src_i + 32 {
30+
// This is safe since we ensure src is at least 16 wide
31+
#[allow(clippy::cast_ptr_alignment)]
32+
unsafe {
33+
(
34+
vld1q_u8(src.get_unchecked(src_i..src_i + 16).as_ptr()),
35+
vld1q_u8(src.get_unchecked(src_i + 16..src_i + 32).as_ptr()),
36+
)
37+
}
38+
} else {
39+
unsafe {
40+
padding
41+
.get_unchecked_mut(..src.len() - src_i)
42+
.clone_from_slice(src.get_unchecked(src_i..));
43+
// This is safe since we ensure src is at least 32 wide
44+
(
45+
vld1q_u8(padding.get_unchecked(0..16).as_ptr()),
46+
vld1q_u8(padding.get_unchecked(16..32).as_ptr()),
47+
)
48+
}
49+
};
50+
51+
let ParseStringHelper { bs_bits, quote_bits } = find_bs_bits_and_quote_bits(v0, v1);
52+
53+
if (bs_bits.wrapping_sub(1) & quote_bits) != 0 {
54+
// we encountered quotes first. Move dst to point to quotes and exit
55+
// find out where the quote is...
56+
let quote_dist: u32 = quote_bits.trailing_zeros();
57+
58+
///////////////////////
59+
// Above, check for overflow in case someone has a crazy string (>=4GB?)
60+
// But only add the overflow check when the document itself exceeds 4GB
61+
// Currently unneeded because we refuse to parse docs larger or equal to 4GB.
62+
////////////////////////
63+
64+
// we advance the point, accounting for the fact that we have a NULl termination
65+
66+
len += quote_dist as usize;
67+
unsafe {
68+
let v = self.input.get_unchecked(idx..idx + len) as *const [u8] as *const str;
69+
return Ok(&*v);
70+
}
71+
72+
// we compare the pointers since we care if they are 'at the same spot'
73+
// not if they are the same value
74+
}
75+
if (quote_bits.wrapping_sub(1) & bs_bits) != 0 {
76+
// Move to the 'bad' character
77+
let bs_dist: u32 = bs_bits.trailing_zeros();
78+
len += bs_dist as usize;
79+
src_i += bs_dist as usize;
80+
break;
81+
} else {
82+
// they are the same. Since they can't co-occur, it means we encountered
83+
// neither.
84+
src_i += 32;
85+
len += 32;
86+
}
87+
}
88+
89+
let mut dst_i: usize = 0;
90+
let dst: &mut [u8] = self.strings.as_mut_slice();
91+
92+
loop {
93+
let (v0, v1) = if src.len() >= src_i + 32 {
94+
// This is safe since we ensure src is at least 16 wide
95+
#[allow(clippy::cast_ptr_alignment)]
96+
unsafe {
97+
(
98+
vld1q_u8(src.get_unchecked(src_i..src_i + 16).as_ptr()),
99+
vld1q_u8(src.get_unchecked(src_i + 16..src_i + 32).as_ptr()),
100+
)
101+
}
102+
} else {
103+
unsafe {
104+
padding
105+
.get_unchecked_mut(..src.len() - src_i)
106+
.clone_from_slice(src.get_unchecked(src_i..));
107+
// This is safe since we ensure src is at least 32 wide
108+
(
109+
vld1q_u8(padding.get_unchecked(0..16).as_ptr()),
110+
vld1q_u8(padding.get_unchecked(16..32).as_ptr()),
111+
)
112+
}
113+
};
114+
115+
unsafe {
116+
dst.get_unchecked_mut(dst_i..dst_i + 32).copy_from_slice(src.get_unchecked(src_i..src_i + 32));
117+
}
118+
119+
// store to dest unconditionally - we can overwrite the bits we don't like
120+
// later
121+
let ParseStringHelper { bs_bits, quote_bits } = find_bs_bits_and_quote_bits(v0, v1);
122+
123+
if (bs_bits.wrapping_sub(1) & quote_bits) != 0 {
124+
// we encountered quotes first. Move dst to point to quotes and exit
125+
// find out where the quote is...
126+
let quote_dist: u32 = quote_bits.trailing_zeros();
127+
128+
///////////////////////
129+
// Above, check for overflow in case someone has a crazy string (>=4GB?)
130+
// But only add the overflow check when the document itself exceeds 4GB
131+
// Currently unneeded because we refuse to parse docs larger or equal to 4GB.
132+
////////////////////////
133+
134+
// we advance the point, accounting for the fact that we have a NULl termination
135+
136+
dst_i += quote_dist as usize;
137+
unsafe {
138+
self.input
139+
.get_unchecked_mut(idx + len..idx + len + dst_i)
140+
.clone_from_slice(&self.strings.get_unchecked(..dst_i));
141+
let v = self.input.get_unchecked(idx..idx + len + dst_i) as *const [u8]
142+
as *const str;
143+
self.str_offset += dst_i as usize;
144+
return Ok(&*v);
145+
}
146+
147+
// we compare the pointers since we care if they are 'at the same spot'
148+
// not if they are the same value
149+
}
150+
if (quote_bits.wrapping_sub(1) & bs_bits) != 0 {
151+
// find out where the backspace is
152+
let bs_dist: u32 = bs_bits.trailing_zeros();
153+
let escape_char: u8 = unsafe { *src.get_unchecked(src_i + bs_dist as usize + 1) };
154+
// we encountered backslash first. Handle backslash
155+
if escape_char == b'u' {
156+
// move src/dst up to the start; they will be further adjusted
157+
// within the unicode codepoint handling code.
158+
src_i += bs_dist as usize;
159+
dst_i += bs_dist as usize;
160+
let (o, s) = if let Ok(r) = handle_unicode_codepoint(
161+
unsafe { src.get_unchecked(src_i..) },
162+
unsafe { dst.get_unchecked_mut(dst_i..) }
163+
)
164+
{
165+
r
166+
} else {
167+
return Err(self.error(ErrorType::InvlaidUnicodeCodepoint));
168+
};
169+
if o == 0 {
170+
return Err(self.error(ErrorType::InvlaidUnicodeCodepoint));
171+
};
172+
// We moved o steps forword at the destiation and 6 on the source
173+
src_i += s;
174+
dst_i += o;
175+
} else {
176+
// simple 1:1 conversion. Will eat bs_dist+2 characters in input and
177+
// write bs_dist+1 characters to output
178+
// note this may reach beyond the part of the buffer we've actually
179+
// seen. I think this is ok
180+
let escape_result: u8 =
181+
unsafe { *ESCAPE_MAP.get_unchecked(escape_char as usize) };
182+
if escape_result == 0 {
183+
return Err(self.error(ErrorType::InvalidEscape));
184+
}
185+
unsafe {
186+
*dst.get_unchecked_mut(dst_i + bs_dist as usize) = escape_result;
187+
}
188+
src_i += bs_dist as usize + 2;
189+
dst_i += bs_dist as usize + 1;
190+
}
191+
} else {
192+
// they are the same. Since they can't co-occur, it means we encountered
193+
// neither.
194+
src_i += 32;
195+
dst_i += 32;
196+
}
197+
}
198+
}
199+
}

src/neon/generator.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
use crate::value::generator::ESCAPED;
2+
use std::io;
3+
use crate::neon::intrinsics::*;
4+
use crate::neon::stage1::neon_movemask;
5+
6+
#[inline(always)]
7+
pub unsafe fn write_str_simd<W>(writer: &mut W, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> where W: std::io::Write {
8+
// The case where we have a 16+ byte block
9+
// we repeate the same logic as above but with
10+
// only 16 bytes
11+
let zero = vdupq_n_u8(0);
12+
let lower_quote_range = vdupq_n_u8(0x1F);
13+
let quote = vdupq_n_u8(b'"');
14+
let backslash = vdupq_n_u8(b'\\');
15+
while *len - *idx > 16 {
16+
// Load 16 bytes of data;
17+
let data: uint8x16_t = vld1q_u8(string.as_ptr().add(*idx));
18+
// Test the data against being backslash and quote.
19+
let bs_or_quote =
20+
vorrq_u8(vceqq_u8(data, backslash), vceqq_u8(data, quote));
21+
// Now mask the data with the quote range (0x1F).
22+
let in_quote_range = vandq_u8(data, lower_quote_range);
23+
// then test of the data is unchanged. aka: xor it with the
24+
// Any field that was inside the quote range it will be zero
25+
// now.
26+
let is_unchanged = vxorrq_u8(data, in_quote_range);
27+
let in_range = vceqq_u8(is_unchanged, zero);
28+
let quote_bits = neon_movemask(vorrq_u8(bs_or_quote, in_range));
29+
if quote_bits != 0 {
30+
let quote_dist = quote_bits.trailing_zeros() as usize;
31+
stry!(writer.write_all(&string[0..*idx + quote_dist]));
32+
let ch = string[*idx + quote_dist];
33+
match ESCAPED[ch as usize] {
34+
b'u' => stry!(write!(writer, "\\u{:04x}", ch)),
35+
36+
escape => stry!(writer.write_all(&[b'\\', escape])),
37+
};
38+
*string = &string[*idx + quote_dist + 1..];
39+
*idx = 0;
40+
*len = string.len();
41+
} else {
42+
*idx += 16;
43+
}
44+
}
45+
stry!(writer.write_all(&string[0..*idx]));
46+
*string = &string[*idx..];
47+
Ok(())
48+
}

0 commit comments

Comments
 (0)