From a65ad0d7dd46f7b61205687cba5050ac35ddaff0 Mon Sep 17 00:00:00 2001 From: Elijah Mirecki Date: Fri, 17 Dec 2021 20:05:47 -0500 Subject: [PATCH] Improve ws lexing SIMD --- src/tokenizer.rs | 108 +++++++++++++++++++++++++++++---------------- src/util.rs | 14 ++++++ test/dereference.b | 2 + test/fib_rec.b | 6 +-- 4 files changed, 89 insertions(+), 41 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 66d44d2..b80d748 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,5 +1,6 @@ use crate::parser::ParseContext; use crate::ast::{Pos, CompErr}; +use crate::util::lsb_number; #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] use std::arch::x86_64::*; @@ -349,73 +350,103 @@ fn get_tok_word(c: &mut ParseContext) -> Result<(Pos, Token), CompErr> { } /** - * Extract an alphanumeric slice at the given offset + * Extract an alphanumeric (and underscore) slice at the given offset * @return An empty slice if the offset is out of bounds, * or if there are no alphanumeric characters at that position */ fn alphanumeric_slice<'a>( pos: &Pos, slice: &'a [u8], offset: usize ) -> Result<&'a str, CompErr> { + let len = alphanumeric_len(slice, offset); + match std::str::from_utf8(&slice[offset..offset + len]) { + Ok(s) => Ok(s), + _ => CompErr::err(pos, "Only ASCII is supported".to_string()), + } +} + +pub fn is_alpha(c: u8) -> bool { + (c >= 97 && c <= 122) | (c >= 65 && c <= 90) | (c >= 48 && c <= 57) | (c == 95) +} + +fn alphanumeric_len( + slice: &[u8], offset: usize +) -> usize { let mut len = 0; - // TODO: SIMD + while offset + len < slice.len() { - let c = slice[offset + len] as char; - if c.is_alphanumeric() || c == '_' { + if is_alpha(slice[offset + len]) { len += 1; } else { break; } } - match std::str::from_utf8(&slice[offset..offset + len]) { - Ok(s) => Ok(s), - _ => CompErr::err(pos, "Only ASCII is supported".to_string()), - } + len } +/// Returns true when it hit the end of the ws #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] -unsafe fn simd_consume_ws(c: &mut ParseContext) { - let space = ' ' as i8; - let space_vec = _mm_set_epi8( - space, space, space, space, - space, space, space, space, - space, space, space, space, - space, space, space, space - ); - // Bitmask that covers both newlines & tabs. - // It also covers a bunch of other chars that we don't care about - let nl_tab = 0b00001000i8; - let nl_tab_vec = _mm_set_epi8( - nl_tab, nl_tab, nl_tab, nl_tab, - nl_tab, nl_tab, nl_tab, nl_tab, - nl_tab, nl_tab, nl_tab, nl_tab, - nl_tab, nl_tab, nl_tab, nl_tab - ); +#[allow(overflowing_literals)] +unsafe fn simd_consume_ws(c: &mut ParseContext) -> bool { + let space_vec = _mm_set1_epi8(' ' as i8); + let tab_nl_vec = _mm_set1_epi8(0b11111000); + let tab_nl_stat_vec = _mm_set1_epi8(0b00001000); + while c.offset + 16 < c.content.len() { let values = _mm_loadu_si128(&c.content[c.offset] as *const u8 as *const _); - let result = _mm_or_si128( + + // Values will be 255 if they're whitespace + // andnot(a, b) does ((NOT a) AND b) + let result = _mm_andnot_si128( _mm_cmpeq_epi8(values, space_vec), - _mm_cmpeq_epi8(values, nl_tab_vec) + // Negated since it's gt instead of eq + _mm_cmpgt_epi8( + _mm_and_si128(values, tab_nl_vec), + tab_nl_stat_vec + ) ); - let p = &result as *const _ as *const u8; + // Compute bitmask of which values are 255 + // Mask is zeros going from from right to left + let mask = _mm_movemask_epi8(result) as u32; - // TODO: Is there a better way than a filthy for loop? - for i in 0..16 { - if *p.add(i) == 0 { - // We aren't at a whitespace char anymore - return; - } else { - c.offset += 1; - } + //println!("mask: {:016b}, {}", mask, std::str::from_utf8(&c.content[c.offset..c.offset + 10]).unwrap()); + //println!("values: {:016b}", mask); + + if mask == 0 { + c.offset += 16; + } else { + let lsb = lsb_number(mask); + //println!("lsb: {}", lsb); + + c.offset += lsb as usize; + return true; } } + false } // Parse any amount of whitespace, including comments fn consume_ws(c: &mut ParseContext) { #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] unsafe { - simd_consume_ws(c); + if c.offset < c.content.len() { + match c.content[c.offset] as char { + ' ' | '\n' | '\t' => {}, + '/' => { + consume_comment(c); + }, + _ => return, + }; + } + + while simd_consume_ws(c) { + match c.content[c.offset] as char { + '/' => if !consume_comment(c) { + return + }, + _ => return, + } + } } while c.offset < c.content.len() { @@ -443,10 +474,11 @@ fn consume_comment(c: &mut ParseContext) -> bool { } c.offset += 2; + // TODO: SIMD goes here + let mut one; let mut two = 0; - // TODO: SIMD to search for */ while c.offset < c.content.len() { one = two; two = c.content[c.offset]; diff --git a/src/util.rs b/src/util.rs index 1cf1106..7197941 100644 --- a/src/util.rs +++ b/src/util.rs @@ -19,3 +19,17 @@ fn try_logical_cpu_count() -> Result { } Ok(logical_cpus) } + +const DE_BRUIJN_LBS_BIT_POS: [u32; 32] = [ + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 +]; +/// TL;DR Fucking fast algos to find the least significant bit and most significant +/// It will find the value in 4 arithmetic operations and a memory lookup. +/// # References +/// http://supertech.csail.mit.edu/papers/debruijn.pdf +pub fn lsb_number(v: u32) -> u32 { + let vi = v as i32; + let index = ((((vi & -vi) as u32).wrapping_mul(0x077CB531u32))).wrapping_shr(27); + return DE_BRUIJN_LBS_BIT_POS[index as usize]; +} diff --git a/test/dereference.b b/test/dereference.b index 4819d20..5c001cf 100644 --- a/test/dereference.b +++ b/test/dereference.b @@ -1,3 +1,5 @@ + + /* test comment */ doubleplus1(xref) { *xref = *xref + *xref; *xref = 1 + **&xref; diff --git a/test/fib_rec.b b/test/fib_rec.b index 73e5dba..c8ab94d 100644 --- a/test/fib_rec.b +++ b/test/fib_rec.b @@ -1,6 +1,3 @@ - - - /* Returns nonsense for n < 0 */ fib(n) { return( @@ -11,6 +8,9 @@ fib(n) { } main() { + auto x; + x = 2; + /* Should return "55" */ return(fib(10)); }