Skip to content

Commit

Permalink
Begin working on an AVX2 parser
Browse files Browse the repository at this point in the history
I've implemented the parser thus far as an example program so that I can
execute it directly.  At the moment, it operates on a 64K chunk, splits
it into four segments, and finds every word and line boundary within it.
I will benchmark it soon to evaluate how effective this paradigm can be.
  • Loading branch information
arya dradjica committed Sep 26, 2024
1 parent 9d94f9f commit c15230e
Showing 1 changed file with 393 additions and 0 deletions.
393 changes: 393 additions & 0 deletions examples/parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,393 @@
use core::arch::x86_64::*;

Check failure on line 1 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, nightly)

unresolved import `core::arch::x86_64`

Check failure on line 1 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, beta)

unresolved import `core::arch::x86_64`

Check failure on line 1 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, 1.76.0)

unresolved import `core::arch::x86_64`

Check failure on line 1 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, stable)

unresolved import `core::arch::x86_64`
use std::io::{self, Read};
use std::{env, fs::File};

#[derive(Debug)]
struct Frame<'a> {
/// The input data for a frame.
///
/// There must be at least 8 bytes of additional memory lying beyond the end
/// of this array that are safe to read.
input: &'a [u8; 65536],

/// Entries parsed from this frame.
///
/// A frame can contain at most 8K entries, since entries with fewer than
/// 8 bytes are virtually impossible. Nicely, this means another 64KiB of
/// data storage.
entries: &'a mut [Entry; 8192],

Check failure on line 18 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, beta)

field `entries` is never read

Check failure on line 18 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, 1.76.0)

field `entries` is never read

Check failure on line 18 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, nightly)

field `entries` is never read

Check failure on line 18 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, stable)

field `entries` is never read

Check failure on line 18 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / Check minimal versions

field `entries` is never read
}

impl<'a> Frame<'a> {
/// Lex the frame into entries.
#[inline]
#[target_feature(enable = "avx2")]

Check failure on line 24 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, nightly)

the feature named `avx2` is not valid for this target

Check failure on line 24 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, beta)

the feature named `avx2` is not valid for this target

Check failure on line 24 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, stable)

the feature named `avx2` is not valid for this target
unsafe fn lex(self) -> Result<(), Error> {
// The start and limit of the segment.
let (start, limit) = self.segment()?;

// The current offset in the segment.
let mut offset = start;

// Whether the segment is still working.
let mut working = _mm256_set1_epi64x(!0);

let mut count = 0usize;

loop {
// Load the next 8 bytes of the segment.
let view = self.view(offset, working);

// Move to the next view.
offset = self.next_offset(view, offset, working);

// Stop processing if the segment has ended.
working = _mm256_cmpgt_epi64(limit, offset);

count += 1;

// If all processing is finished, stop.
if _mm256_testz_si256(working, working) != 0 {
break;
}
}

println!("Performed {count} iterations");

Ok(())
}

/// The offset of the next view.
///
/// The next view will begin at a new word (if one was found within the
/// current view), or at the next 8 bytes of the current word.
///
/// Latency (view -> return): 17c
/// Latency (offset -> return): 1c
/// Latency (working -> return): 2c
#[inline]
#[target_feature(enable = "avx2")]

Check failure on line 69 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, nightly)

the feature named `avx2` is not valid for this target

Check failure on line 69 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, beta)

the feature named `avx2` is not valid for this target

Check failure on line 69 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, stable)

the feature named `avx2` is not valid for this target
unsafe fn next_offset(
&self,
view: __m256i,

Check failure on line 72 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, 1.76.0)

cannot find type `__m256i` in this scope
offset: __m256i,

Check failure on line 73 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, 1.76.0)

cannot find type `__m256i` in this scope
working: __m256i,

Check failure on line 74 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, 1.76.0)

cannot find type `__m256i` in this scope
) -> __m256i {

Check failure on line 75 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, 1.76.0)

cannot find type `__m256i` in this scope
// Mark bytes in the current word.
let w = self.word_mask(view);

// Increment the number of set bytes, saturating to 8.
let w = _mm256_or_si256(w, _mm256_set1_epi64x((0xFFu64 << 56) as _));

// Count the number of set bytes.
let c = Self::count_bytes(w);

// Zero the count if the segment is finished working.
let c = _mm256_and_si256(c, working);

// Add the count to the current offset.
_mm256_add_epi64(offset, c)
}

/// Whether this is the last token on the line.
///
/// This is all-ones if the whitespace terminating the current token is a
/// newline (so that the next token will follow it).
///
/// Latency (view -> return): 7c
#[inline]
#[target_feature(enable = "avx2")]

Check failure on line 99 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, nightly)

the feature named `avx2` is not valid for this target

Check failure on line 99 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, beta)

the feature named `avx2` is not valid for this target

Check failure on line 99 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, stable)

the feature named `avx2` is not valid for this target
unsafe fn end_of_line(&self, view: __m256i) -> __m256i {

Check failure on line 100 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, beta)

methods `end_of_line` and `end_of_word` are never used

Check failure on line 100 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, 1.76.0)

methods `end_of_line` and `end_of_word` are never used

Check failure on line 100 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, nightly)

methods `end_of_line` and `end_of_word` are never used

Check failure on line 100 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest, stable)

methods `end_of_line` and `end_of_word` are never used

Check failure on line 100 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, 1.76.0)

cannot find type `__m256i` in this scope

Check failure on line 100 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, 1.76.0)

cannot find type `__m256i` in this scope

Check failure on line 100 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / Check minimal versions

associated function `end_of_line` is never used
// If the mid-line word mask contains bits that the line mask does not,
// then the line mask is shorter than the mid-line word mask, and thus
// the current word terminates on that newline. If any such bits exist,
// the MSB will be one of them, thus it is a negative integer.

// Mark bytes in the same word (ignoring newlines).
let mlw = self.mid_line_word_mask(view);

// Mark bytes in the same line.
let l = self.line_mask(view);

// Mark bytes that are not on this line (including line terminator).
let nl = _mm256_andnot_si256(l, mlw);

// Check that at least one byte is not on this line.
// If any bytes are not on this line, the MSB will be set.
_mm256_cmpgt_epi64(_mm256_setzero_si256(), nl)
}

/// Whether the end of the word was found.
///
/// This is all-ones if whitespace was found in the current block, as this
/// delimits the current token.
///
/// Latency (view -> return): 7c
#[inline]
#[target_feature(enable = "avx2")]

Check failure on line 127 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, nightly)

the feature named `avx2` is not valid for this target

Check failure on line 127 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, beta)

the feature named `avx2` is not valid for this target

Check failure on line 127 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, stable)

the feature named `avx2` is not valid for this target
unsafe fn end_of_word(&self, view: __m256i) -> __m256i {

Check failure on line 128 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, 1.76.0)

cannot find type `__m256i` in this scope

Check failure on line 128 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, 1.76.0)

cannot find type `__m256i` in this scope

Check failure on line 128 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / Check minimal versions

associated function `end_of_word` is never used
// If the MSB is not set, the word mask did not include the last byte.
_mm256_cmpgt_epi64(self.word_mask(view), _mm256_set1_epi64x(-1))
}

/// A mask of the current word.
///
/// Bytes are all-ones if they are within the current word (excluding the
/// terminating whitespace).
///
/// Latency (view -> return): 6c
#[inline]
#[target_feature(enable = "avx2")]

Check failure on line 140 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, nightly)

the feature named `avx2` is not valid for this target

Check failure on line 140 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, beta)

the feature named `avx2` is not valid for this target

Check failure on line 140 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, stable)

the feature named `avx2` is not valid for this target
unsafe fn word_mask(&self, view: __m256i) -> __m256i {

Check failure on line 141 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, 1.76.0)

cannot find type `__m256i` in this scope
// Truncate the mid-line word mask if a newline is within it.
_mm256_and_si256(self.mid_line_word_mask(view), self.line_mask(view))
}

/// A mask of the current line.
///
/// Bytes are all-ones if they are within the current line (excluding the
/// line terminator itself).
///
/// Latency (view -> return): 4c
#[inline]
#[target_feature(enable = "avx2")]

Check failure on line 153 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, nightly)

the feature named `avx2` is not valid for this target

Check failure on line 153 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, beta)

the feature named `avx2` is not valid for this target

Check failure on line 153 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, stable)

the feature named `avx2` is not valid for this target
unsafe fn line_mask(&self, view: __m256i) -> __m256i {
// Mark ASCII newlines (0x0A).
let lf = _mm256_cmpeq_epi8(view, _mm256_set1_epi8(b'\n' as i8));

// Mark newline bytes by their LSB.
let lf = _mm256_and_si256(lf, _mm256_set1_epi8(1));

// Toggle bytes up to (and including) the first newline.
let mask = _mm256_sub_epi64(lf, _mm256_set1_epi64x(1));

// Mark all bytes up to (but excluding) the first newline.
_mm256_andnot_si256(lf, mask)
}

/// A mask of the current word, assuming newlines are absent.
///
/// Bytes are all-ones if they are within the current word (excluding the
/// terminating whitespace). Newlines are not considered as terminators,
/// instead being treated as parts of words.
///
/// Latency (view -> return): 5c
#[inline]
#[target_feature(enable = "avx2")]

Check failure on line 176 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, nightly)

the feature named `avx2` is not valid for this target

Check failure on line 176 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, beta)

the feature named `avx2` is not valid for this target

Check failure on line 176 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, stable)

the feature named `avx2` is not valid for this target
unsafe fn mid_line_word_mask(&self, view: __m256i) -> __m256i {
// Mark mid-line spaces.
let sp = self.mid_line_space(view);

// Mark mid-line bytes by their LSB.
let sp = _mm256_and_si256(sp, _mm256_set1_epi8(1));

// Toggle bytes up to (and including) the first mid-line space.
let mask = _mm256_sub_epi64(sp, _mm256_set1_epi64x(1));

// Mark all bytes up to (but excluding) the first mid-line space.
_mm256_andnot_si256(sp, mask)
}

/// A mask of mid-line whitespace (spaces and tabs).
///
/// Bytes are all-ones if they are ASCII spaces ('0x20') or horizontal tabs
/// ('0x09'), and are otherwise all-zeros.
///
/// Latency (view -> return): 2c
#[inline]
#[target_feature(enable = "avx2")]

Check failure on line 198 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, nightly)

the feature named `avx2` is not valid for this target

Check failure on line 198 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, beta)

the feature named `avx2` is not valid for this target

Check failure on line 198 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, stable)

the feature named `avx2` is not valid for this target
unsafe fn mid_line_space(&self, view: __m256i) -> __m256i {
// Mark ASCII spaces (0x20).
let sp = _mm256_cmpeq_epi8(view, _mm256_set1_epi8(b'=' as i8));

// Mark ASCII horizontal tabs (0x09).
let ht = _mm256_cmpeq_epi8(view, _mm256_set1_epi8(b'\t' as i8));

// Combine marks.
_mm256_or_si256(sp, ht)
}

/// The current view into the segment.
///
/// The view consists of the next 8 bytes of the segment. If the segment's
/// work has been completed, all-zeros is returned.
///
/// Latency (offset -> return): 16c
/// Latency (working -> return): 15c
#[inline]
#[target_feature(enable = "avx2")]

Check failure on line 218 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, nightly)

the feature named `avx2` is not valid for this target

Check failure on line 218 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, beta)

the feature named `avx2` is not valid for this target

Check failure on line 218 in examples/parser.rs

View workflow job for this annotation

GitHub Actions / test (macOS-latest, stable)

the feature named `avx2` is not valid for this target
unsafe fn view(&self, offset: __m256i, working: __m256i) -> __m256i {
// The value returned if the segment is not working.
let fallback = _mm256_setzero_si256();

// The base address the segment is loaded from.
let addr = self.input.as_ptr() as *const i64;

// Load the segment's bytes if it is working.
_mm256_mask_i64gather_epi64(fallback, addr, offset, working, 1)
}

/// Divide the frame into segments.
///
/// A segment is a slice of the frame that begins with a newline. This is
/// important since lexing cannot start in the middle of a record.
///
/// Initially, 4 segments are drawn by dividing the frame equally. Each of
/// the frames is then skipped forward until a newline.
#[inline]
#[target_feature(enable = "avx2")]
unsafe fn segment(&self) -> Result<(__m256i, __m256i), Error> {
// The offset of the segment within the frame.
let mut offset = _mm256_set_epi64x(4, 3, 2, 1);
offset = _mm256_slli_epi64(offset, 16 - 2 - 3);
offset = _mm256_sub_epi64(offset, _mm256_set1_epi64x(1));

// The first 8 bytes in the segment.
let mut input = _mm256_setzero_si256();

// A mask of newlines in the input.
let mut lines = _mm256_setzero_si256();

// Whether a newline needs to be searched for.
let mut search = _mm256_set1_epi64x(!0);

// Search for a newline.
//
// Latency (offset -> input): 16c
// Latency (search -> input): 15c
// Latency (input -> search): 4c
// Latency (input -> next offset): 4c
// Latency (offset -> next offset): 1c
// Latency (input -> break): 14c
for _ in 0..2048 {
// Load the next 8 bytes of the segment.
input = _mm256_mask_i64gather_epi64(
input,
self.input.as_ptr() as *const _,
offset,
search,
8,
);

// Mark newline bytes (all bits set if byte is equal).
lines = _mm256_cmpeq_epi8(input, _mm256_set1_epi8(b'\n' as _));

// Check whether the search needs to continue.
search = _mm256_cmpeq_epi64(lines, _mm256_setzero_si256());

// Push the offset backward if the search needs to continue.
offset = _mm256_add_epi64(offset, search);

// If all searches are finished, stop.
if _mm256_testz_si256(search, search) != 0 {
break;
}
}

// If a newline wasn't found, return an error.
if _mm256_testz_si256(search, search) == 0 {
return Err(Error);
}

// Mark newline bytes (MSB is set for newlines).
lines = _mm256_and_si256(lines, _mm256_set1_epi8(!0x7F));

// Switch the offsets to units of bytes.
offset = _mm256_slli_epi64(offset, 3);

// Select all bytes up to (and including) the first newline byte.
let to_skip = _mm256_sub_epi64(lines, _mm256_set1_epi64x(1));
let to_skip = _mm256_xor_si256(lines, to_skip);

// Update the offset to skip past the newline.
offset = _mm256_add_epi64(offset, Self::count_bytes(to_skip));

// Calculate the start of each segment.
let start = _mm256_permute4x64_epi64(offset, 0b10010011);
let start = _mm256_and_si256(start, _mm256_set_epi64x(!0, !0, !0, 0));

Ok((start, offset))
}

/// Count the number of set bytes.
///
/// The input must contain bytes that are all-ones or all-zeros. The total
/// number of all-ones bytes (between 0 and 8, inclusive) is returned.
///
/// Latency (bytes -> result): 8c
#[inline]
#[target_feature(enable = "avx2")]
unsafe fn count_bytes(mut bytes: __m256i) -> __m256i {
// Negate bytes so that each one is 0 or 1.
bytes = _mm256_sub_epi8(_mm256_setzero_si256(), bytes);

// Pair bytes in the low and high dwords and add them.
bytes = _mm256_add_epi8(bytes, _mm256_srli_epi64(bytes, 32));

// Pair bytes in the low and high words and add them.
bytes = _mm256_add_epi8(bytes, _mm256_srli_epi64(bytes, 16));

// Pair the low and high bytes and add them.
bytes = _mm256_add_epi8(bytes, _mm256_srli_epi64(bytes, 8));

// Mask out intermediate results.
bytes = _mm256_and_si256(bytes, _mm256_set1_epi64x(0xFF));

bytes
}
}

/// A lexical entry.
///
/// Entries are lines in a zone file providing DNS information. This type is an
/// efficient and compact representation for an entry that can be produced by a
/// vectorized lexer.
#[derive(Copy, Clone, Debug, Default)]
#[repr(C)]
struct Entry {
/// The address of this record.
///
/// This is an offset in bytes relative to the frame.
addr: u32,

/// The location of the record data.
///
/// The most-significant bit is set if an explicit domain name was present
/// in the record.
///
/// The remaining bits are an offset in bytes from the start of the record.
data: u16,

/// The location of the TTL.
///
/// This is an offset in bytes from the start of the record. If it is zero,
/// an implicit TTL was used.
ttl: u8,

/// The record type.
///
/// This is a unique identifier for a record or control entry type, derived
/// by a perfect hash of the first few bytes of its name.
hash: u8,
}

#[derive(Copy, Clone, Debug)]
struct Error;

fn main() -> io::Result<()> {
let path = env::args().nth(1).unwrap();
let mut input = Vec::with_capacity(4096 * 16 + 8);
File::open(path)?.take(4096 * 16).read_to_end(&mut input)?;
input.resize(input.capacity(), 0u8);

let mut entries = vec![Entry::default(); 512 * 16];

let frame = Frame {
input: (&input[..65536]).try_into().unwrap(),
entries: (&mut entries[..8192]).try_into().unwrap(),
};

unsafe { frame.lex() }.unwrap();

Ok(())
}

0 comments on commit c15230e

Please sign in to comment.