Skip to content

Commit

Permalink
make xof_many fall back to compress_xof instead of portable code
Browse files Browse the repository at this point in the history
  • Loading branch information
oconnor663 committed Aug 18, 2024
1 parent 4386d7f commit 5c4c351
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 3 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,26 @@ jobs:
- name: cargo test C bindings intrinsics
run: cargo test --features=prefer_intrinsics
working-directory: ./c/blake3_c_rust_bindings
- name: cargo test C bindings no AVX-512
run: cargo test
working-directory: ./c/blake3_c_rust_bindings
env:
CFLAGS: -DBLAKE3_NO_AVX512
- name: cargo test C bindings no AVX2
run: cargo test
working-directory: ./c/blake3_c_rust_bindings
env:
CFLAGS: -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2
- name: cargo test C bindings no SSE41
run: cargo test
working-directory: ./c/blake3_c_rust_bindings
env:
CFLAGS: -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2 -DBLAKE3_NO_SSE41
- name: cargo test C bindings no SSE2
run: cargo test
working-directory: ./c/blake3_c_rust_bindings
env:
CFLAGS: -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_SSE2
# Reference impl doc test.
- name: reference impl doc test
run: cargo test
Expand Down
4 changes: 3 additions & 1 deletion c/blake3_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,9 @@ void blake3_xof_many(const uint32_t cv[8],
}
#endif
#endif
blake3_xof_many_portable(cv, block, block_len, counter, flags, out, outblocks);
for(size_t i = 0; i < outblocks; ++i) {
blake3_compress_xof(cv, block, block_len, counter + i, flags, out + 64*i);
}
}

void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
Expand Down
3 changes: 3 additions & 0 deletions c/blake3_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,9 @@ void blake3_compress_xof_portable(const uint32_t cv[8],
uint8_t block_len, uint64_t counter,
uint8_t flags, uint8_t out[64]);

// This function is test-only. When blake3_xof_many doesn't have an optimized implementation,
// it loops over blake3_compress_xof instead of falling back to this, so it still benefits
// from compress optimizations.
void blake3_xof_many_portable(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter, uint8_t flags,
Expand Down
13 changes: 11 additions & 2 deletions src/platform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ impl Platform {
cv: &CVWords,
block: &[u8; BLOCK_LEN],
block_len: u8,
counter: u64,
mut counter: u64,
flags: u8,
out: &mut [u8],
) {
Expand All @@ -299,7 +299,16 @@ impl Platform {
Platform::AVX512 => unsafe {
crate::avx512::xof_many(cv, block, block_len, counter, flags, out)
},
_ => crate::portable::xof_many(cv, block, block_len, counter, flags, out),
_ => {
// For platforms without an optimized xof_many, fall back to a loop over
// compress_xof. This is still faster than portable code.
for out_block in out.chunks_exact_mut(BLOCK_LEN) {
// TODO: Use array_chunks_mut here once that's stable.
let out_array: &mut [u8; BLOCK_LEN] = out_block.try_into().unwrap();
*out_array = self.compress_xof(cv, block, block_len, counter, flags);
counter += 1;
}
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/portable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ pub fn hash_many<const N: usize>(
}
}

// This function is test-only. When platform::xof_many() doesn't have an optimized implementation,
// it loops over platform::compress_xof() instead of falling back to this, so it still benefits
// from compress optimizations.
#[cfg(test)]
pub fn xof_many(
cv: &CVWords,
block: &[u8; BLOCK_LEN],
Expand Down

0 comments on commit 5c4c351

Please sign in to comment.