Skip to content

feat(sha2): implement intrinsic based version for sha256 #90

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions sha2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ opaque-debug = "0.2"
sha2-asm = { version = "0.5", optional = true }
libc = { version = "0.2.68", optional = true }

[target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'.dependencies]
raw-cpuid = "7.0.3"

[dependencies.lazy_static]
version = "1.4.0"
default-features = false
# no_std feature is an anti-pattern. Why, lazy_static, why?
# See https://github.com/rust-lang-nursery/lazy-static.rs/issues/150
features = ["spin_no_std"]

[dev-dependencies]
digest = { version = "0.9", features = ["dev"] }
hex-literal = "0.2"
Expand Down
4 changes: 3 additions & 1 deletion sha2/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,10 @@ extern crate std;
#[cfg(feature = "asm-aarch64")]
mod aarch64;
mod consts;
mod platform;
mod sha256;
#[cfg(any(not(feature = "asm"), feature = "asm-aarch64", feature = "compress"))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod sha256_intrinsics;
mod sha256_utils;
mod sha512;
#[cfg(any(not(feature = "asm"), target_arch = "aarch64", feature = "compress"))]
Expand Down
89 changes: 89 additions & 0 deletions sha2/src/platform.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#[allow(dead_code)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum Platform {
Portable,
#[cfg(feature = "asm")]
Asm,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Sha,
}

#[derive(Clone, Copy, Debug)]
pub struct Implementation(Platform);

impl Implementation {
pub fn detect() -> Self {
// Try the different implementations in order of how fast/modern they are.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if let Some(sha_impl) = Self::sha_if_supported() {
return sha_impl;
}
}
#[cfg(any(feature = "asm", feature = "asm-aarch64"))]
{
if let Some(asm_impl) = Self::asm_if_supported() {
return asm_impl;
}
}

Self::portable()
}

pub fn portable() -> Self {
Implementation(Platform::Portable)
}

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[allow(unreachable_code)]
pub fn sha_if_supported() -> Option<Self> {
use raw_cpuid::CpuId;

// Use raw_cpuid instead of is_x86_feature_detected, to ensure the check
// never happens at compile time.
let cpuid = CpuId::new();
let is_runtime_ok = cpuid
.get_extended_feature_info()
.map(|info| info.has_sha())
.unwrap_or_default();

// Make sure this computer actually supports it
if is_runtime_ok {
return Some(Implementation(Platform::Sha));
}

None
}

#[cfg(any(feature = "asm", feature = "asm-arch64"))]
pub fn asm_if_supported() -> Option<Self> {
#[cfg(feature = "asm-aarch64")]
let supported = ::aarch64::sha2_supported();
#[cfg(not(feature = "asm-aarch64"))]
let supported = false;

if supported {
return Some(Implementation(Platform::Asm));
}
None
}

#[inline]
pub fn compress256(&self, state: &mut [u32; 8], block: &[u8; 64]) {
match self.0 {
Platform::Portable => {
use crate::sha256_utils;
sha256_utils::compress256(state, block);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::Sha => {
use crate::sha256_intrinsics;
unsafe { sha256_intrinsics::compress256(state, block) };
}
#[cfg(feature = "asm")]
Platform::Asm => {
sha2_asm::compress256(state, block);
}
}
}
}
37 changes: 12 additions & 25 deletions sha2/src/sha256.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! SHA-256

use crate::consts::{H224, H256, STATE_LEN};
use crate::platform::Implementation;
use block_buffer::BlockBuffer;
use digest::impl_write;
use digest::{
Expand All @@ -9,15 +10,13 @@ use digest::{
};
use digest::{BlockInput, FixedOutputDirty, Reset, Update};

#[cfg(not(feature = "asm"))]
use crate::sha256_utils::compress256;

#[cfg(feature = "asm")]
use sha2_asm::compress256;

type BlockSize = U64;
type Block = GenericArray<u8, BlockSize>;

lazy_static::lazy_static! {
static ref IMPL: Implementation = Implementation::detect();
}

/// A structure that represents that state of a digest computation for the
/// SHA-2 512 family of digest functions
#[derive(Clone)]
Expand All @@ -30,23 +29,9 @@ impl Engine256State {
Engine256State { h: *h }
}

#[cfg(not(feature = "asm-aarch64"))]
pub fn process_block(&mut self, block: &Block) {
let block = unsafe { &*(block.as_ptr() as *const [u8; 64]) };
compress256(&mut self.h, block);
}

#[cfg(feature = "asm-aarch64")]
pub fn process_block(&mut self, block: &Block) {
let block = unsafe { &*(block.as_ptr() as *const [u8; 64]) };
// TODO: Replace this platform-specific call with is_aarch64_feature_detected!("sha2") once
// that macro is stabilised and https://github.com/rust-lang/rfcs/pull/2725 is implemented
// to let us use it on no_std.
if ::aarch64::sha2_supported() {
compress256(&mut self.h, block);
} else {
::sha256_utils::compress256(&mut self.h, block);
}
IMPL.compress256(&mut self.h, block);
}
}

Expand All @@ -71,14 +56,16 @@ impl Engine256 {
fn update(&mut self, input: &[u8]) {
// Assumes that input.len() can be converted to u64 without overflow
self.len += (input.len() as u64) << 3;
let s = &mut self.state;
self.buffer.input_block(input, |b| s.process_block(b));
let self_state = &mut self.state;
self.buffer
.input_block(input, |input| self_state.process_block(input));
}

fn finish(&mut self) {
let s = &mut self.state;
let self_state = &mut self.state;
let l = self.len;
self.buffer.len64_padding_be(l, |b| s.process_block(b));
self.buffer
.len64_padding_be(l, |b| self_state.process_block(b));
}

fn reset(&mut self, h: &[u32; STATE_LEN]) {
Expand Down
Loading