Skip to content

Commit

Permalink
Move capability checks in chacha-x86.pl to C
Browse files Browse the repository at this point in the history
Bug: 673
Change-Id: I7e213dc1bbb62553499666c1b271d97f8c43a3ce
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65870
Reviewed-by: Bob Beck <[email protected]>
Commit-Queue: David Benjamin <[email protected]>
(cherry picked from commit 6d0caa1a0aad0b035ff1a63f9e292fec45ad3b35)
  • Loading branch information
davidben authored and andrewhop committed Sep 17, 2024
1 parent c9110db commit 3a53a70
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 41 deletions.
30 changes: 8 additions & 22 deletions crypto/chacha/asm/chacha-x86.pl
Original file line number Diff line number Diff line change
Expand Up @@ -118,26 +118,10 @@ sub QUARTERROUND {
($d,$d_)=($d_,$d);
}

&static_label("ssse3_shortcut");
&static_label("ssse3_data");
&static_label("pic_point");

&function_begin("ChaCha20_ctr32");
&xor ("eax","eax");
&cmp ("eax",&wparam(2)); # len==0?
&je (&label("no_data"));
if ($xmm) {
&call (&label("pic_point"));
&set_label("pic_point");
&blindpop("eax");
&picmeup("ebp","OPENSSL_ia32cap_P","eax",&label("pic_point"));
&test (&DWP(0,"ebp"),1<<24); # test FXSR bit
&jz (&label("x86"));
&test (&DWP(4,"ebp"),1<<9); # test SSSE3 bit
&jz (&label("x86"));
&jmp (&label("ssse3_shortcut"));
&set_label("x86");
}
&function_begin("ChaCha20_ctr32_nohw");
&mov ("esi",&wparam(3)); # key
&mov ("edi",&wparam(4)); # counter and nonce

Expand Down Expand Up @@ -359,8 +343,7 @@ sub QUARTERROUND {

&set_label("done");
&stack_pop(33);
&set_label("no_data");
&function_end("ChaCha20_ctr32");
&function_end("ChaCha20_ctr32_nohw");

if ($xmm) {
my ($xa,$xa_,$xb,$xb_,$xc,$xc_,$xd,$xd_)=map("xmm$_",(0..7));
Expand Down Expand Up @@ -432,8 +415,11 @@ sub QUARTERROUND_SSSE3 {
($xd,$xd_)=($xd_,$xd);
}

&function_begin("ChaCha20_ssse3");
&set_label("ssse3_shortcut");
&function_begin("ChaCha20_ctr32_ssse3");
&call (&label("pic_point"));
&set_label("pic_point");
&blindpop("eax");

&mov ($out,&wparam(0));
&mov ($inp,&wparam(1));
&mov ($len,&wparam(2));
Expand Down Expand Up @@ -755,7 +741,7 @@ sub SSSE3ROUND { # critical path is 20 "SIMD ticks" per round
}
&set_label("done");
&mov ("esp",&DWP(512,"esp"));
&function_end("ChaCha20_ssse3");
&function_end("ChaCha20_ctr32_ssse3");

&align (64);
&set_label("ssse3_data");
Expand Down
2 changes: 1 addition & 1 deletion crypto/chacha/chacha.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ static void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len,
}
#endif

#if defined(CHACHA20_ASM) || defined(CHACHA20_ASM_NOHW)
#if defined(CHACHA20_ASM_NOHW)

void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
const uint8_t key[32], const uint8_t nonce[12],
Expand Down
3 changes: 0 additions & 3 deletions crypto/chacha/chacha_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,6 @@ TEST(ChaChaTest, CounterOverflow) {

static void check_abi(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]) {
#if defined(CHACHA20_ASM)
CHECK_ABI(ChaCha20_ctr32, out, in, in_len, key, counter);
#endif
#if defined(CHACHA20_ASM_NEON)
if (ChaCha20_ctr32_neon_capable(in_len)) {
CHECK_ABI(ChaCha20_ctr32_neon, out, in, in_len, key, counter);
Expand Down
34 changes: 19 additions & 15 deletions crypto/chacha/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,16 @@ void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32],

#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86)

#define CHACHA20_ASM
#define CHACHA20_ASM_NOHW

#define CHACHA20_ASM_SSSE3
OPENSSL_INLINE int ChaCha20_ctr32_ssse3_capable(size_t len) {
// Unlike the x86_64 version, the x86 SSSE3 routine runs for all non-zero
// lengths.
return len > 0 && CRYPTO_is_SSSE3_capable() && CRYPTO_is_FXSR_capable();
}
void ChaCha20_ctr32_ssse3(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);

#elif !defined(OPENSSL_NO_ASM) && \
(defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
Expand All @@ -41,7 +50,7 @@ void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32],

#define CHACHA20_ASM_NEON
OPENSSL_INLINE int ChaCha20_ctr32_neon_capable(size_t len) {
return (len >= 192) && CRYPTO_is_NEON_capable();
return len >= 192 && CRYPTO_is_NEON_capable();
}
void ChaCha20_ctr32_neon(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);
Expand All @@ -50,43 +59,38 @@ void ChaCha20_ctr32_neon(uint8_t *out, const uint8_t *in, size_t in_len,

#define CHACHA20_ASM_AVX2
OPENSSL_INLINE int ChaCha20_ctr32_avx2_capable(size_t len) {
return (len > 128) && CRYPTO_is_AVX2_capable();
return len > 128 && CRYPTO_is_AVX2_capable();
}
void ChaCha20_ctr32_avx2(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);

#define CHACHA20_ASM_SSSE3_4X
OPENSSL_INLINE int ChaCha20_ctr32_ssse3_4x_capable(size_t len) {
int capable = (len > 128) && CRYPTO_is_SSSE3_capable();
int faster = (len > 192) || !CRYPTO_cpu_perf_is_like_silvermont();
int capable = len > 128 && CRYPTO_is_SSSE3_capable();
int faster = len > 192 || !CRYPTO_cpu_perf_is_like_silvermont();
return capable && faster;
}
void ChaCha20_ctr32_ssse3_4x(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);

#define CHACHA20_ASM_SSSE3
OPENSSL_INLINE int ChaCha20_ctr32_ssse3_capable(size_t len) {
return (len > 128) && CRYPTO_is_SSSE3_capable();
return len > 128 && CRYPTO_is_SSSE3_capable();
}
void ChaCha20_ctr32_ssse3(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);
#endif

#if defined(CHACHA20_ASM)
// ChaCha20_ctr32 encrypts |in_len| bytes from |in| and writes the result to
// |out|. If |in| and |out| alias, they must be equal.
#if defined(CHACHA20_ASM_NOHW)
// ChaCha20_ctr32_nohw encrypts |in_len| bytes from |in| and writes the result
// to |out|. If |in| and |out| alias, they must be equal. |in_len| may not be
// zero.
//
// |counter[0]| is the initial 32-bit block counter, and the remainder is the
// 96-bit nonce. If the counter overflows, the output is undefined. The function
// will produce output, but the output may vary by machine and may not be
// self-consistent. (On some architectures, the assembly implements a mix of
// 64-bit and 32-bit counters.)
void ChaCha20_ctr32(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);
#endif

#if defined(CHACHA20_ASM_NOHW)
// ChaCha20_ctr32_nohw is like |ChaCha20_ctr32| except |in_len| must be nonzero.
void ChaCha20_ctr32_nohw(uint8_t *out, const uint8_t *in, size_t in_len,
const uint32_t key[8], const uint32_t counter[4]);
#endif
Expand Down

0 comments on commit 3a53a70

Please sign in to comment.