Skip to content

Commit 977c638

Browse files
committed
Backport AVX2 AES-GCM implementation from BoringSSL
This uses the AVX2 versions of the AESENC and PCLMULQDQ instructions; on Zen 3 this provides an up to 80% performance improvement. Original source: https://github.com/google/boringssl/blob/13840dd094f9e9c1b00a7368aa25e656554221f1/gen/bcm/aes-gcm-avx2-x86_64-linux.S See the original BoringSSL commit at google/boringssl@3b6e1be.
1 parent 68473c4 commit 977c638

File tree

8 files changed

+1609
-22
lines changed

8 files changed

+1609
-22
lines changed

config/toolchain-simd.m4

+42
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [
2424
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES
2525
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ
2626
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE
27+
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VAES
28+
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VPCLMULQDQ
2729
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE
2830
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT
2931
ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES
@@ -426,6 +428,46 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [
426428
])
427429
])
428430

431+
dnl #
432+
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VAES
433+
dnl #
434+
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VAES], [
435+
AC_MSG_CHECKING([whether host toolchain supports VAES])
436+
437+
AC_LINK_IFELSE([AC_LANG_SOURCE([
438+
[
439+
void main()
440+
{
441+
__asm__ __volatile__("vaesenc %ymm0, %ymm1, %ymm0");
442+
}
443+
]])], [
444+
AC_MSG_RESULT([yes])
445+
AC_DEFINE([HAVE_VAES], 1, [Define if host toolchain supports VAES])
446+
], [
447+
AC_MSG_RESULT([no])
448+
])
449+
])
450+
451+
dnl #
452+
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VPCLMULQDQ
453+
dnl #
454+
AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_VPCLMULQDQ], [
455+
AC_MSG_CHECKING([whether host toolchain supports VPCLMULQDQ])
456+
457+
AC_LINK_IFELSE([AC_LANG_SOURCE([
458+
[
459+
void main()
460+
{
461+
__asm__ __volatile__("vpclmulqdq %0, %%ymm4, %%ymm3, %%ymm5" :: "i"(0));
462+
}
463+
]])], [
464+
AC_MSG_RESULT([yes])
465+
AC_DEFINE([HAVE_VPCLMULQDQ], 1, [Define if host toolchain supports VPCLMULQDQ])
466+
], [
467+
AC_MSG_RESULT([no])
468+
])
469+
])
470+
429471
dnl #
430472
dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE
431473
dnl #

include/os/linux/kernel/linux/simd_x86.h

+26
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,32 @@ zfs_movbe_available(void)
597597
#endif
598598
}
599599

600+
/*
601+
* Check if VAES instruction set is available
602+
*/
603+
static inline boolean_t
604+
zfs_vaes_available(void)
605+
{
606+
#if defined(X86_FEATURE_VAES)
607+
return (!!boot_cpu_has(X86_FEATURE_VAES));
608+
#else
609+
return (B_FALSE);
610+
#endif
611+
}
612+
613+
/*
614+
* Check if VPCLMULQDQ instruction set is available
615+
*/
616+
static inline boolean_t
617+
zfs_vpclmulqdq_available(void)
618+
{
619+
#if defined(X86_FEATURE_VPCLMULQDQ)
620+
return (!!boot_cpu_has(X86_FEATURE_VPCLMULQDQ));
621+
#else
622+
return (B_FALSE);
623+
#endif
624+
}
625+
600626
/*
601627
* Check if SHA_NI instruction set is available
602628
*/

lib/libicp/Makefile.am

+1
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ nodist_libicp_la_SOURCES += \
6969
module/icp/asm-x86_64/aes/aes_aesni.S \
7070
module/icp/asm-x86_64/modes/gcm_pclmulqdq.S \
7171
module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S \
72+
module/icp/asm-x86_64/modes/aesni-gcm-avx2.S \
7273
module/icp/asm-x86_64/modes/ghash-x86_64.S \
7374
module/icp/asm-x86_64/sha2/sha256-x86_64.S \
7475
module/icp/asm-x86_64/sha2/sha512-x86_64.S \

lib/libspl/include/sys/simd.h

+27-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,9 @@ typedef enum cpuid_inst_sets {
101101
AES,
102102
PCLMULQDQ,
103103
MOVBE,
104-
SHA_NI
104+
SHA_NI,
105+
VAES,
106+
VPCLMULQDQ
105107
} cpuid_inst_sets_t;
106108

107109
/*
@@ -126,6 +128,8 @@ typedef struct cpuid_feature_desc {
126128
#define _AES_BIT (1U << 25)
127129
#define _PCLMULQDQ_BIT (1U << 1)
128130
#define _MOVBE_BIT (1U << 22)
131+
#define _VAES_BIT (1U << 9)
132+
#define _VPCLMULQDQ_BIT (1U << 10)
129133
#define _SHA_NI_BIT (1U << 29)
130134

131135
/*
@@ -156,6 +160,8 @@ static const cpuid_feature_desc_t cpuid_features[] = {
156160
[PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX },
157161
[MOVBE] = {1U, 0U, _MOVBE_BIT, ECX },
158162
[SHA_NI] = {7U, 0U, _SHA_NI_BIT, EBX },
163+
[VAES] = {7U, 0U, _VAES_BIT, ECX },
164+
[VPCLMULQDQ] = {7U, 0U, _VPCLMULQDQ_BIT, ECX },
159165
};
160166

161167
/*
@@ -230,6 +236,8 @@ CPUID_FEATURE_CHECK(aes, AES);
230236
CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
231237
CPUID_FEATURE_CHECK(movbe, MOVBE);
232238
CPUID_FEATURE_CHECK(shani, SHA_NI);
239+
CPUID_FEATURE_CHECK(vaes, VAES);
240+
CPUID_FEATURE_CHECK(vpclmulqdq, VPCLMULQDQ);
233241

234242
/*
235243
* Detect register set support
@@ -380,6 +388,24 @@ zfs_shani_available(void)
380388
return (__cpuid_has_shani());
381389
}
382390

391+
/*
392+
* Check if VAES instruction is available
393+
*/
394+
static inline boolean_t
395+
zfs_vaes_available(void)
396+
{
397+
return (__cpuid_has_vaes());
398+
}
399+
400+
/*
401+
* Check if VPCLMULQDQ instruction is available
402+
*/
403+
static inline boolean_t
404+
zfs_vpclmulqdq_available(void)
405+
{
406+
return (__cpuid_has_vpclmulqdq());
407+
}
408+
383409
/*
384410
* AVX-512 family of instruction sets:
385411
*

module/Kbuild.in

+2
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ ICP_OBJS_X86_64 := \
135135
asm-x86_64/sha2/sha256-x86_64.o \
136136
asm-x86_64/sha2/sha512-x86_64.o \
137137
asm-x86_64/modes/aesni-gcm-x86_64.o \
138+
asm-x86_64/modes/aesni-gcm-avx2.o \
138139
asm-x86_64/modes/gcm_pclmulqdq.o \
139140
asm-x86_64/modes/ghash-x86_64.o
140141

@@ -178,6 +179,7 @@ $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
178179

179180
# Suppress objtool "return with modified stack frame" warnings.
180181
OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
182+
OBJECT_FILES_NON_STANDARD_aesni-gcm-avx2.o := y
181183

182184
# Suppress objtool "unsupported stack pointer realignment" warnings.
183185
# See #6950 for the reasoning.

0 commit comments

Comments
 (0)