Skip to content

Commit ea62118

Browse files
mabrahamal42and
authored andcommitted
Remove support for Intel Xeon Phi
AVX-512 SIMD had preprocessing to permit the KNL SIMD to over-ride it, which is no longer needed. Fixes #4740 Closes #4740
1 parent ca9d1ee commit ea62118

28 files changed

+27
-693
lines changed

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ gmx_option_multichoice(
277277
GMX_SIMD
278278
"SIMD instruction set for CPU kernels and compiler optimization"
279279
"AUTO"
280-
AUTO None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 AVX2_128 AVX_512 AVX_512_KNL ARM_NEON_ASIMD ARM_SVE IBM_VSX Reference)
280+
AUTO None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 AVX2_128 AVX_512 ARM_NEON_ASIMD ARM_SVE IBM_VSX Reference)
281281

282282
if (GMX_INTEL_LLVM)
283283
set(GMX_FFT_LIBRARY_DEFAULT "mkl")

cmake/gmxDetectSimd.cmake

+1-3
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,7 @@ function(gmx_suggest_simd _suggested_simd)
7676

7777
if(GMX_TARGET_X86)
7878
gmx_run_cpu_detection(brand)
79-
if(CPU_DETECTION_FEATURES MATCHES " avx512er ")
80-
set(OUTPUT_SIMD "AVX_512_KNL")
81-
elseif(CPU_DETECTION_FEATURES MATCHES " avx512f ")
79+
if(CPU_DETECTION_FEATURES MATCHES " avx512f ")
8280
if(CPU_DETECTION_BRAND MATCHES "Intel")
8381
gmx_detect_avx_512_fma_units(NUMBER_OF_AVX_512_FMA_UNITS)
8482
if(NUMBER_OF_AVX_512_FMA_UNITS EQUAL 2)

cmake/gmxManageSimd.cmake

-16
Original file line numberDiff line numberDiff line change
@@ -190,22 +190,6 @@ elseif(GMX_SIMD_ACTIVE STREQUAL "AVX_512")
190190
set(GMX_SIMD_X86_${GMX_SIMD_ACTIVE} 1)
191191
set(SIMD_STATUS_MESSAGE "Enabling 512-bit AVX-512 SIMD instructions using CXX flags: ${SIMD_AVX_512_CXX_FLAGS}")
192192

193-
elseif(GMX_SIMD_ACTIVE STREQUAL "AVX_512_KNL")
194-
message(WARNING "Intel Xeon Phi support is deprecated and will be removed in the future versions.") # See #4740
195-
196-
gmx_find_simd_avx_512_knl_flags(SIMD_AVX_512_KNL_C_SUPPORTED SIMD_AVX_512_KNL_CXX_SUPPORTED
197-
SIMD_AVX_512_KNL_C_FLAGS SIMD_AVX_512_KNL_CXX_FLAGS)
198-
199-
if(NOT SIMD_AVX_512_KNL_C_SUPPORTED OR NOT SIMD_AVX_512_KNL_CXX_SUPPORTED)
200-
gmx_give_fatal_error_when_simd_support_not_found("AVX 512ER" "choose a lower level of SIMD (slower)" "${SUGGEST_BINUTILS_UPDATE}")
201-
endif()
202-
203-
# If multiple flags are neeed, make them into a list
204-
string(REPLACE " " ";" SIMD_C_FLAGS ${SIMD_AVX_512_KNL_C_FLAGS})
205-
string(REPLACE " " ";" SIMD_CXX_FLAGS ${SIMD_AVX_512_KNL_CXX_FLAGS})
206-
set(GMX_SIMD_X86_${GMX_SIMD_ACTIVE} 1)
207-
set(SIMD_STATUS_MESSAGE "Enabling 512-bit AVX-512-KNL SIMD instructions using CXX flags: ${SIMD_AVX_512_KNL_CXX_FLAGS}")
208-
209193
elseif(GMX_SIMD_ACTIVE STREQUAL "ARM_NEON_ASIMD")
210194

211195
gmx_find_simd_arm_neon_asimd_flags(SIMD_ARM_NEON_ASIMD_C_SUPPORTED SIMD_ARM_NEON_ASIMD_CXX_SUPPORTED

cmake/gmxSimdFlags.cmake

-22
Original file line numberDiff line numberDiff line change
@@ -263,28 +263,6 @@ function(gmx_find_simd_avx_512_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VAR
263263
endfunction()
264264

265265

266-
# AVX-512ER (KNL)
267-
function(gmx_find_simd_avx_512_knl_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
268-
find_x86_toolchain_flags(TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS)
269-
270-
gmx_find_flags(SIMD_AVX_512_KNL_C_FLAGS_RESULT SIMD_AVX_512_KNL_CXX_FLAGS_RESULT
271-
"#include<immintrin.h>
272-
int main(){__m512 y,x=_mm512_set1_ps(0.5);y=_mm512_rsqrt28_ps(x);return (int)_mm512_cmp_ps_mask(x,y,_CMP_LT_OS);}"
273-
TOOLCHAIN_C_FLAGS TOOLCHAIN_CXX_FLAGS
274-
SIMD_AVX_512_KNL_C_FLAGS SIMD_AVX_512_KNL_CXX_FLAGS
275-
"-xMIC-AVX512" "-mavx512er -mfma" "-mavx512er" "/arch:AVX" "-hgnu") # no AVX_512ER flags known for MSVC yet
276-
277-
if(${SIMD_AVX_512_KNL_C_FLAGS_RESULT})
278-
set(${C_FLAGS_VARIABLE} "${TOOLCHAIN_C_FLAGS} ${SIMD_AVX_512_KNL_C_FLAGS}" CACHE INTERNAL "C flags required for AVX-512 for KNL instructions")
279-
endif()
280-
if(${SIMD_AVX_512_KNL_CXX_FLAGS_RESULT})
281-
set(${CXX_FLAGS_VARIABLE} "${TOOLCHAIN_CXX_FLAGS} ${SIMD_AVX_512_KNL_CXX_FLAGS}" CACHE INTERNAL "C++ flags required for AVX-512 for KNL instructions")
282-
endif()
283-
set(${C_FLAGS_RESULT} ${SIMD_AVX_512_KNL_C_FLAGS_RESULT} CACHE INTERNAL "Result of test for AVX-512 for KNL C flags" FORCE)
284-
set(${CXX_FLAGS_RESULT} ${SIMD_AVX_512_KNL_CXX_FLAGS_RESULT} CACHE INTERNAL "Result of test for AVX-512 for KNL C++ flags" FORCE)
285-
endfunction()
286-
287-
288266
# Arm Neon Asimd (64-bit ARM)
289267
function(gmx_find_simd_arm_neon_asimd_flags C_FLAGS_RESULT CXX_FLAGS_RESULT C_FLAGS_VARIABLE CXX_FLAGS_VARIABLE)
290268

docs/install-guide/index.rst

+4-25
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ use mixed or double precision for |Gromacs|. There is no need to
429429
compile FFTW with threading or MPI support, but it does no harm. On
430430
x86 hardware, compile with all of ``--enable-sse2``, ``--enable-avx``,
431431
and ``--enable-avx2`` flags. On Intel processors supporting
432-
512-wide AVX, including KNL, add ``--enable-avx512`` too.
432+
512-wide AVX, add ``--enable-avx512`` too.
433433
FFTW will create a fat library with codelets for all different instruction sets,
434434
and pick the fastest supported one at runtime.
435435
On ARM architectures with SIMD support use ``--enable-neon`` flag;
@@ -833,11 +833,10 @@ lead to performance loss, e.g. on Intel Skylake-X/SP and AMD Zen (first generati
833833
On AMD it is beneficial to use starting with Zen4.
834834
Additionally, with GPU accelerated runs ``AVX2_256`` can also be
835835
faster on high-end Skylake CPUs with both 512-bit FMA units enabled.
836-
9. ``AVX_512_KNL`` Knights Landing Xeon Phi processors.
837-
10. ``IBM_VSX`` Power7, Power8, Power9 and later have this.
838-
11. ``ARM_NEON_ASIMD`` 64-bit ARMv8 and later. For maximum performance on NVIDIA
836+
9. ``IBM_VSX`` Power7, Power8, Power9 and later have this.
837+
10. ``ARM_NEON_ASIMD`` 64-bit ARMv8 and later. For maximum performance on NVIDIA
839838
Grace (ARMv9), we strongly suggest at least GNU >= 13, LLVM >= 16.
840-
12. ``ARM_SVE`` 64-bit ARMv8 and later with the Scalable Vector Extensions (SVE).
839+
11. ``ARM_SVE`` 64-bit ARMv8 and later with the Scalable Vector Extensions (SVE).
841840
The SVE vector length is fixed at CMake configure time. The default vector
842841
length is automatically detected, and this can be changed via the
843842
``GMX_SIMD_ARM_SVE_LENGTH`` CMake variable. If compiling for a different
@@ -1730,26 +1729,6 @@ The ARM ThunderX2 Cray XC50 machines differ only in that the recommended
17301729
compiler is the ARM HPC Compiler (``armclang``).
17311730

17321731

1733-
Intel Xeon Phi
1734-
^^^^^^^^^^^^^^
1735-
1736-
1737-
Xeon Phi processors, hosted or self-hosted, are supported.
1738-
The Knights Landing-based Xeon Phi processors behave like standard x86 nodes,
1739-
but support a special SIMD instruction set. When cross-compiling for such nodes,
1740-
use the ``AVX_512_KNL`` SIMD flavor.
1741-
Knights Landing processors support so-called "clustering modes" which
1742-
allow reconfiguring the memory subsystem for lower latency. |Gromacs| can
1743-
benefit from the quadrant or SNC clustering modes.
1744-
Care needs to be taken to correctly pin threads. In particular, threads of
1745-
an MPI rank should not cross cluster and NUMA boundaries.
1746-
In addition to the main DRAM memory, Knights Landing has a high-bandwidth
1747-
stacked memory called MCDRAM. Using it offers performance benefits if
1748-
it is ensured that ``mdrun`` runs entirely from this memory; to do so
1749-
it is recommended that MCDRAM is configured in "Flat mode" and ``mdrun`` is
1750-
bound to the appropriate NUMA node (use e.g. ``numactl --membind 1`` with
1751-
quadrant clustering mode).
1752-
17531732
NVIDIA Grace
17541733
^^^^^^^^^^^^
17551734

docs/release-notes/2026/major/removed-functionality.rst

+8
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,11 @@ Removed functionality
77
Also, please use the syntax :issue:`number` to reference issues on GitLab, without
88
a space between the colon and number!
99
10+
Support for Xeon Phi processors is removed
11+
""""""""""""""""""""""""""""""""""""""""""
12+
13+
Remaining support for Knights Landing (KNL) hardware and SIMD
14+
instructions have been removed.
15+
16+
:issue:`4740`
17+

docs/user-guide/mdrun-performance.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1432,7 +1432,7 @@ Known limitations of the OpenCL support
14321432

14331433
Limitations in the current OpenCL support of interest to |Gromacs| users:
14341434

1435-
- Intel integrated GPUs are supported. Intel CPUs and Xeon Phi are not supported.
1435+
- Intel integrated GPUs are supported. Intel CPUs are not supported.
14361436
Set ``-DGMX_GPU_NB_CLUSTER_SIZE=4`` when compiling |Gromacs| to run on consumer
14371437
Intel GPUs (as opposed to Ponte Vecchio / Data Center Max GPUs).
14381438
- Due to blocking behavior of some asynchronous task enqueuing functions

src/config.h.cmakein

-3
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,6 @@
126126
/* AVX-512F foundation level instruction SIMD */
127127
#cmakedefine01 GMX_SIMD_X86_AVX_512
128128

129-
/* AVX-512ER foundation level instruction SIMD */
130-
#cmakedefine01 GMX_SIMD_X86_AVX_512_KNL
131-
132129
/* ARM (AArch64) NEON Advanced SIMD instruction set level was selected */
133130
#cmakedefine01 GMX_SIMD_ARM_NEON_ASIMD
134131

src/gromacs/CMakeLists.txt

+1-3
Original file line numberDiff line numberDiff line change
@@ -345,10 +345,8 @@ endif()
345345
# TODO Perhaps generalize this for all headers from src/external
346346
target_include_directories(libgromacs SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/src/external)
347347

348-
if(SIMD_AVX_512_CXX_SUPPORTED AND NOT ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_512_KNL"))
348+
if(SIMD_AVX_512_CXX_SUPPORTED)
349349
# Since we might be overriding -march=core-avx2, add a flag so we don't warn for this specific file.
350-
# On KNL this can cause illegal instruction because the compiler might use non KNL AVX instructions
351-
# with the SIMD_AVX_512_CXX_FLAGS flags.
352350
set_source_files_properties(hardware/identifyavx512fmaunits.cpp PROPERTIES COMPILE_FLAGS "${SIMD_AVX_512_CXX_FLAGS} ${CXX_NO_UNUSED_OPTION_WARNING_FLAGS}")
353351
endif()
354352

src/gromacs/hardware/simd_support.cpp

+2-9
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ static const std::string& simdString(SimdType s)
7979
{ SimdType::X86_Avx2, "AVX2_256" },
8080
{ SimdType::X86_Avx2_128, "AVX2_128" },
8181
{ SimdType::X86_Avx512, "AVX_512" },
82-
{ SimdType::X86_Avx512Knl, "AVX_512_KNL" },
8382
{ SimdType::Arm_NeonAsimd,
8483
"ARM_NEON_ASIMD" },
8584
{ SimdType::Arm_Sve, "ARM_SVE" },
@@ -97,11 +96,7 @@ SimdType simdSuggested(const CpuInfo& c)
9796
switch (c.vendor())
9897
{
9998
case CpuInfo::Vendor::Intel:
100-
if (c.feature(CpuInfo::Feature::X86_Avx512ER))
101-
{
102-
suggested = SimdType::X86_Avx512Knl;
103-
}
104-
else if (c.feature(CpuInfo::Feature::X86_Avx512F))
99+
if (c.feature(CpuInfo::Feature::X86_Avx512F))
105100
{
106101
// If we could not identify the number of AVX512 FMA units we assume 2
107102
suggested = (identifyAvx512FmaUnits() == 1) ? SimdType::X86_Avx2 : SimdType::X86_Avx512;
@@ -198,9 +193,7 @@ SimdType simdSuggested(const CpuInfo& c)
198193

199194
static SimdType simdCompiled()
200195
{
201-
#if GMX_SIMD_X86_AVX_512_KNL
202-
return SimdType::X86_Avx512Knl;
203-
#elif GMX_SIMD_X86_AVX_512
196+
#if GMX_SIMD_X86_AVX_512
204197
return SimdType::X86_Avx512;
205198
#elif GMX_SIMD_X86_AVX2_256
206199
return SimdType::X86_Avx2;

src/gromacs/hardware/simd_support.h

-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ enum class SimdType
6767
X86_Avx2, //!< AVX2
6868
X86_Avx2_128, //!< 128-bit AVX2, better than 256-bit for AMD Ryzen
6969
X86_Avx512, //!< AVX_512
70-
X86_Avx512Knl, //!< AVX_512_KNL
7170
Arm_NeonAsimd, //!< 64-bit ARM AArch64 Advanced SIMD
7271
Arm_Sve, //!< ARM Scalable Vector Extensions
7372
Ibm_Vsx //!< IBM VSX SIMD (Power7 and later)

src/gromacs/mdrun/runner.cpp

+4-7
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,6 @@
8787
#include "gromacs/gpu_utils/gpueventsynchronizer_helpers.h"
8888
#include "gromacs/gpu_utils/hostallocator.h"
8989
#include "gromacs/gpu_utils/nvshmem_utils.h"
90-
#include "gromacs/hardware/cpuinfo.h"
9190
#include "gromacs/hardware/detecthardware.h"
9291
#include "gromacs/hardware/device_management.h"
9392
#include "gromacs/hardware/hardwaretopology.h"
@@ -474,8 +473,7 @@ static void prepare_verlet_scheme(FILE* fplog,
474473
const gmx_mtop_t& mtop,
475474
gmx::ArrayRef<const gmx::RVec> coordinates,
476475
const matrix box,
477-
bool makeGpuPairList,
478-
const gmx::CpuInfo& cpuinfo)
476+
bool makeGpuPairList)
479477
{
480478
// We checked the cut-offs in grompp, but double-check here.
481479
// We have PME+LJcutoff kernels for rcoulomb>rvdw.
@@ -545,8 +543,8 @@ static void prepare_verlet_scheme(FILE* fplog,
545543
if (EI_DYNAMICS(ir->eI))
546544
{
547545
/* Set or try nstlist values */
548-
increaseNstlist(
549-
fplog, cr, ir, nstlist_cmdline, &mtop, box, effectiveAtomDensity.value(), makeGpuPairList, cpuinfo);
546+
gmx::increaseNstlist(
547+
fplog, cr, ir, nstlist_cmdline, &mtop, box, effectiveAtomDensity.value(), makeGpuPairList);
550548
}
551549
}
552550

@@ -1337,8 +1335,7 @@ int Mdrunner::mdrunner()
13371335
mtop,
13381336
MAIN(cr) ? globalState->x : gmx::ArrayRef<const gmx::RVec>(),
13391337
box,
1340-
useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes),
1341-
*hwinfo_->cpuInfo);
1338+
useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes));
13421339

13431340
// We need to decide on update groups early, as this affects
13441341
// inter-domain communication distances.

src/gromacs/nbnxm/pairlist_tuning.cpp

+4-11
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
#include <string>
5454

5555
#include "gromacs/domdec/domdec.h"
56-
#include "gromacs/hardware/cpuinfo.h"
5756
#include "gromacs/math/functions.h"
5857
#include "gromacs/math/vec.h"
5958
#include "gromacs/mdlib/calc_verletbuf.h"
@@ -122,9 +121,6 @@ const int nstlist_try[] = { 20, 25, 40, 50, 80, 100 };
122121
// CPU: pair-search is a factor ~1.5 slower than the non-bonded kernel.
123122
//! Target pair-list size increase ratio for CPU
124123
static const float c_nbnxnListSizeFactorCpu = 1.25;
125-
// Intel KNL: pair-search is a factor ~2-3 slower than the non-bonded kernel.
126-
//! Target pair-list size increase ratio for Intel KNL
127-
static const float c_nbnxnListSizeFactorIntelXeonPhi = 1.4;
128124
// GPU: pair-search is a factor 1.5-3 slower than the non-bonded kernel.
129125
//! Target pair-list size increase ratio for GPU
130126
static const float c_nbnxnListSizeFactorGPU = 1.4;
@@ -166,8 +162,7 @@ void increaseNstlist(FILE* fp,
166162
const gmx_mtop_t* mtop,
167163
const matrix box,
168164
const real effectiveAtomDensity,
169-
bool useOrEmulateGpuForNonbondeds,
170-
const CpuInfo& cpuinfo)
165+
bool useOrEmulateGpuForNonbondeds)
171166
{
172167
if (!EI_DYNAMICS(ir->eI))
173168
{
@@ -263,11 +258,9 @@ void increaseNstlist(FILE* fp,
263258
"In all cases that do not support dynamic nstlist, we should have returned "
264259
"with an appropriate message above");
265260

266-
const bool runningOnXeonPhi = (cpuinfo.brandString().find("Xeon Phi") != std::string::npos);
267-
const float listfac_ok = useOrEmulateGpuForNonbondeds ? c_nbnxnListSizeFactorGPU
268-
: runningOnXeonPhi ? c_nbnxnListSizeFactorIntelXeonPhi
269-
: c_nbnxnListSizeFactorCpu;
270-
float listfac_max = listfac_ok + c_nbnxnListSizeFactorMargin;
261+
const float listfac_ok =
262+
useOrEmulateGpuForNonbondeds ? c_nbnxnListSizeFactorGPU : c_nbnxnListSizeFactorCpu;
263+
float listfac_max = listfac_ok + c_nbnxnListSizeFactorMargin;
271264

272265
const int nstlist_orig = ir->nstlist;
273266
if (nstlist_cmdline > 0)

src/gromacs/nbnxm/pairlist_tuning.h

+1-4
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ struct t_inputrec;
5858
namespace gmx
5959
{
6060
struct PairlistParams;
61-
class CpuInfo;
6261
class MDLogger;
6362

6463
/*! \brief Try to increase nstlist when using the Verlet cut-off scheme
@@ -71,7 +70,6 @@ class MDLogger;
7170
* \param[in] box The unit cell
7271
* \param[in] effectiveAtomDensity The effective atom density
7372
* \param[in] useOrEmulateGpuForNonbondeds Tells if we are using a GPU for non-bondeds
74-
* \param[in] cpuinfo Information about the CPU(s)
7573
*/
7674
void increaseNstlist(FILE* fplog,
7775
t_commrec* cr,
@@ -80,8 +78,7 @@ void increaseNstlist(FILE* fplog,
8078
const gmx_mtop_t* mtop,
8179
const matrix box,
8280
real effectiveAtomDensity,
83-
bool useOrEmulateGpuForNonbondeds,
84-
const CpuInfo& cpuinfo);
81+
bool useOrEmulateGpuForNonbondeds);
8582

8683
/*! \brief Set up the dynamic pairlist pruning
8784
*

src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd4_double.h

-3
Original file line numberDiff line numberDiff line change
@@ -158,13 +158,10 @@ static inline Simd4Double gmx_simdcall fnms(Simd4Double a, Simd4Double b, Simd4D
158158
return { _mm256_fnmsub_pd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
159159
}
160160

161-
// Override for AVX-512-KNL
162-
#if GMX_SIMD_X86_AVX_512
163161
static inline Simd4Double gmx_simdcall rsqrt(Simd4Double x)
164162
{
165163
return { _mm512_castpd512_pd256(_mm512_rsqrt14_pd(_mm512_castpd256_pd512(x.simdInternal_))) };
166164
}
167-
#endif
168165

169166
static inline Simd4Double gmx_simdcall abs(Simd4Double x)
170167
{

src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd4_float.h

-3
Original file line numberDiff line numberDiff line change
@@ -158,13 +158,10 @@ static inline Simd4Float gmx_simdcall fnms(Simd4Float a, Simd4Float b, Simd4Floa
158158
return { _mm_fnmsub_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
159159
}
160160

161-
// Override for AVX-512-KNL
162-
#if GMX_SIMD_X86_AVX_512
163161
static inline Simd4Float gmx_simdcall rsqrt(Simd4Float x)
164162
{
165163
return { _mm512_castps512_ps128(_mm512_rsqrt14_ps(_mm512_castps128_ps512(x.simdInternal_))) };
166164
}
167-
#endif
168165

169166
static inline Simd4Float gmx_simdcall abs(Simd4Float x)
170167
{

src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_double.h

-6
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,6 @@ static inline SimdDouble gmx_simdcall fnms(SimdDouble a, SimdDouble b, SimdDoubl
217217
return { _mm512_fnmsub_pd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
218218
}
219219

220-
// Override for AVX-512-KNL
221-
#if GMX_SIMD_X86_AVX_512
222220
static inline SimdDouble gmx_simdcall rsqrt(SimdDouble x)
223221
{
224222
return { _mm512_rsqrt14_pd(x.simdInternal_) };
@@ -228,7 +226,6 @@ static inline SimdDouble gmx_simdcall rcp(SimdDouble x)
228226
{
229227
return { _mm512_rcp14_pd(x.simdInternal_) };
230228
}
231-
#endif
232229

233230
static inline SimdDouble gmx_simdcall maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
234231
{
@@ -245,8 +242,6 @@ static inline SimdDouble gmx_simdcall maskzFma(SimdDouble a, SimdDouble b, SimdD
245242
return { _mm512_maskz_fmadd_pd(m.simdInternal_, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
246243
}
247244

248-
// Override for AVX-512-KNL
249-
#if GMX_SIMD_X86_AVX_512
250245
static inline SimdDouble gmx_simdcall maskzRsqrt(SimdDouble x, SimdDBool m)
251246
{
252247
return { _mm512_maskz_rsqrt14_pd(m.simdInternal_, x.simdInternal_) };
@@ -256,7 +251,6 @@ static inline SimdDouble gmx_simdcall maskzRcp(SimdDouble x, SimdDBool m)
256251
{
257252
return { _mm512_maskz_rcp14_pd(m.simdInternal_, x.simdInternal_) };
258253
}
259-
#endif
260254

261255
static inline SimdDouble gmx_simdcall abs(SimdDouble x)
262256
{

0 commit comments

Comments
 (0)