@@ -330,7 +330,7 @@ struct zmm_vector<double> {
330
330
* https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg
331
331
*/
332
332
template <typename vtype, typename zmm_t = typename vtype::zmm_t >
333
- X86_SIMD_SORT_FINLINE zmm_t sort_zmm_64bit (zmm_t zmm)
333
+ X86_SIMD_SORT_INLINE zmm_t sort_zmm_64bit (zmm_t zmm)
334
334
{
335
335
const __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
336
336
zmm = cmp_merge<vtype>(
@@ -353,7 +353,7 @@ X86_SIMD_SORT_FINLINE zmm_t sort_zmm_64bit(zmm_t zmm)
353
353
354
354
// Assumes zmm is bitonic and performs a recursive half cleaner
355
355
template <typename vtype, typename zmm_t = typename vtype::zmm_t >
356
- X86_SIMD_SORT_FINLINE zmm_t bitonic_merge_zmm_64bit (zmm_t zmm)
356
+ X86_SIMD_SORT_INLINE zmm_t bitonic_merge_zmm_64bit (zmm_t zmm)
357
357
{
358
358
359
359
// 1) half_cleaner[8]: compare 0-4, 1-5, 2-6, 3-7
@@ -374,7 +374,7 @@ X86_SIMD_SORT_FINLINE zmm_t bitonic_merge_zmm_64bit(zmm_t zmm)
374
374
375
375
// Assumes zmm1 and zmm2 are sorted and performs a recursive half cleaner
376
376
template <typename vtype, typename zmm_t = typename vtype::zmm_t >
377
- X86_SIMD_SORT_FINLINE void bitonic_merge_two_zmm_64bit (zmm_t &zmm1, zmm_t &zmm2)
377
+ X86_SIMD_SORT_INLINE void bitonic_merge_two_zmm_64bit (zmm_t &zmm1, zmm_t &zmm2)
378
378
{
379
379
const __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
380
380
// 1) First step of a merging network: coex of zmm1 and zmm2 reversed
@@ -389,7 +389,7 @@ X86_SIMD_SORT_FINLINE void bitonic_merge_two_zmm_64bit(zmm_t &zmm1, zmm_t &zmm2)
389
389
// Assumes [zmm0, zmm1] and [zmm2, zmm3] are sorted and performs a recursive
390
390
// half cleaner
391
391
template <typename vtype, typename zmm_t = typename vtype::zmm_t >
392
- X86_SIMD_SORT_FINLINE void bitonic_merge_four_zmm_64bit (zmm_t *zmm)
392
+ X86_SIMD_SORT_INLINE void bitonic_merge_four_zmm_64bit (zmm_t *zmm)
393
393
{
394
394
const __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
395
395
// 1) First step of a merging network
@@ -411,7 +411,7 @@ X86_SIMD_SORT_FINLINE void bitonic_merge_four_zmm_64bit(zmm_t *zmm)
411
411
}
412
412
413
413
template <typename vtype, typename zmm_t = typename vtype::zmm_t >
414
- X86_SIMD_SORT_FINLINE void bitonic_merge_eight_zmm_64bit (zmm_t *zmm)
414
+ X86_SIMD_SORT_INLINE void bitonic_merge_eight_zmm_64bit (zmm_t *zmm)
415
415
{
416
416
const __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
417
417
zmm_t zmm4r = vtype::permutexvar (rev_index, zmm[4 ]);
@@ -445,7 +445,7 @@ X86_SIMD_SORT_FINLINE void bitonic_merge_eight_zmm_64bit(zmm_t *zmm)
445
445
}
446
446
447
447
template <typename vtype, typename zmm_t = typename vtype::zmm_t >
448
- X86_SIMD_SORT_FINLINE void bitonic_merge_sixteen_zmm_64bit (zmm_t *zmm)
448
+ X86_SIMD_SORT_INLINE void bitonic_merge_sixteen_zmm_64bit (zmm_t *zmm)
449
449
{
450
450
const __m512i rev_index = _mm512_set_epi64 (NETWORK_64BIT_2);
451
451
zmm_t zmm8r = vtype::permutexvar (rev_index, zmm[8 ]);
@@ -519,7 +519,7 @@ X86_SIMD_SORT_FINLINE void bitonic_merge_sixteen_zmm_64bit(zmm_t *zmm)
519
519
}
520
520
521
521
template <typename vtype, typename type_t >
522
- X86_SIMD_SORT_FINLINE void sort_8_64bit (type_t *arr, int32_t N)
522
+ X86_SIMD_SORT_INLINE void sort_8_64bit (type_t *arr, int32_t N)
523
523
{
524
524
typename vtype::opmask_t load_mask = (0x01 << N) - 0x01 ;
525
525
typename vtype::zmm_t zmm
@@ -528,7 +528,7 @@ X86_SIMD_SORT_FINLINE void sort_8_64bit(type_t *arr, int32_t N)
528
528
}
529
529
530
530
template <typename vtype, typename type_t >
531
- X86_SIMD_SORT_FINLINE void sort_16_64bit (type_t *arr, int32_t N)
531
+ X86_SIMD_SORT_INLINE void sort_16_64bit (type_t *arr, int32_t N)
532
532
{
533
533
if (N <= 8 ) {
534
534
sort_8_64bit<vtype>(arr, N);
@@ -546,7 +546,7 @@ X86_SIMD_SORT_FINLINE void sort_16_64bit(type_t *arr, int32_t N)
546
546
}
547
547
548
548
template <typename vtype, typename type_t >
549
- X86_SIMD_SORT_FINLINE void sort_32_64bit (type_t *arr, int32_t N)
549
+ X86_SIMD_SORT_INLINE void sort_32_64bit (type_t *arr, int32_t N)
550
550
{
551
551
if (N <= 16 ) {
552
552
sort_16_64bit<vtype>(arr, N);
@@ -577,7 +577,7 @@ X86_SIMD_SORT_FINLINE void sort_32_64bit(type_t *arr, int32_t N)
577
577
}
578
578
579
579
template <typename vtype, typename type_t >
580
- X86_SIMD_SORT_FINLINE void sort_64_64bit (type_t *arr, int32_t N)
580
+ X86_SIMD_SORT_INLINE void sort_64_64bit (type_t *arr, int32_t N)
581
581
{
582
582
if (N <= 32 ) {
583
583
sort_32_64bit<vtype>(arr, N);
@@ -628,7 +628,7 @@ X86_SIMD_SORT_FINLINE void sort_64_64bit(type_t *arr, int32_t N)
628
628
}
629
629
630
630
template <typename vtype, typename type_t >
631
- X86_SIMD_SORT_FINLINE void sort_128_64bit (type_t *arr, int32_t N)
631
+ X86_SIMD_SORT_INLINE void sort_128_64bit (type_t *arr, int32_t N)
632
632
{
633
633
if (N <= 64 ) {
634
634
sort_64_64bit<vtype>(arr, N);
@@ -718,9 +718,9 @@ X86_SIMD_SORT_FINLINE void sort_128_64bit(type_t *arr, int32_t N)
718
718
}
719
719
720
720
template <typename vtype, typename type_t >
721
- X86_SIMD_SORT_FINLINE type_t get_pivot_64bit (type_t *arr,
722
- const int64_t left,
723
- const int64_t right)
721
+ X86_SIMD_SORT_INLINE type_t get_pivot_64bit (type_t *arr,
722
+ const int64_t left,
723
+ const int64_t right)
724
724
{
725
725
// median of 8
726
726
int64_t size = (right - left) / 8 ;
@@ -769,7 +769,7 @@ qsort_64bit_(type_t *arr, int64_t left, int64_t right, int64_t max_iters)
769
769
qsort_64bit_<vtype>(arr, pivot_index, right, max_iters - 1 );
770
770
}
771
771
772
- X86_SIMD_SORT_FINLINE int64_t replace_nan_with_inf (double *arr, int64_t arrsize)
772
+ X86_SIMD_SORT_INLINE int64_t replace_nan_with_inf (double *arr, int64_t arrsize)
773
773
{
774
774
int64_t nan_count = 0 ;
775
775
__mmask8 loadmask = 0xFF ;
@@ -785,7 +785,7 @@ X86_SIMD_SORT_FINLINE int64_t replace_nan_with_inf(double *arr, int64_t arrsize)
785
785
return nan_count;
786
786
}
787
787
788
- X86_SIMD_SORT_FINLINE void
788
+ X86_SIMD_SORT_INLINE void
789
789
replace_inf_with_nan (double *arr, int64_t arrsize, int64_t nan_count)
790
790
{
791
791
for (int64_t ii = arrsize - 1 ; nan_count > 0 ; --ii) {
0 commit comments