From ddc882389a14afee5d33153300e4f305e45def47 Mon Sep 17 00:00:00 2001 From: Gopal Srinivasa Date: Mon, 25 Nov 2024 13:50:27 +0530 Subject: [PATCH] Pre-evaluation --- include/distance.h | 32 ++++++++++++++++--------------- src/distance.cpp | 48 ++++++++++++++++++++++++++++------------------ 2 files changed, 46 insertions(+), 34 deletions(-) diff --git a/include/distance.h b/include/distance.h index a8f409767..a499e6164 100644 --- a/include/distance.h +++ b/include/distance.h @@ -22,7 +22,7 @@ template class Distance } // distance comparison function - DISKANN_DLLEXPORT virtual float compare(const T *a, const T *b, uint32_t length, float threshold = FLT_MAX) const = 0; + DISKANN_DLLEXPORT virtual float compare(const T * __restrict a, const T * __restrict b, uint32_t length, float threshold = FLT_MAX) const = 0; // Needed only for COSINE-BYTE and INNER_PRODUCT-BYTE DISKANN_DLLEXPORT virtual float compare(const T *a, const T *b, const float normA, const float normB, @@ -79,7 +79,7 @@ class DistanceCosineInt8 : public Distance DistanceCosineInt8() : Distance(diskann::Metric::COSINE) { } - DISKANN_DLLEXPORT virtual float compare(const int8_t *a, const int8_t *b, uint32_t length, + DISKANN_DLLEXPORT virtual float compare(const int8_t * __restrict a, const int8_t * __restrict b, uint32_t length, float threshold = FLT_MAX) const; }; @@ -89,7 +89,7 @@ class DistanceL2Int8 : public Distance DistanceL2Int8() : Distance(diskann::Metric::L2) { } - DISKANN_DLLEXPORT virtual float compare(const int8_t *a, const int8_t *b, uint32_t size, + DISKANN_DLLEXPORT virtual float compare(const int8_t *__restrict a, const int8_t *__restrict b, uint32_t size, float threshold = FLT_MAX) const; }; @@ -101,7 +101,7 @@ class AVXDistanceL2Int8 : public Distance AVXDistanceL2Int8() : Distance(diskann::Metric::L2) { } - DISKANN_DLLEXPORT virtual float compare(const int8_t *a, const int8_t *b, uint32_t length, + DISKANN_DLLEXPORT virtual float compare(const int8_t *__restrict a, const int8_t *__restrict b, uint32_t length, float threshold = FLT_MAX) const; }; @@ -111,7 +111,7 @@ class DistanceCosineFloat : public Distance DistanceCosineFloat() : Distance(diskann::Metric::COSINE) { } - DISKANN_DLLEXPORT virtual float compare(const float *a, const float *b, uint32_t length, + DISKANN_DLLEXPORT virtual float compare(const float *__restrict a, const float *__restrict b, uint32_t length, float threshold = FLT_MAX) const; }; @@ -123,10 +123,10 @@ class DistanceL2Float : public Distance } #ifdef _WINDOWS - DISKANN_DLLEXPORT virtual float compare(const float *a, const float *b, uint32_t size, + DISKANN_DLLEXPORT virtual float compare(const float *__restrict a, const float *__restrict b, uint32_t size, float threshold = FLT_MAX) const; #else - DISKANN_DLLEXPORT virtual float compare(const float *a, const float *b, uint32_t size, + DISKANN_DLLEXPORT virtual float compare(const float *__restrict a, const float *__restrict b, uint32_t size, float threshold = FLT_MAX) const __attribute__((hot)); #endif }; @@ -137,7 +137,7 @@ class AVXDistanceL2Float : public Distance AVXDistanceL2Float() : Distance(diskann::Metric::L2) { } - DISKANN_DLLEXPORT virtual float compare(const float *a, const float *b, uint32_t length, + DISKANN_DLLEXPORT virtual float compare(const float *__restrict a, const float *__restrict b, uint32_t length, float threshold = FLT_MAX) const; }; @@ -147,7 +147,8 @@ template class SlowDistanceL2 : public Distance SlowDistanceL2() : Distance(diskann::Metric::L2) { } - DISKANN_DLLEXPORT virtual float compare(const T *a, const T *b, uint32_t length, float threshold = FLT_MAX) const; + DISKANN_DLLEXPORT virtual float compare(const T *__restrict a, const T *__restrict b, uint32_t length, + float threshold = FLT_MAX) const; }; class SlowDistanceCosineUInt8 : public Distance @@ -156,7 +157,7 @@ class SlowDistanceCosineUInt8 : public Distance SlowDistanceCosineUInt8() : Distance(diskann::Metric::COSINE) { } - DISKANN_DLLEXPORT virtual float compare(const uint8_t *a, const uint8_t *b, uint32_t length, + DISKANN_DLLEXPORT virtual float compare(const uint8_t *__restrict a, const uint8_t *__restrict b, uint32_t length, float threshold = FLT_MAX) const; }; @@ -166,7 +167,7 @@ class DistanceL2UInt8 : public Distance DistanceL2UInt8() : Distance(diskann::Metric::L2) { } - DISKANN_DLLEXPORT virtual float compare(const uint8_t *a, const uint8_t *b, uint32_t size, + DISKANN_DLLEXPORT virtual float compare(const uint8_t *__restrict a, const uint8_t *__restrict b, uint32_t size, float threshold = FLT_MAX) const; }; @@ -182,7 +183,7 @@ template class DistanceInnerProduct : public Distance } inline float inner_product(const T *a, const T *b, unsigned size) const; - inline float compare(const T *a, const T *b, unsigned size, float threshold = FLT_MAX) const + inline float compare(const T *__restrict a, const T *__restrict b, unsigned size, float threshold = FLT_MAX) const { float result = inner_product(a, b, size); // if (result < 0) @@ -201,7 +202,7 @@ template class DistanceFastL2 : public DistanceInnerProduct { } float norm(const T *a, unsigned size) const; - float compare(const T *a, const T *b, float norm, unsigned size, float threshold = FLT_MAX) const; + float compare(const T *__restrict a, const T *__restrict b, float norm, unsigned size, float threshold = FLT_MAX) const; }; class AVXDistanceInnerProductFloat : public Distance @@ -210,7 +211,8 @@ class AVXDistanceInnerProductFloat : public Distance AVXDistanceInnerProductFloat() : Distance(diskann::Metric::INNER_PRODUCT) { } - DISKANN_DLLEXPORT virtual float compare(const float *a, const float *b, uint32_t length, float threshold = FLT_MAX) const; + DISKANN_DLLEXPORT virtual float compare(const float *__restrict a, const float *__restrict b, uint32_t length, + float threshold = FLT_MAX) const; }; class AVXNormalizedCosineDistanceFloat : public Distance @@ -225,7 +227,7 @@ class AVXNormalizedCosineDistanceFloat : public Distance AVXNormalizedCosineDistanceFloat() : Distance(diskann::Metric::COSINE) { } - DISKANN_DLLEXPORT virtual float compare(const float *a, const float *b, uint32_t length, + DISKANN_DLLEXPORT virtual float compare(const float *__restrict a, const float *__restrict b, uint32_t length, float threshold = FLT_MAX) const { // Inner product returns negative values to indicate distance. diff --git a/src/distance.cpp b/src/distance.cpp index 563ede547..213c8f320 100644 --- a/src/distance.cpp +++ b/src/distance.cpp @@ -65,7 +65,8 @@ template size_t Distance::get_required_alignment() const // Cosine distance functions. // -float DistanceCosineInt8::compare(const int8_t *a, const int8_t *b, uint32_t length, float threshold) const +float DistanceCosineInt8::compare(const int8_t *__restrict a, const int8_t *__restrict b, uint32_t length, + float threshold) const { #ifdef _WINDOWS return diskann::CosineSimilarity2(a, b, length); @@ -82,7 +83,8 @@ float DistanceCosineInt8::compare(const int8_t *a, const int8_t *b, uint32_t len #endif } -float DistanceCosineFloat::compare(const float *a, const float *b, uint32_t length, float threshold) const +float DistanceCosineFloat::compare(const float *__restrict a, const float *__restrict b, uint32_t length, + float threshold) const { #ifdef _WINDOWS return diskann::CosineSimilarity2(a, b, length); @@ -99,7 +101,7 @@ float DistanceCosineFloat::compare(const float *a, const float *b, uint32_t leng #endif } -float SlowDistanceCosineUInt8::compare(const uint8_t *a, const uint8_t *b, uint32_t length, +float SlowDistanceCosineUInt8::compare(const uint8_t *__restrict a, const uint8_t *__restrict b, uint32_t length, float threshold) const { int magA = 0, magB = 0, scalarProduct = 0; @@ -117,7 +119,7 @@ float SlowDistanceCosineUInt8::compare(const uint8_t *a, const uint8_t *b, uint3 // L2 distance functions. // -float DistanceL2Int8::compare(const int8_t *a, const int8_t *b, uint32_t size, float threshold) const +float DistanceL2Int8::compare(const int8_t *__restrict a, const int8_t *__restrict b, uint32_t size, float threshold) const { #ifdef _WINDOWS #ifdef USE_AVX2 @@ -131,7 +133,7 @@ float DistanceL2Int8::compare(const int8_t *a, const int8_t *b, uint32_t size, f pY += 32; size -= 32; if (_mm256_reduce_add_ps(r) > threshold) { - diskann::cout << "Breaking because sum exceeded threshold: " << threshold << std::endl; + //diskann::cout << "Breaking because sum exceeded threshold: " << threshold << std::endl; return FLT_MAX; } } @@ -144,7 +146,7 @@ float DistanceL2Int8::compare(const int8_t *a, const int8_t *b, uint32_t size, f size -= 4; if (_mm256_reduce_add_ps(r) > threshold) { - diskann::cout << "Breaking because sum exceeded threshold: " << threshold << std::endl; + //diskann::cout << "Breaking because sum exceeded threshold: " << threshold << std::endl; return FLT_MAX; } @@ -171,7 +173,8 @@ float DistanceL2Int8::compare(const int8_t *a, const int8_t *b, uint32_t size, f #endif } -float DistanceL2UInt8::compare(const uint8_t *a, const uint8_t *b, uint32_t size, float threshold) const +float DistanceL2UInt8::compare(const uint8_t *__restrict a, const uint8_t *__restrict b, uint32_t size, + float threshold) const { uint32_t result = 0; #ifndef _WINDOWS @@ -185,12 +188,12 @@ float DistanceL2UInt8::compare(const uint8_t *a, const uint8_t *b, uint32_t size } #ifndef _WINDOWS -float DistanceL2Float::compare(const float *a, const float *b, uint32_t size, float threshold) const +float DistanceL2Float::compare(const float *__restrict a, const float *__restrict b, uint32_t size, float threshold) const { a = (const float *)__builtin_assume_aligned(a, 32); b = (const float *)__builtin_assume_aligned(b, 32); #else -float DistanceL2Float::compare(const float *a, const float *b, uint32_t size, float threshold) const +float DistanceL2Float::compare(const float *__restrict a, const float *__restrict b, uint32_t size, float threshold) const { #endif @@ -216,9 +219,13 @@ float DistanceL2Float::compare(const float *a, const float *b, uint32_t size, fl sum = _mm256_fmadd_ps(tmp_vec, tmp_vec, sum); - if (_mm256_reduce_add_ps(sum) > threshold) { - //diskann::cout << "Breaking because sum exceeded threshold: " << threshold << std::endl; - return FLT_MAX; + if (j == (niters/2) || j == (3* niters/4) || j == (7* niters/8) ) + { + if (_mm256_reduce_add_ps(sum) > threshold) + { + //diskann::cout << "Breaking because sum exceeded threshold: " << threshold << std::endl; + return FLT_MAX; + } } } @@ -237,7 +244,7 @@ float DistanceL2Float::compare(const float *a, const float *b, uint32_t size, fl } template -float SlowDistanceL2::compare(const T *a, const T *b, uint32_t length, float threshold) const +float SlowDistanceL2::compare(const T *__restrict a, const T *__restrict b, uint32_t length, float threshold) const { float result = 0.0f; for (uint32_t i = 0; i < length; i++) @@ -248,7 +255,8 @@ float SlowDistanceL2::compare(const T *a, const T *b, uint32_t length, float } #ifdef _WINDOWS -float AVXDistanceL2Int8::compare(const int8_t *a, const int8_t *b, uint32_t length, float threshold) const +float AVXDistanceL2Int8::compare(const int8_t *__restrict a, const int8_t *__restrict b, uint32_t length, + float threshold) const { __m128 r = _mm_setzero_ps(); __m128i r1; @@ -286,7 +294,8 @@ float AVXDistanceL2Int8::compare(const int8_t *a, const int8_t *b, uint32_t leng return res; } -float AVXDistanceL2Float::compare(const float *a, const float *b, uint32_t length, float threshold) const +float AVXDistanceL2Float::compare(const float *__restrict a, const float *__restrict b, uint32_t length, + float threshold) const { __m128 diff, v1, v2; __m128 sum = _mm_set1_ps(0); @@ -305,11 +314,11 @@ float AVXDistanceL2Float::compare(const float *a, const float *b, uint32_t lengt return sum.m128_f32[0] + sum.m128_f32[1] + sum.m128_f32[2] + sum.m128_f32[3]; } #else -float AVXDistanceL2Int8::compare(const int8_t *, const int8_t *, uint32_t, float threshold) const +float AVXDistanceL2Int8::compare(const int8_t *restrict, const int8_t *restrict, uint32_t, float threshold) const { return 0; } -float AVXDistanceL2Float::compare(const float *, const float *, uint32_t, float threshold) const +float AVXDistanceL2Float::compare(const float *restrict, const float *restrict, uint32_t, float threshold) const { return 0; } @@ -429,7 +438,8 @@ template float DistanceInnerProduct::inner_product(const T *a, c } template -float DistanceFastL2::compare(const T *a, const T *b, float norm, uint32_t size, float threshold) const +float DistanceFastL2::compare(const T *__restrict a, const T *__restrict b, float norm, uint32_t size, + float threshold) const { float result = -2 * DistanceInnerProduct::inner_product(a, b, size); result += norm; @@ -537,7 +547,7 @@ template float DistanceFastL2::norm(const T *a, uint32_t size) c return result; } -float AVXDistanceInnerProductFloat::compare(const float *a, const float *b, uint32_t size, +float AVXDistanceInnerProductFloat::compare(const float *__restrict a, const float *__restrict b, uint32_t size, float threshold) const { float result = 0.0f;