diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 04ec15fd4..b689e1021 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -45,6 +45,10 @@ jobs: cd jni/external/faiss git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0001-Custom-patch-to-support-multi-vector.patch rm ../../patches/faiss/0001-Custom-patch-to-support-multi-vector.patch + git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch + rm ../../patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch + git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch + rm ../../patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch working-directory: ${{ github.workspace }} - name: Setup Java ${{ matrix.java }} @@ -56,7 +60,15 @@ jobs: # switching the user, as OpenSearch cluster can only be started as root/Administrator on linux-deb/linux-rpm/windows-zip. run: | chown -R 1000:1000 `pwd` - su `id -un 1000` -c "whoami && java -version && ./gradlew build" + if lscpu | grep -i avx2 + then + echo "avx2 available on system" + su `id -un 1000` -c "whoami && java -version && ./gradlew build -Dsimd.enabled=true" + else + echo "avx2 not available on system" + su `id -un 1000` -c "whoami && java -version && ./gradlew build" + fi + - name: Upload Coverage Report uses: codecov/codecov-action@v1 @@ -88,7 +100,14 @@ jobs: - name: Run build run: | - ./gradlew build + if sysctl -n machdep.cpu.features machdep.cpu.leaf7_features | grep -i AVX2 + then + echo "avx2 available on system" + ./gradlew build -Dsimd.enabled=true + else + echo "avx2 not available on system" + ./gradlew build + fi Build-k-NN-Windows: strategy: diff --git a/.github/workflows/test_security.yml b/.github/workflows/test_security.yml index 783b4399c..cf83185f6 100644 --- a/.github/workflows/test_security.yml +++ b/.github/workflows/test_security.yml @@ -45,6 +45,10 @@ jobs: cd jni/external/faiss git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0001-Custom-patch-to-support-multi-vector.patch rm ../../patches/faiss/0001-Custom-patch-to-support-multi-vector.patch + git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch + rm ../../patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch + git apply --ignore-space-change --ignore-whitespace --3way ../../patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch + rm ../../patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch working-directory: ${{ github.workspace }} - name: Setup Java ${{ matrix.java }} @@ -56,4 +60,4 @@ jobs: # switching the user, as OpenSearch cluster can only be started as root/Administrator on linux-deb/linux-rpm/windows-zip. run: | chown -R 1000:1000 `pwd` - su `id -un 1000` -c "whoami && java -version && ./gradlew integTest -Dsecurity.enabled=true" + su `id -un 1000` -c "whoami && java -version && ./gradlew integTest -Dsecurity.enabled=true -Dsimd.enabled=true" diff --git a/CHANGELOG.md b/CHANGELOG.md index b0d366420..4d5796d4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Features * Add parent join support for lucene knn [#1182](https://github.com/opensearch-project/k-NN/pull/1182) * Add parent join support for faiss hnsw [#1398](https://github.com/opensearch-project/k-NN/pull/1398) +* Add Support for Faiss SQFP16 and enable Faiss AVX2 Optimization [#1421](https://github.com/opensearch-project/k-NN/pull/1421) ### Enhancements * Increase Lucene max dimension limit to 16,000 [#1346](https://github.com/opensearch-project/k-NN/pull/1346) * Tuned default values for ef_search and ef_construction for better indexing and search performance for vector search [#1353](https://github.com/opensearch-project/k-NN/pull/1353) diff --git a/DEVELOPER_GUIDE.md b/DEVELOPER_GUIDE.md index 073a8cbea..c232e75a9 100644 --- a/DEVELOPER_GUIDE.md +++ b/DEVELOPER_GUIDE.md @@ -11,6 +11,7 @@ - [Build](#build) - [JNI Library](#jni-library) - [JNI Library Artifacts](#jni-library-artifacts) + - [Enable SIMD Optimization](#enable-simd-optimization) - [Run OpenSearch k-NN](#run-opensearch-k-nn) - [Run Single-node Cluster Locally](#run-single-node-cluster-locally) - [Run Multi-node Cluster Locally](#run-multi-node-cluster-locally) @@ -236,6 +237,23 @@ If you want to make a custom patch on JNI library 3. Place the patch file under `jni/patches` 4. Make a change in `jni/CmakeLists.txt`, `.github/workflows/CI.yml` to apply the patch during build +### Enable SIMD Optimization +SIMD(Single Instruction/Multiple Data) Optimization can be enabled by setting this optional parameter `simd.enabled` to `true` which boosts the performance +by enabling `AVX2` on `x86 architecture` and `NEON` on `ARM64 architecture` while building the Faiss library. But to enable SIMD, the underlying processor +should support this (AVX2 or NEON). So, by default it is set to `false`. + +``` +# While building OpenSearch k-NN +./gradlew build -Dsimd.enabled=true + +# While running OpenSearch k-NN +./gradlew run -Dsimd.enabled=true + +# While building the JNI libraries +cd jni +cmake . -DSIMD_ENABLED=true +``` + ## Run OpenSearch k-NN ### Run Single-node Cluster Locally diff --git a/build.gradle b/build.gradle index f8831398f..90d30ee3d 100644 --- a/build.gradle +++ b/build.gradle @@ -17,6 +17,7 @@ buildscript { version_qualifier = System.getProperty("build.version_qualifier", "") opensearch_group = "org.opensearch" isSnapshot = "true" == System.getProperty("build.snapshot", "true") + simd_enabled = System.getProperty("simd.enabled", "false") version_tokens = opensearch_version.tokenize('-') opensearch_build = version_tokens[0] + '.0' @@ -297,10 +298,10 @@ task cmakeJniLib(type:Exec) { workingDir 'jni' if (Os.isFamily(Os.FAMILY_WINDOWS)) { dependsOn windowsPatches - commandLine 'cmake', '.', "-G", "Unix Makefiles", "-DKNN_PLUGIN_VERSION=${opensearch_version}", "-DBLAS_LIBRARIES=$rootDir\\src\\main\\resources\\windowsDependencies\\libopenblas.dll", "-DLAPACK_LIBRARIES=$rootDir\\src\\main\\resources\\windowsDependencies\\libopenblas.dll" + commandLine 'cmake', '.', "-G", "Unix Makefiles", "-DKNN_PLUGIN_VERSION=${opensearch_version}", "-DBLAS_LIBRARIES=$rootDir\\src\\main\\resources\\windowsDependencies\\libopenblas.dll", "-DLAPACK_LIBRARIES=$rootDir\\src\\main\\resources\\windowsDependencies\\libopenblas.dll", "-DSIMD_ENABLED=${simd_enabled}" } else { - commandLine 'cmake', '.', "-DKNN_PLUGIN_VERSION=${opensearch_version}" + commandLine 'cmake', '.', "-DKNN_PLUGIN_VERSION=${opensearch_version}", "-DSIMD_ENABLED=${simd_enabled}" } } diff --git a/jni/CMakeLists.txt b/jni/CMakeLists.txt index 04dca217c..901929fc3 100644 --- a/jni/CMakeLists.txt +++ b/jni/CMakeLists.txt @@ -111,7 +111,13 @@ endif () if (${CONFIG_FAISS} STREQUAL ON OR ${CONFIG_ALL} STREQUAL ON OR ${CONFIG_TEST} STREQUAL ON) set(BUILD_TESTING OFF) # Avoid building faiss tests set(BLA_STATIC ON) # Statically link BLAS - set(FAISS_OPT_LEVEL generic) # Keep optimization level generic + if(${CMAKE_SYSTEM_NAME} STREQUAL Windows OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64" OR NOT ${SIMD_ENABLED}) + set(FAISS_OPT_LEVEL generic) # Keep optimization level as generic on Windows OS as it is not supported due to MINGW64 compiler issue. Also, on aarch64 avx2 is not supported. + set(TARGET_LINK_FAISS_LIB faiss) + else() + set(FAISS_OPT_LEVEL avx2) # Keep optimization level as avx2 to improve performance on Linux and Mac. + set(TARGET_LINK_FAISS_LIB faiss_avx2) + endif() if (${CMAKE_SYSTEM_NAME} STREQUAL Darwin) if(CMAKE_C_COMPILER_ID MATCHES "Clang\$") @@ -143,12 +149,20 @@ if (${CONFIG_FAISS} STREQUAL ON OR ${CONFIG_ALL} STREQUAL ON OR ${CONFIG_TEST} S endif () # Check if patch exist, this is to skip git apply during CI build. See CI.yml with ubuntu. - find_path(PATCH_FILE NAMES 0001-Custom-patch-to-support-multi-vector.patch PATHS ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss NO_DEFAULT_PATH) + find_path(PATCH_FILE NAMES 0001-Custom-patch-to-support-multi-vector.patch 0002-Custom-patch-to-support-sqfp16-neon.patch PATHS ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss NO_DEFAULT_PATH) # If it exists, apply patches if (EXISTS ${PATCH_FILE}) message(STATUS "Applying custom patches.") execute_process(COMMAND git apply --ignore-space-change --ignore-whitespace --3way ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0001-Custom-patch-to-support-multi-vector.patch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE) + + # 0002-Custom-patch-to-support-sqfp16-neon.patch is a temporary patch to add NEON support to SQ. + # Once the commit conflict issues wrt to Multi vector are resolved, this patch can be removed by updating the faiss submodule with corresponding commit. + # Apply the patch if the OS is not Windows and Processor is aarch64. + if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL Windows AND ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64" AND ${SIMD_ENABLED}) + execute_process(COMMAND git apply --ignore-space-change --ignore-whitespace --3way ${CMAKE_CURRENT_SOURCE_DIR}/patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss ERROR_VARIABLE ERROR_MSG RESULT_VARIABLE RESULT_CODE) + endif() + if(RESULT_CODE) message(FATAL_ERROR "Failed to apply patch:\n${ERROR_MSG}") endif() @@ -165,7 +179,7 @@ if (${CONFIG_FAISS} STREQUAL ON OR ${CONFIG_ALL} STREQUAL ON OR ${CONFIG_TEST} S ${CMAKE_CURRENT_SOURCE_DIR}/src/knn_extension/faiss/utils/BitSet.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/knn_extension/faiss/MultiVectorResultCollector.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/knn_extension/faiss/MultiVectorResultCollectorFactory.cpp) - target_link_libraries(${TARGET_LIB_FAISS} faiss ${TARGET_LIB_COMMON} OpenMP::OpenMP_CXX) + target_link_libraries(${TARGET_LIB_FAISS} ${TARGET_LINK_FAISS_LIB} ${TARGET_LIB_COMMON} OpenMP::OpenMP_CXX) target_include_directories(${TARGET_LIB_FAISS} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include/knn_extension/faiss diff --git a/jni/external/faiss b/jni/external/faiss index 3219e3d12..0013c702f 160000 --- a/jni/external/faiss +++ b/jni/external/faiss @@ -1 +1 @@ -Subproject commit 3219e3d12e6fc36dfdfe17d4cf238ef70bf89568 +Subproject commit 0013c702f47bedbf6159ac356e61f378ccd12ac8 diff --git a/jni/patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch b/jni/patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch new file mode 100644 index 000000000..d743d0a97 --- /dev/null +++ b/jni/patches/faiss/0002-Custom-patch-to-support-sqfp16-neon.patch @@ -0,0 +1,495 @@ +diff --git a/faiss/CMakeLists.txt b/faiss/CMakeLists.txt +index db5133d6..22dac7cb 100644 +--- a/faiss/CMakeLists.txt ++++ b/faiss/CMakeLists.txt +@@ -189,6 +189,7 @@ set(FAISS_HEADERS + utils/extra_distances.h + utils/fp16-fp16c.h + utils/fp16-inl.h ++ utils/fp16-arm.h + utils/fp16.h + utils/hamming-inl.h + utils/hamming.h +diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp +index fc7b28ef..07d77d56 100644 +--- a/faiss/impl/ScalarQuantizer.cpp ++++ b/faiss/impl/ScalarQuantizer.cpp +@@ -91,6 +91,20 @@ struct Codec8bit { + return _mm256_fmadd_ps(f8, one_255, half_one_255); + } + #endif ++ ++#ifdef __aarch64__ ++ static FAISS_ALWAYS_INLINE float32x4x2_t ++ decode_8_components(const uint8_t* code, int i) { ++ float32_t result[8] = {}; ++ for (size_t j = 0; j < 8; j++) { ++ result[j] = decode_component(code, i + j); ++ } ++ float32x4_t res1 = vld1q_f32(result); ++ float32x4_t res2 = vld1q_f32(result + 4); ++ float32x4x2_t res = vzipq_f32(res1, res2); ++ return vuzpq_f32(res.val[0], res.val[1]); ++ } ++#endif + }; + + struct Codec4bit { +@@ -129,6 +143,20 @@ struct Codec4bit { + return _mm256_mul_ps(f8, one_255); + } + #endif ++ ++#ifdef __aarch64__ ++ static FAISS_ALWAYS_INLINE float32x4x2_t ++ decode_8_components(const uint8_t* code, int i) { ++ float32_t result[8] = {}; ++ for (size_t j = 0; j < 8; j++) { ++ result[j] = decode_component(code, i + j); ++ } ++ float32x4_t res1 = vld1q_f32(result); ++ float32x4_t res2 = vld1q_f32(result + 4); ++ float32x4x2_t res = vzipq_f32(res1, res2); ++ return vuzpq_f32(res.val[0], res.val[1]); ++ } ++#endif + }; + + struct Codec6bit { +@@ -228,6 +256,20 @@ struct Codec6bit { + } + + #endif ++ ++#ifdef __aarch64__ ++ static FAISS_ALWAYS_INLINE float32x4x2_t ++ decode_8_components(const uint8_t* code, int i) { ++ float32_t result[8] = {}; ++ for (size_t j = 0; j < 8; j++) { ++ result[j] = decode_component(code, i + j); ++ } ++ float32x4_t res1 = vld1q_f32(result); ++ float32x4_t res2 = vld1q_f32(result + 4); ++ float32x4x2_t res = vzipq_f32(res1, res2); ++ return vuzpq_f32(res.val[0], res.val[1]); ++ } ++#endif + }; + + /******************************************************************* +@@ -293,6 +335,31 @@ struct QuantizerTemplate : QuantizerTemplate { + + #endif + ++#ifdef __aarch64__ ++ ++template ++struct QuantizerTemplate : QuantizerTemplate { ++ QuantizerTemplate(size_t d, const std::vector& trained) ++ : QuantizerTemplate(d, trained) {} ++ ++ FAISS_ALWAYS_INLINE float32x4x2_t ++ reconstruct_8_components(const uint8_t* code, int i) const { ++ float32x4x2_t xi = Codec::decode_8_components(code, i); ++ float32x4x2_t res = vzipq_f32( ++ vfmaq_f32( ++ vdupq_n_f32(this->vmin), ++ xi.val[0], ++ vdupq_n_f32(this->vdiff)), ++ vfmaq_f32( ++ vdupq_n_f32(this->vmin), ++ xi.val[1], ++ vdupq_n_f32(this->vdiff))); ++ return vuzpq_f32(res.val[0], res.val[1]); ++ } ++}; ++ ++#endif ++ + template + struct QuantizerTemplate : ScalarQuantizer::SQuantizer { + const size_t d; +@@ -350,6 +417,29 @@ struct QuantizerTemplate : QuantizerTemplate { + + #endif + ++#ifdef __aarch64__ ++ ++template ++struct QuantizerTemplate : QuantizerTemplate { ++ QuantizerTemplate(size_t d, const std::vector& trained) ++ : QuantizerTemplate(d, trained) {} ++ ++ FAISS_ALWAYS_INLINE float32x4x2_t ++ reconstruct_8_components(const uint8_t* code, int i) const { ++ float32x4x2_t xi = Codec::decode_8_components(code, i); ++ ++ float32x4x2_t vmin_8 = vld1q_f32_x2(this->vmin + i); ++ float32x4x2_t vdiff_8 = vld1q_f32_x2(this->vdiff + i); ++ ++ float32x4x2_t res = vzipq_f32( ++ vfmaq_f32(vmin_8.val[0], xi.val[0], vdiff_8.val[0]), ++ vfmaq_f32(vmin_8.val[1], xi.val[1], vdiff_8.val[1])); ++ return vuzpq_f32(res.val[0], res.val[1]); ++ } ++}; ++ ++#endif ++ + /******************************************************************* + * FP16 quantizer + *******************************************************************/ +@@ -397,6 +487,23 @@ struct QuantizerFP16<8> : QuantizerFP16<1> { + + #endif + ++#ifdef __aarch64__ ++ ++template <> ++struct QuantizerFP16<8> : QuantizerFP16<1> { ++ QuantizerFP16(size_t d, const std::vector& trained) ++ : QuantizerFP16<1>(d, trained) {} ++ ++ FAISS_ALWAYS_INLINE float32x4x2_t ++ reconstruct_8_components(const uint8_t* code, int i) const { ++ uint16x4x2_t codei = vld2_u16((const uint16_t*)(code + 2 * i)); ++ return vzipq_f32( ++ vcvt_f32_f16(vreinterpret_f16_u16(codei.val[0])), ++ vcvt_f32_f16(vreinterpret_f16_u16(codei.val[1]))); ++ } ++}; ++#endif ++ + /******************************************************************* + * 8bit_direct quantizer + *******************************************************************/ +@@ -446,6 +553,28 @@ struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> { + + #endif + ++#ifdef __aarch64__ ++ ++template <> ++struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> { ++ Quantizer8bitDirect(size_t d, const std::vector& trained) ++ : Quantizer8bitDirect<1>(d, trained) {} ++ ++ FAISS_ALWAYS_INLINE float32x4x2_t ++ reconstruct_8_components(const uint8_t* code, int i) const { ++ float32_t result[8] = {}; ++ for (size_t j = 0; j < 8; j++) { ++ result[j] = code[i + j]; ++ } ++ float32x4_t res1 = vld1q_f32(result); ++ float32x4_t res2 = vld1q_f32(result + 4); ++ float32x4x2_t res = vzipq_f32(res1, res2); ++ return vuzpq_f32(res.val[0], res.val[1]); ++ } ++}; ++ ++#endif ++ + template + ScalarQuantizer::SQuantizer* select_quantizer_1( + QuantizerType qtype, +@@ -728,6 +857,59 @@ struct SimilarityL2<8> { + + #endif + ++#ifdef __aarch64__ ++template <> ++struct SimilarityL2<8> { ++ static constexpr int simdwidth = 8; ++ static constexpr MetricType metric_type = METRIC_L2; ++ ++ const float *y, *yi; ++ explicit SimilarityL2(const float* y) : y(y) {} ++ float32x4x2_t accu8; ++ ++ FAISS_ALWAYS_INLINE void begin_8() { ++ accu8 = vzipq_f32(vdupq_n_f32(0.0f), vdupq_n_f32(0.0f)); ++ yi = y; ++ } ++ ++ FAISS_ALWAYS_INLINE void add_8_components(float32x4x2_t x) { ++ float32x4x2_t yiv = vld1q_f32_x2(yi); ++ yi += 8; ++ ++ float32x4_t sub0 = vsubq_f32(yiv.val[0], x.val[0]); ++ float32x4_t sub1 = vsubq_f32(yiv.val[1], x.val[1]); ++ ++ float32x4_t accu8_0 = vfmaq_f32(accu8.val[0], sub0, sub0); ++ float32x4_t accu8_1 = vfmaq_f32(accu8.val[1], sub1, sub1); ++ ++ float32x4x2_t accu8_temp = vzipq_f32(accu8_0, accu8_1); ++ accu8 = vuzpq_f32(accu8_temp.val[0], accu8_temp.val[1]); ++ } ++ ++ FAISS_ALWAYS_INLINE void add_8_components_2( ++ float32x4x2_t x, ++ float32x4x2_t y) { ++ float32x4_t sub0 = vsubq_f32(y.val[0], x.val[0]); ++ float32x4_t sub1 = vsubq_f32(y.val[1], x.val[1]); ++ ++ float32x4_t accu8_0 = vfmaq_f32(accu8.val[0], sub0, sub0); ++ float32x4_t accu8_1 = vfmaq_f32(accu8.val[1], sub1, sub1); ++ ++ float32x4x2_t accu8_temp = vzipq_f32(accu8_0, accu8_1); ++ accu8 = vuzpq_f32(accu8_temp.val[0], accu8_temp.val[1]); ++ } ++ ++ FAISS_ALWAYS_INLINE float result_8() { ++ float32x4_t sum_0 = vpaddq_f32(accu8.val[0], accu8.val[0]); ++ float32x4_t sum_1 = vpaddq_f32(accu8.val[1], accu8.val[1]); ++ ++ float32x4_t sum2_0 = vpaddq_f32(sum_0, sum_0); ++ float32x4_t sum2_1 = vpaddq_f32(sum_1, sum_1); ++ return vgetq_lane_f32(sum2_0, 0) + vgetq_lane_f32(sum2_1, 0); ++ } ++}; ++#endif ++ + template + struct SimilarityIP {}; + +@@ -801,6 +983,56 @@ struct SimilarityIP<8> { + }; + #endif + ++#ifdef __aarch64__ ++ ++template <> ++struct SimilarityIP<8> { ++ static constexpr int simdwidth = 8; ++ static constexpr MetricType metric_type = METRIC_INNER_PRODUCT; ++ ++ const float *y, *yi; ++ ++ explicit SimilarityIP(const float* y) : y(y) {} ++ float32x4x2_t accu8; ++ ++ FAISS_ALWAYS_INLINE void begin_8() { ++ accu8 = vzipq_f32(vdupq_n_f32(0.0f), vdupq_n_f32(0.0f)); ++ yi = y; ++ } ++ ++ FAISS_ALWAYS_INLINE void add_8_components(float32x4x2_t x) { ++ float32x4x2_t yiv = vld1q_f32_x2(yi); ++ yi += 8; ++ ++ float32x4_t accu8_0 = vfmaq_f32(accu8.val[0], yiv.val[0], x.val[0]); ++ float32x4_t accu8_1 = vfmaq_f32(accu8.val[1], yiv.val[1], x.val[1]); ++ float32x4x2_t accu8_temp = vzipq_f32(accu8_0, accu8_1); ++ accu8 = vuzpq_f32(accu8_temp.val[0], accu8_temp.val[1]); ++ } ++ ++ FAISS_ALWAYS_INLINE void add_8_components_2( ++ float32x4x2_t x1, ++ float32x4x2_t x2) { ++ float32x4_t accu8_0 = vfmaq_f32(accu8.val[0], x1.val[0], x2.val[0]); ++ float32x4_t accu8_1 = vfmaq_f32(accu8.val[1], x1.val[1], x2.val[1]); ++ float32x4x2_t accu8_temp = vzipq_f32(accu8_0, accu8_1); ++ accu8 = vuzpq_f32(accu8_temp.val[0], accu8_temp.val[1]); ++ } ++ ++ FAISS_ALWAYS_INLINE float result_8() { ++ float32x4x2_t sum_tmp = vzipq_f32( ++ vpaddq_f32(accu8.val[0], accu8.val[0]), ++ vpaddq_f32(accu8.val[1], accu8.val[1])); ++ float32x4x2_t sum = vuzpq_f32(sum_tmp.val[0], sum_tmp.val[1]); ++ float32x4x2_t sum2_tmp = vzipq_f32( ++ vpaddq_f32(sum.val[0], sum.val[0]), ++ vpaddq_f32(sum.val[1], sum.val[1])); ++ float32x4x2_t sum2 = vuzpq_f32(sum2_tmp.val[0], sum2_tmp.val[1]); ++ return vgetq_lane_f32(sum2.val[0], 0) + vgetq_lane_f32(sum2.val[1], 0); ++ } ++}; ++#endif ++ + /******************************************************************* + * DistanceComputer: combines a similarity and a quantizer to do + * code-to-vector or code-to-code comparisons +@@ -903,6 +1135,53 @@ struct DCTemplate : SQDistanceComputer { + + #endif + ++#ifdef __aarch64__ ++ ++template ++struct DCTemplate : SQDistanceComputer { ++ using Sim = Similarity; ++ ++ Quantizer quant; ++ ++ DCTemplate(size_t d, const std::vector& trained) ++ : quant(d, trained) {} ++ float compute_distance(const float* x, const uint8_t* code) const { ++ Similarity sim(x); ++ sim.begin_8(); ++ for (size_t i = 0; i < quant.d; i += 8) { ++ float32x4x2_t xi = quant.reconstruct_8_components(code, i); ++ sim.add_8_components(xi); ++ } ++ return sim.result_8(); ++ } ++ ++ float compute_code_distance(const uint8_t* code1, const uint8_t* code2) ++ const { ++ Similarity sim(nullptr); ++ sim.begin_8(); ++ for (size_t i = 0; i < quant.d; i += 8) { ++ float32x4x2_t x1 = quant.reconstruct_8_components(code1, i); ++ float32x4x2_t x2 = quant.reconstruct_8_components(code2, i); ++ sim.add_8_components_2(x1, x2); ++ } ++ return sim.result_8(); ++ } ++ ++ void set_query(const float* x) final { ++ q = x; ++ } ++ ++ float symmetric_dis(idx_t i, idx_t j) override { ++ return compute_code_distance( ++ codes + i * code_size, codes + j * code_size); ++ } ++ ++ float query_to_code(const uint8_t* code) const final { ++ return compute_distance(q, code); ++ } ++}; ++#endif ++ + /******************************************************************* + * DistanceComputerByte: computes distances in the integer domain + *******************************************************************/ +@@ -1019,6 +1298,54 @@ struct DistanceComputerByte : SQDistanceComputer { + + #endif + ++#ifdef __aarch64__ ++ ++template ++struct DistanceComputerByte : SQDistanceComputer { ++ using Sim = Similarity; ++ ++ int d; ++ std::vector tmp; ++ ++ DistanceComputerByte(int d, const std::vector&) : d(d), tmp(d) {} ++ ++ int compute_code_distance(const uint8_t* code1, const uint8_t* code2) ++ const { ++ int accu = 0; ++ for (int i = 0; i < d; i++) { ++ if (Sim::metric_type == METRIC_INNER_PRODUCT) { ++ accu += int(code1[i]) * code2[i]; ++ } else { ++ int diff = int(code1[i]) - code2[i]; ++ accu += diff * diff; ++ } ++ } ++ return accu; ++ } ++ ++ void set_query(const float* x) final { ++ for (int i = 0; i < d; i++) { ++ tmp[i] = int(x[i]); ++ } ++ } ++ ++ int compute_distance(const float* x, const uint8_t* code) { ++ set_query(x); ++ return compute_code_distance(tmp.data(), code); ++ } ++ ++ float symmetric_dis(idx_t i, idx_t j) override { ++ return compute_code_distance( ++ codes + i * code_size, codes + j * code_size); ++ } ++ ++ float query_to_code(const uint8_t* code) const final { ++ return compute_code_distance(tmp.data(), code); ++ } ++}; ++ ++#endif ++ + /******************************************************************* + * select_distance_computer: runtime selection of template + * specialization +@@ -1155,7 +1482,7 @@ void ScalarQuantizer::train(size_t n, const float* x) { + } + + ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const { +-#ifdef USE_F16C ++#if defined(USE_F16C) || defined(__aarch64__) + if (d % 8 == 0) { + return select_quantizer_1<8>(qtype, d, trained); + } else +@@ -1186,7 +1513,7 @@ void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const { + SQDistanceComputer* ScalarQuantizer::get_distance_computer( + MetricType metric) const { + FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT); +-#ifdef USE_F16C ++#if defined(USE_F16C) || defined(__aarch64__) + if (d % 8 == 0) { + if (metric == METRIC_L2) { + return select_distance_computer>(qtype, d, trained); +@@ -1522,7 +1849,7 @@ InvertedListScanner* ScalarQuantizer::select_InvertedListScanner( + bool store_pairs, + const IDSelector* sel, + bool by_residual) const { +-#ifdef USE_F16C ++#if defined(USE_F16C) || defined(__aarch64__) + if (d % 8 == 0) { + return sel0_InvertedListScanner<8>( + mt, this, quantizer, store_pairs, sel, by_residual); +diff --git a/faiss/utils/fp16-arm.h b/faiss/utils/fp16-arm.h +new file mode 100644 +index 00000000..79c885b0 +--- /dev/null ++++ b/faiss/utils/fp16-arm.h +@@ -0,0 +1,29 @@ ++/** ++ * Copyright (c) Facebook, Inc. and its affiliates. ++ * ++ * This source code is licensed under the MIT license found in the ++ * LICENSE file in the root directory of this source tree. ++ */ ++ ++#pragma once ++ ++#include ++#include ++ ++namespace faiss { ++ ++inline uint16_t encode_fp16(float x) { ++ float32x4_t fx4 = vdupq_n_f32(x); ++ float16x4_t f16x4 = vcvt_f16_f32(fx4); ++ uint16x4_t ui16x4 = vreinterpret_u16_f16(f16x4); ++ return vduph_lane_u16(ui16x4, 3); ++} ++ ++inline float decode_fp16(uint16_t x) { ++ uint16x4_t ui16x4 = vdup_n_u16(x); ++ float16x4_t f16x4 = vreinterpret_f16_u16(ui16x4); ++ float32x4_t fx4 = vcvt_f32_f16(f16x4); ++ return vdups_laneq_f32(fx4, 3); ++} ++ ++} // namespace faiss +diff --git a/faiss/utils/fp16.h b/faiss/utils/fp16.h +index 90691d8f..43e05dc3 100644 +--- a/faiss/utils/fp16.h ++++ b/faiss/utils/fp16.h +@@ -13,6 +13,8 @@ + + #if defined(__F16C__) + #include ++#elif defined(__aarch64__) ++#include + #else + #include + #endif diff --git a/jni/patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch b/jni/patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch new file mode 100644 index 000000000..22d50e66c --- /dev/null +++ b/jni/patches/faiss/0003-Custom-patch-to-support-AVX2-Linux-CI.patch @@ -0,0 +1,32 @@ +Temporarily replace the intrinsic '_mm_loadu_si64' with '_mm_loadl_epi64' until centOS7 is deprecated in our CI on Linux OS. +centOS7 only supports gcc version upto 8.x. But, the intrinsic '_mm_loadu_si64' requires gcc version of minimum 9.x. +So, replacing it with an equivalent intrinsic. + +diff --git a/faiss/impl/code_distance/code_distance-avx2.h b/faiss/impl/code_distance/code_distance-avx2.h +index 0aa1535b..6e4e5b55 100644 +--- a/faiss/impl/code_distance/code_distance-avx2.h ++++ b/faiss/impl/code_distance/code_distance-avx2.h +@@ -91,7 +91,7 @@ float inline distance_single_code_avx2_pqdecoder8_m8( + __m256 partialSum; + + // load 8 uint8 values +- const __m128i mm1 = _mm_loadu_si64((const __m128i_u*)code); ++ const __m128i mm1 = _mm_loadl_epi64((const __m128i_u*)code); + { + // convert uint8 values (low part of __m128i) to int32 + // values +@@ -199,10 +199,10 @@ inline void distance_four_codes_avx2_pqdecoder8_m8( + + // load 8 uint8 values + __m128i mm1[N]; +- mm1[0] = _mm_loadu_si64((const __m128i_u*)code0); +- mm1[1] = _mm_loadu_si64((const __m128i_u*)code1); +- mm1[2] = _mm_loadu_si64((const __m128i_u*)code2); +- mm1[3] = _mm_loadu_si64((const __m128i_u*)code3); ++ mm1[0] = _mm_loadl_epi64((const __m128i_u*)code0); ++ mm1[1] = _mm_loadl_epi64((const __m128i_u*)code1); ++ mm1[2] = _mm_loadl_epi64((const __m128i_u*)code2); ++ mm1[3] = _mm_loadl_epi64((const __m128i_u*)code3); + + for (intptr_t j = 0; j < N; j++) { + // convert uint8 values (low part of __m128i) to int32 diff --git a/jni/tests/faiss_wrapper_test.cpp b/jni/tests/faiss_wrapper_test.cpp index ed3ec880d..5afe09c22 100644 --- a/jni/tests/faiss_wrapper_test.cpp +++ b/jni/tests/faiss_wrapper_test.cpp @@ -17,6 +17,7 @@ #include "gtest/gtest.h" #include "jni_util.h" #include "test_util.h" +#include "faiss/IndexHNSW.h" using ::testing::NiceMock; using ::testing::Return; @@ -425,3 +426,55 @@ TEST(FaissTrainIndexTest, BasicAssertions) { // Confirm that training succeeded ASSERT_TRUE(trainedIndex->is_trained); } + +TEST(FaissCreateHnswSQfp16IndexTest, BasicAssertions) { + // Define the data + faiss::idx_t numIds = 200; + std::vector ids; + std::vector> vectors; + int dim = 2; + for (int64_t i = 0; i < numIds; ++i) { + ids.push_back(i); + + std::vector vect; + vect.reserve(dim); + for (int j = 0; j < dim; ++j) { + vect.push_back(test_util::RandomFloat(-500.0, 500.0)); + } + vectors.push_back(vect); + } + + std::string indexPath = test_util::RandomString(10, "tmp/", ".faiss"); + std::string spaceType = knn_jni::L2; + std::string index_description = "HNSW32,SQfp16"; + + std::unordered_map parametersMap; + parametersMap[knn_jni::SPACE_TYPE] = (jobject)&spaceType; + parametersMap[knn_jni::INDEX_DESCRIPTION] = (jobject)&index_description; + + // Set up jni + JNIEnv *jniEnv = nullptr; + NiceMock mockJNIUtil; + + EXPECT_CALL(mockJNIUtil, + GetJavaObjectArrayLength( + jniEnv, reinterpret_cast(&vectors))) + .WillRepeatedly(Return(vectors.size())); + + // Create the index + knn_jni::faiss_wrapper::CreateIndex( + &mockJNIUtil, jniEnv, reinterpret_cast(&ids), + reinterpret_cast(&vectors), (jstring)&indexPath, + (jobject)¶metersMap); + + // Make sure index can be loaded + std::unique_ptr index(test_util::FaissLoadIndex(indexPath)); + auto indexIDMap = dynamic_cast(index.get()); + + // Assert that Index is of type IndexHNSWSQ + ASSERT_NE(indexIDMap, nullptr); + ASSERT_NE(dynamic_cast(indexIDMap->index), nullptr); + + // Clean up + std::remove(indexPath.c_str()); +} diff --git a/src/main/java/org/opensearch/knn/common/KNNConstants.java b/src/main/java/org/opensearch/knn/common/KNNConstants.java index 5b968ce31..7f493ea35 100644 --- a/src/main/java/org/opensearch/knn/common/KNNConstants.java +++ b/src/main/java/org/opensearch/knn/common/KNNConstants.java @@ -7,6 +7,8 @@ import org.opensearch.knn.index.VectorDataType; +import java.util.List; + public class KNNConstants { // shared across library constants public static final String DIMENSION = "dimension"; @@ -89,6 +91,11 @@ public class KNNConstants { public static final String FAISS_IVF_DESCRIPTION = "IVF"; public static final String FAISS_FLAT_DESCRIPTION = "Flat"; public static final String FAISS_PQ_DESCRIPTION = "PQ"; + public static final String ENCODER_SQ = "sq"; + public static final String FAISS_SQ_DESCRIPTION = "SQ"; + public static final String FAISS_SQ_TYPE = "type"; + public static final String FAISS_SQ_ENCODER_FP16 = "fp16"; + public static final List FAISS_SQ_ENCODER_TYPES = List.of(FAISS_SQ_ENCODER_FP16); // Parameter defaults/limits public static final Integer ENCODER_PARAMETER_PQ_CODE_COUNT_DEFAULT = 1; diff --git a/src/main/java/org/opensearch/knn/index/Parameter.java b/src/main/java/org/opensearch/knn/index/Parameter.java index 4d69e7838..bef5a33e9 100644 --- a/src/main/java/org/opensearch/knn/index/Parameter.java +++ b/src/main/java/org/opensearch/knn/index/Parameter.java @@ -95,6 +95,49 @@ public ValidationException validate(Object value) { } } + /** + * String method parameter + */ + public static class StringParameter extends Parameter { + + /** + * Constructor + * + * @param name of the parameter + * @param defaultValue value to assign if the parameter is not set + * @param validator used to validate the parameter value passed + */ + public StringParameter(String name, String defaultValue, Predicate validator) { + super(name, defaultValue, validator); + } + + /** + * Check if the value passed in is valid + * + * @param value to be checked + * @return ValidationException produced by validation errors; null if no validations errors. + */ + @Override + public ValidationException validate(Object value) { + ValidationException validationException = null; + if (!(value instanceof String)) { + validationException = new ValidationException(); + validationException.addValidationError( + String.format("Value not of type String for String " + "parameter \"%s\".", getName()) + ); + return validationException; + } + + if (!validator.test((String) value)) { + validationException = new ValidationException(); + validationException.addValidationError( + String.format("Parameter validation failed for String " + "parameter \"%s\".", getName()) + ); + } + return validationException; + } + } + /** * MethodContext parameter. Some methods require sub-methods in order to implement some kind of functionality. For * instance, faiss methods can contain an encoder along side the approximate nearest neighbor function to compress diff --git a/src/main/java/org/opensearch/knn/index/util/Faiss.java b/src/main/java/org/opensearch/knn/index/util/Faiss.java index 71eed404a..420288033 100644 --- a/src/main/java/org/opensearch/knn/index/util/Faiss.java +++ b/src/main/java/org/opensearch/knn/index/util/Faiss.java @@ -28,9 +28,14 @@ import static org.opensearch.knn.common.KNNConstants.ENCODER_PARAMETER_PQ_CODE_SIZE_DEFAULT; import static org.opensearch.knn.common.KNNConstants.ENCODER_PARAMETER_PQ_CODE_SIZE_LIMIT; import static org.opensearch.knn.common.KNNConstants.ENCODER_PARAMETER_PQ_M; +import static org.opensearch.knn.common.KNNConstants.ENCODER_SQ; import static org.opensearch.knn.common.KNNConstants.FAISS_HNSW_DESCRIPTION; import static org.opensearch.knn.common.KNNConstants.FAISS_IVF_DESCRIPTION; import static org.opensearch.knn.common.KNNConstants.FAISS_PQ_DESCRIPTION; +import static org.opensearch.knn.common.KNNConstants.FAISS_SQ_DESCRIPTION; +import static org.opensearch.knn.common.KNNConstants.FAISS_SQ_ENCODER_FP16; +import static org.opensearch.knn.common.KNNConstants.FAISS_SQ_ENCODER_TYPES; +import static org.opensearch.knn.common.KNNConstants.FAISS_SQ_TYPE; import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER; import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW; import static org.opensearch.knn.common.KNNConstants.METHOD_IVF; @@ -75,6 +80,20 @@ class Faiss extends NativeLibrary { methodComponentContext ).build()) ) + .build(), + ENCODER_SQ, + MethodComponent.Builder.builder(ENCODER_SQ) + .addParameter( + FAISS_SQ_TYPE, + new Parameter.StringParameter(FAISS_SQ_TYPE, FAISS_SQ_ENCODER_FP16, FAISS_SQ_ENCODER_TYPES::contains) + ) + .setMapGenerator( + ((methodComponent, methodComponentContext) -> MethodAsMapBuilder.builder( + FAISS_SQ_DESCRIPTION, + methodComponent, + methodComponentContext + ).addParameter(FAISS_SQ_TYPE, "", "").build()) + ) .build() ); diff --git a/src/test/java/org/opensearch/knn/index/FaissIT.java b/src/test/java/org/opensearch/knn/index/FaissIT.java index c2dd70d3f..9da067fde 100644 --- a/src/test/java/org/opensearch/knn/index/FaissIT.java +++ b/src/test/java/org/opensearch/knn/index/FaissIT.java @@ -15,6 +15,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.primitives.Floats; import lombok.SneakyThrows; +import org.apache.hc.core5.http.ParseException; import org.apache.hc.core5.http.io.entity.EntityUtils; import org.junit.BeforeClass; import org.opensearch.client.Response; @@ -34,15 +35,20 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.TreeMap; import java.util.stream.Collectors; import static org.opensearch.knn.common.KNNConstants.ENCODER_PARAMETER_PQ_M; import static org.opensearch.knn.common.KNNConstants.ENCODER_PQ; +import static org.opensearch.knn.common.KNNConstants.ENCODER_SQ; import static org.opensearch.knn.common.KNNConstants.FAISS_NAME; +import static org.opensearch.knn.common.KNNConstants.FAISS_SQ_ENCODER_FP16; +import static org.opensearch.knn.common.KNNConstants.FAISS_SQ_TYPE; import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE; import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER; import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW; +import static org.opensearch.knn.common.KNNConstants.METHOD_IVF; import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_NLIST; import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_SPACE_TYPE; import static org.opensearch.knn.common.KNNConstants.MODEL_ID; @@ -267,6 +273,119 @@ public void testEndToEnd_whenMethodIsHNSWPQ_thenSucceed() { fail("Graphs are not getting evicted"); } + @SneakyThrows + public void testHNSWSQFP16_whenIndexedAndQueried_thenSucceed() { + String indexName = "test-index-hnsw-sqfp16"; + String fieldName = "test-field-hnsw-sqfp16"; + + KNNMethod hnswMethod = KNNEngine.FAISS.getMethod(KNNConstants.METHOD_HNSW); + SpaceType[] spaceTypes = { SpaceType.L2, SpaceType.INNER_PRODUCT }; + Random random = new Random(); + SpaceType spaceType = spaceTypes[random.nextInt(spaceTypes.length)]; + + List mValues = ImmutableList.of(16, 32, 64, 128); + List efConstructionValues = ImmutableList.of(16, 32, 64, 128); + List efSearchValues = ImmutableList.of(16, 32, 64, 128); + + int dimension = 128; + int numDocs = 100; + + // Create an index + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject(fieldName) + .field("type", "knn_vector") + .field("dimension", dimension) + .startObject(KNNConstants.KNN_METHOD) + .field(KNNConstants.NAME, hnswMethod.getMethodComponent().getName()) + .field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue()) + .field(KNNConstants.KNN_ENGINE, KNNEngine.FAISS.getName()) + .startObject(KNNConstants.PARAMETERS) + .field(KNNConstants.METHOD_PARAMETER_M, mValues.get(random().nextInt(mValues.size()))) + .field(KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION, efConstructionValues.get(random().nextInt(efConstructionValues.size()))) + .field(KNNConstants.METHOD_PARAMETER_EF_SEARCH, efSearchValues.get(random().nextInt(efSearchValues.size()))) + .startObject(METHOD_ENCODER_PARAMETER) + .field(NAME, ENCODER_SQ) + .startObject(PARAMETERS) + .field(FAISS_SQ_TYPE, FAISS_SQ_ENCODER_FP16) + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + .endObject(); + + Map mappingMap = xContentBuilderToMap(builder); + String mapping = builder.toString(); + + createKnnIndex(indexName, mapping); + assertEquals(new TreeMap<>(mappingMap), new TreeMap<>(getIndexMappingAsMap(indexName))); + indexTestData(indexName, fieldName, dimension, numDocs); + queryTestData(indexName, fieldName, dimension, numDocs); + deleteKNNIndex(indexName); + validateGraphEviction(); + } + + @SneakyThrows + public void testIVFSQFP16_whenIndexedAndQueried_thenSucceed() { + + String modelId = "test-model-ivf-sqfp16"; + int dimension = 128; + int numDocs = 100; + + String trainingIndexName = "train-index-ivf-sqfp16"; + String trainingFieldName = "train-field-ivf-sqfp16"; + + // Add training data + createBasicKnnIndex(trainingIndexName, trainingFieldName, dimension); + int trainingDataCount = 200; + bulkIngestRandomVectors(trainingIndexName, trainingFieldName, trainingDataCount, dimension); + + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .field(NAME, METHOD_IVF) + .field(KNN_ENGINE, FAISS_NAME) + .field(METHOD_PARAMETER_SPACE_TYPE, "l2") + .startObject(PARAMETERS) + .startObject(METHOD_ENCODER_PARAMETER) + .field(NAME, ENCODER_SQ) + .startObject(PARAMETERS) + .field(FAISS_SQ_TYPE, FAISS_SQ_ENCODER_FP16) + .endObject() + .endObject() + .endObject() + .endObject(); + Map method = xContentBuilderToMap(builder); + + trainModel(modelId, trainingIndexName, trainingFieldName, dimension, method, "faiss ivf sqfp16 test description"); + + // Make sure training succeeds after 30 seconds + assertTrainingSucceeds(modelId, 30, 1000); + + // Create knn index from model + String fieldName = "test-field-name-ivf-sqfp16"; + String indexName = "test-index-name-ivf-sqfp16"; + String indexMapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject(fieldName) + .field("type", "knn_vector") + .field(MODEL_ID, modelId) + .endObject() + .endObject() + .endObject() + .toString(); + + createKnnIndex(indexName, getKNNDefaultIndexSettings(), indexMapping); + + indexTestData(indexName, fieldName, dimension, numDocs); + queryTestData(indexName, fieldName, dimension, numDocs); + deleteKNNIndex(indexName); + validateGraphEviction(); + } + @SneakyThrows public void testEndToEnd_whenMethodIsHNSWPQAndHyperParametersNotSet_thenSucceed() { String indexName = "test-index"; @@ -625,4 +744,44 @@ protected void setupKNNIndexForFilterQuery() throws Exception { refreshIndex(INDEX_NAME); } + + private void queryTestData(final String indexName, final String fieldName, final int dimension, final int numDocs) throws IOException, + ParseException { + float[] queryVector = new float[dimension]; + Arrays.fill(queryVector, (float) numDocs); + int k = 10; + + Response searchResponse = searchKNNIndex(indexName, new KNNQueryBuilder(fieldName, queryVector, k), k); + List results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), fieldName); + assertEquals(k, results.size()); + for (int i = 0; i < k; i++) { + assertEquals(numDocs - i - 1, Integer.parseInt(results.get(i).getDocId())); + } + } + + private void indexTestData(final String indexName, final String fieldName, final int dimension, final int numDocs) throws Exception { + for (int i = 0; i < numDocs; i++) { + float[] indexVector = new float[dimension]; + Arrays.fill(indexVector, (float) i); + addKnnDocWithAttributes(indexName, Integer.toString(i), fieldName, indexVector, ImmutableMap.of("rating", String.valueOf(i))); + } + + // Assert that all docs are ingested + refreshAllNonSystemIndices(); + assertEquals(numDocs, getDocCount(indexName)); + } + + private void validateGraphEviction() throws Exception { + // Search every 5 seconds 14 times to confirm graph gets evicted + int intervals = 14; + for (int i = 0; i < intervals; i++) { + if (getTotalGraphsInCache() == 0) { + return; + } + + Thread.sleep(5 * 1000); + } + + fail("Graphs are not getting evicted"); + } } diff --git a/src/test/java/org/opensearch/knn/index/ParameterTests.java b/src/test/java/org/opensearch/knn/index/ParameterTests.java index 4e7adfc8c..08decd592 100644 --- a/src/test/java/org/opensearch/knn/index/ParameterTests.java +++ b/src/test/java/org/opensearch/knn/index/ParameterTests.java @@ -15,6 +15,7 @@ import org.opensearch.knn.KNNTestCase; import org.opensearch.common.ValidationException; import org.opensearch.knn.index.Parameter.IntegerParameter; +import org.opensearch.knn.index.Parameter.StringParameter; import org.opensearch.knn.index.Parameter.MethodComponentContextParameter; import java.util.Map; @@ -51,6 +52,19 @@ public void testIntegerParameter_validate() { assertNull(parameter.validate(12)); } + public void testStringParameter_validate() { + final StringParameter parameter = new StringParameter("test_parameter", "default_value", v -> "test".equals(v)); + + // Invalid type + assertNotNull(parameter.validate(5)); + + // null + assertNotNull(parameter.validate(null)); + + // valid value + assertNull(parameter.validate("test")); + } + public void testMethodComponentContextParameter_validate() { String methodComponentName1 = "method-1"; String parameterKey1 = "parameter_key_1"; diff --git a/src/test/java/org/opensearch/knn/index/util/FaissTests.java b/src/test/java/org/opensearch/knn/index/util/FaissTests.java index 0e7bc6482..5dc348a29 100644 --- a/src/test/java/org/opensearch/knn/index/util/FaissTests.java +++ b/src/test/java/org/opensearch/knn/index/util/FaissTests.java @@ -5,6 +5,7 @@ package org.opensearch.knn.index.util; +import lombok.SneakyThrows; import org.opensearch.Version; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.common.xcontent.XContentFactory; @@ -22,7 +23,10 @@ import static org.opensearch.knn.common.KNNConstants.ENCODER_PARAMETER_PQ_CODE_SIZE; import static org.opensearch.knn.common.KNNConstants.ENCODER_PARAMETER_PQ_M; import static org.opensearch.knn.common.KNNConstants.ENCODER_PQ; +import static org.opensearch.knn.common.KNNConstants.ENCODER_SQ; import static org.opensearch.knn.common.KNNConstants.FAISS_NAME; +import static org.opensearch.knn.common.KNNConstants.FAISS_SQ_ENCODER_FP16; +import static org.opensearch.knn.common.KNNConstants.FAISS_SQ_TYPE; import static org.opensearch.knn.common.KNNConstants.INDEX_DESCRIPTION_PARAMETER; import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE; import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER; @@ -86,6 +90,35 @@ public void testGetMethodAsMap_whenMethodIsHNSWPQ_thenCreateCorrectIndexDescript assertEquals(expectedIndexDescription, map.get(INDEX_DESCRIPTION_PARAMETER)); } + @SneakyThrows + public void testGetMethodAsMap_whenMethodIsHNSWSQFP16_thenCreateCorrectIndexDescription() { + int hnswMParam = 65; + String expectedIndexDescription = String.format(Locale.ROOT, "HNSW%d,SQfp16", hnswMParam); + + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() + .startObject() + .field(NAME, METHOD_HNSW) + .field(KNN_ENGINE, FAISS_NAME) + .startObject(PARAMETERS) + .field(METHOD_PARAMETER_M, hnswMParam) + .startObject(METHOD_ENCODER_PARAMETER) + .field(NAME, ENCODER_SQ) + .startObject(PARAMETERS) + .field(FAISS_SQ_TYPE, FAISS_SQ_ENCODER_FP16) + .endObject() + .endObject() + .endObject() + .endObject(); + Map in = xContentBuilderToMap(xContentBuilder); + KNNMethodContext knnMethodContext = KNNMethodContext.parse(in); + knnMethodContext.getMethodComponentContext().setIndexVersion(Version.CURRENT); + + Map map = Faiss.INSTANCE.getMethodAsMap(knnMethodContext); + + assertTrue(map.containsKey(INDEX_DESCRIPTION_PARAMETER)); + assertEquals(expectedIndexDescription, map.get(INDEX_DESCRIPTION_PARAMETER)); + } + public void testGetMethodAsMap_whenMethodIsIVFFlat_thenCreateCorrectIndexDescription() throws IOException { int nlists = 88; String expectedIndexDescription = String.format(Locale.ROOT, "IVF%d,Flat", nlists); @@ -137,6 +170,34 @@ public void testGetMethodAsMap_whenMethodIsIVFPQ_thenCreateCorrectIndexDescripti assertEquals(expectedIndexDescription, map.get(INDEX_DESCRIPTION_PARAMETER)); } + @SneakyThrows + public void testGetMethodAsMap_whenMethodIsIVFSQFP16_thenCreateCorrectIndexDescription() { + int nlists = 88; + String expectedIndexDescription = String.format(Locale.ROOT, "IVF%d,SQfp16", nlists); + + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() + .startObject() + .field(NAME, METHOD_IVF) + .field(KNN_ENGINE, FAISS_NAME) + .startObject(PARAMETERS) + .field(METHOD_PARAMETER_NLIST, nlists) + .startObject(METHOD_ENCODER_PARAMETER) + .field(NAME, ENCODER_SQ) + .startObject(PARAMETERS) + .field(FAISS_SQ_TYPE, FAISS_SQ_ENCODER_FP16) + .endObject() + .endObject() + .endObject() + .endObject(); + Map in = xContentBuilderToMap(xContentBuilder); + KNNMethodContext knnMethodContext = KNNMethodContext.parse(in); + + Map map = Faiss.INSTANCE.getMethodAsMap(knnMethodContext); + + assertTrue(map.containsKey(INDEX_DESCRIPTION_PARAMETER)); + assertEquals(expectedIndexDescription, map.get(INDEX_DESCRIPTION_PARAMETER)); + } + public void testMethodAsMapBuilder() throws IOException { String methodName = "test-method"; String methodDescription = "test-description"; diff --git a/src/test/java/org/opensearch/knn/jni/JNIServiceTests.java b/src/test/java/org/opensearch/knn/jni/JNIServiceTests.java index 0a05c95a0..c2470ea47 100644 --- a/src/test/java/org/opensearch/knn/jni/JNIServiceTests.java +++ b/src/test/java/org/opensearch/knn/jni/JNIServiceTests.java @@ -13,6 +13,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import lombok.SneakyThrows; import org.junit.BeforeClass; import org.opensearch.Version; import org.opensearch.common.xcontent.XContentFactory; @@ -41,7 +42,10 @@ import static org.opensearch.knn.common.KNNConstants.ENCODER_PARAMETER_PQ_M; import static org.opensearch.knn.common.KNNConstants.ENCODER_PARAMETER_PQ_CODE_SIZE; import static org.opensearch.knn.common.KNNConstants.ENCODER_PQ; +import static org.opensearch.knn.common.KNNConstants.ENCODER_SQ; import static org.opensearch.knn.common.KNNConstants.FAISS_NAME; +import static org.opensearch.knn.common.KNNConstants.FAISS_SQ_ENCODER_FP16; +import static org.opensearch.knn.common.KNNConstants.FAISS_SQ_TYPE; import static org.opensearch.knn.common.KNNConstants.INDEX_DESCRIPTION_PARAMETER; import static org.opensearch.knn.common.KNNConstants.INDEX_THREAD_QTY; import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE; @@ -466,6 +470,111 @@ public void testCreateIndex_faiss_invalid_invalidIndexDescription() throws IOExc ); } + @SneakyThrows + public void testCreateIndex_faiss_sqfp16_invalidIndexDescription() { + + int[] docIds = new int[] { 1, 2 }; + float[][] vectors = new float[][] { { 2, 3 }, { 3, 4 } }; + String sqfp16InvalidIndexDescription = "HNSW16,SQfp1655"; + + Path tmpFile = createTempFile(); + expectThrows( + Exception.class, + () -> JNIService.createIndex( + docIds, + vectors, + tmpFile.toAbsolutePath().toString(), + ImmutableMap.of( + INDEX_DESCRIPTION_PARAMETER, + sqfp16InvalidIndexDescription, + KNNConstants.SPACE_TYPE, + SpaceType.L2.getValue() + ), + FAISS_NAME + ) + ); + } + + @SneakyThrows + public void testLoadIndex_faiss_sqfp16_valid() { + + int[] docIds = new int[] { 1, 2 }; + float[][] vectors = new float[][] { { 2, 3 }, { 3, 4 } }; + String sqfp16IndexDescription = "HNSW16,SQfp16"; + + Path tmpFile = createTempFile(); + JNIService.createIndex( + docIds, + vectors, + tmpFile.toAbsolutePath().toString(), + ImmutableMap.of(INDEX_DESCRIPTION_PARAMETER, sqfp16IndexDescription, KNNConstants.SPACE_TYPE, SpaceType.L2.getValue()), + FAISS_NAME + ); + assertTrue(tmpFile.toFile().length() > 0); + + long pointer = JNIService.loadIndex(tmpFile.toAbsolutePath().toString(), Collections.emptyMap(), FAISS_NAME); + assertNotEquals(0, pointer); + } + + @SneakyThrows + public void testQueryIndex_faiss_sqfp16_valid() { + + String sqfp16IndexDescription = "HNSW16,SQfp16"; + int k = 10; + + Path tmpFile = createTempFile(); + JNIService.createIndex( + testData.indexData.docs, + testData.indexData.vectors, + tmpFile.toAbsolutePath().toString(), + ImmutableMap.of(INDEX_DESCRIPTION_PARAMETER, sqfp16IndexDescription, KNNConstants.SPACE_TYPE, SpaceType.L2.getValue()), + FAISS_NAME + ); + assertTrue(tmpFile.toFile().length() > 0); + + long pointer = JNIService.loadIndex(tmpFile.toAbsolutePath().toString(), Collections.emptyMap(), FAISS_NAME); + assertNotEquals(0, pointer); + + for (float[] query : testData.queries) { + KNNQueryResult[] results = JNIService.queryIndex(pointer, query, k, FAISS_NAME, null, null); + assertEquals(k, results.length); + } + + // Filter will result in no ids + for (float[] query : testData.queries) { + KNNQueryResult[] results = JNIService.queryIndex(pointer, query, k, FAISS_NAME, new int[] { 0 }, null); + assertEquals(0, results.length); + } + } + + @SneakyThrows + public void testTrain_whenConfigurationIsIVFSQFP16_thenSucceed() { + long trainPointer = transferVectors(10); + int ivfNlistParam = 16; + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() + .startObject() + .field(NAME, METHOD_IVF) + .field(KNN_ENGINE, FAISS_NAME) + .startObject(PARAMETERS) + .field(METHOD_PARAMETER_NLIST, ivfNlistParam) + .startObject(METHOD_ENCODER_PARAMETER) + .field(NAME, ENCODER_SQ) + .startObject(PARAMETERS) + .field(FAISS_SQ_TYPE, FAISS_SQ_ENCODER_FP16) + .endObject() + .endObject() + .endObject() + .endObject(); + Map in = xContentBuilderToMap(xContentBuilder); + KNNMethodContext knnMethodContext = KNNMethodContext.parse(in); + Map parameters = KNNEngine.FAISS.getMethodAsMap(knnMethodContext); + + byte[] faissIndex = JNIService.trainIndex(parameters, 128, trainPointer, FAISS_NAME); + + assertNotEquals(0, faissIndex.length); + JNIService.freeVectors(trainPointer); + } + public void testCreateIndex_faiss_invalid_invalidParameterType() throws IOException { int[] docIds = new int[] {};