From dc8c28530e16496d2e6ae3201021810c734a3207 Mon Sep 17 00:00:00 2001 From: Heemin Kim Date: Mon, 5 Aug 2024 14:12:25 -0700 Subject: [PATCH] Make build succeed with OS 2.11 Signed-off-by: Heemin Kim --- ...backwards_compatibility_tests_workflow.yml | 8 +- build.gradle | 6 +- .../org/opensearch/knn/index/IndexUtil.java | 20 +- .../index/KNNVectorSimilarityFunction.java | 30 +- .../codec/BasePerFieldKnnVectorsFormat.java | 5 - .../KNN950PerFieldKnnVectorsFormat.java | 14 +- .../index/codec/KNN990Codec/KNN990Codec.java | 61 ---- .../KNN990PerFieldKnnVectorsFormat.java | 54 ---- .../knn/index/codec/KNNCodecVersion.java | 22 +- .../knn/index/query/KNNQueryFactory.java | 42 +-- .../knn/index/query/RNNQueryFactory.java | 71 +--- .../index/query/request/MethodParameter.java | 4 +- .../opensearch/knn/index/util/BitUtil.java | 302 ++++++++++++++++++ .../index/util/IndexHyperParametersUtil.java | 20 -- .../knn/plugin/script/KNNScoringUtil.java | 76 ++++- .../services/org.apache.lucene.codecs.Codec | 4 +- .../KNNVectorSimilarityFunctionTests.java | 3 +- .../opensearch/knn/index/LuceneEngineIT.java | 4 +- .../knn/index/SegmentReplicationIT.java | 94 ------ .../codec/KNN990Codec/KNN990CodecTests.java | 51 --- .../knn/index/codec/KNNCodecFactoryTests.java | 8 - .../knn/index/codec/KNNCodecServiceTests.java | 1 - .../knn/index/codec/KNNCodecTestCase.java | 2 - .../knn/index/codec/KNNCodecTestUtil.java | 4 +- .../knn/index/query/KNNQueryBuilderTests.java | 120 +------ .../knn/index/query/KNNQueryFactoryTests.java | 7 - .../knn/index/query/KNNWeightTests.java | 7 - .../knn/index/query/RNNQueryFactoryTests.java | 146 --------- .../util/IndexHyperParametersUtilTests.java | 4 - 29 files changed, 440 insertions(+), 750 deletions(-) delete mode 100644 src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990Codec.java delete mode 100644 src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java create mode 100644 src/main/java/org/opensearch/knn/index/util/BitUtil.java delete mode 100644 src/test/java/org/opensearch/knn/index/SegmentReplicationIT.java delete mode 100644 src/test/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990CodecTests.java delete mode 100644 src/test/java/org/opensearch/knn/index/query/RNNQueryFactoryTests.java diff --git a/.github/workflows/backwards_compatibility_tests_workflow.yml b/.github/workflows/backwards_compatibility_tests_workflow.yml index 8946ed2c1b..bcc8798876 100644 --- a/.github/workflows/backwards_compatibility_tests_workflow.yml +++ b/.github/workflows/backwards_compatibility_tests_workflow.yml @@ -14,8 +14,8 @@ jobs: strategy: matrix: java: [ 11, 17 ] - bwc_version : [ "1.1.0", "1.2.4", "1.3.8", "2.0.1", "2.1.0", "2.2.1", "2.3.0", "2.4.1", "2.5.0", "2.6.0", "2.7.0", "2.8.0", "2.9.0", "2.10.0", "2.11.0", "2.12.0", "2.13.0", "2.14.0", "2.15.0" ] - opensearch_version : [ "2.16.0-SNAPSHOT" ] + bwc_version : [ "1.1.0", "1.2.4", "1.3.8", "2.0.1", "2.1.0", "2.2.1", "2.3.0", "2.4.1", "2.5.0", "2.6.0", "2.7.0", "2.8.0", "2.9.0", "2.10.0", "2.11.0" ] + opensearch_version : [ "2.11.1" ] name: k-NN Restart-Upgrade BWC Tests runs-on: ubuntu-latest @@ -52,8 +52,8 @@ jobs: strategy: matrix: java: [ 11, 17 ] - bwc_version: [ "1.3.8", "2.0.1", "2.1.0", "2.2.1", "2.3.0", "2.4.1", "2.5.0", "2.6.0", "2.7.0", "2.8.0", "2.9.0", "2.10.0", "2.11.0", "2.12.0", "2.13.0", "2.14.0", "2.15.0"] - opensearch_version: [ "2.16.0-SNAPSHOT" ] + bwc_version: [ "1.3.8", "2.0.1", "2.1.0", "2.2.1", "2.3.0", "2.4.1", "2.5.0", "2.6.0", "2.7.0", "2.8.0", "2.9.0", "2.10.0", "2.11.0" ] + opensearch_version: [ "2.11.1" ] name: k-NN Rolling-Upgrade BWC Tests runs-on: ubuntu-latest diff --git a/build.gradle b/build.gradle index 73f11933b9..86cf4c2c62 100644 --- a/build.gradle +++ b/build.gradle @@ -13,7 +13,7 @@ buildscript { ext { // build.version_qualifier parameter applies to knn plugin artifacts only. OpenSearch version must be set // explicitly as 'opensearch.version' property, for instance opensearch.version=2.0.0-rc1-SNAPSHOT - opensearch_version = System.getProperty("opensearch.version", "2.16.0-SNAPSHOT") + opensearch_version = System.getProperty("opensearch.version", "2.11.1") version_qualifier = System.getProperty("build.version_qualifier", "") opensearch_group = "org.opensearch" isSnapshot = "true" == System.getProperty("build.snapshot", "true") @@ -293,9 +293,9 @@ dependencies { api group: 'com.google.guava', name: 'guava', version:'32.1.3-jre' api group: 'commons-lang', name: 'commons-lang', version: '2.6' testFixturesImplementation "org.opensearch.test:framework:${opensearch_version}" - testImplementation group: 'net.bytebuddy', name: 'byte-buddy', version: '1.14.9' + testImplementation group: 'net.bytebuddy', name: 'byte-buddy', version: '1.14.7' testImplementation group: 'org.objenesis', name: 'objenesis', version: '3.2' - testImplementation group: 'net.bytebuddy', name: 'byte-buddy-agent', version: '1.14.9' + testImplementation group: 'net.bytebuddy', name: 'byte-buddy-agent', version: '1.14.7' testFixturesImplementation "org.opensearch:common-utils:${version}" implementation 'com.github.oshi:oshi-core:6.4.13' api "net.java.dev.jna:jna:5.13.0" diff --git a/src/main/java/org/opensearch/knn/index/IndexUtil.java b/src/main/java/org/opensearch/knn/index/IndexUtil.java index 524c9267e6..8524999ce4 100644 --- a/src/main/java/org/opensearch/knn/index/IndexUtil.java +++ b/src/main/java/org/opensearch/knn/index/IndexUtil.java @@ -33,6 +33,7 @@ import java.util.Locale; import java.util.Map; +import static org.opensearch.Version.CURRENT; import static org.opensearch.knn.common.KNNConstants.BYTES_PER_KILOBYTES; import static org.opensearch.knn.common.KNNConstants.HNSW_ALGO_EF_SEARCH; import static org.opensearch.knn.common.KNNConstants.SPACE_TYPE; @@ -45,11 +46,11 @@ public class IndexUtil { private static final Version MINIMAL_SUPPORTED_VERSION_FOR_LUCENE_HNSW_FILTER = Version.V_2_4_0; private static final Version MINIMAL_SUPPORTED_VERSION_FOR_IGNORE_UNMAPPED = Version.V_2_11_0; - private static final Version MINIMAL_SUPPORTED_VERSION_FOR_MODEL_NODE_ASSIGNMENT = Version.V_2_12_0; - private static final Version MINIMAL_SUPPORTED_VERSION_FOR_MODEL_METHOD_COMPONENT_CONTEXT = Version.V_2_13_0; - private static final Version MINIMAL_SUPPORTED_VERSION_FOR_RADIAL_SEARCH = Version.V_2_14_0; - private static final Version MINIMAL_SUPPORTED_VERSION_FOR_METHOD_PARAMETERS = Version.V_2_16_0; - private static final Version MINIMAL_SUPPORTED_VERSION_FOR_MODEL_VECTOR_DATA_TYPE = Version.V_2_16_0; + private static final Version MINIMAL_SUPPORTED_VERSION_FOR_MODEL_NODE_ASSIGNMENT = null; + private static final Version MINIMAL_SUPPORTED_VERSION_FOR_MODEL_METHOD_COMPONENT_CONTEXT = null; + private static final Version MINIMAL_SUPPORTED_VERSION_FOR_RADIAL_SEARCH = null; + private static final Version MINIMAL_SUPPORTED_VERSION_FOR_METHOD_PARAMETERS = null; + private static final Version MINIMAL_SUPPORTED_VERSION_FOR_MODEL_VECTOR_DATA_TYPE = null; // public so neural search can access it public static final Map minimalRequiredVersionMap = initializeMinimalRequiredVersionMap(); @@ -281,10 +282,15 @@ public static Map getParametersAtLoading( return Collections.unmodifiableMap(loadParameters); } + // Placeholder for feature flag + private static boolean enable216() { + return true; + } + public static boolean isClusterOnOrAfterMinRequiredVersion(String key) { Version minimalRequiredVersion = minimalRequiredVersionMap.get(key); if (minimalRequiredVersion == null) { - return false; + return enable216(); } return KNNClusterUtil.instance().getClusterMinVersion().onOrAfter(minimalRequiredVersion); } @@ -292,7 +298,7 @@ public static boolean isClusterOnOrAfterMinRequiredVersion(String key) { public static boolean isVersionOnOrAfterMinRequiredVersion(Version version, String key) { Version minimalRequiredVersion = minimalRequiredVersionMap.get(key); if (minimalRequiredVersion == null) { - return false; + ; } return version.onOrAfter(minimalRequiredVersion); } diff --git a/src/main/java/org/opensearch/knn/index/KNNVectorSimilarityFunction.java b/src/main/java/org/opensearch/knn/index/KNNVectorSimilarityFunction.java index 7eca6287c5..463e1e6834 100644 --- a/src/main/java/org/opensearch/knn/index/KNNVectorSimilarityFunction.java +++ b/src/main/java/org/opensearch/knn/index/KNNVectorSimilarityFunction.java @@ -8,6 +8,8 @@ import org.apache.lucene.index.VectorSimilarityFunction; import org.opensearch.knn.plugin.script.KNNScoringUtil; +import static org.apache.lucene.util.VectorUtil.dotProduct; + /** * Wrapper class of VectorSimilarityFunction to support more function than what Lucene provides */ @@ -15,7 +17,22 @@ public enum KNNVectorSimilarityFunction { EUCLIDEAN(VectorSimilarityFunction.EUCLIDEAN), DOT_PRODUCT(VectorSimilarityFunction.DOT_PRODUCT), COSINE(VectorSimilarityFunction.COSINE), - MAXIMUM_INNER_PRODUCT(VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT), + MAXIMUM_INNER_PRODUCT(null) { + @Override + public float compare(float[] v1, float[] v2) { + return scaleMaxInnerProductScore(dotProduct(v1, v2)); + } + + @Override + public float compare(byte[] v1, byte[] v2) { + return scaleMaxInnerProductScore(dotProduct(v1, v2)); + } + + @Override + public VectorSimilarityFunction getVectorSimilarityFunction() { + throw new IllegalStateException("VectorSimilarityFunction is not available for Hamming space"); + } + }, HAMMING(null) { @Override public float compare(float[] v1, float[] v2) { @@ -50,4 +67,15 @@ public float compare(float[] var1, float[] var2) { public float compare(byte[] var1, byte[] var2) { return vectorSimilarityFunction.compare(var1, var2); } + + /** + * @param vectorDotProductSimilarity the raw similarity between two vectors + * @return A scaled score preventing negative scores for maximum-inner-product + */ + public static float scaleMaxInnerProductScore(float vectorDotProductSimilarity) { + if (vectorDotProductSimilarity < 0) { + return 1 / (1 + -1 * vectorDotProductSimilarity); + } + return vectorDotProductSimilarity + 1; + } } diff --git a/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java index f3738452a3..f06b9c255e 100644 --- a/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java +++ b/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java @@ -111,11 +111,6 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) { return vectorsFormatSupplier.apply(knnVectorsFormatParams); } - @Override - public int getMaxDimensions(String fieldName) { - return getKnnVectorsFormatForField(fieldName).getMaxDimensions(fieldName); - } - private boolean isKnnVectorFieldType(final String field) { return mapperService.isPresent() && mapperService.get().fieldType(field) instanceof KNNVectorFieldMapper.KNNVectorFieldType; } diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java index 978b220035..a319f55376 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java @@ -5,10 +5,9 @@ package org.opensearch.knn.index.codec.KNN950Codec; -import org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsFormat; +import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat; import org.opensearch.index.mapper.MapperService; import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat; -import org.opensearch.knn.index.util.KNNEngine; import java.util.Optional; @@ -29,15 +28,4 @@ public KNN950PerFieldKnnVectorsFormat(final Optional mapperServic ) ); } - - @Override - /** - * This method returns the maximum dimension allowed from KNNEngine for Lucene codec - * - * @param fieldName Name of the field, ignored - * @return Maximum constant dimension set by KNNEngine - */ - public int getMaxDimensions(String fieldName) { - return KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE); - } } diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990Codec.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990Codec.java deleted file mode 100644 index 4b8a1d3cd9..0000000000 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990Codec.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.knn.index.codec.KNN990Codec; - -import lombok.Builder; -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.CompoundFormat; -import org.apache.lucene.codecs.DocValuesFormat; -import org.apache.lucene.codecs.FilterCodec; -import org.apache.lucene.codecs.KnnVectorsFormat; -import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; -import org.opensearch.knn.index.codec.KNNCodecVersion; -import org.opensearch.knn.index.codec.KNNFormatFacade; - -/** - * KNN Codec that wraps the Lucene Codec which is part of Lucene 9.9 - */ -public class KNN990Codec extends FilterCodec { - private static final KNNCodecVersion VERSION = KNNCodecVersion.V_9_9_0; - private final KNNFormatFacade knnFormatFacade; - private final PerFieldKnnVectorsFormat perFieldKnnVectorsFormat; - - /** - * No arg constructor that uses Lucene99 as the delegate - */ - public KNN990Codec() { - this(VERSION.getDefaultCodecDelegate(), VERSION.getPerFieldKnnVectorsFormat()); - } - - /** - * Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec - * and a unique name to this ctor. - * - * @param delegate codec that will perform all operations this codec does not override - * @param knnVectorsFormat per field format for KnnVector - */ - @Builder - protected KNN990Codec(Codec delegate, PerFieldKnnVectorsFormat knnVectorsFormat) { - super(VERSION.getCodecName(), delegate); - knnFormatFacade = VERSION.getKnnFormatFacadeSupplier().apply(delegate); - perFieldKnnVectorsFormat = knnVectorsFormat; - } - - @Override - public DocValuesFormat docValuesFormat() { - return knnFormatFacade.docValuesFormat(); - } - - @Override - public CompoundFormat compoundFormat() { - return knnFormatFacade.compoundFormat(); - } - - @Override - public KnnVectorsFormat knnVectorsFormat() { - return perFieldKnnVectorsFormat; - } -} diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java deleted file mode 100644 index e8ecfad181..0000000000 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.knn.index.codec.KNN990Codec; - -import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat; -import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; -import org.opensearch.index.mapper.MapperService; -import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat; -import org.opensearch.knn.index.util.KNNEngine; - -import java.util.Optional; - -/** - * Class provides per field format implementation for Lucene Knn vector type - */ -public class KNN990PerFieldKnnVectorsFormat extends BasePerFieldKnnVectorsFormat { - private static final int NUM_MERGE_WORKERS = 1; - - public KNN990PerFieldKnnVectorsFormat(final Optional mapperService) { - super( - mapperService, - Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN, - Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH, - () -> new Lucene99HnswVectorsFormat(), - knnVectorsFormatParams -> new Lucene99HnswVectorsFormat( - knnVectorsFormatParams.getMaxConnections(), - knnVectorsFormatParams.getBeamWidth() - ), - knnScalarQuantizedVectorsFormatParams -> new Lucene99HnswScalarQuantizedVectorsFormat( - knnScalarQuantizedVectorsFormatParams.getMaxConnections(), - knnScalarQuantizedVectorsFormatParams.getBeamWidth(), - NUM_MERGE_WORKERS, - knnScalarQuantizedVectorsFormatParams.getBits(), - knnScalarQuantizedVectorsFormatParams.isCompressFlag(), - knnScalarQuantizedVectorsFormatParams.getConfidenceInterval(), - null - ) - ); - } - - @Override - /** - * This method returns the maximum dimension allowed from KNNEngine for Lucene codec - * - * @param fieldName Name of the field, ignored - * @return Maximum constant dimension set by KNNEngine - */ - public int getMaxDimensions(String fieldName) { - return KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE); - } -} diff --git a/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java b/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java index 505dd50a5f..cbf6680f7f 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java @@ -11,8 +11,7 @@ import org.apache.lucene.backward_codecs.lucene92.Lucene92Codec; import org.apache.lucene.codecs.Codec; import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec; -import org.apache.lucene.backward_codecs.lucene95.Lucene95Codec; -import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.codecs.lucene95.Lucene95Codec; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.opensearch.index.mapper.MapperService; import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat; @@ -24,8 +23,6 @@ import org.opensearch.knn.index.codec.KNN940Codec.KNN940PerFieldKnnVectorsFormat; import org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec; import org.opensearch.knn.index.codec.KNN950Codec.KNN950PerFieldKnnVectorsFormat; -import org.opensearch.knn.index.codec.KNN990Codec.KNN990Codec; -import org.opensearch.knn.index.codec.KNN990Codec.KNN990PerFieldKnnVectorsFormat; import java.util.Optional; import java.util.function.BiFunction; @@ -95,24 +92,9 @@ public enum KNNCodecVersion { .knnVectorsFormat(new KNN950PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService))) .build(), KNN950Codec::new - ), - - V_9_9_0( - "KNN990Codec", - new Lucene99Codec(), - new KNN990PerFieldKnnVectorsFormat(Optional.empty()), - (delegate) -> new KNNFormatFacade( - new KNN80DocValuesFormat(delegate.docValuesFormat()), - new KNN80CompoundFormat(delegate.compoundFormat()) - ), - (userCodec, mapperService) -> KNN990Codec.builder() - .delegate(userCodec) - .knnVectorsFormat(new KNN990PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService))) - .build(), - KNN990Codec::new ); - private static final KNNCodecVersion CURRENT = V_9_9_0; + private static final KNNCodecVersion CURRENT = V_9_5_0; private final String codecName; private final Codec defaultCodecDelegate; diff --git a/src/main/java/org/opensearch/knn/index/query/KNNQueryFactory.java b/src/main/java/org/opensearch/knn/index/query/KNNQueryFactory.java index af7dad0264..cc24fea859 100644 --- a/src/main/java/org/opensearch/knn/index/query/KNNQueryFactory.java +++ b/src/main/java/org/opensearch/knn/index/query/KNNQueryFactory.java @@ -11,8 +11,6 @@ import org.apache.lucene.search.KnnFloatVectorQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.join.BitSetProducer; -import org.apache.lucene.search.join.DiversifyingChildrenByteKnnVectorQuery; -import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery; import org.opensearch.index.query.QueryShardContext; import org.opensearch.knn.index.VectorDataType; import org.opensearch.knn.index.util.KNNEngine; @@ -132,9 +130,9 @@ public static Query create(CreateQueryRequest createQueryRequest) { log.debug(String.format("Creating Lucene k-NN query for index: %s \"\", field: %s \"\", k: %d", indexName, fieldName, k)); switch (vectorDataType) { case BYTE: - return getKnnByteVectorQuery(fieldName, byteVector, luceneK, filterQuery, parentFilter); + return new KnnByteVectorQuery(fieldName, byteVector, luceneK, filterQuery); case FLOAT: - return getKnnFloatVectorQuery(fieldName, vector, luceneK, filterQuery, parentFilter); + return new KnnFloatVectorQuery(fieldName, vector, luceneK, filterQuery); default: throw new IllegalArgumentException( String.format( @@ -155,40 +153,4 @@ private static Query validateFilterQuerySupport(final Query filterQuery, final K } return null; } - - /** - * If parentFilter is not null, it is a nested query. Therefore, we return {@link DiversifyingChildrenByteKnnVectorQuery} - * which will dedupe search result per parent so that we can get k parent results at the end. - */ - private static Query getKnnByteVectorQuery( - final String fieldName, - final byte[] byteVector, - final int k, - final Query filterQuery, - final BitSetProducer parentFilter - ) { - if (parentFilter == null) { - return new KnnByteVectorQuery(fieldName, byteVector, k, filterQuery); - } else { - return new DiversifyingChildrenByteKnnVectorQuery(fieldName, byteVector, filterQuery, k, parentFilter); - } - } - - /** - * If parentFilter is not null, it is a nested query. Therefore, we return {@link DiversifyingChildrenFloatKnnVectorQuery} - * which will dedupe search result per parent so that we can get k parent results at the end. - */ - private static Query getKnnFloatVectorQuery( - final String fieldName, - final float[] floatVector, - final int k, - final Query filterQuery, - final BitSetProducer parentFilter - ) { - if (parentFilter == null) { - return new KnnFloatVectorQuery(fieldName, floatVector, k, filterQuery); - } else { - return new DiversifyingChildrenFloatKnnVectorQuery(fieldName, floatVector, filterQuery, k, parentFilter); - } - } } diff --git a/src/main/java/org/opensearch/knn/index/query/RNNQueryFactory.java b/src/main/java/org/opensearch/knn/index/query/RNNQueryFactory.java index dd5efc93f6..f33d6d4d99 100644 --- a/src/main/java/org/opensearch/knn/index/query/RNNQueryFactory.java +++ b/src/main/java/org/opensearch/knn/index/query/RNNQueryFactory.java @@ -5,23 +5,17 @@ package org.opensearch.knn.index.query; -import static org.opensearch.knn.common.KNNConstants.DEFAULT_LUCENE_RADIAL_SEARCH_TRAVERSAL_SIMILARITY_RATIO; -import static org.opensearch.knn.common.KNNConstants.VECTOR_DATA_TYPE_FIELD; -import static org.opensearch.knn.index.VectorDataType.SUPPORTED_VECTOR_DATA_TYPES; - -import java.util.Locale; -import java.util.Map; - import lombok.extern.log4j.Log4j2; -import org.apache.lucene.search.ByteVectorSimilarityQuery; -import org.apache.lucene.search.FloatVectorSimilarityQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.join.BitSetProducer; +import org.opensearch.OpenSearchException; import org.opensearch.index.IndexSettings; import org.opensearch.index.query.QueryShardContext; import org.opensearch.knn.index.VectorDataType; import org.opensearch.knn.index.util.KNNEngine; +import java.util.Map; + /** * Class to create radius nearest neighbor queries */ @@ -67,8 +61,6 @@ public static Query create(RNNQueryFactory.CreateQueryRequest createQueryRequest final String fieldName = createQueryRequest.getFieldName(); final Float radius = createQueryRequest.getRadius(); final float[] vector = createQueryRequest.getVector(); - final byte[] byteVector = createQueryRequest.getByteVector(); - final VectorDataType vectorDataType = createQueryRequest.getVectorDataType(); final Query filterQuery = getFilterQuery(createQueryRequest); final Map methodParameters = createQueryRequest.getMethodParameters(); @@ -93,61 +85,6 @@ public static Query create(RNNQueryFactory.CreateQueryRequest createQueryRequest .filterQuery(filterQuery) .build(); } - - log.debug(String.format("Creating Lucene r-NN query for index: %s \"\", field: %s \"\", k: %f", indexName, fieldName, radius)); - switch (vectorDataType) { - case BYTE: - return getByteVectorSimilarityQuery(fieldName, byteVector, radius, filterQuery); - case FLOAT: - return getFloatVectorSimilarityQuery(fieldName, vector, radius, filterQuery); - default: - throw new IllegalArgumentException( - String.format( - Locale.ROOT, - "Invalid value provided for [%s] field. Supported values are [%s], but got: %s", - VECTOR_DATA_TYPE_FIELD, - SUPPORTED_VECTOR_DATA_TYPES, - vectorDataType - ) - ); - } - } - - /** - * If radius is greater than 0, we return {@link FloatVectorSimilarityQuery} which will return all documents with similarity - * greater than or equal to the resultSimilarity. If filterQuery is not null, it will be used to filter the documents. - */ - private static Query getFloatVectorSimilarityQuery( - final String fieldName, - final float[] floatVector, - final float resultSimilarity, - final Query filterQuery - ) { - return new FloatVectorSimilarityQuery( - fieldName, - floatVector, - DEFAULT_LUCENE_RADIAL_SEARCH_TRAVERSAL_SIMILARITY_RATIO * resultSimilarity, - resultSimilarity, - filterQuery - ); - } - - /** - * If radius is greater than 0, we return {@link ByteVectorSimilarityQuery} which will return all documents with similarity - * greater than or equal to the resultSimilarity. If filterQuery is not null, it will be used to filter the documents. - */ - private static Query getByteVectorSimilarityQuery( - final String fieldName, - final byte[] byteVector, - final float resultSimilarity, - final Query filterQuery - ) { - return new ByteVectorSimilarityQuery( - fieldName, - byteVector, - DEFAULT_LUCENE_RADIAL_SEARCH_TRAVERSAL_SIMILARITY_RATIO * resultSimilarity, - resultSimilarity, - filterQuery - ); + throw new IllegalStateException("Radial search is supported only with faiss Engine"); } } diff --git a/src/main/java/org/opensearch/knn/index/query/request/MethodParameter.java b/src/main/java/org/opensearch/knn/index/query/request/MethodParameter.java index 17f04d7e21..e9f4b22a21 100644 --- a/src/main/java/org/opensearch/knn/index/query/request/MethodParameter.java +++ b/src/main/java/org/opensearch/knn/index/query/request/MethodParameter.java @@ -33,7 +33,7 @@ @RequiredArgsConstructor public enum MethodParameter { - EF_SEARCH(METHOD_PARAMETER_EF_SEARCH, Version.V_2_16_0, EF_SEARCH_FIELD) { + EF_SEARCH(METHOD_PARAMETER_EF_SEARCH, null, EF_SEARCH_FIELD) { @Override public Integer parse(Object value) { return parseInteger(value, METHOD_PARAMETER_EF_SEARCH); @@ -52,7 +52,7 @@ public ValidationException validate(Object value) { } }, - NPROBE(METHOD_PARAMETER_NPROBES, Version.V_2_16_0, NPROBE_FIELD) { + NPROBE(METHOD_PARAMETER_NPROBES, null, NPROBE_FIELD) { @Override public Integer parse(Object value) { return parseInteger(value, METHOD_PARAMETER_EF_SEARCH); diff --git a/src/main/java/org/opensearch/knn/index/util/BitUtil.java b/src/main/java/org/opensearch/knn/index/util/BitUtil.java new file mode 100644 index 0000000000..2515a99846 --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/util/BitUtil.java @@ -0,0 +1,302 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.util; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; + +/** + * A variety of high efficiency bit twiddling routines and encoders for primitives. + * Copied from https://github.com/apache/lucene/blob/main/lucene/core/src/java/org/apache/lucene/util/BitUtil.java + */ +public final class BitUtil { + + private BitUtil() {} // no instance + + /** + * Native byte order. + * + *

Warning: This constant is {@link ByteOrder#nativeOrder()} only in production environments, + * during testing we randomize it. If you need to communicate with native APIs (e.g., Java's + * Panama API), use {@link ByteOrder#nativeOrder()}. + */ + public static final ByteOrder NATIVE_BYTE_ORDER = getNativeByteOrder(); + + private static ByteOrder getNativeByteOrder() { + try { + var prop = System.getProperty("tests.seed"); + if (prop != null) { + return (prop.hashCode() % 2 == 0) ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN; + } + } catch ( + @SuppressWarnings("unused") + SecurityException se) { + // fall-through + } + return ByteOrder.nativeOrder(); + } + + /** + * A {@link VarHandle} to read/write little endian {@code short} from/to a byte array. Shape: + * {@code short vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, short + * val)} + */ + public static final VarHandle VH_LE_SHORT = + MethodHandles.byteArrayViewVarHandle(short[].class, ByteOrder.LITTLE_ENDIAN); + + /** + * A {@link VarHandle} to read/write little endian {@code int} from a byte array. Shape: {@code + * int vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, int val)} + */ + public static final VarHandle VH_LE_INT = + MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.LITTLE_ENDIAN); + + /** + * A {@link VarHandle} to read/write little endian {@code long} from a byte array. Shape: {@code + * long vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, long val)} + */ + public static final VarHandle VH_LE_LONG = + MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN); + + /** + * A {@link VarHandle} to read/write little endian {@code float} from a byte array. Shape: {@code + * float vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, float val)} + */ + public static final VarHandle VH_LE_FLOAT = + MethodHandles.byteArrayViewVarHandle(float[].class, ByteOrder.LITTLE_ENDIAN); + + /** + * A {@link VarHandle} to read/write little endian {@code double} from a byte array. Shape: {@code + * double vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, double val)} + */ + public static final VarHandle VH_LE_DOUBLE = + MethodHandles.byteArrayViewVarHandle(double[].class, ByteOrder.LITTLE_ENDIAN); + + /** + * A {@link VarHandle} to read/write native endian {@code short} from/to a byte array. Shape: + * {@code short vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, short + * val)} + * + *

Warning: This handle uses default order only in production environments, during testing we + * randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link + * ByteOrder#nativeOrder()}. + */ + public static final VarHandle VH_NATIVE_SHORT = + MethodHandles.byteArrayViewVarHandle(short[].class, NATIVE_BYTE_ORDER); + + /** + * A {@link VarHandle} to read/write native endian {@code int} from a byte array. Shape: {@code + * int vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, int val)} + * + *

Warning: This handle uses default order only in production environments, during testing we + * randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link + * ByteOrder#nativeOrder()}. + */ + public static final VarHandle VH_NATIVE_INT = + MethodHandles.byteArrayViewVarHandle(int[].class, NATIVE_BYTE_ORDER); + + /** + * A {@link VarHandle} to read/write native endian {@code long} from a byte array. Shape: {@code + * long vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, long val)} + * + *

Warning: This handle uses default order only in production environments, during testing we + * randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link + * ByteOrder#nativeOrder()}. + */ + public static final VarHandle VH_NATIVE_LONG = + MethodHandles.byteArrayViewVarHandle(long[].class, NATIVE_BYTE_ORDER); + + /** + * A {@link VarHandle} to read/write native endian {@code float} from a byte array. Shape: {@code + * float vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, float val)} + * + *

Warning: This handle uses default order only in production environments, during testing we + * randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link + * ByteOrder#nativeOrder()}. + */ + public static final VarHandle VH_NATIVE_FLOAT = + MethodHandles.byteArrayViewVarHandle(float[].class, NATIVE_BYTE_ORDER); + + /** + * A {@link VarHandle} to read/write native endian {@code double} from a byte array. Shape: {@code + * double vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, double val)} + * + *

Warning: This handle uses default order only in production environments, during testing we + * randomize it. If you need to communicate with native APIs (e.g., Java's Panama API), use {@link + * ByteOrder#nativeOrder()}. + */ + public static final VarHandle VH_NATIVE_DOUBLE = + MethodHandles.byteArrayViewVarHandle(double[].class, NATIVE_BYTE_ORDER); + + /** + * A {@link VarHandle} to read/write big endian {@code short} from a byte array. Shape: {@code + * short vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, short val)} + * + * @deprecated Better use little endian unless it is needed for backwards compatibility. + */ + @Deprecated + public static final VarHandle VH_BE_SHORT = + MethodHandles.byteArrayViewVarHandle(short[].class, ByteOrder.BIG_ENDIAN); + + /** + * A {@link VarHandle} to read/write big endian {@code int} from a byte array. Shape: {@code int + * vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, int val)} + * + * @deprecated Better use little endian unless it is needed for backwards compatibility. + */ + @Deprecated + public static final VarHandle VH_BE_INT = + MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.BIG_ENDIAN); + + /** + * A {@link VarHandle} to read/write big endian {@code long} from a byte array. Shape: {@code long + * vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, long val)} + * + * @deprecated Better use little endian unless it is needed for backwards compatibility. + */ + @Deprecated + public static final VarHandle VH_BE_LONG = + MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.BIG_ENDIAN); + + /** + * A {@link VarHandle} to read/write big endian {@code float} from a byte array. Shape: {@code + * float vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, float val)} + * + * @deprecated Better use little endian unless it is needed for backwards compatibility. + */ + @Deprecated + public static final VarHandle VH_BE_FLOAT = + MethodHandles.byteArrayViewVarHandle(float[].class, ByteOrder.BIG_ENDIAN); + + /** + * A {@link VarHandle} to read/write big endian {@code double} from a byte array. Shape: {@code + * double vh.get(byte[] arr, int ofs)} and {@code void vh.set(byte[] arr, int ofs, double val)} + * + * @deprecated Better use little endian unless it is needed for backwards compatibility. + */ + @Deprecated + public static final VarHandle VH_BE_DOUBLE = + MethodHandles.byteArrayViewVarHandle(double[].class, ByteOrder.BIG_ENDIAN); + + /** + * returns the next highest power of two, or the current value if it's already a power of two or + * zero + */ + public static int nextHighestPowerOfTwo(int v) { + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; + } + + /** + * returns the next highest power of two, or the current value if it's already a power of two or + * zero + */ + public static long nextHighestPowerOfTwo(long v) { + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + v++; + return v; + } + + // magic numbers for bit interleaving + private static final long MAGIC0 = 0x5555555555555555L; + private static final long MAGIC1 = 0x3333333333333333L; + private static final long MAGIC2 = 0x0F0F0F0F0F0F0F0FL; + private static final long MAGIC3 = 0x00FF00FF00FF00FFL; + private static final long MAGIC4 = 0x0000FFFF0000FFFFL; + private static final long MAGIC5 = 0x00000000FFFFFFFFL; + private static final long MAGIC6 = 0xAAAAAAAAAAAAAAAAL; + + // shift values for bit interleaving + private static final long SHIFT0 = 1; + private static final long SHIFT1 = 2; + private static final long SHIFT2 = 4; + private static final long SHIFT3 = 8; + private static final long SHIFT4 = 16; + + /** + * Interleaves the first 32 bits of each long value + * + *

Adapted from: http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN + */ + public static long interleave(int even, int odd) { + long v1 = 0x00000000FFFFFFFFL & even; + long v2 = 0x00000000FFFFFFFFL & odd; + v1 = (v1 | (v1 << SHIFT4)) & MAGIC4; + v1 = (v1 | (v1 << SHIFT3)) & MAGIC3; + v1 = (v1 | (v1 << SHIFT2)) & MAGIC2; + v1 = (v1 | (v1 << SHIFT1)) & MAGIC1; + v1 = (v1 | (v1 << SHIFT0)) & MAGIC0; + v2 = (v2 | (v2 << SHIFT4)) & MAGIC4; + v2 = (v2 | (v2 << SHIFT3)) & MAGIC3; + v2 = (v2 | (v2 << SHIFT2)) & MAGIC2; + v2 = (v2 | (v2 << SHIFT1)) & MAGIC1; + v2 = (v2 | (v2 << SHIFT0)) & MAGIC0; + + return (v2 << 1) | v1; + } + + /** Extract just the even-bits value as a long from the bit-interleaved value */ + public static long deinterleave(long b) { + b &= MAGIC0; + b = (b ^ (b >>> SHIFT0)) & MAGIC1; + b = (b ^ (b >>> SHIFT1)) & MAGIC2; + b = (b ^ (b >>> SHIFT2)) & MAGIC3; + b = (b ^ (b >>> SHIFT3)) & MAGIC4; + b = (b ^ (b >>> SHIFT4)) & MAGIC5; + return b; + } + + /** flip flops odd with even bits */ + public static long flipFlop(final long b) { + return ((b & MAGIC6) >>> 1) | ((b & MAGIC0) << 1); + } + + /** Same as {@link #zigZagEncode(long)} but on integers. */ + public static int zigZagEncode(int i) { + return (i >> 31) ^ (i << 1); + } + + /** + * Zig-zag encode + * the provided long. Assuming the input is a signed long whose absolute value can be stored on + * n bits, the returned value will be an unsigned long that can be stored on + * n+1 bits. + */ + public static long zigZagEncode(long l) { + return (l >> 63) ^ (l << 1); + } + + /** Decode an int previously encoded with {@link #zigZagEncode(int)}. */ + public static int zigZagDecode(int i) { + return ((i >>> 1) ^ -(i & 1)); + } + + /** Decode a long previously encoded with {@link #zigZagEncode(long)}. */ + public static long zigZagDecode(long l) { + return ((l >>> 1) ^ -(l & 1)); + } + + /** + * Return true if, and only if, the provided integer - treated as an unsigned integer - is either + * 0 or a power of two. + */ + public static boolean isZeroOrPowerOfTwo(int x) { + return (x & (x - 1)) == 0; + } +} diff --git a/src/main/java/org/opensearch/knn/index/util/IndexHyperParametersUtil.java b/src/main/java/org/opensearch/knn/index/util/IndexHyperParametersUtil.java index af842788a4..041be8eeef 100644 --- a/src/main/java/org/opensearch/knn/index/util/IndexHyperParametersUtil.java +++ b/src/main/java/org/opensearch/knn/index/util/IndexHyperParametersUtil.java @@ -25,10 +25,6 @@ @Log4j2 @NoArgsConstructor(access = AccessLevel.PRIVATE) public class IndexHyperParametersUtil { - - private static final int INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION_OLD_VALUE = 512; - private static final int INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH_OLD_VALUE = 512; - /** * Returns the default value of EF Construction that should be used for the input index version. After version 2.12.0 * of Opensearch we are have reduced the value of ef_construction in favor of better build times. @@ -37,14 +33,6 @@ public class IndexHyperParametersUtil { * @return default value of EF Construction that should be used for the input index version. */ public static int getHNSWEFConstructionValue(@NonNull final Version indexVersion) { - if (indexVersion.before(Version.V_2_12_0)) { - log.debug( - "Picking up old values of ef_construction : index version : {}, value: {}", - indexVersion, - INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION_OLD_VALUE - ); - return INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION_OLD_VALUE; - } log.debug( "Picking up new values of ef_construction : index version : {}, value: {}", indexVersion, @@ -61,14 +49,6 @@ public static int getHNSWEFConstructionValue(@NonNull final Version indexVersion * @return default value of EF Search that should be used for the input index version. */ public static int getHNSWEFSearchValue(@NonNull final Version indexVersion) { - if (indexVersion.before(Version.V_2_12_0)) { - log.debug( - "Picking up old values of ef_search : index version : {}, value: {}", - indexVersion, - INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH_OLD_VALUE - ); - return INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH_OLD_VALUE; - } log.debug( "Picking up new values of ef_search : index version : {}, value: {}", indexVersion, diff --git a/src/main/java/org/opensearch/knn/plugin/script/KNNScoringUtil.java b/src/main/java/org/opensearch/knn/plugin/script/KNNScoringUtil.java index f61ae4349e..f5b12ff786 100644 --- a/src/main/java/org/opensearch/knn/plugin/script/KNNScoringUtil.java +++ b/src/main/java/org/opensearch/knn/plugin/script/KNNScoringUtil.java @@ -5,22 +5,40 @@ package org.opensearch.knn.plugin.script; -import java.math.BigInteger; -import java.util.List; -import java.util.Locale; -import java.util.Objects; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.util.Constants; import org.apache.lucene.util.VectorUtil; import org.opensearch.knn.index.KNNVectorScriptDocValues; import org.opensearch.knn.index.SpaceType; import org.opensearch.knn.index.VectorDataType; +import org.opensearch.knn.index.util.BitUtil; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.math.BigInteger; +import java.util.List; +import java.util.Locale; +import java.util.Objects; import static org.opensearch.knn.common.KNNValidationUtil.validateByteVectorValue; +import static org.opensearch.knn.index.util.BitUtil.NATIVE_BYTE_ORDER; public class KNNScoringUtil { private static Logger logger = LogManager.getLogger(KNNScoringUtil.class); + /** + * For xorBitCount we stride over the values as either 64-bits (long) or 32-bits (int) at a time. + * On ARM Long::bitCount is not vectorized, and therefore produces less than optimal code, when + * compared to Integer::bitCount. While Long::bitCount is optimal on x64. See + * https://bugs.openjdk.org/browse/JDK-8336000 + */ + static final boolean XOR_BIT_COUNT_STRIDE_AS_INT = Constants.OS_ARCH.equals("aarch64"); + + + public static final VarHandle VH_NATIVE_LONG = + MethodHandles.byteArrayViewVarHandle(long[].class, NATIVE_BYTE_ORDER); + /** * checks both query vector and input vector has equal dimension * @@ -201,7 +219,55 @@ public static float calculateHammingBit(Long queryLong, Long inputLong) { */ public static float calculateHammingBit(byte[] queryVector, byte[] inputVector) { requireEqualDimension(queryVector, inputVector); - return VectorUtil.xorBitCount(queryVector, inputVector); + return xorBitCount(queryVector, inputVector); + } + + /** + * XOR bit count computed over signed bytes. + * + * @param a bytes containing a vector + * @param b bytes containing another vector, of the same dimension + * @return the value of the XOR bit count of the two vectors + */ + public static int xorBitCount(byte[] a, byte[] b) { + if (a.length != b.length) { + throw new IllegalArgumentException("vector dimensions differ: " + a.length + "!=" + b.length); + } + if (XOR_BIT_COUNT_STRIDE_AS_INT) { + return xorBitCountInt(a, b); + } else { + return xorBitCountLong(a, b); + } + } + + /** XOR bit count striding over 4 bytes at a time. */ + static int xorBitCountInt(byte[] a, byte[] b) { + int distance = 0, i = 0; + for (final int upperBound = a.length & -Integer.BYTES; i < upperBound; i += Integer.BYTES) { + distance += + Integer.bitCount( + (int) BitUtil.VH_NATIVE_INT.get(a, i) ^ (int) BitUtil.VH_NATIVE_INT.get(b, i)); + } + // tail: + for (; i < a.length; i++) { + distance += Integer.bitCount((a[i] ^ b[i]) & 0xFF); + } + return distance; + } + + /** XOR bit count striding over 8 bytes at a time. */ + static int xorBitCountLong(byte[] a, byte[] b) { + int distance = 0, i = 0; + for (final int upperBound = a.length & -Long.BYTES; i < upperBound; i += Long.BYTES) { + distance += + Long.bitCount( + (long) VH_NATIVE_LONG.get(a, i) ^ (long) VH_NATIVE_LONG.get(b, i)); + } + // tail: + for (; i < a.length; i++) { + distance += Integer.bitCount((a[i] ^ b[i]) & 0xFF); + } + return distance; } /** diff --git a/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec index 308b379675..8185e7858a 100644 --- a/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -4,6 +4,4 @@ org.opensearch.knn.index.codec.KNN86Codec.KNN86Codec org.opensearch.knn.index.codec.KNN87Codec.KNN87Codec org.opensearch.knn.index.codec.KNN910Codec.KNN910Codec org.opensearch.knn.index.codec.KNN920Codec.KNN920Codec -org.opensearch.knn.index.codec.KNN940Codec.KNN940Codec -org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec -org.opensearch.knn.index.codec.KNN990Codec.KNN990Codec \ No newline at end of file +org.opensearch.knn.index.codec.KNN940Codec.KNN940Codec \ No newline at end of file diff --git a/src/test/java/org/opensearch/knn/index/KNNVectorSimilarityFunctionTests.java b/src/test/java/org/opensearch/knn/index/KNNVectorSimilarityFunctionTests.java index 691941dc30..1974ea6189 100644 --- a/src/test/java/org/opensearch/knn/index/KNNVectorSimilarityFunctionTests.java +++ b/src/test/java/org/opensearch/knn/index/KNNVectorSimilarityFunctionTests.java @@ -22,8 +22,7 @@ public class KNNVectorSimilarityFunctionTests extends TestCase { private static final Set FUNCTION_SET_BACKED_BY_LUCENE = Set.of( EUCLIDEAN, DOT_PRODUCT, - COSINE, - MAXIMUM_INNER_PRODUCT + COSINE ); public void testFunctions_whenBackedByLucene_thenSameAsLucene() { diff --git a/src/test/java/org/opensearch/knn/index/LuceneEngineIT.java b/src/test/java/org/opensearch/knn/index/LuceneEngineIT.java index 1a047ac95f..29ad046702 100644 --- a/src/test/java/org/opensearch/knn/index/LuceneEngineIT.java +++ b/src/test/java/org/opensearch/knn/index/LuceneEngineIT.java @@ -812,8 +812,8 @@ public void test_whenUsingIP_thenSuccess() { // Check that the expected scores are returned final List expectedScores = Arrays.asList( - VectorUtil.scaleMaxInnerProductScore(8.0f), - VectorUtil.scaleMaxInnerProductScore(-8.0f) + KNNVectorSimilarityFunction.scaleMaxInnerProductScore(8.0f), + KNNVectorSimilarityFunction.scaleMaxInnerProductScore(-8.0f) ); assertEquals(expectedScores.size(), knnResults.size()); for (int i = 0; i < expectedScores.size(); i++) { diff --git a/src/test/java/org/opensearch/knn/index/SegmentReplicationIT.java b/src/test/java/org/opensearch/knn/index/SegmentReplicationIT.java deleted file mode 100644 index 02b0fcf71d..0000000000 --- a/src/test/java/org/opensearch/knn/index/SegmentReplicationIT.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - * - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.knn.index; - -import lombok.SneakyThrows; -import lombok.extern.log4j.Log4j2; -import org.apache.http.util.EntityUtils; -import org.junit.Assert; -import org.opensearch.client.Response; -import org.opensearch.common.xcontent.XContentFactory; -import org.opensearch.core.xcontent.XContentBuilder; -import org.opensearch.knn.KNNRestTestCase; -import org.opensearch.knn.KNNResult; - -import java.util.List; - -/** - * This IT class contains will contain special cases of IT for segment replication behavior. - * All the index created in this test will have replication type SEGMENT, number of replicas: 1 and should be run on - * at-least 2 node configuration. - */ -@Log4j2 -public class SegmentReplicationIT extends KNNRestTestCase { - private static final String INDEX_NAME = "segment-replicated-knn-index"; - - @SneakyThrows - public void testSearchOnReplicas_whenIndexHasDeletedDocs_thenSuccess() { - createKnnIndex(INDEX_NAME, getKNNSegmentReplicatedIndexSettings(), createKNNIndexMethodFieldMapping(FIELD_NAME, 2)); - - Float[] vector = { 1.3f, 2.2f }; - int docsInIndex = 10; - - for (int i = 0; i < docsInIndex; i++) { - addKnnDoc(INDEX_NAME, Integer.toString(i), FIELD_NAME, vector); - } - refreshIndex(INDEX_NAME); - int deleteDocs = 5; - for (int i = 0; i < deleteDocs; i++) { - deleteKnnDoc(INDEX_NAME, Integer.toString(i)); - } - refreshIndex(INDEX_NAME); - // sleep for 5sec to ensure data is replicated. I don't have a better way here to know if segments has been - // replicated. - Thread.sleep(5000); - // validate warmup is successful or not. - doKnnWarmup(List.of(INDEX_NAME)); - - XContentBuilder queryBuilder = XContentFactory.jsonBuilder().startObject().startObject("query"); - queryBuilder.startObject("knn"); - queryBuilder.startObject(FIELD_NAME); - queryBuilder.field("vector", vector); - queryBuilder.field("k", docsInIndex); - queryBuilder.endObject().endObject().endObject().endObject(); - - // validate primaries are working - Response searchResponse = performSearch(INDEX_NAME, queryBuilder.toString(), "preference=_primary"); - String responseBody = EntityUtils.toString(searchResponse.getEntity()); - List knnResults = parseSearchResponse(responseBody, FIELD_NAME); - assertEquals(docsInIndex - deleteDocs, knnResults.size()); - - if (ensureMinDataNodesCountForTestingQueriesOnReplica()) { - // validate replicas are working - searchResponse = performSearch(INDEX_NAME, queryBuilder.toString(), "preference=_replica"); - responseBody = EntityUtils.toString(searchResponse.getEntity()); - knnResults = parseSearchResponse(responseBody, FIELD_NAME); - assertEquals(docsInIndex - deleteDocs, knnResults.size()); - } - } - - private boolean ensureMinDataNodesCountForTestingQueriesOnReplica() { - int dataNodeCount = getDataNodeCount(); - if (dataNodeCount <= 1) { - log.warn( - "Not running segment replication tests named: " - + "testSearchOnReplicas_whenIndexHasDeletedDocs_thenSuccess, as data nodes count is not atleast 2. " - + "Actual datanode count : {}", - dataNodeCount - ); - Assert.assertTrue(true); - // making the test successful because we don't want to break already running tests. - return false; - } - return true; - } -} diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990CodecTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990CodecTests.java deleted file mode 100644 index 307ebbb248..0000000000 --- a/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990CodecTests.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.knn.index.codec.KNN990Codec; - -import lombok.SneakyThrows; -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; -import org.opensearch.index.mapper.MapperService; -import org.opensearch.knn.index.codec.KNNCodecTestCase; - -import java.util.Optional; -import java.util.function.Function; - -import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_9_0; - -public class KNN990CodecTests extends KNNCodecTestCase { - - @SneakyThrows - public void testMultiFieldsKnnIndex() { - testMultiFieldsKnnIndex(KNN990Codec.builder().delegate(V_9_9_0.getDefaultCodecDelegate()).build()); - } - - @SneakyThrows - public void testBuildFromModelTemplate() { - testBuildFromModelTemplate((KNN990Codec.builder().delegate(V_9_9_0.getDefaultCodecDelegate()).build())); - } - - // Ensure that the codec is able to return the correct per field knn vectors format for codec - public void testCodecSetsCustomPerFieldKnnVectorsFormat() { - final Codec codec = new KNN990Codec(); - assertTrue(codec.knnVectorsFormat() instanceof KNN990PerFieldKnnVectorsFormat); - } - - // IMPORTANT: When this Codec is moved to a backwards Codec, this test needs to be removed, because it attempts to - // write with a read only codec, which will fail - @SneakyThrows - public void testKnnVectorIndex() { - Function perFieldKnnVectorsFormatProvider = ( - mapperService) -> new KNN990PerFieldKnnVectorsFormat(Optional.of(mapperService)); - - Function knnCodecProvider = (knnVectorFormat) -> KNN990Codec.builder() - .delegate(V_9_9_0.getDefaultCodecDelegate()) - .knnVectorsFormat(knnVectorFormat) - .build(); - - testKnnVectorIndex(knnCodecProvider, perFieldKnnVectorsFormatProvider); - } -} diff --git a/src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java b/src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java index 29dae60857..4a17a1d6f5 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java @@ -9,13 +9,11 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.backward_codecs.lucene91.Lucene91Codec; import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec; -import org.apache.lucene.backward_codecs.lucene95.Lucene95Codec; import org.opensearch.knn.KNNTestCase; import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_1_0; import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_2_0; import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_4_0; -import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_5_0; public class KNNCodecFactoryTests extends KNNTestCase { @@ -37,12 +35,6 @@ public void testKNN940Codec() { assertNotNull(V_9_4_0.getKnnFormatFacadeSupplier().apply(V_9_4_0.getDefaultCodecDelegate())); } - public void testKNN950Codec() { - assertDelegateForVersion(V_9_5_0, Lucene95Codec.class); - assertNotNull(V_9_5_0.getPerFieldKnnVectorsFormat()); - assertNotNull(V_9_5_0.getKnnFormatFacadeSupplier().apply(V_9_5_0.getDefaultCodecDelegate())); - } - private void assertDelegateForVersion(final KNNCodecVersion codecVersion, final Class expectedCodecClass) { final Codec defaultDelegate = codecVersion.getDefaultCodecDelegate(); assertNotNull(defaultDelegate); diff --git a/src/test/java/org/opensearch/knn/index/codec/KNNCodecServiceTests.java b/src/test/java/org/opensearch/knn/index/codec/KNNCodecServiceTests.java index dfe4e7f22a..233b9adf77 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNNCodecServiceTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNNCodecServiceTests.java @@ -36,7 +36,6 @@ public void setUp() throws Exception { super.setUp(); IndexMetadata indexMetadata = mock(IndexMetadata.class); when(indexMetadata.getIndex()).thenReturn(new Index(TEST_INDEX, INDEX_UUID.toString())); - when(indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY)).thenReturn(null); when(indexMetadata.getSettings()).thenReturn(Settings.EMPTY); Settings settings = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, Integer.toString(NUM_OF_SHARDS)).build(); indexSettings = new IndexSettings(indexMetadata, settings); diff --git a/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestCase.java b/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestCase.java index 66fe9770df..3d890f3082 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestCase.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestCase.java @@ -348,7 +348,6 @@ public void testKnnVectorIndex( writer.close(); verify(perFieldKnnVectorsFormatSpy, atLeastOnce()).getKnnVectorsFormatForField(eq(FIELD_NAME_ONE)); - verify(perFieldKnnVectorsFormatSpy, atLeastOnce()).getMaxDimensions(eq(FIELD_NAME_ONE)); IndexSearcher searcher = new IndexSearcher(reader); Query query = KNNQueryFactory.create( @@ -383,7 +382,6 @@ public void testKnnVectorIndex( NativeMemoryLoadStrategy.IndexLoadStrategy.initialize(resourceWatcherService); verify(perFieldKnnVectorsFormatSpy, atLeastOnce()).getKnnVectorsFormatForField(eq(FIELD_NAME_TWO)); - verify(perFieldKnnVectorsFormatSpy, atLeastOnce()).getMaxDimensions(eq(FIELD_NAME_TWO)); IndexSearcher searcher1 = new IndexSearcher(reader1); Query query1 = KNNQueryFactory.create( diff --git a/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestUtil.java b/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestUtil.java index 6acdfec5d1..a6d6f598eb 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestUtil.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNNCodecTestUtil.java @@ -192,8 +192,7 @@ public FieldInfo build() { vectorDimension, VectorEncoding.FLOAT32, vectorSimilarityFunction, - softDeletes, - isParentField + softDeletes ); } } @@ -403,7 +402,6 @@ public static SegmentInfo newSegmentInfo(final Directory directory, final String segmentName, docsInSegment, false, - false, codec, Collections.emptyMap(), randomByteArrayOfLength(StringHelper.ID_LENGTH), diff --git a/src/test/java/org/opensearch/knn/index/query/KNNQueryBuilderTests.java b/src/test/java/org/opensearch/knn/index/query/KNNQueryBuilderTests.java index 18e2b914a3..644b7e4be9 100644 --- a/src/test/java/org/opensearch/knn/index/query/KNNQueryBuilderTests.java +++ b/src/test/java/org/opensearch/knn/index/query/KNNQueryBuilderTests.java @@ -6,7 +6,6 @@ package org.opensearch.knn.index.query; import com.google.common.collect.ImmutableMap; -import org.apache.lucene.search.FloatVectorSimilarityQuery; import org.apache.lucene.search.KnnFloatVectorQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; @@ -512,61 +511,6 @@ public void testDoToQuery_Normal() throws Exception { assertEquals(knnQueryBuilder.vector(), query.getQueryVector()); } - public void testDoToQuery_whenNormal_whenDoRadiusSearch_whenDistanceThreshold_thenSucceed() { - float[] queryVector = { 1.0f, 2.0f, 3.0f, 4.0f }; - KNNQueryBuilder knnQueryBuilder = KNNQueryBuilder.builder() - .fieldName(FIELD_NAME) - .vector(queryVector) - .maxDistance(MAX_DISTANCE) - .build(); - Index dummyIndex = new Index("dummy", "dummy"); - QueryShardContext mockQueryShardContext = mock(QueryShardContext.class); - KNNVectorFieldMapper.KNNVectorFieldType mockKNNVectorField = mock(KNNVectorFieldMapper.KNNVectorFieldType.class); - when(mockQueryShardContext.index()).thenReturn(dummyIndex); - when(mockKNNVectorField.getDimension()).thenReturn(4); - when(mockKNNVectorField.getVectorDataType()).thenReturn(VectorDataType.FLOAT); - when(mockQueryShardContext.fieldMapper(anyString())).thenReturn(mockKNNVectorField); - MethodComponentContext methodComponentContext = new MethodComponentContext( - org.opensearch.knn.common.KNNConstants.METHOD_HNSW, - ImmutableMap.of() - ); - KNNMethodContext knnMethodContext = new KNNMethodContext(KNNEngine.LUCENE, SpaceType.L2, methodComponentContext); - when(mockKNNVectorField.getKnnMethodContext()).thenReturn(knnMethodContext); - FloatVectorSimilarityQuery query = (FloatVectorSimilarityQuery) knnQueryBuilder.doToQuery(mockQueryShardContext); - float resultSimilarity = KNNEngine.LUCENE.distanceToRadialThreshold(MAX_DISTANCE, SpaceType.L2); - - assertTrue(query.toString().contains("resultSimilarity=" + resultSimilarity)); - assertTrue( - query.toString() - .contains( - "traversalSimilarity=" - + org.opensearch.knn.common.KNNConstants.DEFAULT_LUCENE_RADIAL_SEARCH_TRAVERSAL_SIMILARITY_RATIO * resultSimilarity - ) - ); - } - - public void testDoToQuery_whenNormal_whenDoRadiusSearch_whenScoreThreshold_thenSucceed() { - float[] queryVector = { 1.0f, 2.0f, 3.0f, 4.0f }; - - KNNQueryBuilder knnQueryBuilder = KNNQueryBuilder.builder().fieldName(FIELD_NAME).vector(queryVector).minScore(MIN_SCORE).build(); - - Index dummyIndex = new Index("dummy", "dummy"); - QueryShardContext mockQueryShardContext = mock(QueryShardContext.class); - KNNVectorFieldMapper.KNNVectorFieldType mockKNNVectorField = mock(KNNVectorFieldMapper.KNNVectorFieldType.class); - when(mockQueryShardContext.index()).thenReturn(dummyIndex); - when(mockKNNVectorField.getDimension()).thenReturn(4); - when(mockKNNVectorField.getVectorDataType()).thenReturn(VectorDataType.FLOAT); - when(mockQueryShardContext.fieldMapper(anyString())).thenReturn(mockKNNVectorField); - MethodComponentContext methodComponentContext = new MethodComponentContext( - org.opensearch.knn.common.KNNConstants.METHOD_HNSW, - ImmutableMap.of() - ); - KNNMethodContext knnMethodContext = new KNNMethodContext(KNNEngine.LUCENE, SpaceType.L2, methodComponentContext); - when(mockKNNVectorField.getKnnMethodContext()).thenReturn(knnMethodContext); - FloatVectorSimilarityQuery query = (FloatVectorSimilarityQuery) knnQueryBuilder.doToQuery(mockQueryShardContext); - assertTrue(query.toString().contains("resultSimilarity=" + 0.5f)); - } - public void testDoToQuery_whenDoRadiusSearch_whenPassNegativeDistance_whenSupportedSpaceType_thenSucceed() { float[] queryVector = { 1.0f, 2.0f, 3.0f, 4.0f }; float negativeDistance = -1.0f; @@ -801,60 +745,6 @@ public void testDoToQuery_KnnQueryWithFilter_Lucene() throws Exception { assertTrue(query.getClass().isAssignableFrom(KnnFloatVectorQuery.class)); } - public void testDoToQuery_whenDoRadiusSearch_whenDistanceThreshold_whenFilter_thenSucceed() { - float[] queryVector = { 1.0f, 2.0f, 3.0f, 4.0f }; - - KNNQueryBuilder knnQueryBuilder = KNNQueryBuilder.builder() - .fieldName(FIELD_NAME) - .vector(queryVector) - .maxDistance(MAX_DISTANCE) - .filter(TERM_QUERY) - .build(); - - Index dummyIndex = new Index("dummy", "dummy"); - QueryShardContext mockQueryShardContext = mock(QueryShardContext.class); - KNNVectorFieldMapper.KNNVectorFieldType mockKNNVectorField = mock(KNNVectorFieldMapper.KNNVectorFieldType.class); - when(mockQueryShardContext.index()).thenReturn(dummyIndex); - when(mockKNNVectorField.getDimension()).thenReturn(4); - when(mockKNNVectorField.getVectorDataType()).thenReturn(VectorDataType.FLOAT); - MethodComponentContext methodComponentContext = new MethodComponentContext( - org.opensearch.knn.common.KNNConstants.METHOD_HNSW, - ImmutableMap.of() - ); - KNNMethodContext knnMethodContext = new KNNMethodContext(KNNEngine.LUCENE, SpaceType.L2, methodComponentContext); - when(mockKNNVectorField.getKnnMethodContext()).thenReturn(knnMethodContext); - when(mockQueryShardContext.fieldMapper(anyString())).thenReturn(mockKNNVectorField); - Query query = knnQueryBuilder.doToQuery(mockQueryShardContext); - assertNotNull(query); - assertTrue(query.getClass().isAssignableFrom(FloatVectorSimilarityQuery.class)); - } - - public void testDoToQuery_whenDoRadiusSearch_whenScoreThreshold_whenFilter_thenSucceed() { - float[] queryVector = { 1.0f, 2.0f, 3.0f, 4.0f }; - KNNQueryBuilder knnQueryBuilder = KNNQueryBuilder.builder() - .fieldName(FIELD_NAME) - .vector(queryVector) - .maxDistance(MAX_DISTANCE) - .filter(TERM_QUERY) - .build(); - Index dummyIndex = new Index("dummy", "dummy"); - QueryShardContext mockQueryShardContext = mock(QueryShardContext.class); - KNNVectorFieldMapper.KNNVectorFieldType mockKNNVectorField = mock(KNNVectorFieldMapper.KNNVectorFieldType.class); - when(mockQueryShardContext.index()).thenReturn(dummyIndex); - when(mockKNNVectorField.getDimension()).thenReturn(4); - when(mockKNNVectorField.getVectorDataType()).thenReturn(VectorDataType.FLOAT); - MethodComponentContext methodComponentContext = new MethodComponentContext( - org.opensearch.knn.common.KNNConstants.METHOD_HNSW, - ImmutableMap.of() - ); - KNNMethodContext knnMethodContext = new KNNMethodContext(KNNEngine.LUCENE, SpaceType.L2, methodComponentContext); - when(mockKNNVectorField.getKnnMethodContext()).thenReturn(knnMethodContext); - when(mockQueryShardContext.fieldMapper(anyString())).thenReturn(mockKNNVectorField); - Query query = knnQueryBuilder.doToQuery(mockQueryShardContext); - assertNotNull(query); - assertTrue(query.getClass().isAssignableFrom(FloatVectorSimilarityQuery.class)); - } - public void testDoToQuery_WhenknnQueryWithFilterAndFaissEngine_thenSuccess() { // Given float[] queryVector = { 1.0f, 2.0f, 3.0f, 4.0f }; @@ -1115,8 +1005,6 @@ public void testSerialization() throws Exception { assertSerialization(Version.CURRENT, Optional.empty(), K, null, null, null); assertSerialization(Version.CURRENT, Optional.empty(), K, Map.of("ef_search", EF_SEARCH), null, null); assertSerialization(Version.CURRENT, Optional.of(TERM_QUERY), K, Map.of("ef_search", EF_SEARCH), null, null); - assertSerialization(Version.V_2_3_0, Optional.empty(), K, Map.of("ef_search", EF_SEARCH), null, null); - assertSerialization(Version.V_2_3_0, Optional.empty(), K, null, null, null); // For distance threshold search assertSerialization(Version.CURRENT, Optional.empty(), null, null, null, MAX_DISTANCE); @@ -1181,12 +1069,8 @@ private void assertSerialization( } private void assertMethodParameters(Version version, Map expectedMethodParameters, Map actualMethodParameters) { - if (!version.onOrAfter(Version.V_2_16_0)) { - assertNull(actualMethodParameters); - } else if (expectedMethodParameters != null) { - if (version.onOrAfter(Version.V_2_16_0)) { - assertEquals(expectedMethodParameters.get("ef_search"), actualMethodParameters.get("ef_search")); - } + if (expectedMethodParameters != null) { + assertEquals(expectedMethodParameters.get("ef_search"), actualMethodParameters.get("ef_search")); } } diff --git a/src/test/java/org/opensearch/knn/index/query/KNNQueryFactoryTests.java b/src/test/java/org/opensearch/knn/index/query/KNNQueryFactoryTests.java index 02b64cba50..d111bc7f8e 100644 --- a/src/test/java/org/opensearch/knn/index/query/KNNQueryFactoryTests.java +++ b/src/test/java/org/opensearch/knn/index/query/KNNQueryFactoryTests.java @@ -11,8 +11,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.join.BitSetProducer; -import org.apache.lucene.search.join.DiversifyingChildrenByteKnnVectorQuery; -import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery; import org.apache.lucene.search.join.ToChildBlockJoinQuery; import org.mockito.MockedConstruction; import org.mockito.Mockito; @@ -272,11 +270,6 @@ public void testCreateFaissQueryWithFilter_withValidValues_nullEfSearch_thenSucc assertEquals(expectedQuery, actual); } - public void testCreate_whenLuceneWithParentFilter_thenReturnDiversifyingQuery() { - validateDiversifyingQueryWithParentFilter(VectorDataType.BYTE, DiversifyingChildrenByteKnnVectorQuery.class); - validateDiversifyingQueryWithParentFilter(VectorDataType.FLOAT, DiversifyingChildrenFloatKnnVectorQuery.class); - } - public void testCreate_whenNestedVectorFiledAndNonNestedFilterField_thenReturnToChildBlockJoinQueryForFilters() { MapperService mockMapperService = mock(MapperService.class); QueryShardContext mockQueryShardContext = mock(QueryShardContext.class); diff --git a/src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java b/src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java index d08f7e0ce7..2873b95812 100644 --- a/src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java +++ b/src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java @@ -241,7 +241,6 @@ public void testQueryScoreForFaissWithModel() { SEGMENT_NAME, 100, true, - false, KNNCodecVersion.current().getDefaultCodecDelegate(), Map.of(), new byte[StringHelper.ID_LENGTH], @@ -331,7 +330,6 @@ public void testShardWithoutFiles() { SEGMENT_NAME, 100, false, - false, KNNCodecVersion.current().getDefaultCodecDelegate(), Map.of(), new byte[StringHelper.ID_LENGTH], @@ -375,7 +373,6 @@ public void testEmptyQueryResults() { SEGMENT_NAME, 100, true, - false, KNNCodecVersion.current().getDefaultCodecDelegate(), Map.of(), new byte[StringHelper.ID_LENGTH], @@ -645,7 +642,6 @@ private SegmentReader mockSegmentReader() { SEGMENT_NAME, 100, true, - false, KNNCodecVersion.current().getDefaultCodecDelegate(), Map.of(), new byte[StringHelper.ID_LENGTH], @@ -1115,7 +1111,6 @@ public void testDoANNSearch_whenRadialIsDefined_thenCallJniRadiusQueryIndex() { SEGMENT_NAME, 100, true, - false, KNNCodecVersion.current().getDefaultCodecDelegate(), Map.of(), new byte[StringHelper.ID_LENGTH], @@ -1192,7 +1187,6 @@ private SegmentReader getMockedSegmentReader() { SEGMENT_NAME, 100, true, - false, KNNCodecVersion.current().getDefaultCodecDelegate(), Map.of(), new byte[StringHelper.ID_LENGTH], @@ -1257,7 +1251,6 @@ private void testQueryScore( SEGMENT_NAME, 100, true, - false, KNNCodecVersion.current().getDefaultCodecDelegate(), Map.of(), new byte[StringHelper.ID_LENGTH], diff --git a/src/test/java/org/opensearch/knn/index/query/RNNQueryFactoryTests.java b/src/test/java/org/opensearch/knn/index/query/RNNQueryFactoryTests.java deleted file mode 100644 index af415f9c52..0000000000 --- a/src/test/java/org/opensearch/knn/index/query/RNNQueryFactoryTests.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.knn.index.query; - -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; -import static org.opensearch.knn.common.KNNConstants.DEFAULT_VECTOR_DATA_TYPE_FIELD; -import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_SEARCH; - -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -import org.apache.lucene.search.ByteVectorSimilarityQuery; -import org.apache.lucene.search.FloatVectorSimilarityQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.join.BitSetProducer; -import org.opensearch.index.IndexSettings; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.query.QueryBuilder; -import org.opensearch.index.query.QueryShardContext; -import org.opensearch.index.query.TermQueryBuilder; -import org.opensearch.knn.KNNTestCase; -import org.opensearch.knn.index.VectorDataType; -import org.opensearch.knn.index.util.KNNEngine; - -public class RNNQueryFactoryTests extends KNNTestCase { - private static final String FILTER_FILED_NAME = "foo"; - private static final String FILTER_FILED_VALUE = "fooval"; - private static final QueryBuilder FILTER_QUERY_BUILDER = new TermQueryBuilder(FILTER_FILED_NAME, FILTER_FILED_VALUE); - private final int testQueryDimension = 17; - private final float[] testQueryVector = new float[testQueryDimension]; - private final byte[] testByteQueryVector = new byte[testQueryDimension]; - private final String testIndexName = "test-index"; - private final String testFieldName = "test-field"; - private final Float testRadius = 0.5f; - private final int maxResultWindow = 20000; - private final Map methodParameters = Map.of(METHOD_PARAMETER_EF_SEARCH, 100); - - public void testCreate_whenLucene_withRadiusQuery_withFloatVector() { - List luceneDefaultQueryEngineList = Arrays.stream(KNNEngine.values()) - .filter(knnEngine -> !KNNEngine.getEnginesThatCreateCustomSegmentFiles().contains(knnEngine)) - .collect(Collectors.toList()); - for (KNNEngine knnEngine : luceneDefaultQueryEngineList) { - Query query = RNNQueryFactory.create( - knnEngine, - testIndexName, - testFieldName, - testQueryVector, - testRadius, - DEFAULT_VECTOR_DATA_TYPE_FIELD - ); - assertEquals(FloatVectorSimilarityQuery.class, query.getClass()); - } - } - - public void testCreate_whenLucene_withRadiusQuery_withByteVector() { - List luceneDefaultQueryEngineList = Arrays.stream(KNNEngine.values()) - .filter(knnEngine -> !KNNEngine.getEnginesThatCreateCustomSegmentFiles().contains(knnEngine)) - .collect(Collectors.toList()); - for (KNNEngine knnEngine : luceneDefaultQueryEngineList) { - QueryShardContext mockQueryShardContext = mock(QueryShardContext.class); - MappedFieldType testMapper = mock(MappedFieldType.class); - when(mockQueryShardContext.fieldMapper(any())).thenReturn(testMapper); - BitSetProducer parentFilter = mock(BitSetProducer.class); - when(mockQueryShardContext.getParentFilter()).thenReturn(parentFilter); - final RNNQueryFactory.CreateQueryRequest createQueryRequest = RNNQueryFactory.CreateQueryRequest.builder() - .knnEngine(knnEngine) - .indexName(testIndexName) - .fieldName(testFieldName) - .vector(testQueryVector) - .radius(testRadius) - .byteVector(testByteQueryVector) - .vectorDataType(VectorDataType.BYTE) - .context(mockQueryShardContext) - .filter(FILTER_QUERY_BUILDER) - .build(); - Query query = RNNQueryFactory.create(createQueryRequest); - assertEquals(ByteVectorSimilarityQuery.class, query.getClass()); - } - } - - public void testCreate_whenLucene_withFilter_thenSucceed() { - List luceneDefaultQueryEngineList = Arrays.stream(KNNEngine.values()) - .filter(knnEngine -> !KNNEngine.getEnginesThatCreateCustomSegmentFiles().contains(knnEngine)) - .collect(Collectors.toList()); - for (KNNEngine knnEngine : luceneDefaultQueryEngineList) { - QueryShardContext mockQueryShardContext = mock(QueryShardContext.class); - MappedFieldType testMapper = mock(MappedFieldType.class); - when(mockQueryShardContext.fieldMapper(any())).thenReturn(testMapper); - final RNNQueryFactory.CreateQueryRequest createQueryRequest = RNNQueryFactory.CreateQueryRequest.builder() - .knnEngine(knnEngine) - .indexName(testIndexName) - .fieldName(testFieldName) - .vector(testQueryVector) - .vectorDataType(DEFAULT_VECTOR_DATA_TYPE_FIELD) - .context(mockQueryShardContext) - .filter(FILTER_QUERY_BUILDER) - .radius(testRadius) - .build(); - Query query = RNNQueryFactory.create(createQueryRequest); - assertEquals(FloatVectorSimilarityQuery.class, query.getClass()); - } - } - - public void testCreate_whenFaiss_thenSucceed() { - // Given - QueryShardContext mockQueryShardContext = mock(QueryShardContext.class); - MappedFieldType testMapper = mock(MappedFieldType.class); - IndexSettings indexSettings = mock(IndexSettings.class); - when(mockQueryShardContext.getIndexSettings()).thenReturn(indexSettings); - when(mockQueryShardContext.fieldMapper(any())).thenReturn(testMapper); - when(mockQueryShardContext.getIndexSettings().getMaxResultWindow()).thenReturn(maxResultWindow); - - final KNNQuery expectedQuery = KNNQuery.builder() - .field(testFieldName) - .queryVector(testQueryVector) - .indexName(testIndexName) - .radius(testRadius) - .methodParameters(methodParameters) - .context(new KNNQuery.Context(maxResultWindow)) - .build(); - - // When - final RNNQueryFactory.CreateQueryRequest createQueryRequest = RNNQueryFactory.CreateQueryRequest.builder() - .knnEngine(KNNEngine.FAISS) - .indexName(testIndexName) - .fieldName(testFieldName) - .vector(testQueryVector) - .radius(testRadius) - .vectorDataType(DEFAULT_VECTOR_DATA_TYPE_FIELD) - .context(mockQueryShardContext) - .methodParameters(methodParameters) - .build(); - - Query query = RNNQueryFactory.create(createQueryRequest); - - // Then - assertEquals(expectedQuery, query); - } -} diff --git a/src/test/java/org/opensearch/knn/index/util/IndexHyperParametersUtilTests.java b/src/test/java/org/opensearch/knn/index/util/IndexHyperParametersUtilTests.java index 508b8765c5..656f4bd128 100644 --- a/src/test/java/org/opensearch/knn/index/util/IndexHyperParametersUtilTests.java +++ b/src/test/java/org/opensearch/knn/index/util/IndexHyperParametersUtilTests.java @@ -24,8 +24,6 @@ public void testLombokNonNull() { } public void testGetHNSWEFConstructionValue_withDifferentValues_thenSuccess() { - Assert.assertEquals(512, IndexHyperParametersUtil.getHNSWEFConstructionValue(Version.V_2_11_0)); - Assert.assertEquals(512, IndexHyperParametersUtil.getHNSWEFConstructionValue(Version.V_2_3_0)); Assert.assertEquals( KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION.intValue(), IndexHyperParametersUtil.getHNSWEFConstructionValue(Version.CURRENT) @@ -34,8 +32,6 @@ public void testGetHNSWEFConstructionValue_withDifferentValues_thenSuccess() { } public void testGetHNSWEFSearchValue_withDifferentValues_thenSuccess() { - Assert.assertEquals(512, IndexHyperParametersUtil.getHNSWEFConstructionValue(Version.V_2_11_0)); - Assert.assertEquals(512, IndexHyperParametersUtil.getHNSWEFConstructionValue(Version.V_2_3_0)); Assert.assertEquals( KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH.intValue(), IndexHyperParametersUtil.getHNSWEFConstructionValue(Version.CURRENT)