Skip to content

Commit

Permalink
Adds graph build time metrics in NativeEngines990KnnVectorsWriter (op…
Browse files Browse the repository at this point in the history
…ensearch-project#2018)

Signed-off-by: Tejas Shah <[email protected]>
  • Loading branch information
shatejas authored Sep 3, 2024
1 parent 6b197be commit e6c5953
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@
import org.apache.lucene.index.Sorter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.opensearch.common.StopWatch;
import org.opensearch.knn.index.quantizationservice.QuantizationService;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.codec.nativeindex.NativeIndexWriter;
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;
import org.opensearch.knn.index.vectorvalues.KNNVectorValuesFactory;
import org.opensearch.knn.plugin.stats.KNNGraphValue;
import org.opensearch.knn.quantization.models.quantizationParams.QuantizationParams;
import org.opensearch.knn.quantization.models.quantizationState.QuantizationState;

Expand All @@ -45,6 +47,10 @@
@RequiredArgsConstructor
public class NativeEngines990KnnVectorsWriter extends KnnVectorsWriter {
private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(NativeEngines990KnnVectorsWriter.class);

private static final String FLUSH_OPERATION = "flush";
private static final String MERGE_OPERATION = "merge";

private final SegmentWriteState segmentWriteState;
private final FlatVectorsWriter flatVectorsWriter;
private final List<NativeEngineFieldVectorsWriter<?>> fields = new ArrayList<>();
Expand Down Expand Up @@ -78,7 +84,9 @@ public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException {
field.getFieldInfo(),
(vectorDataType, fieldInfo, fieldVectorsWriter) -> getKNNVectorValues(vectorDataType, fieldVectorsWriter),
NativeIndexWriter::flushIndex,
field
field,
KNNGraphValue.REFRESH_TOTAL_TIME_IN_MILLIS,
FLUSH_OPERATION
);
}
}
Expand All @@ -88,7 +96,14 @@ public void mergeOneField(final FieldInfo fieldInfo, final MergeState mergeState
// This will ensure that we are merging the FlatIndex during force merge.
flatVectorsWriter.mergeOneField(fieldInfo, mergeState);
// For merge, pick values from flat vector and reindex again. This will use the flush operation to create graphs
trainAndIndex(fieldInfo, this::getKNNVectorValuesForMerge, NativeIndexWriter::mergeIndex, mergeState);
trainAndIndex(
fieldInfo,
this::getKNNVectorValuesForMerge,
NativeIndexWriter::mergeIndex,
mergeState,
KNNGraphValue.MERGE_TOTAL_TIME_IN_MILLIS,
MERGE_OPERATION
);

}

Expand Down Expand Up @@ -214,7 +229,9 @@ private <T, C> void trainAndIndex(
final FieldInfo fieldInfo,
final VectorValuesRetriever<VectorDataType, FieldInfo, C, KNNVectorValues<T>> vectorValuesRetriever,
final IndexOperation<T> indexOperation,
final C VectorProcessingContext
final C VectorProcessingContext,
final KNNGraphValue graphBuildTime,
final String operationName
) throws IOException {
final VectorDataType vectorDataType = extractVectorDataType(fieldInfo);
KNNVectorValues<T> knnVectorValues = vectorValuesRetriever.apply(vectorDataType, fieldInfo, VectorProcessingContext);
Expand All @@ -228,6 +245,12 @@ private <T, C> void trainAndIndex(
: NativeIndexWriter.getWriter(fieldInfo, segmentWriteState);

knnVectorValues = vectorValuesRetriever.apply(vectorDataType, fieldInfo, VectorProcessingContext);

StopWatch stopWatch = new StopWatch();
stopWatch.start();
indexOperation.buildAndWrite(writer, knnVectorValues);
long time_in_millis = stopWatch.totalTime().millis();
graphBuildTime.incrementBy(time_in_millis);
log.warn("Graph build took " + time_in_millis + " ms for " + operationName);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.opensearch.knn.index.engine.qframe.QuantizationConfigParser;
import org.opensearch.knn.index.mapper.KNNVectorFieldMapper;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.plugin.stats.KNNGraphValue;
import org.opensearch.knn.quantization.enums.ScalarQuantizationType;

import java.io.IOException;
Expand Down Expand Up @@ -135,6 +136,8 @@ public void testNativeEngineVectorFormat_whenMultipleVectorFieldIndexed_thenSucc
indexWriter.commit();
indexWriter.close();

assertNotEquals(0L, (long) KNNGraphValue.REFRESH_TOTAL_TIME_IN_MILLIS.getValue());

// Validate to see if correct values are returned, assumption here is only 1 segment is getting created
IndexSearcher searcher = new IndexSearcher(indexReader);
final LeafReader leafReader = searcher.getLeafContexts().get(0).reader();
Expand Down Expand Up @@ -204,6 +207,8 @@ public void testNativeEngineVectorFormat_whenBinaryQuantizationApplied_thenSucce
indexWriter.flush();
indexWriter.commit();
indexWriter.close();
assertNotEquals(0L, (long) KNNGraphValue.REFRESH_TOTAL_TIME_IN_MILLIS.getValue());

IndexSearcher searcher = new IndexSearcher(indexReader);
final LeafReader leafReader = searcher.getLeafContexts().get(0).reader();
SegmentReader segmentReader = Lucene.segmentReader(leafReader);
Expand Down

0 comments on commit e6c5953

Please sign in to comment.