Skip to content

Commit

Permalink
use the correct type to widen the sort fields when merging top docs (o…
Browse files Browse the repository at this point in the history
…pensearch-project#16881)

* use the correct type to widen the sort fields when merging top docs

Signed-off-by: panguixin <[email protected]>

* fix

Signed-off-by: panguixin <[email protected]>

* apply commments

Signed-off-by: panguixin <[email protected]>

* changelog

Signed-off-by: panguixin <[email protected]>

* add more tests

Signed-off-by: panguixin <[email protected]>

---------

Signed-off-by: panguixin <[email protected]>
  • Loading branch information
bugmakerrrrrr authored Jan 10, 2025
1 parent cc990c0 commit bbcbd21
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 23 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Indexed IP field supports `terms_query` with more than 1025 IP masks [#16391](https://github.com/opensearch-project/OpenSearch/pull/16391)
- Make entries for dependencies from server/build.gradle to gradle version catalog ([#16707](https://github.com/opensearch-project/OpenSearch/pull/16707))
- Allow extended plugins to be optional ([#16909](https://github.com/opensearch-project/OpenSearch/pull/16909))
- Use the correct type to widen the sort fields when merging top docs ([#16881](https://github.com/opensearch-project/OpenSearch/pull/16881))

### Deprecated
- Performing update operation with default pipeline or final pipeline is deprecated ([#16712](https://github.com/opensearch-project/OpenSearch/pull/16712))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.opensearch.common.Numbers;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.core.rest.RestStatus;
import org.opensearch.core.xcontent.MediaTypeRegistry;
import org.opensearch.core.xcontent.XContentBuilder;
Expand All @@ -63,6 +64,7 @@
import org.opensearch.search.SearchHit;
import org.opensearch.search.SearchHits;
import org.opensearch.test.InternalSettingsPlugin;
import org.opensearch.test.OpenSearchTestCase;
import org.opensearch.test.ParameterizedDynamicSettingsOpenSearchIntegTestCase;
import org.hamcrest.Matchers;

Expand All @@ -82,7 +84,9 @@
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.function.Supplier;

import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.opensearch.index.query.QueryBuilders.functionScoreQuery;
Expand Down Expand Up @@ -2609,4 +2613,99 @@ public void testSimpleSortsPoints() throws Exception {

assertThat(searchResponse.toString(), not(containsString("error")));
}

public void testSortMixedIntegerNumericFields() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(3);
AtomicInteger counter = new AtomicInteger();
index("long", () -> Long.MAX_VALUE - counter.getAndIncrement());
index("integer", () -> Integer.MAX_VALUE - counter.getAndIncrement());
SearchResponse searchResponse = client().prepareSearch("long", "integer")
.setQuery(matchAllQuery())
.setSize(10)
.addSort(SortBuilders.fieldSort("field").order(SortOrder.ASC).sortMode(SortMode.MAX))
.get();
assertNoFailures(searchResponse);
long[] sortValues = new long[10];
for (int i = 0; i < 10; i++) {
sortValues[i] = ((Number) searchResponse.getHits().getAt(i).getSortValues()[0]).longValue();
}
for (int i = 1; i < 10; i++) {
assertThat(Arrays.toString(sortValues), sortValues[i - 1], lessThan(sortValues[i]));
}
}

public void testSortMixedFloatingNumericFields() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(3);
AtomicInteger counter = new AtomicInteger();
index("double", () -> 100.5 - counter.getAndIncrement());
counter.set(0);
index("float", () -> 200.5 - counter.getAndIncrement());
counter.set(0);
index("half_float", () -> 300.5 - counter.getAndIncrement());
SearchResponse searchResponse = client().prepareSearch("double", "float", "half_float")
.setQuery(matchAllQuery())
.setSize(15)
.addSort(SortBuilders.fieldSort("field").order(SortOrder.ASC).sortMode(SortMode.MAX))
.get();
assertNoFailures(searchResponse);
double[] sortValues = new double[15];
for (int i = 0; i < 15; i++) {
sortValues[i] = ((Number) searchResponse.getHits().getAt(i).getSortValues()[0]).doubleValue();
}
for (int i = 1; i < 15; i++) {
assertThat(Arrays.toString(sortValues), sortValues[i - 1], lessThan(sortValues[i]));
}
}

public void testSortMixedFloatingAndIntegerNumericFields() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(3);
index("long", () -> randomLongBetween(0, (long) 2E53 - 1));
index("integer", OpenSearchTestCase::randomInt);
index("double", OpenSearchTestCase::randomDouble);
index("float", () -> randomFloat());
boolean asc = randomBoolean();
SearchResponse searchResponse = client().prepareSearch("long", "integer", "double", "float")
.setQuery(matchAllQuery())
.setSize(20)
.addSort(SortBuilders.fieldSort("field").order(asc ? SortOrder.ASC : SortOrder.DESC).sortMode(SortMode.MAX))
.get();
assertNoFailures(searchResponse);
double[] sortValues = new double[20];
for (int i = 0; i < 20; i++) {
sortValues[i] = ((Number) searchResponse.getHits().getAt(i).getSortValues()[0]).doubleValue();
}
if (asc) {
for (int i = 1; i < 20; i++) {
assertThat(Arrays.toString(sortValues), sortValues[i - 1], lessThanOrEqualTo(sortValues[i]));
}
} else {
for (int i = 1; i < 20; i++) {
assertThat(Arrays.toString(sortValues), sortValues[i - 1], greaterThanOrEqualTo(sortValues[i]));
}
}
}

private void index(String type, Supplier<Number> valueSupplier) throws Exception {
assertAcked(
prepareCreate(type).setMapping(
XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject("field")
.field("type", type)
.endObject()
.endObject()
.endObject()
).setSettings(Settings.builder().put("index.number_of_shards", 3).put("index.number_of_replicas", 0))
);
ensureGreen(type);
for (int i = 0; i < 5; i++) {
client().prepareIndex(type)
.setId(Integer.toString(i))
.setSource("{\"field\" : " + valueSupplier.get() + " }", XContentType.JSON)
.get();
}
client().admin().indices().prepareRefresh(type).get();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.opensearch.common.lucene.search.TopDocsAndMaxScore;
import org.opensearch.core.common.breaker.CircuitBreaker;
import org.opensearch.core.common.io.stream.NamedWriteableRegistry;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.SearchHit;
import org.opensearch.search.SearchHits;
Expand Down Expand Up @@ -604,36 +605,51 @@ private static void validateMergeSortValueFormats(Collection<? extends SearchPha
* support sort optimization, we removed type widening there and taking care here during merging.
* More details here https://github.com/opensearch-project/OpenSearch/issues/6326
*/
// TODO: should we check the compatibility between types
private static Sort createSort(TopFieldDocs[] topFieldDocs) {
final SortField[] firstTopDocFields = topFieldDocs[0].fields;
final SortField[] newFields = new SortField[firstTopDocFields.length];
for (int fieldIndex = 0; fieldIndex < firstTopDocFields.length; fieldIndex++) {
SortField.Type firstType = getSortType(firstTopDocFields[fieldIndex]);
newFields[fieldIndex] = firstTopDocFields[fieldIndex];
if (SortedWiderNumericSortField.isTypeSupported(firstType) == false) {
continue;
}

for (int i = 0; i < firstTopDocFields.length; i++) {
final SortField delegate = firstTopDocFields[i];
final SortField.Type type = delegate instanceof SortedNumericSortField
? ((SortedNumericSortField) delegate).getNumericType()
: delegate.getType();
boolean requireWiden = false;
boolean isFloat = firstType == SortField.Type.FLOAT || firstType == SortField.Type.DOUBLE;
for (int shardIndex = 1; shardIndex < topFieldDocs.length; shardIndex++) {
final SortField sortField = topFieldDocs[shardIndex].fields[fieldIndex];
SortField.Type sortType = getSortType(sortField);
if (SortedWiderNumericSortField.isTypeSupported(sortType) == false) {
// throw exception if sortType is not CUSTOM?
// skip this shard or do not widen?
requireWiden = false;
break;
}
requireWiden = requireWiden || sortType != firstType;
isFloat = isFloat || sortType == SortField.Type.FLOAT || sortType == SortField.Type.DOUBLE;
}

if (SortedWiderNumericSortField.isTypeSupported(type) && isSortWideningRequired(topFieldDocs, i)) {
newFields[i] = new SortedWiderNumericSortField(delegate.getField(), type, delegate.getReverse());
} else {
newFields[i] = firstTopDocFields[i];
if (requireWiden) {
newFields[fieldIndex] = new SortedWiderNumericSortField(
firstTopDocFields[fieldIndex].getField(),
isFloat ? SortField.Type.DOUBLE : SortField.Type.LONG,
firstTopDocFields[fieldIndex].getReverse()
);
}
}
return new Sort(newFields);
}

/**
* It will compare respective SortField between shards to see if any shard results have different
* field mapping type, accordingly it will decide to widen the sort fields.
*/
private static boolean isSortWideningRequired(TopFieldDocs[] topFieldDocs, int sortFieldindex) {
for (int i = 0; i < topFieldDocs.length - 1; i++) {
if (!topFieldDocs[i].fields[sortFieldindex].equals(topFieldDocs[i + 1].fields[sortFieldindex])) {
return true;
}
private static SortField.Type getSortType(SortField sortField) {
if (sortField.getComparatorSource() instanceof IndexFieldData.XFieldComparatorSource) {
return ((IndexFieldData.XFieldComparatorSource) sortField.getComparatorSource()).reducedType();
} else {
return sortField instanceof SortedNumericSortField
? ((SortedNumericSortField) sortField).getNumericType()
: sortField.getType();
}
return false;
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,18 @@
import org.apache.lucene.search.comparators.NumericComparator;

import java.io.IOException;
import java.util.Comparator;

/**
* Sorted numeric field for wider sort types,
* to help sorting two different numeric types.
* Sorted numeric field for wider sort types, to help sorting two different numeric types.
* NOTE: the unsigned_long is not supported by widening sort since the unsigned_long could not be used with other types
*
* @opensearch.internal
*/
public class SortedWiderNumericSortField extends SortedNumericSortField {
private final int byteCounts;
private final Comparator<Number> comparator;

/**
* Creates a sort, possibly in reverse, specifying how the sort value from the document's set is
* selected.
Expand All @@ -39,6 +43,15 @@ public class SortedWiderNumericSortField extends SortedNumericSortField {
*/
public SortedWiderNumericSortField(String field, Type type, boolean reverse) {
super(field, type, reverse);
if (type == Type.LONG) {
byteCounts = Long.BYTES;
comparator = Comparator.comparingLong(Number::longValue);
} else if (type == Type.DOUBLE) {
byteCounts = Double.BYTES;
comparator = Comparator.comparingDouble(Number::doubleValue);
} else {
throw new IllegalArgumentException("Unsupported numeric type: " + type);
}
}

/**
Expand All @@ -51,7 +64,7 @@ public SortedWiderNumericSortField(String field, Type type, boolean reverse) {
*/
@Override
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new NumericComparator<Number>(getField(), (Number) getMissingValue(), getReverse(), pruning, Double.BYTES) {
return new NumericComparator<Number>(getField(), (Number) getMissingValue(), getReverse(), pruning, byteCounts) {
@Override
public int compare(int slot1, int slot2) {
throw new UnsupportedOperationException();
Expand All @@ -78,7 +91,7 @@ public int compareValues(Number first, Number second) {
} else if (second == null) {
return 1;
} else {
return Double.compare(first.doubleValue(), second.doubleValue());
return comparator.compare(first, second);
}
}
};
Expand Down

0 comments on commit bbcbd21

Please sign in to comment.