diff --git a/CHANGELOG.md b/CHANGELOG.md index 74f9b1b113248..6b2aecd22dc12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add ability for Boolean and date field queries to run when only doc_values are enabled ([#11650](https://github.com/opensearch-project/OpenSearch/pull/11650)) - Refactor implementations of query phase searcher, allow QueryCollectorContext to have zero collectors ([#13481](https://github.com/opensearch-project/OpenSearch/pull/13481)) - Adds support to inject telemetry instances to plugins ([#13636](https://github.com/opensearch-project/OpenSearch/pull/13636)) +- Add Open Parameters to Flat_object Field Type ([#13853](https://github.com/opensearch-project/OpenSearch/pull/13853)) ### Deprecated diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 998122d9e5c43..fc1c6bf822cef 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -18,6 +18,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentLocation; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.mapper.MapperParsingException; import java.io.IOException; import java.math.BigInteger; @@ -44,6 +45,10 @@ public class JsonToStringXContentParser extends AbstractXContentParser { private DeprecationHandler deprecationHandler; + private int depthLimit; + private String nullValue; + private int ignoreAbove; + private static final String VALUE_AND_PATH_SUFFIX = "._valueAndPath"; private static final String VALUE_SUFFIX = "._value"; private static final String DOT_SYMBOL = "."; @@ -53,19 +58,25 @@ public JsonToStringXContentParser( NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, XContentParser parser, - String fieldTypeName + String fieldTypeName, + int depthLimit, + String nullValue, + int ignoreAbove ) throws IOException { super(xContentRegistry, deprecationHandler); this.deprecationHandler = deprecationHandler; this.xContentRegistry = xContentRegistry; this.parser = parser; this.fieldTypeName = fieldTypeName; + this.depthLimit = depthLimit; + this.nullValue = nullValue; + this.ignoreAbove = ignoreAbove; } public XContentParser parseObject() throws IOException { builder.startObject(); StringBuilder path = new StringBuilder(fieldTypeName); - parseToken(path, null); + parseToken(path, null, 1); builder.field(this.fieldTypeName, keyList); builder.field(this.fieldTypeName + VALUE_SUFFIX, valueList); builder.field(this.fieldTypeName + VALUE_AND_PATH_SUFFIX, valueAndPathList); @@ -74,7 +85,15 @@ public XContentParser parseObject() throws IOException { return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString)); } - private void parseToken(StringBuilder path, String currentFieldName) throws IOException { + private void parseToken(StringBuilder path, String currentFieldName, int depth) throws IOException { + if (depth >= depthLimit) { + throw new MapperParsingException( + "the depth of flat_object field path [" + path + "] is bigger than maximum" + " depth [" + depthLimit + "]" + ); + } + if (depth == 1 && processNoNestedValue()) { + return; + } while (this.parser.nextToken() != Token.END_OBJECT) { if (this.parser.currentName() != null) { @@ -100,12 +119,12 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx this.keyList.add(fieldNameSuffix); } } else if (this.parser.currentToken() == Token.START_ARRAY) { - parseToken(path, currentFieldName); + parseToken(path, currentFieldName, depth); break; } else if (this.parser.currentToken() == Token.END_ARRAY) { // skip } else if (this.parser.currentToken() == Token.START_OBJECT) { - parseToken(path, currentFieldName); + parseToken(path, currentFieldName, depth + 1); int dotIndex = path.lastIndexOf(DOT_SYMBOL, path.length()); if (dotIndex != -1 && path.length() > currentFieldName.length()) { @@ -115,9 +134,10 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx if (!path.toString().contains(currentFieldName)) { path.append(DOT_SYMBOL).append(currentFieldName); } - parseValue(parsedFields); - this.valueList.add(parsedFields.toString()); - this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields); + if (parseValue(parsedFields)) { + this.valueList.add(parsedFields.toString()); + this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields); + } int dotIndex = path.lastIndexOf(DOT_SYMBOL, path.length()); if (dotIndex != -1 && path.length() > currentFieldName.length()) { path.setLength(path.length() - currentFieldName.length() - 1); @@ -127,13 +147,35 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx } } - private void parseValue(StringBuilder parsedFields) throws IOException { + private boolean processNoNestedValue() throws IOException { + if (parser.currentToken() == Token.VALUE_NULL) { + if (nullValue != null) { + this.valueList.add(nullValue); + } + return true; + } else if (this.parser.currentToken() == Token.VALUE_STRING + || this.parser.currentToken() == Token.VALUE_NUMBER + || this.parser.currentToken() == Token.VALUE_BOOLEAN) { + String value = this.parser.textOrNull(); + if (value != null && value.length() <= ignoreAbove) { + this.valueList.add(value); + } + return true; + } + return false; + } + + private boolean parseValue(StringBuilder parsedFields) throws IOException { switch (this.parser.currentToken()) { case VALUE_BOOLEAN: case VALUE_NUMBER: case VALUE_STRING: case VALUE_NULL: - parsedFields.append(this.parser.textOrNull()); + String value = this.parser.textOrNull(); + if (value != null && value.length() <= ignoreAbove) { + parsedFields.append(value); + return true; + } break; // Handle other token types as needed case FIELD_NAME: @@ -144,6 +186,7 @@ private void parseValue(StringBuilder parsedFields) throws IOException { default: throw new IOException("Unsupported token type [" + parser.currentToken() + "]"); } + return false; } @Override diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index 9a3f2595a7c9e..1dd383da0a964 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -30,14 +30,18 @@ import org.opensearch.common.lucene.Lucene; import org.opensearch.common.lucene.search.AutomatonQueries; import org.opensearch.common.xcontent.JsonToStringXContentParser; +import org.opensearch.common.xcontent.support.XContentMapValues; import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.QueryShardException; +import org.opensearch.index.similarity.SimilarityProvider; import org.opensearch.search.aggregations.support.CoreValuesSourceType; import org.opensearch.search.lookup.SearchLookup; @@ -48,9 +52,12 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.function.BiFunction; import java.util.function.Supplier; +import static org.opensearch.index.mapper.TypeParsers.DOC_VALUES; +import static org.opensearch.index.mapper.TypeParsers.checkNull; import static org.opensearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES; /** @@ -66,6 +73,13 @@ public final class FlatObjectFieldMapper extends DynamicKeyFieldMapper { private static final String DOT_SYMBOL = "."; private static final String EQUAL_SYMBOL = "="; + private final NamedAnalyzer normalizer; + private final boolean hasDocValues; + private int ignoreAbove; + private final String nullValue; + private final SimilarityProvider similarity; + private final int depthLimit; + /** * In flat_object field mapper, field type is similar to keyword field type * Cannot be tokenized, can OmitNorms, and can setIndexOption. @@ -85,7 +99,15 @@ public static class Defaults { @Override public MappedFieldType keyedFieldType(String key) { - return new FlatObjectFieldType(this.name() + DOT_SYMBOL + key, this.name()); + return new FlatObjectFieldType( + this.name() + DOT_SYMBOL + key, + this.name(), + normalizer, + hasDocValues, + ignoreAbove, + nullValue, + similarity + ); } /** @@ -104,14 +126,31 @@ public FlatObjectField(String field, BytesRef term, FieldType ft) { * @opensearch.internal */ public static class Builder extends FieldMapper.Builder { - - public Builder(String name) { + private NamedAnalyzer normalizer; + private int ignoreAbove = Integer.MAX_VALUE; + private String nullValue; + private SimilarityProvider similarity; + private int depthLimit = Integer.MAX_VALUE; + private final IndexAnalyzers indexAnalyzers; + + // Visible for testing + public Builder(String name, IndexAnalyzers indexAnalyzers) { super(name, Defaults.FIELD_TYPE); builder = this; + this.indexAnalyzers = indexAnalyzers; + } + + public void setNormalizer(String normalizer) { + this.normalizer = this.indexAnalyzers.getNormalizer(normalizer); + this.indexAnalyzer = this.normalizer; + this.searchAnalyzer = this.normalizer; + if (this.normalizer == null) { + throw new MapperParsingException("normalizer [" + normalizer + "] is not supported in flat_object"); + } } private FlatObjectFieldType buildFlatObjectFieldType(BuilderContext context, FieldType fieldType) { - return new FlatObjectFieldType(buildFullName(context), fieldType); + return new FlatObjectFieldType(buildFullName(context), fieldType, this); } /** @@ -158,12 +197,17 @@ public FlatObjectFieldMapper build(BuilderContext context) { } } - public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n)); + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers())); /** * Creates a new TypeParser for flatObjectFieldMapper that does not use ParameterizedFieldMapper */ public static class TypeParser implements Mapper.TypeParser { + public static final String NORMALIZER = "normalizer"; + public static final String IGNORE_ABOVE = "ignore_above"; + public static final String NULL_VALUE = "null_value"; + public static final String SIMILARITY = "similarity"; + public static final String DEPTH_LIMIT = "depth_limit"; private final BiFunction builderFunction; public TypeParser(BiFunction builderFunction) { @@ -173,6 +217,38 @@ public TypeParser(BiFunction builderFunction) { @Override public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { Builder builder = builderFunction.apply(name, parserContext); + for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { + Map.Entry entry = iterator.next(); + String propName = entry.getKey(); + Object propNode = entry.getValue(); + checkNull(propName, propNode); + switch (propName) { + case NORMALIZER: + builder.setNormalizer(XContentMapValues.nodeStringValue(propNode)); + iterator.remove(); + break; + case DOC_VALUES: + builder.hasDocValues = XContentMapValues.nodeBooleanValue(propNode); + iterator.remove(); + break; + case IGNORE_ABOVE: + builder.ignoreAbove = XContentMapValues.nodeIntegerValue(propNode); + iterator.remove(); + break; + case NULL_VALUE: + builder.nullValue = XContentMapValues.nodeStringValue(propNode); + iterator.remove(); + break; + case SIMILARITY: + builder.similarity = TypeParsers.resolveSimilarity(parserContext, name, propNode); + iterator.remove(); + break; + case DEPTH_LIMIT: + builder.depthLimit = XContentMapValues.nodeIntegerValue(propNode); + iterator.remove(); + break; + } + } return builder; } } @@ -193,27 +269,32 @@ public static final class FlatObjectFieldType extends StringFieldType { private KeywordFieldMapper.KeywordFieldType valueAndPathFieldType; public FlatObjectFieldType(String name, boolean isSearchable, boolean hasDocValues, Map meta) { - super(name, isSearchable, false, true, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); + super(name, isSearchable, false, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); this.ignoreAbove = Integer.MAX_VALUE; this.nullValue = null; this.mappedFieldTypeName = null; } - public FlatObjectFieldType(String name, FieldType fieldType) { + public FlatObjectFieldType(String name, FieldType fieldType, Builder builder) { super( name, fieldType.indexOptions() != IndexOptions.NONE, false, - true, - new TextSearchInfo(fieldType, null, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER), + builder.hasDocValues, + new TextSearchInfo(fieldType, builder.similarity, getNamedAnalyzer(builder), getNamedAnalyzer(builder)), Collections.emptyMap() ); - this.ignoreAbove = Integer.MAX_VALUE; - this.nullValue = null; + setIndexAnalyzer(builder.normalizer); + this.ignoreAbove = builder.ignoreAbove; + this.nullValue = builder.nullValue; this.mappedFieldTypeName = null; } + private static NamedAnalyzer getNamedAnalyzer(Builder builder) { + return Objects.requireNonNullElse(builder.searchAnalyzer, Lucene.KEYWORD_ANALYZER); + } + public FlatObjectFieldType(String name, NamedAnalyzer analyzer) { super(name, true, false, true, new TextSearchInfo(Defaults.FIELD_TYPE, null, analyzer, analyzer), Collections.emptyMap()); this.ignoreAbove = Integer.MAX_VALUE; @@ -221,17 +302,31 @@ public FlatObjectFieldType(String name, NamedAnalyzer analyzer) { this.mappedFieldTypeName = null; } - public FlatObjectFieldType(String name, String mappedFieldTypeName) { + public FlatObjectFieldType( + String name, + String mappedFieldTypeName, + NamedAnalyzer normalizer, + boolean hasDocValues, + int ignoreAbove, + String nullValue, + SimilarityProvider similarity + ) { super( name, true, false, - true, - new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER), + hasDocValues, + new TextSearchInfo( + Defaults.FIELD_TYPE, + similarity, + normalizer == null ? Lucene.KEYWORD_ANALYZER : normalizer, + normalizer == null ? Lucene.KEYWORD_ANALYZER : normalizer + ), Collections.emptyMap() ); - this.ignoreAbove = Integer.MAX_VALUE; - this.nullValue = null; + setIndexAnalyzer(normalizer); + this.ignoreAbove = ignoreAbove; + this.nullValue = nullValue; this.mappedFieldTypeName = mappedFieldTypeName; } @@ -544,6 +639,12 @@ public Query wildcardQuery( this.valueFieldMapper = valueFieldMapper; this.valueAndPathFieldMapper = valueAndPathFieldMapper; this.mappedFieldType = mappedFieldType; + this.normalizer = builder.normalizer; + this.hasDocValues = builder.hasDocValues; + this.ignoreAbove = builder.ignoreAbove; + this.nullValue = builder.nullValue; + this.similarity = builder.similarity; + this.depthLimit = builder.depthLimit; } @Override @@ -553,7 +654,25 @@ protected FlatObjectFieldMapper clone() { @Override protected void mergeOptions(FieldMapper other, List conflicts) { - + FlatObjectFieldMapper mappers = (FlatObjectFieldMapper) other; + if (!Objects.equals(this.normalizer, mappers.normalizer)) { + conflicts.add("mapper [" + name() + "] has different [normalizer]"); + } + if (!Objects.equals(this.hasDocValues, mappers.hasDocValues)) { + conflicts.add("mapper [" + name() + "] has different [doc_values]"); + } + if (!Objects.equals(this.ignoreAbove, mappers.ignoreAbove)) { + this.ignoreAbove = mappers.ignoreAbove; + } + if (!Objects.equals(this.nullValue, mappers.nullValue)) { + conflicts.add("mapper [" + name() + "] has different [null_value]"); + } + if (!Objects.equals(this.similarity, mappers.similarity)) { + conflicts.add("mapper [" + name() + "] has different [similarity]"); + } + if (!Objects.equals(this.depthLimit, mappers.depthLimit)) { + conflicts.add("mapper [" + name() + "] has different [depth_limit]"); + } } @Override @@ -573,7 +692,10 @@ protected void parseCreateField(ParseContext context) throws IOException { NamedXContentRegistry.EMPTY, DeprecationHandler.IGNORE_DEPRECATIONS, context.parser(), - fieldType().name() + fieldType().name(), + depthLimit, + nullValue, + ignoreAbove ); /* JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser @@ -627,6 +749,9 @@ public Iterator iterator() { */ private void parseValueAddFields(ParseContext context, String value, String fieldName) throws IOException { + if (value == null) { + return; + } NamedAnalyzer normalizer = fieldType().normalizer(); if (normalizer != null) { value = normalizeValue(normalizer, name(), value); @@ -639,12 +764,12 @@ private void parseValueAddFields(ParseContext context, String value, String fiel // convert to utf8 only once before feeding postings/dv/stored fields final BytesRef binaryValue = new BytesRef(fieldType().name() + DOT_SYMBOL + value); - Field field = new FlatObjectField(fieldType().name(), binaryValue, fieldType); if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { createFieldNamesField(context); } if (fieldName.equals(fieldType().name())) { + Field field = new FlatObjectField(fieldType().name(), binaryValue, fieldType); context.doc().add(field); } if (valueType.equals(VALUE_SUFFIX)) { @@ -679,32 +804,60 @@ private void parseValueAddFields(ParseContext context, String value, String fiel } private static String normalizeValue(NamedAnalyzer normalizer, String field, String value) throws IOException { - String normalizerErrorMessage = "The normalization token stream is " - + "expected to produce exactly 1 token, but got 0 for analyzer " - + normalizer - + " and input \"" - + value - + "\""; try (TokenStream ts = normalizer.tokenStream(field, value)) { final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); if (ts.incrementToken() == false) { - throw new IllegalStateException(normalizerErrorMessage); + throw new IllegalStateException(errorMessage(normalizer, value)); } final String newValue = termAtt.toString(); if (ts.incrementToken()) { - throw new IllegalStateException(normalizerErrorMessage); + throw new IllegalStateException(errorMessage(normalizer, value)); } ts.end(); return newValue; } } + private static String errorMessage(NamedAnalyzer normalizer, String value) { + return "The normalization token stream is " + + "expected to produce exactly 1 token, but got 0 for analyzer " + + normalizer + + " and input \"" + + value + + "\""; + + } + @Override protected String contentType() { return CONTENT_TYPE; } + @Override + protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { + super.doXContentBody(builder, includeDefaults, params); + if (includeDefaults || normalizer != null) { + builder.field("normalizer", normalizer.name()); + } + if (includeDefaults || ignoreAbove != Integer.MAX_VALUE) { + builder.field("ignore_above", ignoreAbove); + } + if (includeDefaults || nullValue != null) { + builder.field("null_value", nullValue); + } + if (includeDefaults || similarity != null) { + builder.field("similarity", similarity.name()); + } + if (includeDefaults || depthLimit != Integer.MAX_VALUE) { + builder.field("depth_limit", depthLimit); + } + } + + public int ignoreAbove() { + return ignoreAbove; + } + private static final class ValueAndPathFieldMapper extends FieldMapper { protected ValueAndPathFieldMapper(FieldType fieldType, KeywordFieldMapper.KeywordFieldType mappedFieldType) { diff --git a/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java index 0feb7bcd1ceec..f48d8baf402c9 100644 --- a/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java +++ b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java @@ -12,13 +12,19 @@ import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.mapper.MapperParsingException; import org.opensearch.test.OpenSearchTestCase; +import org.hamcrest.Matchers; import java.io.IOException; +import org.mockito.Mockito; + +import static org.mockito.Mockito.when; + public class JsonToStringXContentParserTests extends OpenSearchTestCase { - private String flattenJsonString(String fieldName, String in) throws IOException { + private String flattenJsonString(String fieldName, String in, int depthLimit, String nullValue, int ignoreAbove) throws IOException { String transformed; try ( XContentParser parser = JsonXContent.jsonXContent.createParser( @@ -31,7 +37,10 @@ private String flattenJsonString(String fieldName, String in) throws IOException xContentRegistry(), DeprecationHandler.THROW_UNSUPPORTED_OPERATION, parser, - fieldName + fieldName, + depthLimit, + nullValue, + ignoreAbove ); // Skip the START_OBJECT token: jsonToStringXContentParser.nextToken(); @@ -53,7 +62,7 @@ public void testNestedObjects() throws IOException { + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]" + "}", - flattenJsonString("flat", jsonExample) + flattenJsonString("flat", jsonExample, 5, null, 100) ); } @@ -68,7 +77,7 @@ public void testChildHasDots() throws IOException { + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]" + "}", - flattenJsonString("flat", jsonExample) + flattenJsonString("flat", jsonExample, 5, null, 100) ); } @@ -87,7 +96,7 @@ public void testNestChildObjectWithDots() throws IOException { + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.really_inner=2.0\",\"flat.third=three\"]" + "}", - flattenJsonString("flat", jsonExample) + flattenJsonString("flat", jsonExample, 5, null, 100) ); } @@ -106,8 +115,96 @@ public void testNestChildObjectWithDotsAndFieldWithDots() throws IOException { + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]" + "}", - flattenJsonString("flat", jsonExample) + flattenJsonString("flat", jsonExample, 5, null, 100) + ); + } + + public void testDepthLimit() throws IOException { + String jsonExample = "{" + + "\"first\" : \"1\"," + + "\"second.inner\" : {" + + " \"totally.absolutely.inner\" : \"2.0\"" + + "}," + + "\"third\": \"three\"" + + "}"; + MapperParsingException e = expectThrows(MapperParsingException.class, () -> flattenJsonString("flat", jsonExample, 2, null, 100)); + assertThat( + e.getRootCause().getMessage(), + Matchers.containsString("the depth of flat_object field path [flat.second.inner] is bigger than maximum depth [2]") + ); + assertEquals( + "{" + + "\"flat\":[\"first\",\"second\",\"inner\",\"totally\",\"absolutely\",\"inner\",\"third\"]," + + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]" + + "}", + flattenJsonString("flat", jsonExample, 3, null, 100) + ); + } + + public void testIgnoreAbove() throws IOException { + String jsonExample = "{" + + "\"first\" : \"1\"," + + "\"second.inner\" : {" + + " \"totally.absolutely.inner\" : \"2.0\"" + + "}," + + "\"third\": \"three\"" + + "}"; + + assertEquals( + "{" + + "\"flat\":[\"first\",\"second\",\"inner\",\"totally\",\"absolutely\",\"inner\",\"third\"]," + + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]" + + "}", + flattenJsonString("flat", jsonExample, 5, null, 5) + ); + + assertEquals( + "{" + + "\"flat\":[\"first\",\"second\",\"inner\",\"totally\",\"absolutely\",\"inner\",\"third\"]," + + "\"flat._value\":[\"1\",\"2.0\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\"]" + + "}", + flattenJsonString("flat", jsonExample, 5, null, 4) ); } + public void testNullValue() throws IOException { + + XContentParser mapper = Mockito.mock(XContentParser.class); + when(mapper.currentToken()).thenReturn(XContentParser.Token.VALUE_NULL); + + JsonToStringXContentParser jsonToStringXContentParser = new JsonToStringXContentParser( + xContentRegistry(), + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + mapper, + "flat", + 5, + "ddd", + 100 + ); + + XContentParser transformedParser = jsonToStringXContentParser.parseObject(); + try (XContentBuilder jsonBuilder = XContentFactory.jsonBuilder()) { + jsonBuilder.copyCurrentStructure(transformedParser); + assertEquals("{\"flat\":[],\"flat._value\":[\"ddd\"],\"flat._valueAndPath\":[]}", jsonBuilder.toString()); + } + + jsonToStringXContentParser = new JsonToStringXContentParser( + xContentRegistry(), + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + mapper, + "flat", + 5, + null, + 100 + ); + + transformedParser = jsonToStringXContentParser.parseObject(); + try (XContentBuilder jsonBuilder = XContentFactory.jsonBuilder()) { + jsonBuilder.copyCurrentStructure(transformedParser); + assertEquals("{\"flat\":[],\"flat._value\":[],\"flat._valueAndPath\":[]}", jsonBuilder.toString()); + } + } } diff --git a/server/src/test/java/org/opensearch/index/fielddata/AbstractFieldDataTestCase.java b/server/src/test/java/org/opensearch/index/fielddata/AbstractFieldDataTestCase.java index 63c74b3cfa64f..7b249bb7342c8 100644 --- a/server/src/test/java/org/opensearch/index/fielddata/AbstractFieldDataTestCase.java +++ b/server/src/test/java/org/opensearch/index/fielddata/AbstractFieldDataTestCase.java @@ -144,7 +144,7 @@ public > IFD getForField(String type, String field } else if (type.equals("geo_point")) { fieldType = new GeoPointFieldMapper.Builder(fieldName).docValues(docValues).build(context).fieldType(); } else if (type.equals("flat_object")) { - fieldType = new FlatObjectFieldMapper.Builder(fieldName).docValues(docValues).build(context).fieldType(); + fieldType = new FlatObjectFieldMapper.Builder(fieldName, null).docValues(docValues).build(context).fieldType(); } else if (type.equals("binary")) { fieldType = new BinaryFieldMapper.Builder(fieldName, docValues).build(context).fieldType(); } else { diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java index 637072c8886c1..7e911d5496bed 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java @@ -8,6 +8,10 @@ package org.opensearch.index.mapper; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; @@ -19,13 +23,29 @@ import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.analysis.AnalyzerScope; +import org.opensearch.index.analysis.CharFilterFactory; +import org.opensearch.index.analysis.CustomAnalyzer; +import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.analysis.LowercaseNormalizer; +import org.opensearch.index.analysis.NamedAnalyzer; +import org.opensearch.index.analysis.TokenFilterFactory; +import org.opensearch.index.analysis.TokenizerFactory; import org.opensearch.index.query.QueryShardContext; +import org.hamcrest.Matchers; import java.io.IOException; +import java.util.Map; +import static java.util.Collections.singletonMap; +import static org.opensearch.index.mapper.FlatObjectFieldMapper.TypeParser.DEPTH_LIMIT; +import static org.opensearch.index.mapper.FlatObjectFieldMapper.TypeParser.IGNORE_ABOVE; +import static org.opensearch.index.mapper.FlatObjectFieldMapper.TypeParser.NORMALIZER; +import static org.opensearch.index.mapper.FlatObjectFieldMapper.TypeParser.NULL_VALUE; +import static org.opensearch.index.mapper.TypeParsers.DOC_VALUES; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.core.IsEqual.equalTo; -import static org.hamcrest.core.StringContains.containsString; public class FlatObjectFieldMapperTests extends MapperTestCase { private static final String FIELD_TYPE = "flat_object"; @@ -70,6 +90,7 @@ protected void assertExistsQuery(MappedFieldType fieldType, Query query, ParseCo } } + @Override public void minimalMapping(XContentBuilder b) throws IOException { b.field("type", FIELD_TYPE); } @@ -84,6 +105,7 @@ protected void writeFieldValue(XContentBuilder builder) throws IOException { builder.endObject(); } + @Override public void testMinimalToMaximal() throws IOException { XContentBuilder orig = JsonXContent.contentBuilder().startObject(); createMapperService(fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, ToXContent.EMPTY_PARAMS); @@ -118,6 +140,9 @@ public void testDefaults() throws Exception { assertFalse(fieldType.stored()); assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + fieldType = fields[1].fieldType(); + assertThat(fieldType.indexOptions(), Matchers.equalTo(IndexOptions.NONE)); + assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType()); // Test internal substring fields as well IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); @@ -131,16 +156,237 @@ public void testDefaults() throws Exception { assertEquals(new BytesRef("field.foo=bar"), fieldValueAndPaths[0].binaryValue()); } + public void testIgnoreAbove() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "flat_object").field("ignore_above", 5))); + + String json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .field("foo", "bar") + .endObject() + .endObject() + .toString(); + ParsedDocument doc = mapper.parse(source(json)); + + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + + json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .field("foo", "opensearch") + .endObject() + .endObject() + .toString(); + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(0, fieldValues.length); + + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(0, fieldValueAndPaths.length); + } + public void testNullValue() throws IOException { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); - MapperParsingException e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.nullField("field")))); - assertThat(e.getMessage(), containsString("object mapping for [_doc] tried to parse field [field] as object")); + ParsedDocument doc = mapper.parse(source(b -> b.nullField("field"))); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "flat_object").field("null_value", "bar"))); + doc = mapper.parse(source(b -> {})); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(0, fields.length); + + doc = mapper.parse(source(b -> b.nullField("field"))); + fields = doc.rootDoc().getFields("field"); + assertEquals(0, fields.length); + + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertTrue(fieldValues[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("bar"), fieldValues[0].binaryValue()); + + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(0, fieldValueAndPaths.length); + } + + public void testDisableDocValues() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "flat_object").field("doc_values", false))); + String json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .field("foo", "bar") + .endObject() + .endObject() + .toString(); + ParsedDocument doc = mapper.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType()); + + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(1, fieldValues.length); + assertTrue(fieldValues[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("bar"), fieldValues[0].binaryValue()); + + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(1, fieldValueAndPaths.length); + assertTrue(fieldValues[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("bar"), fieldValues[0].binaryValue()); + } + + public void testNormalizer() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "flat_object").field("normalizer", "lowercase"))); + String json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .field("Foo", "Bar") + .endObject() + .endObject() + .toString(); + ParsedDocument doc = mapper.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.foo"), fields[0].binaryValue()); + IndexableFieldType fieldType = fields[0].fieldType(); + assertThat(fieldType.indexOptions(), Matchers.equalTo(IndexOptions.DOCS)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("bar"), fieldValues[0].binaryValue()); + + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertTrue(fieldValueAndPaths[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("field.foo=bar"), fieldValueAndPaths[0].binaryValue()); + } + + public void testDepthLimit() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "flat_object").field("depth_limit", "2"))); + String json = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .field("Foo", "Bar") + .endObject() + .endObject() + .toString(); + ParsedDocument doc = mapper.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.Foo"), fields[0].binaryValue()); + IndexableFieldType fieldType = fields[0].fieldType(); + assertThat(fieldType.indexOptions(), Matchers.equalTo(IndexOptions.DOCS)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("Bar"), fieldValues[0].binaryValue()); + + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertTrue(fieldValueAndPaths[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("field.Foo=Bar"), fieldValueAndPaths[0].binaryValue()); + + // beyond depth_limit + String json1 = XContentFactory.jsonBuilder() + .startObject() + .startObject("field") + .startObject("field1") + .field("Foo", "Bar") + .endObject() + .endObject() + .endObject() + .toString(); + + MapperParsingException e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(json1))); + assertThat( + e.getRootCause().getMessage(), + Matchers.containsString("the depth of flat_object field path [field.field1] is bigger than maximum depth [2]") + ); + } + + public void testUpdateNormalizer() throws IOException { + MapperService mapperService = createMapperService( + fieldMapping(b -> b.field("type", "flat_object").field("normalizer", "lowercase")) + ); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> merge(mapperService, fieldMapping(b -> b.field("type", "flat_object").field("normalizer", "other_lowercase"))) + ); + assertEquals( + "Mapper for [field] conflicts with existing mapping:\n" + + "[mapper [field] has different [analyzer], mapper [field] has different [normalizer]]", + e.getMessage() + ); + } + + public void testConfigureSimilarity() throws IOException { + MapperService mapperService = createMapperService(fieldMapping(b -> b.field("type", "flat_object").field("similarity", "boolean"))); + MappedFieldType ft = mapperService.documentMapper().fieldTypes().get("field"); + assertEquals("boolean", ft.getTextSearchInfo().getSimilarity().name()); + + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> merge(mapperService, fieldMapping(b -> b.field("type", "flat_object").field("similarity", "BM25"))) + ); + assertThat(e.getMessage(), Matchers.containsString("mapper [field] has different [similarity]")); } @Override protected void registerParameters(ParameterChecker checker) throws IOException { - // In the future we will want to make sure parameter updates are covered. + checker.registerConflictCheck(DOC_VALUES, b -> b.field(DOC_VALUES, false)); + checker.registerConflictCheck(NULL_VALUE, b -> b.field(NULL_VALUE, "foo")); + checker.registerConflictCheck(NORMALIZER, b -> b.field(NORMALIZER, "lowercase")); + checker.registerConflictCheck(DEPTH_LIMIT, b -> b.field(DEPTH_LIMIT, "34")); + checker.registerUpdateCheck(b -> b.field(IGNORE_ABOVE, 256), m -> assertEquals(256, ((FlatObjectFieldMapper) m).ignoreAbove())); + checker.registerConflictCheck("similarity", b -> b.field("similarity", "boolean")); + } + + @Override + protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) { + return new IndexAnalyzers( + singletonMap("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer())), + Map.of( + "lowercase", + new NamedAnalyzer("lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer()), + "other_lowercase", + new NamedAnalyzer("other_lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer()) + ), + singletonMap( + "lowercase", + new NamedAnalyzer( + "lowercase", + AnalyzerScope.INDEX, + new CustomAnalyzer( + TokenizerFactory.newFactory("lowercase", WhitespaceTokenizer::new), + new CharFilterFactory[0], + new TokenFilterFactory[] { new TokenFilterFactory() { + + @Override + public String name() { + return "lowercase"; + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new LowerCaseFilter(tokenStream); + } + } } + ) + ) + ) + ); } } diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java index 9ec053dc59d10..4b5022bc4b874 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java @@ -27,7 +27,7 @@ public class FlatObjectFieldTypeTests extends FieldTypeTestCase { private static MappedFieldType getFlatParentFieldType(String fieldName) { Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT.id).build(); Mapper.BuilderContext context = new Mapper.BuilderContext(settings, new ContentPath()); - MappedFieldType flatParentFieldType = new FlatObjectFieldMapper.Builder(fieldName).build(context).fieldType(); + MappedFieldType flatParentFieldType = new FlatObjectFieldMapper.Builder(fieldName, null).build(context).fieldType(); return flatParentFieldType; } @@ -60,7 +60,15 @@ public void testDirectSubfield() { String searchFieldName = ((FlatObjectFieldMapper.FlatObjectFieldType) flatParentFieldType).directSubfield(); assertEquals("field._value", searchFieldName); - MappedFieldType dynamicMappedFieldType = new FlatObjectFieldMapper.FlatObjectFieldType("bar", flatParentFieldType.name()); + MappedFieldType dynamicMappedFieldType = new FlatObjectFieldMapper.FlatObjectFieldType( + "bar", + flatParentFieldType.name(), + null, + true, + Integer.MAX_VALUE, + null, + null + ); // when searching for "foo" in "field.bar", the directSubfield is field._valueAndPath field String searchFieldNameDocPath = ((FlatObjectFieldMapper.FlatObjectFieldType) dynamicMappedFieldType).directSubfield(); assertEquals("field._valueAndPath", searchFieldNameDocPath); @@ -79,7 +87,15 @@ public void testRewriteValue() { String searchValues = ((FlatObjectFieldMapper.FlatObjectFieldType) flatParentFieldType).rewriteValue("foo"); assertEquals("foo", searchValues); - MappedFieldType dynamicMappedFieldType = new FlatObjectFieldMapper.FlatObjectFieldType("field.bar", flatParentFieldType.name()); + MappedFieldType dynamicMappedFieldType = new FlatObjectFieldMapper.FlatObjectFieldType( + "field.bar", + flatParentFieldType.name(), + null, + true, + Integer.MAX_VALUE, + null, + null + ); // when searching for "foo" in "field.bar", the rewrite value is "field.bar=foo" String searchFieldNameDocPath = ((FlatObjectFieldMapper.FlatObjectFieldType) dynamicMappedFieldType).directSubfield(); @@ -97,7 +113,15 @@ public void testTermQuery() { assertEquals("foo", searchValues); assertEquals(new TermQuery(new Term(searchFieldName, searchValues)), flatParentFieldType.termQuery(searchValues, null)); - MappedFieldType dynamicMappedFieldType = new FlatObjectFieldMapper.FlatObjectFieldType("field.bar", flatParentFieldType.name()); + MappedFieldType dynamicMappedFieldType = new FlatObjectFieldMapper.FlatObjectFieldType( + "field.bar", + flatParentFieldType.name(), + null, + true, + Integer.MAX_VALUE, + null, + null + ); // when searching for "foo" in "field.bar", the term query is directed to search in field._valueAndPath field String searchFieldNameDocPath = ((FlatObjectFieldMapper.FlatObjectFieldType) dynamicMappedFieldType).directSubfield(); @@ -117,7 +141,15 @@ public void testExistsQuery() { assertEquals(new TermQuery(new Term(FieldNamesFieldMapper.NAME, "field")), ft.existsQuery(null)); // when checking if a subfield within the flat_object, for example, "field.bar", use term query in the flat_object field - MappedFieldType dynamicMappedFieldType = new FlatObjectFieldMapper.FlatObjectFieldType("field.bar", ft.name()); + MappedFieldType dynamicMappedFieldType = new FlatObjectFieldMapper.FlatObjectFieldType( + "field.bar", + ft.name(), + null, + true, + Integer.MAX_VALUE, + null, + null + ); assertEquals(new TermQuery(new Term("field", "field.bar")), dynamicMappedFieldType.existsQuery(null)); }