From 9194b7affdf0fc560d884918e490de010f5673ce Mon Sep 17 00:00:00 2001 From: kkewwei Date: Fri, 30 Aug 2024 12:35:29 +0800 Subject: [PATCH] Fix null values when indexing in flat_object field (#14069) Signed-off-by: kkewwei --- CHANGELOG.md | 1 + .../test/index/91_flat_object_null_value.yml | 401 ++++++++++++++++++ .../xcontent/JsonToStringXContentParser.java | 66 ++- .../JsonToStringXContentParserTests.java | 26 +- .../mapper/FlatObjectFieldMapperTests.java | 248 +++++++++++ 5 files changed, 716 insertions(+), 26 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/index/91_flat_object_null_value.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index fe1cee57279d2..d0263752a2a43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -83,6 +83,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix indexing error when flat_object field is explicitly null ([#15375](https://github.com/opensearch-project/OpenSearch/pull/15375)) - Fix split response processor not included in allowlist ([#15393](https://github.com/opensearch-project/OpenSearch/pull/15393)) - Fix unchecked cast in dynamic action map getter ([#15394](https://github.com/opensearch-project/OpenSearch/pull/15394)) +- Fix null values indexed as "null" strings in flat_object field ([#14069](https://github.com/opensearch-project/OpenSearch/pull/14069)) ### Security diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/91_flat_object_null_value.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/91_flat_object_null_value.yml new file mode 100644 index 0000000000000..98abd58a54e4b --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/91_flat_object_null_value.yml @@ -0,0 +1,401 @@ +--- +# The test setup includes: +# - Create flat_object mapping for flat_object_null_value index +# - Index 19 example documents related to null value +# - Refresh the index so it is ready for search tests + +setup: + - skip: + version: " - 2.99.99" + reason: "null value in flat_object is processed in 3.0.0 " + - do: + indices.create: + index: flat_object_null_value + body: + mappings: + properties: + record: + type: "flat_object" + - do: + index: + index: flat_object_null_value + id: 1 + body: { + "record": null + } + + - do: + index: + index: flat_object_null_value + id: 2 + body: { + "record": { + "name": null + } + } + + - do: + index: + index: flat_object_null_value + id: 3 + body: { + "record": { + "name": null, + "age":"5", + "name1": null + } + } + + - do: + index: + index: flat_object_null_value + id: 4 + body: { + "record": { + "name": [ + null, + { + "d": { + "name": "dsds" + } + } + ] + } + } + + - do: + index: + index: flat_object_null_value + id: 5 + body: { + "record": { + "name": [ + { + "d": { + "name": "dsds" + } + }, + null + ] + } + } + + - do: + index: + index: flat_object_null_value + id: 6 + body: { + "record": { + "name": [ + { + "name": "age1" + }, + null, + { + "d": { + "name": "dsds" + } + } + ] + } + } + + - do: + index: + index: flat_object_null_value + id: 7 + body: { + "record": { + "name": null, + "age":"3" + } + } + + - do: + index: + index: flat_object_null_value + id: 8 + body: { + "record": { + "age":"3", + "name": null + } + } + + - do: + index: + index: flat_object_null_value + id: 9 + body: { + "record": { + "name": [ + null, + 3 + ], + "age": 4 + } + } + + - do: + index: + index: flat_object_null_value + id: 10 + body: { + "record": { + "age": 4, + "name": [ + null, + 3 + ] + } + } + + - do: + index: + index: flat_object_null_value + id: 11 + body: { + "record": { + "name": null + } + } + + - do: + index: + index: flat_object_null_value + id: 12 + body: { + "record": { + "r1": { + "labels": [ + null + ] + } + } + } + + - do: + index: + index: flat_object_null_value + id: 13 + body: { + "record": { + "labels": [ + null + ] + } + } + + - do: + index: + index: flat_object_null_value + id: 14 + body: { + "record": { + "r1": { + "name": null, + "labels": [ + null + ] + } + } + } + + - do: + index: + index: flat_object_null_value + id: 15 + body: { + "record": { + "age": "4", + "labels": [ + null + ] + } + } + + - do: + index: + index: flat_object_null_value + id: 16 + body: { + "record": { + "labels": [ + null + ], + "age": "4" + } + } + + - do: + index: + index: flat_object_null_value + id: 17 + body: { + "record": { + "name": { + "name1": [ + null, + "dsdsdsd" + ] + } + } + } + + - do: + index: + index: flat_object_null_value + id: 18 + body: { + "record": { + "name": { + "name1": { + "name2": null + } + } + } + } + + - do: + index: + index: flat_object_null_value + id: 19 + body: { + "record": { + "name": { + "name1": [ + [], + [ + "dsdsdsd", + null + ] + ] + } + } + } + + - do: + indices.refresh: + index: flat_object_null_value +--- +# Delete Index when connection is teardown +teardown: + - do: + indices.delete: + index: flat_object_null_value + + +--- +# Verify that mappings under the catalog field did not expand +# and no dynamic fields were created. +"Mappings": + - skip: + version: " - 2.99.99" + reason: "null value in flat_object is processed in 3.0.0" + + - do: + indices.get_mapping: + index: flat_object_null_value + - is_true: flat_object_null_value.mappings + - match: { flat_object_null_value.mappings.properties.record.type: flat_object } + # https://github.com/opensearch-project/OpenSearch/tree/main/rest-api-spec/src/main/resources/rest-api-spec/test#length + - length: { flat_object_null_value.mappings.properties: 1 } + + +--- +"Supported queries": + - skip: + version: " - 2.99.99" + reason: "null value in flat_object is processed in 3.0.0" + + # Verify Document Count + - do: + search: + body: { + size: 30, + query: { + match_all: { } + } + } + + - length: { hits.hits: 19 } + + # Exists Query with no dot path. + - do: + search: + body: { + _source: true, + size: 30, + query: { + exists: { "field": "record" } + } + } + + - length: { hits.hits: 12 } + - match: { hits.hits.0._source.record: { "name": null, "age": "5", "name1": null } } + - match: { hits.hits.1._source.record.name: [ null, { "d": { "name": "dsds" } } ] } + - match: { hits.hits.2._source.record.name: [ { "d": { "name": "dsds" } }, null ] } + - match: { hits.hits.3._source.record.name: [ { "name": "age1" }, null, { "d": { "name": "dsds" } } ] } + - match: { hits.hits.4._source.record: { "name": null, "age": "3" } } + - match: { hits.hits.5._source.record: { "age": "3", "name": null } } + - match: { hits.hits.6._source.record: { "name": [ null, 3 ], "age": 4 } } + - match: { hits.hits.7._source.record: { "age": 4, "name": [ null, 3 ] } } + - match: { hits.hits.8._source.record: { "age": "4", "labels": [ null ] } } + - match: { hits.hits.9._source.record: { "labels": [ null ], "age": "4" } } + - match: { hits.hits.10._source.record.name: { "name1": [ null, "dsdsdsd" ] } } + - match: { hits.hits.11._source.record.name: { "name1": [ [], [ "dsdsdsd", null ] ] } } + + # Exists Query with dot path. + - do: + search: + body: { + _source: true, + query: { + exists: { "field": "record.d" } + } + } + + - length: { hits.hits: 3 } + - match: { hits.hits.0._source.record.name: [ null, { "d": { "name": "dsds" } } ] } + - match: { hits.hits.1._source.record.name: [ { "d": { "name": "dsds" } }, null ] } + - match: { hits.hits.2._source.record.name: [ { "name": "age1" }, null, { "d": { "name": "dsds" } } ] } + + # Term Query without exact dot path. + - do: + search: + body: { + _source: true, + query: { + term: { record: "dsdsdsd" } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.record.name: { "name1": [ null, "dsdsdsd" ] } } + - match: { hits.hits.1._source.record.name: { "name1": [ [], [ "dsdsdsd", null ] ] } } + + # Term Query with exact dot path. + - do: + search: + body: { + _source: true, + query: { + term: { record.name.name1: "dsdsdsd" } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.record.name: { "name1": [ null, "dsdsdsd" ] } } + - match: { hits.hits.1._source.record.name: { "name1": [ [], [ "dsdsdsd", null ] ] } } + + # Test "null" string search. + - do: + search: + body: { + _source: true, + query: { + term: { record: "null" } + } + } + + - length: { hits.hits: 0 } diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index d24571fc5778d..2f60fc8f69f87 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -26,6 +26,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Deque; +import java.util.HashSet; import java.util.LinkedList; /** @@ -72,21 +73,31 @@ public XContentParser parseObject() throws IOException { builder.startObject(); LinkedList path = new LinkedList<>(Collections.singleton(fieldTypeName)); while (currentToken() != Token.END_OBJECT) { - parseToken(path); + parseToken(path, null); } - builder.field(this.fieldTypeName, keyList); - builder.field(this.fieldTypeName + VALUE_SUFFIX, valueList); - builder.field(this.fieldTypeName + VALUE_AND_PATH_SUFFIX, valueAndPathList); + // deduplication the fieldName,valueList,valueAndPathList + builder.field(this.fieldTypeName, new HashSet<>(keyList)); + builder.field(this.fieldTypeName + VALUE_SUFFIX, new HashSet<>(valueList)); + builder.field(this.fieldTypeName + VALUE_AND_PATH_SUFFIX, new HashSet<>(valueAndPathList)); builder.endObject(); String jString = XContentHelper.convertToJson(BytesReference.bytes(builder), false, MediaTypeRegistry.JSON); return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString)); } - private void parseToken(Deque path) throws IOException { + /** + * @return true if the child object contains no_null value, false otherwise + */ + private boolean parseToken(Deque path, String currentFieldName) throws IOException { + if (path.size() == 1 && processNoNestedValue()) { + return true; + } + boolean isChildrenValueValid = false; + boolean visitFieldName = false; if (this.parser.currentToken() == Token.FIELD_NAME) { - String fieldName = this.parser.currentName(); - path.addLast(fieldName); // Pushing onto the stack *must* be matched by pop - String parts = fieldName; + currentFieldName = this.parser.currentName(); + path.addLast(currentFieldName); // Pushing onto the stack *must* be matched by pop + visitFieldName = true; + String parts = currentFieldName; while (parts.contains(".")) { // Extract the intermediate keys maybe present in fieldName int dotPos = parts.indexOf('.'); String part = parts.substring(0, dotPos); @@ -95,30 +106,61 @@ private void parseToken(Deque path) throws IOException { } this.keyList.add(parts); // parts has no dot, so either it's the original fieldName or it's the last part this.parser.nextToken(); // advance to the value of fieldName - parseToken(path); // parse the value for fieldName (which will be an array, an object, or a primitive value) + isChildrenValueValid = parseToken(path, currentFieldName); // parse the value for fieldName (which will be an array, an object, + // or a primitive value) path.removeLast(); // Here is where we pop fieldName from the stack (since we're done with the value of fieldName) // Note that whichever other branch we just passed through has already ended with nextToken(), so we // don't need to call it. } else if (this.parser.currentToken() == Token.START_ARRAY) { parser.nextToken(); while (this.parser.currentToken() != Token.END_ARRAY) { - parseToken(path); + isChildrenValueValid |= parseToken(path, currentFieldName); } this.parser.nextToken(); + } else if (this.parser.currentToken() == Token.END_ARRAY) { + // skip } else if (this.parser.currentToken() == Token.START_OBJECT) { parser.nextToken(); while (this.parser.currentToken() != Token.END_OBJECT) { - parseToken(path); + isChildrenValueValid |= parseToken(path, currentFieldName); } this.parser.nextToken(); - } else if (this.parser.currentToken().isValue()) { + } else { String parsedValue = parseValue(); if (parsedValue != null) { this.valueList.add(parsedValue); this.valueAndPathList.add(Strings.collectionToDelimitedString(path, ".") + EQUAL_SYMBOL + parsedValue); + isChildrenValueValid = true; } this.parser.nextToken(); } + + if (visitFieldName && isChildrenValueValid == false) { + removeKeyOfNullValue(); + } + return isChildrenValueValid; + } + + public void removeKeyOfNullValue() { + // it means that the value of the sub child (or the last brother) is invalid, + // we should delete the key from keyList. + assert keyList.size() > 0; + this.keyList.remove(keyList.size() - 1); + } + + private boolean processNoNestedValue() throws IOException { + if (parser.currentToken() == Token.VALUE_NULL) { + return true; + } else if (this.parser.currentToken() == Token.VALUE_STRING + || this.parser.currentToken() == Token.VALUE_NUMBER + || this.parser.currentToken() == Token.VALUE_BOOLEAN) { + String value = this.parser.textOrNull(); + if (value != null) { + this.valueList.add(value); + } + return true; + } + return false; } private String parseValue() throws IOException { diff --git a/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java index a0f5150981a08..3c292181b4d8f 100644 --- a/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java +++ b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java @@ -49,9 +49,9 @@ public void testNestedObjects() throws IOException { assertEquals( "{" - + "\"flat\":[\"first\",\"second\",\"inner\",\"third\"]," + + "\"flat\":[\"third\",\"inner\",\"first\",\"second\"]," + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," - + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]" + + "\"flat._valueAndPath\":[\"flat.second.inner=2.0\",\"flat.first=1\",\"flat.third=three\"]" + "}", flattenJsonString("flat", jsonExample) ); @@ -64,9 +64,9 @@ public void testChildHasDots() throws IOException { assertEquals( "{" - + "\"flat\":[\"first\",\"second\",\"inner\",\"third\"]," + + "\"flat\":[\"third\",\"inner\",\"first\",\"second\"]," + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," - + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]" + + "\"flat._valueAndPath\":[\"flat.second.inner=2.0\",\"flat.first=1\",\"flat.third=three\"]" + "}", flattenJsonString("flat", jsonExample) ); @@ -83,7 +83,7 @@ public void testNestChildObjectWithDots() throws IOException { assertEquals( "{" - + "\"flat\":[\"first\",\"second\",\"inner\",\"really_inner\",\"third\"]," + + "\"flat\":[\"really_inner\",\"third\",\"inner\",\"first\",\"second\"]," + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.really_inner=2.0\",\"flat.third=three\"]" + "}", @@ -102,7 +102,7 @@ public void testNestChildObjectWithDotsAndFieldWithDots() throws IOException { assertEquals( "{" - + "\"flat\":[\"first\",\"second\",\"inner\",\"totally\",\"absolutely\",\"inner\",\"third\"]," + + "\"flat\":[\"third\",\"absolutely\",\"totally\",\"inner\",\"first\",\"second\"]," + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]" + "}", @@ -123,10 +123,9 @@ public void testArrayOfObjects() throws IOException { assertEquals( "{" - + "\"flat\":[\"field\",\"detail\",\"foooooooooooo\",\"name\",\"name\"]," - + "\"flat._value\":[\"baz\",\"baz\"]," + + "\"flat\":[\"field\",\"name\",\"detail\",\"foooooooooooo\"]," + + "\"flat._value\":[\"baz\"]," + "\"flat._valueAndPath\":[" - + "\"flat.field.detail.foooooooooooo.name=baz\"," + "\"flat.field.detail.foooooooooooo.name=baz\"" + "]}", flattenJsonString("flat", jsonExample) @@ -151,14 +150,13 @@ public void testArraysOfObjectsAndValues() throws IOException { assertEquals( "{" - + "\"flat\":[\"field\",\"detail\",\"foooooooooooo\",\"name\",\"name\",\"numbers\"]," - + "\"flat._value\":[\"baz\",\"baz\",\"1\",\"2\",\"3\"]," + + "\"flat\":[\"field\",\"name\",\"numbers\",\"detail\",\"foooooooooooo\"]," + + "\"flat._value\":[\"1\",\"2\",\"3\",\"baz\"]," + "\"flat._valueAndPath\":[" + "\"flat.field.detail.foooooooooooo.name=baz\"," - + "\"flat.field.detail.foooooooooooo.name=baz\"," + "\"flat.field.numbers=1\"," - + "\"flat.field.numbers=2\"," - + "\"flat.field.numbers=3\"" + + "\"flat.field.numbers=3\"," + + "\"flat.field.numbers=2\"" + "]}", flattenJsonString("flat", jsonExample) ); diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java index 5b5ca378ee7ff..94d1f501bee51 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java @@ -136,6 +136,254 @@ public void testNullValue() throws IOException { assertEquals(1, parsedDocument.docs().size()); IndexableField[] fields = parsedDocument.rootDoc().getFields("field"); assertEquals(0, fields.length); + ParsedDocument doc; + String json; + IndexableField[] fieldValues; + IndexableField[] fieldValueAndPaths; + + { + // test1: {"field":null} + doc = mapper.parse(source(b -> b.nullField("field"))); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + // test2: {"field":{"age":3, "name": null}} + json = "{\"field\":{\"age\":3, \"name\": null}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.age"), fields[0].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("3"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=3"), fieldValueAndPaths[0].binaryValue()); + + // test3: {"field":{"name":null, "age":"5", "name1":null}} + json = "{\"field\":{\"name\":null, \"age\":\"5\", \"name1\":null}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.age"), fields[0].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("5"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=5"), fieldValueAndPaths[0].binaryValue()); + + // test4: {"field":{"name": {"name1": {"name2":null}}}} + json = "{\"field\":{\"name\": {\"name1\": {\"name2\":null}}}}"; + doc = mapper.parse(source(json)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + } + + { + // test5: {"field":[null]} + doc = mapper.parse(source(b -> b.array("field", (String[]) null))); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + // test6: {"field":{"labels": [null]}} + json = "{\"field\":{\"labels\": [null]}}"; + doc = mapper.parse(source(json)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + // test7: {"field":{"r1": {"labels": [null]}}} + json = "{\"field\":{\"r1\": {\"labels\": [null]}}}"; + doc = mapper.parse(source(json)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + // test8: {"field":{"r1": {"name": null,"labels": [null]}}} + json = "{\"field\":{\"r1\": {\"name\": null,\"labels\": [null]}}}"; + doc = mapper.parse(source(json)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + // test9: {"field":{"name": [null,3],"age":4}} + json = "{\"field\":{\"name\": [null,3],\"age\":4}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.name"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.age"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(4, fieldValues.length); + assertEquals(new BytesRef("3"), fieldValues[0].binaryValue()); + assertEquals(new BytesRef("4"), fieldValues[2].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(4, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=4"), fieldValueAndPaths[0].binaryValue()); + assertEquals(new BytesRef("field.name=3"), fieldValueAndPaths[2].binaryValue()); + + // test10: {"field":{"age": 4,"name": [null,"3"]}} + json = "{\"field\":{\"age\": 4,\"name\": [null,\"3\"]}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.name"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.age"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(4, fieldValues.length); + assertEquals(new BytesRef("3"), fieldValues[0].binaryValue()); + assertEquals(new BytesRef("4"), fieldValues[2].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(4, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=4"), fieldValueAndPaths[0].binaryValue()); + assertEquals(new BytesRef("field.name=3"), fieldValueAndPaths[2].binaryValue()); + + // test11: {"field":{"age":"4","labels": [null]}} + json = "{\"field\":{\"age\":\"4\",\"labels\": [null]}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.age"), fields[0].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("4"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=4"), fieldValueAndPaths[0].binaryValue()); + + // test12: {"field":{"labels": [null], "age":"4"}} + json = "{\"field\":{\"labels\": [null], \"age\":\"4\"}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.age"), fields[0].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("4"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=4"), fieldValueAndPaths[0].binaryValue()); + + // test13: {"field":{"name": [null, {"d":{"name":"dsds"}}]}} + json = "{\"field\":{\"name\": [null, {\"d\":{\"name\":\"dsds\"}}]}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.d"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.name"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("dsds"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.name.d.name=dsds"), fieldValueAndPaths[0].binaryValue()); + + // test14: {"field":{"name": [{"d":{"name":"dsds"}}, null]}} + json = "{\"field\":{\"name\": [{\"d\":{\"name\":\"dsds\"}}, null]}}"; + doc = mapper.parse(source(json)); + IndexableField[] fields1 = doc.rootDoc().getFields("field"); + assertEquals(fields1.length, fields.length); + for (int i = 0; i < fields1.length; i++) { + assertEquals(fields[i].toString(), fields1[i].toString()); + } + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.d"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.name"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("dsds"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.name.d.name=dsds"), fieldValueAndPaths[0].binaryValue()); + + // test15: {"field":{"name": [{"name":"age1"}, null, {"d":{"name":"dsds"}}]}} + json = "{\"field\":{\"name\": [{\"name\":\"age1\"}, null, {\"d\":{\"name\":\"dsds\"}}]}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.d"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.name"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(4, fieldValues.length); + assertEquals(new BytesRef("dsds"), fieldValues[0].binaryValue()); + assertEquals(new BytesRef("age1"), fieldValues[2].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(4, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.name.name=age1"), fieldValueAndPaths[0].binaryValue()); + assertEquals(new BytesRef("field.name.d.name=dsds"), fieldValueAndPaths[2].binaryValue()); + + // test16: {"field":{"name": {"name1": [null,"dsdsdsd"]}}} + json = "{\"field\":{\"name\": {\"name1\": [null,\"dsdsdsd\"]}}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.name"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.name1"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("dsdsdsd"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.name.name1=dsdsdsd"), fieldValueAndPaths[0].binaryValue()); + + // test17: {"field":{"name": {"name1": [[],["dsdsdsd", null]]}}} + json = "{\"field\":{\"name\": {\"name1\": [[],[\"dsdsdsd\", null]]}}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.name"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.name1"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("dsdsdsd"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.name.name1=dsdsdsd"), fieldValueAndPaths[0].binaryValue()); + } + } + + public void testInfiniteLoopWithNullValue() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + // test2: {"field":{"name": null,"age":3}} + String json = "{\"field\":{\"name\": null,\"age\":3}}"; + ParsedDocument doc = mapper.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.age"), fields[0].binaryValue()); + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("3"), fieldValues[0].binaryValue()); + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=3"), fieldValueAndPaths[0].binaryValue()); + } + + // test deduplicationValue of keyList, valueList, valueAndPathList + public void testDeduplicationValue() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + + // test: {"field":{"age": 3,"labels": [null,"3"], "abc":{"abc":{"labels":"n"}}}} + String json = "{\"field\":{\"age\": 3,\"labels\": [null,\"3\"], \"abc\":{\"abc\":{\"labels\":\"n\"}}}}"; + ParsedDocument doc = mapper.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(6, fields.length); + assertEquals(new BytesRef("field.abc"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.age"), fields[2].binaryValue()); + assertEquals(new BytesRef("field.labels"), fields[4].binaryValue()); + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(4, fieldValues.length); + assertEquals(new BytesRef("3"), fieldValues[0].binaryValue()); + assertEquals(new BytesRef("n"), fieldValues[2].binaryValue()); + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(6, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.abc.abc.labels=n"), fieldValueAndPaths[0].binaryValue()); + assertEquals(new BytesRef("field.age=3"), fieldValueAndPaths[2].binaryValue()); + assertEquals(new BytesRef("field.labels=3"), fieldValueAndPaths[4].binaryValue()); } @Override