diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc index 59d307c4df0ad..a4be7026dffcd 100644 --- a/docs/reference/mapping/types/keyword.asciidoc +++ b/docs/reference/mapping/types/keyword.asciidoc @@ -163,7 +163,6 @@ index setting limits the number of dimensions in an index. Dimension fields have the following constraints: * The `doc_values` and `index` mapping parameters must be `true`. -* Field values cannot be an <>. // end::dimension[] * Dimension values are used to identify a document’s time series. If dimension values are altered in any way during indexing, the document will be stored as belonging to different from intended time series. As a result there are additional constraints: ** The field cannot use a <>. diff --git a/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml b/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml index d20231a6d6cf2..6eb9536c9646b 100644 --- a/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml +++ b/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml @@ -1230,3 +1230,62 @@ non string dimension fields: - match: { .$idx0name.mappings.properties.attributes.properties.double.time_series_dimension: true } - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'ip' } - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.time_series_dimension: true } + +--- +multi value dimensions: + - requires: + cluster_features: ["mapper.pass_through_priority"] + reason: support for priority in passthrough objects + + - do: + allowed_warnings: + - "index template [my-dynamic-template] has index patterns [k9s*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [my-dynamic-template] will take precedence during new index creation" + indices.put_index_template: + name: my-dynamic-template + body: + index_patterns: [k9s*] + data_stream: {} + template: + settings: + index: + number_of_shards: 1 + mode: time_series + time_series: + start_time: 2023-08-31T13:03:08.138Z + + mappings: + properties: + attributes: + type: passthrough + dynamic: true + time_series_dimension: true + priority: 1 + dynamic_templates: + - counter_metric: + mapping: + type: integer + time_series_metric: counter + + - do: + bulk: + index: k9s + refresh: true + body: + - '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }' + - '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "10", "attributes": { "dim1": ["a" , "b"], "dim2": [1, 2] } }' + - '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }' + - '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["b" , "a"], "dim2": [1, 2] } }' + - '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }' + - is_false: errors + + - do: + search: + index: k9s + body: + size: 0 + aggs: + tsids: + terms: + field: _tsid + + - length: { aggregations.tsids.buckets: 2 } # only the order of the dim1 attribute is different, yet we expect to have two distinct time series diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java index e1c70fd5e6a8c..35a370958b586 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java @@ -34,7 +34,6 @@ import java.util.ArrayList; import java.util.Base64; import java.util.Collections; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -44,6 +43,7 @@ import java.util.function.Predicate; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; +import static org.elasticsearch.common.xcontent.XContentParserUtils.expectValueToken; /** * Generates the shard id for {@code (id, routing)} pairs. @@ -300,12 +300,20 @@ public String createId(Map flat, byte[] suffix) { Builder b = builder(); for (Map.Entry e : flat.entrySet()) { if (isRoutingPath.test(e.getKey())) { - b.hashes.add(new NameAndHash(new BytesRef(e.getKey()), hash(new BytesRef(e.getValue().toString())))); + if (e.getValue() instanceof List listValue) { + listValue.forEach(v -> hashValue(b, e.getKey(), v)); + } else { + hashValue(b, e.getKey(), e.getValue()); + } } } return b.createId(suffix, IndexRouting.ExtractFromSource::defaultOnEmpty); } + private static void hashValue(Builder b, String key, Object value) { + b.hashes.add(new NameAndHash(new BytesRef(key), hash(new BytesRef(value.toString())))); + } + private static int defaultOnEmpty() { throw new IllegalArgumentException("Error extracting routing: source didn't contain any routing fields"); } @@ -356,6 +364,13 @@ private void extractObject(@Nullable String path, XContentParser source) throws } } + private void extractArray(@Nullable String path, XContentParser source) throws IOException { + while (source.currentToken() != Token.END_ARRAY) { + expectValueToken(source.currentToken(), source); + extractItem(path, source); + } + } + private void extractItem(String path, XContentParser source) throws IOException { switch (source.currentToken()) { case START_OBJECT: @@ -369,6 +384,11 @@ private void extractItem(String path, XContentParser source) throws IOException hashes.add(new NameAndHash(new BytesRef(path), hash(new BytesRef(source.text())))); source.nextToken(); break; + case START_ARRAY: + source.nextToken(); + extractArray(path, source); + source.nextToken(); + break; case VALUE_NULL: source.nextToken(); break; @@ -382,21 +402,13 @@ private void extractItem(String path, XContentParser source) throws IOException } private int buildHash(IntSupplier onEmpty) { - Collections.sort(hashes); - Iterator itr = hashes.iterator(); - if (itr.hasNext() == false) { + if (hashes.isEmpty()) { return onEmpty.getAsInt(); } - NameAndHash prev = itr.next(); - int hash = hash(prev.name) ^ prev.hash; - while (itr.hasNext()) { - NameAndHash next = itr.next(); - if (prev.name.equals(next.name)) { - throw new IllegalArgumentException("Duplicate routing dimension for [" + next.name + "]"); - } - int thisHash = hash(next.name) ^ next.hash; - hash = 31 * hash + thisHash; - prev = next; + Collections.sort(hashes); + int hash = 0; + for (NameAndHash nah : hashes) { + hash = 31 * hash + (hash(nah.name) ^ nah.hash); } return hash; } diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java index a72ee1d28b7f0..f91e354ca91e2 100644 --- a/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java +++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java @@ -71,6 +71,20 @@ public static void ensureExpectedToken(Token expected, Token actual, XContentPar } } + /** + * Makes sure the provided token {@linkplain Token#isValue() is a value type} + * + * @throws ParsingException if the token is not a value type + */ + public static void expectValueToken(Token actual, XContentParser parser) { + if (actual.isValue() == false) { + throw new ParsingException( + parser.getTokenLocation(), + String.format(Locale.ROOT, "Failed to parse object: expecting value token but found [%s]", actual) + ); + } + } + private static ParsingException parsingException(XContentParser parser, Token expected, Token actual) { return new ParsingException( parser.getTokenLocation(), diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java index f1c6a072c2d9e..24e62f08f4aa0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java @@ -40,13 +40,14 @@ import java.io.IOException; import java.net.InetAddress; import java.time.ZoneId; +import java.util.ArrayList; import java.util.Base64; import java.util.Collections; -import java.util.Comparator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; -import java.util.SortedSet; -import java.util.TreeSet; +import java.util.SortedMap; +import java.util.TreeMap; /** * Mapper for {@code _tsid} field included generated when the index is @@ -180,8 +181,6 @@ public static class TimeSeriesIdBuilder implements DocumentDimensions { public static final int MAX_DIMENSIONS = 512; - private record Dimension(BytesRef name, BytesReference value) {} - private final Murmur3Hasher tsidHasher = new Murmur3Hasher(0); /** @@ -189,7 +188,7 @@ private record Dimension(BytesRef name, BytesReference value) {} * for generating the _tsid field. The map will be used by {@link TimeSeriesIdFieldMapper} * to build the _tsid field for the document. */ - private final SortedSet dimensions = new TreeSet<>(Comparator.comparing(o -> o.name)); + private final SortedMap> dimensions = new TreeMap<>(); /** * Builds the routing. Used for building {@code _id}. If null then skipped. */ @@ -207,9 +206,16 @@ public BytesReference buildLegacyTsid() throws IOException { try (BytesStreamOutput out = new BytesStreamOutput()) { out.writeVInt(dimensions.size()); - for (Dimension entry : dimensions) { - out.writeBytesRef(entry.name); - entry.value.writeTo(out); + for (Map.Entry> entry : dimensions.entrySet()) { + out.writeBytesRef(entry.getKey()); + List value = entry.getValue(); + if (value.size() > 1) { + throw new IllegalArgumentException( + "Dimension field [" + entry.getKey().utf8ToString() + "] cannot be a multi-valued field." + ); + } + assert value.isEmpty() == false : "dimension value is empty"; + value.get(0).writeTo(out); } return out.bytes(); } @@ -241,18 +247,19 @@ public BytesReference buildTsidHash() { int tsidHashIndex = StreamOutput.putVInt(tsidHash, len, 0); tsidHasher.reset(); - for (final Dimension dimension : dimensions) { - tsidHasher.update(dimension.name.bytes); + for (final BytesRef name : dimensions.keySet()) { + tsidHasher.update(name.bytes); } tsidHashIndex = writeHash128(tsidHasher.digestHash(), tsidHash, tsidHashIndex); // NOTE: concatenate all dimension value hashes up to a certain number of dimensions int tsidHashStartIndex = tsidHashIndex; - for (final Dimension dimension : dimensions) { + for (final List values : dimensions.values()) { if ((tsidHashIndex - tsidHashStartIndex) >= 4 * numberOfDimensions) { break; } - final BytesRef dimensionValueBytesRef = dimension.value.toBytesRef(); + assert values.isEmpty() == false : "dimension values are empty"; + final BytesRef dimensionValueBytesRef = values.get(0).toBytesRef(); ByteUtils.writeIntLE( StringHelper.murmurhash3_x86_32( dimensionValueBytesRef.bytes, @@ -268,8 +275,8 @@ public BytesReference buildTsidHash() { // NOTE: hash all dimension field allValues tsidHasher.reset(); - for (final Dimension dimension : dimensions) { - tsidHasher.update(dimension.value.toBytesRef().bytes); + for (final List values : dimensions.values()) { + values.forEach(v -> tsidHasher.update(v.toBytesRef().bytes)); } tsidHashIndex = writeHash128(tsidHasher.digestHash(), tsidHash, tsidHashIndex); @@ -372,8 +379,15 @@ public DocumentDimensions validate(final IndexSettings settings) { } private void add(String fieldName, BytesReference encoded) throws IOException { - if (dimensions.add(new Dimension(new BytesRef(fieldName), encoded)) == false) { - throw new IllegalArgumentException("Dimension field [" + fieldName + "] cannot be a multi-valued field."); + BytesRef name = new BytesRef(fieldName); + List values = dimensions.get(name); + if (values == null) { + // optimize for the common case where dimensions are not multi-valued + values = new ArrayList<>(1); + values.add(encoded); + dimensions.put(name, values); + } else { + values.add(encoded); } } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java index ad39a7017b465..64d78878c30d9 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java @@ -594,7 +594,31 @@ public void testRoutingPathBooleansInSource() throws IOException { IndexRouting routing = indexRoutingForPath(shards, "foo"); assertIndexShard(routing, Map.of("foo", true), Math.floorMod(hash(List.of("foo", "true")), shards)); assertIndexShard(routing, Map.of("foo", false), Math.floorMod(hash(List.of("foo", "false")), shards)); + } + public void testRoutingPathArraysInSource() throws IOException { + int shards = between(2, 1000); + IndexRouting routing = indexRoutingForPath(shards, "a,b,c,d"); + assertIndexShard( + routing, + Map.of("a", List.of("foo", "bar", "foo"), "b", List.of(21, 42), "c", List.of(true), "d", List.of()), + Math.floorMod(hash(List.of("a", "foo", "a", "bar", "a", "foo", "b", "21", "b", "42", "c", "true")), shards) + ); + } + + public void testRoutingPathObjectArraysInSource() throws IOException { + int shards = between(2, 1000); + IndexRouting routing = indexRoutingForPath(shards, "a"); + + BytesReference source = source(Map.of("a", List.of("foo", Map.of("foo", "bar")))); + Exception e = expectThrows( + IllegalArgumentException.class, + () -> routing.indexShard(randomAlphaOfLength(5), null, XContentType.JSON, source, s -> {}) + ); + assertThat( + e.getMessage(), + equalTo("Error extracting routing: Failed to parse object: expecting value token but found [START_OBJECT]") + ); } public void testRoutingPathBwc() throws IOException { @@ -667,7 +691,11 @@ private void assertIndexShard(IndexRouting routing, Map source, IndexRouting.ExtractFromSource.Builder b = r.builder(); for (Map.Entry e : flattened.entrySet()) { - b.addMatching(e.getKey(), new BytesRef(e.getValue().toString())); + if (e.getValue() instanceof List listValue) { + listValue.forEach(v -> b.addMatching(e.getKey(), new BytesRef(v.toString()))); + } else { + b.addMatching(e.getKey(), new BytesRef(e.getValue().toString())); + } } String idFromBuilder = b.createId(suffix, () -> { throw new AssertionError(); }); assertThat(idFromBuilder, equalTo(idFromSource)); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapperTests.java index f05ec95fe84cb..4591fbd2fe178 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapperTests.java @@ -27,10 +27,13 @@ import org.elasticsearch.xcontent.XContentType; import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.nullValue; @@ -62,13 +65,19 @@ protected IndexVersion getVersion() { } private DocumentMapper createDocumentMapper(String routingPath, XContentBuilder mappings) throws IOException { + return createDocumentMapper(getVersion(), routingPath, mappings); + } + + private DocumentMapper createDocumentMapper(IndexVersion version, String routingPath, XContentBuilder mappings) throws IOException { return createMapperService( + version, getIndexSettingsBuilder().put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.name()) .put(MapperService.INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING.getKey(), 200) // Allow tests that use many dimensions .put(IndexMetadata.INDEX_ROUTING_PATH.getKey(), routingPath) .put(IndexSettings.TIME_SERIES_START_TIME.getKey(), "2021-04-28T00:00:00Z") .put(IndexSettings.TIME_SERIES_END_TIME.getKey(), "2021-10-29T00:00:00Z") .build(), + () -> true, mappings ).documentMapper(); } @@ -643,6 +652,44 @@ public void testDifferentDimensions() throws IOException { assertThat(doc1.rootDoc().getBinaryValue("_tsid").bytes, not(doc2.rootDoc().getBinaryValue("_tsid").bytes)); } + public void testMultiValueDimensionsNotSupportedBeforeTsidHashing() throws IOException { + IndexVersion priorToTsidHashing = IndexVersionUtils.getPreviousVersion(IndexVersions.TIME_SERIES_ID_HASHING); + DocumentMapper docMapper = createDocumentMapper( + priorToTsidHashing, + "a", + mapping(b -> b.startObject("a").field("type", "keyword").field("time_series_dimension", true).endObject()) + ); + + String a1 = randomAlphaOfLength(10); + String a2 = randomAlphaOfLength(10); + CheckedConsumer fields = d -> d.field("a", new String[] { a1, a2 }); + DocumentParsingException exception = assertThrows(DocumentParsingException.class, () -> parseDocument(docMapper, fields)); + assertThat(exception.getMessage(), containsString("Dimension field [a] cannot be a multi-valued field")); + } + + public void testMultiValueDimensions() throws IOException { + DocumentMapper docMapper = createDocumentMapper( + IndexVersions.TIME_SERIES_ID_HASHING, + "a", + mapping(b -> b.startObject("a").field("type", "keyword").field("time_series_dimension", true).endObject()) + ); + + String a1 = randomAlphaOfLength(10); + String a2 = randomAlphaOfLength(10); + List docs = List.of( + parseDocument(docMapper, d -> d.field("a", new String[] { a1 })), + parseDocument(docMapper, d -> d.field("a", new String[] { a1, a2 })), + parseDocument(docMapper, d -> d.field("a", new String[] { a2, a1 })), + parseDocument(docMapper, d -> d.field("a", new String[] { a1, a2, a1 })), + parseDocument(docMapper, d -> d.field("a", new String[] { a2, a1, a2 })) + ); + List tsids = docs.stream() + .map(doc -> doc.rootDoc().getBinaryValue("_tsid").toString()) + .distinct() + .collect(Collectors.toList()); + assertThat(tsids, hasSize(docs.size())); + } + /** * Documents with fewer dimensions have a different value. */ diff --git a/x-pack/plugin/otel-data/src/main/resources/index-templates/metrics-otel@template.yaml b/x-pack/plugin/otel-data/src/main/resources/index-templates/metrics-otel@template.yaml index 89ff28249aabb..a4413d266181d 100644 --- a/x-pack/plugin/otel-data/src/main/resources/index-templates/metrics-otel@template.yaml +++ b/x-pack/plugin/otel-data/src/main/resources/index-templates/metrics-otel@template.yaml @@ -28,6 +28,11 @@ template: type: constant_keyword value: metrics dynamic_templates: + - ecs_ip: + mapping: + type: ip + path_match: [ "ip", "*.ip", "*_ip" ] + match_mapping_type: string - all_strings_to_keywords: mapping: ignore_above: 1024 diff --git a/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_metrics_tests.yml b/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_metrics_tests.yml index a6591d6c32210..f6a12de78fe90 100644 --- a/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_metrics_tests.yml +++ b/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_metrics_tests.yml @@ -123,11 +123,10 @@ setup: start_time: 2024-07-01T13:03:08.138Z mappings: dynamic_templates: - - ip_fields: + - no_ip_fields: mapping: - type: ip + type: keyword match_mapping_type: string - path_match: "*.ip" - do: bulk: index: metrics-generic.otel-default @@ -145,5 +144,34 @@ setup: indices.get_mapping: index: $idx0name expand_wildcards: hidden - - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'ip' } + - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'keyword' } - match: { .$idx0name.mappings.properties.attributes.properties.foo.type: "keyword" } +--- +IP dimensions: + - do: + bulk: + index: metrics-generic.otel-default + refresh: true + body: + - create: {"dynamic_templates":{"metrics.foo.bar":"counter_long"}} + - "@timestamp": 2024-07-18T14:48:33.467654000Z + resource: + attributes: + host.ip: [ "127.0.0.1", "0.0.0.0" ] + attributes: + philip: [ a, b, c ] + metrics: + foo.bar: 42 + - is_false: errors + + - do: + indices.get_data_stream: + name: metrics-generic.otel-default + - set: { data_streams.0.indices.0.index_name: idx0name } + + - do: + indices.get_mapping: + index: $idx0name + expand_wildcards: hidden + - match: { .$idx0name.mappings.properties.resource.properties.attributes.properties.host\.ip.type: 'ip' } + - match: { .$idx0name.mappings.properties.attributes.properties.philip.type: "keyword" }