|
| 1 | +/* |
| 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 3 | + * or more contributor license agreements. Licensed under the Elastic License |
| 4 | + * 2.0 and the Server Side Public License, v 1; you may not use this file except |
| 5 | + * in compliance with, at your election, the Elastic License 2.0 or the Server |
| 6 | + * Side Public License, v 1. |
| 7 | + */ |
| 8 | + |
| 9 | +package org.elasticsearch.ingest.common; |
| 10 | + |
| 11 | +import org.elasticsearch.ingest.AbstractProcessor; |
| 12 | +import org.elasticsearch.ingest.ConfigurationUtils; |
| 13 | +import org.elasticsearch.ingest.IngestDocument; |
| 14 | +import org.elasticsearch.ingest.Processor; |
| 15 | + |
| 16 | +import java.util.Locale; |
| 17 | +import java.util.Map; |
| 18 | +import java.util.Objects; |
| 19 | + |
| 20 | +import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; |
| 21 | + |
| 22 | +public final class DataStreamRouterProcessor extends AbstractProcessor { |
| 23 | + public static final String TYPE = "data_stream_router"; |
| 24 | + |
| 25 | + private static final String DATA_STREAM_PREFIX = "data_stream."; |
| 26 | + private static final String DATA_STREAM_TYPE = DATA_STREAM_PREFIX + "type"; |
| 27 | + private static final String DATA_STREAM_DATASET = DATA_STREAM_PREFIX + "dataset"; |
| 28 | + private static final String DATA_STREAM_NAMESPACE = DATA_STREAM_PREFIX + "namespace"; |
| 29 | + private static final String EVENT_DATASET = "event.dataset"; |
| 30 | + |
| 31 | + private static final char[] DISALLOWED_IN_DATASET = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':', '-' }; |
| 32 | + private static final char[] DISALLOWED_IN_NAMESPACE = new char[] { '\\', '/', '*', '?', '\"', '<', '>', '|', ' ', ',', '#', ':' }; |
| 33 | + private static final int MAX_LENGTH = 100; |
| 34 | + private static final char REPLACEMENT_CHAR = '_'; |
| 35 | + private final String dataset; |
| 36 | + private final String namespace; |
| 37 | + |
| 38 | + DataStreamRouterProcessor(String tag, String description, String dataset, String namespace) { |
| 39 | + super(tag, description); |
| 40 | + this.dataset = dataset; |
| 41 | + this.namespace = namespace; |
| 42 | + } |
| 43 | + |
| 44 | + private static String sanitizeDataStreamField(String s, char[] disallowedInDataset) { |
| 45 | + if (s == null) { |
| 46 | + return null; |
| 47 | + } |
| 48 | + s = s.toLowerCase(Locale.ROOT); |
| 49 | + s = s.substring(0, Math.min(s.length(), MAX_LENGTH)); |
| 50 | + for (char c : disallowedInDataset) { |
| 51 | + s = s.replace(c, REPLACEMENT_CHAR); |
| 52 | + } |
| 53 | + return s; |
| 54 | + } |
| 55 | + |
| 56 | + @Override |
| 57 | + public IngestDocument execute(IngestDocument ingestDocument) throws Exception { |
| 58 | + final String indexName = ingestDocument.getFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), String.class); |
| 59 | + final String type; |
| 60 | + final String datasetFallback; |
| 61 | + final String namespaceFallback; |
| 62 | + int indexOfFirstDash = indexName.indexOf('-'); |
| 63 | + String illegalDataStreamNameMessage = "invalid data stream name: [" |
| 64 | + + indexName |
| 65 | + + "]; must follow naming scheme <type>-<dataset>-<namespace>"; |
| 66 | + if (indexOfFirstDash < 0) { |
| 67 | + throw new IllegalArgumentException(illegalDataStreamNameMessage); |
| 68 | + } |
| 69 | + type = indexName.substring(0, indexOfFirstDash); |
| 70 | + int indexOfSecondDash = indexName.indexOf('-', indexOfFirstDash + 1); |
| 71 | + if (indexOfSecondDash < 0) { |
| 72 | + throw new IllegalArgumentException(illegalDataStreamNameMessage); |
| 73 | + } |
| 74 | + datasetFallback = indexName.substring(indexOfFirstDash + 1, indexOfSecondDash); |
| 75 | + namespaceFallback = indexName.substring(indexOfSecondDash + 1); |
| 76 | + |
| 77 | + String dataset = getDataset(ingestDocument, datasetFallback); |
| 78 | + String namespace = getNamespace(ingestDocument, namespaceFallback); |
| 79 | + ingestDocument.setFieldValue(DATA_STREAM_TYPE, type); |
| 80 | + if (ingestDocument.hasField(EVENT_DATASET)) { |
| 81 | + ingestDocument.setFieldValue(EVENT_DATASET, dataset); |
| 82 | + } |
| 83 | + ingestDocument.setFieldValue(DATA_STREAM_DATASET, dataset); |
| 84 | + ingestDocument.setFieldValue(DATA_STREAM_NAMESPACE, namespace); |
| 85 | + ingestDocument.setFieldValue(IngestDocument.Metadata.INDEX.getFieldName(), type + "-" + dataset + "-" + namespace); |
| 86 | + ingestDocument.skipCurrentPipeline(); |
| 87 | + return ingestDocument; |
| 88 | + } |
| 89 | + |
| 90 | + private String getDataset(IngestDocument ingestDocument, String datasetFallback) { |
| 91 | + String dataset = this.dataset; |
| 92 | + if (dataset == null) { |
| 93 | + dataset = sanitizeDataStreamField(ingestDocument.getFieldValue(DATA_STREAM_DATASET, String.class, true), DISALLOWED_IN_DATASET); |
| 94 | + } |
| 95 | + if (dataset == null) { |
| 96 | + dataset = datasetFallback; |
| 97 | + } |
| 98 | + return dataset; |
| 99 | + } |
| 100 | + |
| 101 | + private String getNamespace(IngestDocument ingestDocument, String namespaceFallback) { |
| 102 | + String namespace = this.namespace; |
| 103 | + if (namespace == null) { |
| 104 | + namespace = sanitizeDataStreamField( |
| 105 | + ingestDocument.getFieldValue(DATA_STREAM_NAMESPACE, String.class, true), |
| 106 | + DISALLOWED_IN_NAMESPACE |
| 107 | + ); |
| 108 | + } |
| 109 | + if (namespace == null) { |
| 110 | + namespace = namespaceFallback; |
| 111 | + } |
| 112 | + return namespace; |
| 113 | + } |
| 114 | + |
| 115 | + @Override |
| 116 | + public String getType() { |
| 117 | + return TYPE; |
| 118 | + } |
| 119 | + |
| 120 | + public String getDataStreamDataset() { |
| 121 | + return dataset; |
| 122 | + } |
| 123 | + |
| 124 | + public String getDataStreamNamespace() { |
| 125 | + return namespace; |
| 126 | + } |
| 127 | + |
| 128 | + public static final class Factory implements Processor.Factory { |
| 129 | + |
| 130 | + @Override |
| 131 | + public DataStreamRouterProcessor create( |
| 132 | + Map<String, Processor.Factory> processorFactories, |
| 133 | + String tag, |
| 134 | + String description, |
| 135 | + Map<String, Object> config |
| 136 | + ) throws Exception { |
| 137 | + String dataset = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "dataset"); |
| 138 | + if (Objects.equals(sanitizeDataStreamField(dataset, DISALLOWED_IN_DATASET), dataset) == false) { |
| 139 | + throw newConfigurationException(TYPE, tag, "dataset", "contains illegal characters"); |
| 140 | + } |
| 141 | + String namespace = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, "namespace"); |
| 142 | + if (Objects.equals(sanitizeDataStreamField(namespace, DISALLOWED_IN_NAMESPACE), namespace) == false) { |
| 143 | + throw newConfigurationException(TYPE, tag, "namespace", "contains illegal characters"); |
| 144 | + } |
| 145 | + return new DataStreamRouterProcessor(tag, description, dataset, namespace); |
| 146 | + } |
| 147 | + } |
| 148 | +} |
0 commit comments