From f23c80ab392087c24d35a1083b2bec5b6f9d385b Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 6 Jan 2025 09:14:01 +0100 Subject: [PATCH 01/30] Serialize logs directly to JSON in OTel mode (skipping objmodel) --- .../elasticsearchexporter/exporter_test.go | 40 +- exporter/elasticsearchexporter/model.go | 74 +--- .../elasticsearchexporter/pdata_serializer.go | 357 ++++++++++++++++++ .../pdata_serializer_test.go | 131 +++++++ 4 files changed, 524 insertions(+), 78 deletions(-) create mode 100644 exporter/elasticsearchexporter/pdata_serializer.go create mode 100644 exporter/elasticsearchexporter/pdata_serializer_test.go diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index 5554c089e02b..d6d74a222d21 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -427,7 +427,7 @@ func TestExporterLogs(t *testing.T) { body: func() pcommon.Value { return pcommon.NewValueStr("foo") }(), - wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"dropped_attributes_count":0,"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"},"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0},"severity_number":0,"body":{"text":"foo"}}`), + wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"text":"foo"}}`), }, { body: func() pcommon.Value { @@ -438,7 +438,7 @@ func TestExporterLogs(t *testing.T) { m.PutEmptyMap("inner").PutStr("foo", "bar") return vm }(), - wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"dropped_attributes_count":0,"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"},"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0},"severity_number":0,"body":{"flattened":{"true":true,"false":false,"inner":{"foo":"bar"}}}}`), + wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"flattened":{"true":true,"false":false,"inner":{"foo":"bar"}}}}`), }, { body: func() pcommon.Value { @@ -450,7 +450,7 @@ func TestExporterLogs(t *testing.T) { return vm }(), isEvent: true, - wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value","event.name":"foo"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"dropped_attributes_count":0,"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"},"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0},"severity_number":0,"body":{"structured":{"true":true,"false":false,"inner":{"foo":"bar"}}}}`), + wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value","event.name":"foo"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"structured":{"true":true,"false":false,"inner":{"foo":"bar"}}}}`), }, { body: func() pcommon.Value { @@ -461,7 +461,7 @@ func TestExporterLogs(t *testing.T) { s.AppendEmpty().SetEmptyMap().PutStr("foo", "bar") return vs }(), - wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"dropped_attributes_count":0,"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"},"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0},"severity_number":0,"body":{"flattened":{"value":["foo",false,{"foo":"bar"}]}}}`), + wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"flattened":{"value":["foo",false,{"foo":"bar"}]}}}`), }, { body: func() pcommon.Value { @@ -473,7 +473,7 @@ func TestExporterLogs(t *testing.T) { return vs }(), isEvent: true, - wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value","event.name":"foo"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"dropped_attributes_count":0,"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"},"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0},"severity_number":0,"body":{"structured":{"value":["foo",false,{"foo":"bar"}]}}}`), + wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value","event.name":"foo"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"structured":{"value":["foo",false,{"foo":"bar"}]}}}`), }, } { rec := newBulkRecorder() @@ -734,6 +734,36 @@ func TestExporterLogs(t *testing.T) { assert.JSONEq(t, `{"a":"a","a.b":"a.b"}`, gjson.GetBytes(doc, `scope.attributes`).Raw) assert.JSONEq(t, `{"a":"a","a.b":"a.b"}`, gjson.GetBytes(doc, `resource.attributes`).Raw) }) + + t.Run("otel mode attribute complex value", func(t *testing.T) { + rec := newBulkRecorder() + server := newESTestServer(t, func(docs []itemRequest) ([]itemResponse, error) { + rec.Record(docs) + return itemsAllOK(docs) + }) + + exporter := newTestLogsExporter(t, server.URL, func(cfg *Config) { + cfg.Mapping.Mode = "otel" + }) + + logs := plog.NewLogs() + resourceLog := logs.ResourceLogs().AppendEmpty() + resourceLog.Resource().Attributes().PutEmptyMap("some.resource.attribute").PutEmptyMap("foo.bar").PutStr("baz", "qux") + scopeLog := resourceLog.ScopeLogs().AppendEmpty() + scopeLog.Scope().Attributes().PutEmptyMap("some.scope.attribute").PutEmptyMap("foo.bar").PutStr("baz", "qux") + logRecord := scopeLog.LogRecords().AppendEmpty() + logRecord.Attributes().PutEmptyMap("some.record.attribute").PutEmptyMap("foo.bar").PutStr("baz", "qux") + + mustSendLogs(t, exporter, logs) + + rec.WaitItems(1) + + assert.Len(t, rec.Items(), 1) + doc := rec.Items()[0].Document + assert.JSONEq(t, `{"some.record.attribute":{"foo.bar":{"baz":"qux"}}}`, gjson.GetBytes(doc, `attributes`).Raw) + assert.JSONEq(t, `{"some.scope.attribute":"{\"foo.bar\":{\"baz\":\"qux\"}}"}`, gjson.GetBytes(doc, `scope.attributes`).Raw) + assert.JSONEq(t, `{"some.resource.attribute":"{\"foo.bar\":{\"baz\":\"qux\"}}"}`, gjson.GetBytes(doc, `resource.attributes`).Raw) + }) } func TestExporterMetrics(t *testing.T) { diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index f6ee644fb022..08a94313a6de 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -117,7 +117,7 @@ func (m *encodeModel) encodeLog(resource pcommon.Resource, resourceSchemaURL str case MappingECS: document = m.encodeLogECSMode(resource, record, scope) case MappingOTel: - document = m.encodeLogOTelMode(resource, resourceSchemaURL, record, scope, scopeSchemaURL) + return serializeLog(resource, resourceSchemaURL, scope, scopeSchemaURL, record) case MappingBodyMap: return m.encodeLogBodyMapMode(record) default: @@ -161,78 +161,6 @@ func (m *encodeModel) encodeLogBodyMapMode(record plog.LogRecord) ([]byte, error return jsoniter.Marshal(body.Map().AsRaw()) } -func (m *encodeModel) encodeLogOTelMode(resource pcommon.Resource, resourceSchemaURL string, record plog.LogRecord, scope pcommon.InstrumentationScope, scopeSchemaURL string) objmodel.Document { - var document objmodel.Document - - docTimeStamp := record.Timestamp() - if docTimeStamp.AsTime().UnixNano() == 0 { - docTimeStamp = record.ObservedTimestamp() - } - - document.AddTimestamp("@timestamp", docTimeStamp) - document.AddTimestamp("observed_timestamp", record.ObservedTimestamp()) - - document.AddTraceID("trace_id", record.TraceID()) - document.AddSpanID("span_id", record.SpanID()) - document.AddString("severity_text", record.SeverityText()) - document.AddInt("severity_number", int64(record.SeverityNumber())) - document.AddInt("dropped_attributes_count", int64(record.DroppedAttributesCount())) - - m.encodeAttributesOTelMode(&document, record.Attributes()) - m.encodeResourceOTelMode(&document, resource, resourceSchemaURL) - m.encodeScopeOTelMode(&document, scope, scopeSchemaURL) - - // Body - setOTelLogBody(&document, record.Body(), record.Attributes()) - - return document -} - -func setOTelLogBody(doc *objmodel.Document, body pcommon.Value, attributes pcommon.Map) { - // Determine if this log record is an event, as they are mapped differently - // https://github.com/open-telemetry/semantic-conventions/blob/main/docs/general/events.md - _, isEvent := attributes.Get("event.name") - - switch body.Type() { - case pcommon.ValueTypeMap: - if isEvent { - doc.AddAttribute("body.structured", body) - } else { - doc.AddAttribute("body.flattened", body) - } - case pcommon.ValueTypeSlice: - // output must be an array of objects due to ES limitations - // otherwise, wrap the array in an object - s := body.Slice() - allMaps := true - for i := 0; i < s.Len(); i++ { - if s.At(i).Type() != pcommon.ValueTypeMap { - allMaps = false - } - } - - var outVal pcommon.Value - if allMaps { - outVal = body - } else { - vm := pcommon.NewValueMap() - m := vm.SetEmptyMap() - body.Slice().CopyTo(m.PutEmptySlice("value")) - outVal = vm - } - - if isEvent { - doc.AddAttribute("body.structured", outVal) - } else { - doc.AddAttribute("body.flattened", outVal) - } - case pcommon.ValueTypeStr: - doc.AddString("body.text", body.Str()) - default: - doc.AddString("body.text", body.AsString()) - } -} - func (m *encodeModel) encodeLogECSMode(resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) objmodel.Document { var document objmodel.Document diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go new file mode 100644 index 000000000000..76f875bf5709 --- /dev/null +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -0,0 +1,357 @@ +package elasticsearchexporter + +import ( + "bytes" + "encoding/hex" + "github.com/elastic/go-structform" + "github.com/elastic/go-structform/json" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/plog" + "strings" +) + +const tsLayout = "2006-01-02T15:04:05.000000000Z" + +func serializeLog(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, record plog.LogRecord) ([]byte, error) { + var buf bytes.Buffer + + v := json.NewVisitor(&buf) + // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. + // This is required to generate the correct dynamic mapping in ES. + v.SetExplicitRadixPoint(true) + if err := v.OnObjectStart(-1, structform.AnyType); err != nil { + return nil, err + } + docTimeStamp := record.Timestamp() + if docTimeStamp.AsTime().UnixNano() == 0 { + docTimeStamp = record.ObservedTimestamp() + } + if err := writeTimestampField(v, "@timestamp", docTimeStamp); err != nil { + return nil, err + } + if err := writeTimestampField(v, "observed_timestamp", record.ObservedTimestamp()); err != nil { + return nil, err + } + if err := writeDataStream(v, record.Attributes()); err != nil { + return nil, err + } + if err := writeStringFieldSkipDefault(v, "severity_text", record.SeverityText()); err != nil { + return nil, err + } + if err := writeIntFieldSkipDefault(v, "severity_number", int64(record.SeverityNumber())); err != nil { + return nil, err + } + if err := writeTraceIdField(v, record.TraceID()); err != nil { + return nil, err + } + if err := writeSpanIdField(v, record.SpanID()); err != nil { + return nil, err + } + if err := writeAttributes(v, record.Attributes(), false); err != nil { + return nil, err + } + if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(record.DroppedAttributesCount())); err != nil { + return nil, err + } + if err := writeResource(v, resource, resourceSchemaURL); err != nil { + return nil, err + } + if err := writeScope(v, scope, scopeSchemaURL); err != nil { + return nil, err + } + if err := writeLogBody(v, record); err != nil { + return nil, err + } + if err := v.OnObjectFinished(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func writeDataStream(v *json.Visitor, attributes pcommon.Map) error { + if err := v.OnKey("data_stream"); err != nil { + return err + } + if err := v.OnObjectStart(-1, structform.AnyType); err != nil { + return err + } + var err error + attributes.Range(func(k string, val pcommon.Value) bool { + if strings.HasPrefix(k, "data_stream.") && val.Type() == pcommon.ValueTypeStr { + if err = writeStringFieldSkipDefault(v, k[12:], val.Str()); err != nil { + return false + } + } + return true + }) + if err != nil { + return err + } + + if err := v.OnObjectFinished(); err != nil { + return err + } + return nil +} + +func writeLogBody(v *json.Visitor, record plog.LogRecord) error { + if record.Body().Type() == pcommon.ValueTypeEmpty { + return nil + } + if err := v.OnKey("body"); err != nil { + return err + } + if err := v.OnObjectStart(-1, structform.AnyType); err != nil { + return err + } + + // Determine if this log record is an event, as they are mapped differently + // https://github.com/open-telemetry/semantic-conventions/blob/main/docs/general/events.md + var bodyType string + if _, hasEventNameAttribute := record.Attributes().Get("event.name"); hasEventNameAttribute || record.EventName() != "" { + bodyType = "structured" + } else { + bodyType = "flattened" + } + body := record.Body() + switch body.Type() { + case pcommon.ValueTypeMap: + case pcommon.ValueTypeSlice: + // output must be an array of objects due to ES limitations + // otherwise, wrap the array in an object + s := body.Slice() + allMaps := true + for i := 0; i < s.Len(); i++ { + if s.At(i).Type() != pcommon.ValueTypeMap { + allMaps = false + } + } + + if !allMaps { + body = pcommon.NewValueMap() + m := body.SetEmptyMap() + record.Body().Slice().CopyTo(m.PutEmptySlice("value")) + } + default: + bodyType = "text" + } + if err := v.OnKey(bodyType); err != nil { + return err + } + if err := writeValue(v, body, false); err != nil { + return err + } + if err := v.OnObjectFinished(); err != nil { + return err + } + return nil +} + +func writeResource(v *json.Visitor, resource pcommon.Resource, resourceSchemaURL string) error { + if err := v.OnKey("resource"); err != nil { + return err + } + if err := v.OnObjectStart(-1, structform.AnyType); err != nil { + return err + } + if err := writeStringFieldSkipDefault(v, "schema_url", resourceSchemaURL); err != nil { + return err + } + if err := writeAttributes(v, resource.Attributes(), true); err != nil { + return err + } + if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(resource.DroppedAttributesCount())); err != nil { + return err + } + if err := v.OnObjectFinished(); err != nil { + return err + } + return nil +} + +func writeScope(v *json.Visitor, scope pcommon.InstrumentationScope, scopeSchemaURL string) error { + if err := v.OnKey("scope"); err != nil { + return err + } + if err := v.OnObjectStart(-1, structform.AnyType); err != nil { + return err + } + if err := writeStringFieldSkipDefault(v, "schema_url", scopeSchemaURL); err != nil { + return err + } + if err := writeStringFieldSkipDefault(v, "name", scope.Name()); err != nil { + return err + } + if err := writeStringFieldSkipDefault(v, "version", scope.Version()); err != nil { + return err + } + if err := writeAttributes(v, scope.Attributes(), true); err != nil { + return err + } + if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(scope.DroppedAttributesCount())); err != nil { + return err + } + if err := v.OnObjectFinished(); err != nil { + return err + } + return nil +} + +func writeAttributes(v *json.Visitor, attributes pcommon.Map, stringifyMapValues bool) error { + if attributes.Len() == 0 { + return nil + } + if err := v.OnKey("attributes"); err != nil { + return err + } + attrCopy := pcommon.NewMap() + attributes.CopyTo(attrCopy) + attrCopy.RemoveIf(func(key string, _ pcommon.Value) bool { + switch key { + case dataStreamType, dataStreamDataset, dataStreamNamespace: + return true + } + return false + }) + mergeGeolocation(attrCopy) + if err := writeMap(v, attrCopy, stringifyMapValues); err != nil { + return err + } + return nil +} + +func writeMap(v *json.Visitor, attributes pcommon.Map, stringifyMapValues bool) error { + if err := v.OnObjectStart(-1, structform.AnyType); err != nil { + return err + } + var err error + attributes.Range(func(k string, val pcommon.Value) bool { + if err = v.OnKey(k); err != nil { + return false + } + err = writeValue(v, val, stringifyMapValues) + return err == nil + }) + if err != nil { + return err + } + if err := v.OnObjectFinished(); err != nil { + return err + } + return nil +} + +func writeValue(v *json.Visitor, val pcommon.Value, stringifyMaps bool) error { + switch val.Type() { + case pcommon.ValueTypeEmpty: + if err := v.OnNil(); err != nil { + return err + } + case pcommon.ValueTypeStr: + if err := v.OnString(val.Str()); err != nil { + return err + } + case pcommon.ValueTypeBool: + if err := v.OnBool(val.Bool()); err != nil { + return err + } + case pcommon.ValueTypeDouble: + if err := v.OnFloat64(val.Double()); err != nil { + return err + } + case pcommon.ValueTypeInt: + if err := v.OnInt64(val.Int()); err != nil { + return err + } + case pcommon.ValueTypeBytes: + if err := v.OnString(hex.EncodeToString(val.Bytes().AsRaw())); err != nil { + return err + } + case pcommon.ValueTypeMap: + if stringifyMaps { + if err := v.OnString(val.AsString()); err != nil { + return err + } + } else { + if err := writeMap(v, val.Map(), false); err != nil { + return err + } + } + case pcommon.ValueTypeSlice: + if err := v.OnArrayStart(-1, structform.AnyType); err != nil { + return err + } + slice := val.Slice() + for i := 0; i < slice.Len(); i++ { + if err := writeValue(v, slice.At(i), stringifyMaps); err != nil { + return err + } + } + if err := v.OnArrayFinished(); err != nil { + return err + } + } + return nil +} + +func writeTimestampField(v *json.Visitor, key string, timestamp pcommon.Timestamp) error { + if err := v.OnKey(key); err != nil { + return err + } + if err := v.OnString(timestamp.AsTime().UTC().Format(tsLayout)); err != nil { + return err + } + return nil +} + +func writeIntFieldSkipDefault(v *json.Visitor, key string, i int64) error { + if i == 0 { + return nil + } + if err := v.OnKey(key); err != nil { + return err + } + if err := v.OnInt64(i); err != nil { + return err + } + return nil +} + +func writeStringFieldSkipDefault(v *json.Visitor, key, value string) error { + if value == "" { + return nil + } + if err := v.OnKey(key); err != nil { + return err + } + if err := v.OnString(value); err != nil { + return err + } + return nil +} + +func writeTraceIdField(v *json.Visitor, id pcommon.TraceID) error { + if id.IsEmpty() { + return nil + } + if err := v.OnKey("trace_id"); err != nil { + return err + } + if err := v.OnString(hex.EncodeToString(id[:])); err != nil { + return err + } + return nil +} + +func writeSpanIdField(v *json.Visitor, id pcommon.SpanID) error { + if id.IsEmpty() { + return nil + } + if err := v.OnKey("span_id"); err != nil { + return err + } + if err := v.OnString(hex.EncodeToString(id[:])); err != nil { + return err + } + return nil +} diff --git a/exporter/elasticsearchexporter/pdata_serializer_test.go b/exporter/elasticsearchexporter/pdata_serializer_test.go new file mode 100644 index 000000000000..3b00d7f8e425 --- /dev/null +++ b/exporter/elasticsearchexporter/pdata_serializer_test.go @@ -0,0 +1,131 @@ +package elasticsearchexporter + +import ( + "bytes" + "encoding/json" + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/plog" + "testing" +) + +func TestSerializeLog(t *testing.T) { + + tests := []struct { + name string + logCustomizer func(resource pcommon.Resource, scope pcommon.InstrumentationScope, record plog.LogRecord) + wantErr bool + expected interface{} + }{ + {name: "test attributes", logCustomizer: func(resource pcommon.Resource, scope pcommon.InstrumentationScope, record plog.LogRecord) { + record.SetSeverityText("debug") + record.Attributes().PutEmpty("empty") + record.Attributes().PutStr("data_stream.type", "logs") + record.Attributes().PutStr("string", "foo") + record.Attributes().PutBool("bool", true) + record.Attributes().PutDouble("double", 42.0) + record.Attributes().PutInt("int", 42) + record.Attributes().PutEmptyBytes("bytes").Append(42) + _ = record.Attributes().PutEmptySlice("slice").FromRaw([]interface{}{42, "foo"}) + record.Attributes().PutEmptySlice("map_slice").AppendEmpty().SetEmptyMap().PutStr("foo.bar", "baz") + mapAttr := record.Attributes().PutEmptyMap("map") + mapAttr.PutStr("foo.bar", "baz") + mapAttr.PutEmptySlice("inner.slice").AppendEmpty().SetStr("foo") + + resource.Attributes().PutEmptyMap("resource_map").PutStr("foo", "bar") + scope.Attributes().PutEmptyMap("scope_map").PutStr("foo", "bar") + }, wantErr: false, expected: map[string]interface{}{ + "@timestamp": "1970-01-01T00:00:00.000000000Z", + "observed_timestamp": "1970-01-01T00:00:00.000000000Z", + "data_stream": map[string]interface{}{ + "type": "logs", + }, + "severity_text": "debug", + "resource": map[string]interface{}{ + "attributes": map[string]interface{}{ + "resource_map": `{"foo":"bar"}`, + }, + }, + "scope": map[string]interface{}{ + "attributes": map[string]interface{}{ + "scope_map": `{"foo":"bar"}`, + }, + }, + "attributes": map[string]interface{}{ + "empty": nil, + "string": "foo", + "bool": true, + "double": json.Number("42.0"), + "int": json.Number("42"), + "bytes": "2a", + "slice": []interface{}{json.Number("42"), "foo"}, + "map_slice": []interface{}{map[string]interface{}{ + "foo.bar": "baz", + }}, + "map": map[string]interface{}{ + "foo.bar": "baz", + "inner.slice": []interface{}{"foo"}, + }, + }, + }}, + { + name: "text body", + logCustomizer: func(resource pcommon.Resource, scope pcommon.InstrumentationScope, record plog.LogRecord) { + record.Body().SetStr("foo") + }, + wantErr: false, + expected: map[string]interface{}{ + "@timestamp": "1970-01-01T00:00:00.000000000Z", + "observed_timestamp": "1970-01-01T00:00:00.000000000Z", + "data_stream": map[string]interface{}{}, + "resource": map[string]interface{}{}, + "scope": map[string]interface{}{}, + "body": map[string]interface{}{ + "text": "foo", + }, + }, + }, + { + name: "map body", + logCustomizer: func(resource pcommon.Resource, scope pcommon.InstrumentationScope, record plog.LogRecord) { + record.Body().SetEmptyMap().PutStr("foo.bar", "baz") + }, + wantErr: false, + expected: map[string]interface{}{ + "@timestamp": "1970-01-01T00:00:00.000000000Z", + "observed_timestamp": "1970-01-01T00:00:00.000000000Z", + "data_stream": map[string]interface{}{}, + "resource": map[string]interface{}{}, + "scope": map[string]interface{}{}, + "body": map[string]interface{}{ + "flattened": map[string]interface{}{ + "foo.bar": "baz", + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + resourceLogs := plog.NewResourceLogs() + scopeLogs := resourceLogs.ScopeLogs().AppendEmpty() + record := scopeLogs.LogRecords().AppendEmpty() + tt.logCustomizer(resourceLogs.Resource(), scopeLogs.Scope(), record) + + logBytes, err := serializeLog(resourceLogs.Resource(), "", scopeLogs.Scope(), "", record) + if (err != nil) != tt.wantErr { + t.Errorf("serializeLog() error = %v, wantErr %v", err, tt.wantErr) + } + eventAsJson := string(logBytes) + var result interface{} + decoder := json.NewDecoder(bytes.NewBuffer(logBytes)) + decoder.UseNumber() + if err := decoder.Decode(&result); err != nil { + t.Error(err) + } + + assert.Equal(t, tt.expected, result, eventAsJson) + }) + } +} From 74e94ffd4b73563f66d8b43ab74c58430e782573 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 6 Jan 2025 13:11:31 +0100 Subject: [PATCH 02/30] Serialize spans and span events without objmodel --- exporter/elasticsearchexporter/exporter.go | 11 +- .../elasticsearchexporter/exporter_test.go | 8 +- exporter/elasticsearchexporter/model.go | 62 +----- .../elasticsearchexporter/pdata_serializer.go | 193 +++++++++++++++++- 4 files changed, 204 insertions(+), 70 deletions(-) diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index ebd3800858a2..f3c830595ce0 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -440,14 +440,13 @@ func (e *elasticsearchExporter) pushSpanEvent( } fIndex = formattedIndex } - - document := e.model.encodeSpanEvent(resource, resourceSchemaURL, span, spanEvent, scope, scopeSchemaURL) - if document == nil { - return nil - } - docBytes, err := e.model.encodeDocument(*document) + docBytes, err := e.model.encodeSpanEvent(resource, resourceSchemaURL, span, spanEvent, scope, scopeSchemaURL) if err != nil { return err } + if docBytes == nil { + return nil + } + return bulkIndexerSession.Add(ctx, fIndex, bytes.NewReader(docBytes), nil) } diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index d6d74a222d21..0cb04d1db036 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -1641,8 +1641,8 @@ func TestExporterTraces(t *testing.T) { }) spanLink := span.Links().AppendEmpty() - spanLink.SetTraceID(pcommon.NewTraceIDEmpty()) - spanLink.SetSpanID(pcommon.NewSpanIDEmpty()) + spanLink.SetTraceID([16]byte{1}) + spanLink.SetSpanID([8]byte{1}) spanLink.SetFlags(10) spanLink.SetDroppedAttributesCount(11) spanLink.TraceState().FromRaw("bar") @@ -1655,11 +1655,11 @@ func TestExporterTraces(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"traces-generic.otel-default"}}`), - Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","attributes":{"attr.foo":"attr.bar"},"data_stream":{"dataset":"generic.otel","namespace":"default","type":"traces"},"dropped_attributes_count":2,"dropped_events_count":3,"dropped_links_count":4,"duration":3600000000000,"kind":"Unspecified","links":[{"attributes":{"link.attr.foo":"link.attr.bar"},"dropped_attributes_count":11,"span_id":"","trace_id":"","trace_state":"bar"}],"name":"name","resource":{"attributes":{"resource.foo":"resource.bar"},"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0},"status":{"code":"Unset"},"trace_state":"foo"}`), + Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","attributes":{"attr.foo":"attr.bar"},"data_stream":{"dataset":"generic.otel","namespace":"default","type":"traces"},"dropped_attributes_count":2,"dropped_events_count":3,"dropped_links_count":4,"duration":3600000000000,"kind":"Unspecified","links":[{"attributes":{"link.attr.foo":"link.attr.bar"},"dropped_attributes_count":11,"span_id":"0100000000000000","trace_id":"01000000000000000000000000000000","trace_state":"bar"}],"name":"name","resource":{"attributes":{"resource.foo":"resource.bar"}},"scope":{},"status":{"code":"Unset"},"trace_state":"foo"}`), }, { Action: []byte(`{"create":{"_index":"logs-generic.otel-default"}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"event.attr.foo":"event.attr.bar","event.name":"exception"},"data_stream":{"dataset":"generic.otel","namespace":"default","type":"logs"},"dropped_attributes_count":1,"resource":{"attributes":{"resource.foo":"resource.bar"},"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0}}`), + Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","event_name":"exception","attributes":{"event.attr.foo":"event.attr.bar","event.name":"exception"},"data_stream":{"dataset":"generic.otel","namespace":"default","type":"logs"},"dropped_attributes_count":1,"resource":{"attributes":{"resource.foo":"resource.bar"}},"scope":{}}`), }, } diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index 08a94313a6de..709f76e55286 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -79,7 +79,7 @@ var ErrInvalidTypeForBodyMapMode = errors.New("invalid log record body type for type mappingModel interface { encodeLog(pcommon.Resource, string, plog.LogRecord, pcommon.InstrumentationScope, string) ([]byte, error) encodeSpan(pcommon.Resource, string, ptrace.Span, pcommon.InstrumentationScope, string) ([]byte, error) - encodeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent, scope pcommon.InstrumentationScope, scopeSchemaURL string) *objmodel.Document + encodeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent, scope pcommon.InstrumentationScope, scopeSchemaURL string) ([]byte, error) upsertMetricDataPointValue(map[uint32]objmodel.Document, pcommon.Resource, string, pcommon.InstrumentationScope, string, pmetric.Metric, dataPoint) error encodeDocument(objmodel.Document) ([]byte, error) } @@ -582,7 +582,7 @@ func (m *encodeModel) encodeSpan(resource pcommon.Resource, resourceSchemaURL st var document objmodel.Document switch m.mode { case MappingOTel: - document = m.encodeSpanOTelMode(resource, resourceSchemaURL, span, scope, scopeSchemaURL) + return serializeSpan(resource, resourceSchemaURL, scope, scopeSchemaURL, span) default: document = m.encodeSpanDefaultMode(resource, span, scope) } @@ -593,47 +593,6 @@ func (m *encodeModel) encodeSpan(resource pcommon.Resource, resourceSchemaURL st return buf.Bytes(), err } -func (m *encodeModel) encodeSpanOTelMode(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, scope pcommon.InstrumentationScope, scopeSchemaURL string) objmodel.Document { - var document objmodel.Document - document.AddTimestamp("@timestamp", span.StartTimestamp()) - document.AddTraceID("trace_id", span.TraceID()) - document.AddSpanID("span_id", span.SpanID()) - document.AddString("trace_state", span.TraceState().AsRaw()) - document.AddSpanID("parent_span_id", span.ParentSpanID()) - document.AddString("name", span.Name()) - document.AddString("kind", span.Kind().String()) - document.AddUInt("duration", uint64(span.EndTimestamp()-span.StartTimestamp())) - - m.encodeAttributesOTelMode(&document, span.Attributes()) - - document.AddInt("dropped_attributes_count", int64(span.DroppedAttributesCount())) - document.AddInt("dropped_events_count", int64(span.DroppedEventsCount())) - - links := pcommon.NewValueSlice() - linkSlice := links.SetEmptySlice() - spanLinks := span.Links() - for i := 0; i < spanLinks.Len(); i++ { - linkMap := linkSlice.AppendEmpty().SetEmptyMap() - spanLink := spanLinks.At(i) - linkMap.PutStr("trace_id", spanLink.TraceID().String()) - linkMap.PutStr("span_id", spanLink.SpanID().String()) - linkMap.PutStr("trace_state", spanLink.TraceState().AsRaw()) - mAttr := linkMap.PutEmptyMap("attributes") - spanLink.Attributes().CopyTo(mAttr) - linkMap.PutInt("dropped_attributes_count", int64(spanLink.DroppedAttributesCount())) - } - document.AddAttribute("links", links) - - document.AddInt("dropped_links_count", int64(span.DroppedLinksCount())) - document.AddString("status.message", span.Status().Message()) - document.AddString("status.code", span.Status().Code().String()) - - m.encodeResourceOTelMode(&document, resource, resourceSchemaURL) - m.encodeScopeOTelMode(&document, scope, scopeSchemaURL) - - return document -} - func (m *encodeModel) encodeSpanDefaultMode(resource pcommon.Resource, span ptrace.Span, scope pcommon.InstrumentationScope) objmodel.Document { var document objmodel.Document document.AddTimestamp("@timestamp", span.StartTimestamp()) // We use @timestamp in order to ensure that we can index if the default data stream logs template is used. @@ -654,24 +613,13 @@ func (m *encodeModel) encodeSpanDefaultMode(resource pcommon.Resource, span ptra return document } -func (m *encodeModel) encodeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent, scope pcommon.InstrumentationScope, scopeSchemaURL string) *objmodel.Document { +func (m *encodeModel) encodeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent, scope pcommon.InstrumentationScope, scopeSchemaURL string) ([]byte, error) { if m.mode != MappingOTel { // Currently span events are stored separately only in OTel mapping mode. // In other modes, they are stored within the span document. - return nil + return nil, nil } - var document objmodel.Document - document.AddTimestamp("@timestamp", spanEvent.Timestamp()) - document.AddString("attributes.event.name", spanEvent.Name()) - document.AddSpanID("span_id", span.SpanID()) - document.AddTraceID("trace_id", span.TraceID()) - document.AddInt("dropped_attributes_count", int64(spanEvent.DroppedAttributesCount())) - - m.encodeAttributesOTelMode(&document, spanEvent.Attributes()) - m.encodeResourceOTelMode(&document, resource, resourceSchemaURL) - m.encodeScopeOTelMode(&document, scope, scopeSchemaURL) - - return &document + return serializeSpanEvent(resource, resourceSchemaURL, scope, scopeSchemaURL, span, spanEvent) } func (m *encodeModel) encodeAttributes(document *objmodel.Document, attributes pcommon.Map) { diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index 76f875bf5709..6bce3db40b07 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -7,11 +7,188 @@ import ( "github.com/elastic/go-structform/json" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/plog" + "go.opentelemetry.io/collector/pdata/ptrace" "strings" ) const tsLayout = "2006-01-02T15:04:05.000000000Z" +func serializeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent) ([]byte, error) { + var buf bytes.Buffer + + v := json.NewVisitor(&buf) + // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. + // This is required to generate the correct dynamic mapping in ES. + v.SetExplicitRadixPoint(true) + if err := v.OnObjectStart(-1, structform.AnyType); err != nil { + return nil, err + } + if err := writeTimestampField(v, "@timestamp", spanEvent.Timestamp()); err != nil { + return nil, err + } + if err := writeDataStream(v, spanEvent.Attributes()); err != nil { + return nil, err + } + if err := writeTraceIdField(v, span.TraceID()); err != nil { + return nil, err + } + if err := writeSpanIdField(v, "span_id", span.SpanID()); err != nil { + return nil, err + } + if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(spanEvent.DroppedAttributesCount())); err != nil { + return nil, err + } + if err := writeStringFieldSkipDefault(v, "event_name", spanEvent.Name()); err != nil { + return nil, err + } + + var attributes pcommon.Map + if spanEvent.Name() != "" { + attributes = pcommon.NewMap() + spanEvent.Attributes().CopyTo(attributes) + attributes.PutStr("event.name", spanEvent.Name()) + } else { + attributes = spanEvent.Attributes() + } + if err := writeAttributes(v, attributes, false); err != nil { + return nil, err + } + if err := writeResource(v, resource, resourceSchemaURL); err != nil { + return nil, err + } + if err := writeScope(v, scope, scopeSchemaURL); err != nil { + return nil, err + } + if err := v.OnObjectFinished(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func serializeSpan(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, span ptrace.Span) ([]byte, error) { + var buf bytes.Buffer + + v := json.NewVisitor(&buf) + // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. + // This is required to generate the correct dynamic mapping in ES. + v.SetExplicitRadixPoint(true) + if err := v.OnObjectStart(-1, structform.AnyType); err != nil { + return nil, err + } + if err := writeTimestampField(v, "@timestamp", span.StartTimestamp()); err != nil { + return nil, err + } + if err := writeDataStream(v, span.Attributes()); err != nil { + return nil, err + } + if err := writeTraceIdField(v, span.TraceID()); err != nil { + return nil, err + } + if err := writeSpanIdField(v, "span_id", span.SpanID()); err != nil { + return nil, err + } + if err := writeStringFieldSkipDefault(v, "trace_state", span.TraceState().AsRaw()); err != nil { + return nil, err + } + if err := writeSpanIdField(v, "parent_span_id", span.ParentSpanID()); err != nil { + return nil, err + } + if err := writeStringFieldSkipDefault(v, "name", span.Name()); err != nil { + return nil, err + } + if err := writeStringFieldSkipDefault(v, "kind", span.Kind().String()); err != nil { + return nil, err + } + if err := writeUIntField(v, "duration", uint64(span.EndTimestamp()-span.StartTimestamp())); err != nil { + return nil, err + } + if err := writeAttributes(v, span.Attributes(), false); err != nil { + return nil, err + } + if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(span.DroppedAttributesCount())); err != nil { + return nil, err + } + if err := writeIntFieldSkipDefault(v, "dropped_events_count", int64(span.DroppedEventsCount())); err != nil { + return nil, err + } + if err := writeSpanLinks(v, span); err != nil { + return nil, err + } + if err := writeIntFieldSkipDefault(v, "dropped_links_count", int64(span.DroppedLinksCount())); err != nil { + return nil, err + } + if err := writeStatus(v, span.Status()); err != nil { + return nil, err + } + if err := writeResource(v, resource, resourceSchemaURL); err != nil { + return nil, err + } + if err := writeScope(v, scope, scopeSchemaURL); err != nil { + return nil, err + } + if err := v.OnObjectFinished(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func writeStatus(v *json.Visitor, status ptrace.Status) error { + if err := v.OnKey("status"); err != nil { + return err + } + if err := v.OnObjectStart(-1, structform.AnyType); err != nil { + return err + } + if err := writeStringFieldSkipDefault(v, "message", status.Message()); err != nil { + return err + } + if err := writeStringFieldSkipDefault(v, "code", status.Code().String()); err != nil { + return err + } + if err := v.OnObjectFinished(); err != nil { + return err + } + return nil +} + +func writeSpanLinks(v *json.Visitor, span ptrace.Span) error { + if err := v.OnKey("links"); err != nil { + return err + } + if err := v.OnArrayStart(-1, structform.AnyType); err != nil { + return err + } + spanLinks := span.Links() + for i := 0; i < spanLinks.Len(); i++ { + spanLink := spanLinks.At(i) + if err := v.OnObjectStart(-1, structform.AnyType); err != nil { + return err + } + if err := writeStringFieldSkipDefault(v, "trace_id", spanLink.TraceID().String()); err != nil { + return err + } + if err := writeStringFieldSkipDefault(v, "span_id", spanLink.SpanID().String()); err != nil { + return err + } + if err := writeStringFieldSkipDefault(v, "trace_state", spanLink.TraceState().AsRaw()); err != nil { + return err + } + if err := writeAttributes(v, spanLink.Attributes(), false); err != nil { + return err + } + if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(spanLink.DroppedAttributesCount())); err != nil { + return err + } + if err := v.OnObjectFinished(); err != nil { + return err + } + } + if err := v.OnArrayFinished(); err != nil { + return err + } + return nil +} + func serializeLog(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, record plog.LogRecord) ([]byte, error) { var buf bytes.Buffer @@ -44,7 +221,7 @@ func serializeLog(resource pcommon.Resource, resourceSchemaURL string, scope pco if err := writeTraceIdField(v, record.TraceID()); err != nil { return nil, err } - if err := writeSpanIdField(v, record.SpanID()); err != nil { + if err := writeSpanIdField(v, "span_id", record.SpanID()); err != nil { return nil, err } if err := writeAttributes(v, record.Attributes(), false); err != nil { @@ -304,6 +481,16 @@ func writeTimestampField(v *json.Visitor, key string, timestamp pcommon.Timestam return nil } +func writeUIntField(v *json.Visitor, key string, i uint64) error { + if err := v.OnKey(key); err != nil { + return err + } + if err := v.OnUint64(i); err != nil { + return err + } + return nil +} + func writeIntFieldSkipDefault(v *json.Visitor, key string, i int64) error { if i == 0 { return nil @@ -343,11 +530,11 @@ func writeTraceIdField(v *json.Visitor, id pcommon.TraceID) error { return nil } -func writeSpanIdField(v *json.Visitor, id pcommon.SpanID) error { +func writeSpanIdField(v *json.Visitor, key string, id pcommon.SpanID) error { if id.IsEmpty() { return nil } - if err := v.OnKey("span_id"); err != nil { + if err := v.OnKey(key); err != nil { return err } if err := v.OnString(hex.EncodeToString(id[:])); err != nil { From 8956a9e751a04a0fa787b88a989dc67e2ba4683d Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 6 Jan 2025 18:25:22 +0100 Subject: [PATCH 03/30] Serialize metrics without objmodel --- exporter/elasticsearchexporter/exporter.go | 72 +++---- .../elasticsearchexporter/exporter_test.go | 20 +- exporter/elasticsearchexporter/model.go | 199 +++++------------- exporter/elasticsearchexporter/model_test.go | 30 +-- .../elasticsearchexporter/pdata_serializer.go | 124 +++++++++-- .../pdata_serializer_test.go | 8 +- 6 files changed, 230 insertions(+), 223 deletions(-) diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index f3c830595ce0..dadc5e30e526 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -19,8 +19,6 @@ import ( "go.opentelemetry.io/collector/pdata/pmetric" "go.opentelemetry.io/collector/pdata/ptrace" "go.uber.org/zap" - - "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/objmodel" ) type elasticsearchExporter struct { @@ -193,21 +191,18 @@ func (e *elasticsearchExporter) pushMetricsData( } defer session.End() - var ( - validationErrs []error // log instead of returning these so that upstream does not retry - errs []error - ) + var errs []error resourceMetrics := metrics.ResourceMetrics() for i := 0; i < resourceMetrics.Len(); i++ { resourceMetric := resourceMetrics.At(i) resource := resourceMetric.Resource() scopeMetrics := resourceMetric.ScopeMetrics() - resourceDocs := make(map[string]map[uint32]objmodel.Document) - for j := 0; j < scopeMetrics.Len(); j++ { + var validationErrs []error // log instead of returning these so that upstream does not retry scopeMetrics := scopeMetrics.At(j) scope := scopeMetrics.Scope() + groupedDataPointsByIndex := make(map[string]map[uint32][]dataPoint) for k := 0; k < scopeMetrics.Metrics().Len(); k++ { metric := scopeMetrics.Metrics().At(k) @@ -216,13 +211,17 @@ func (e *elasticsearchExporter) pushMetricsData( if err != nil { return err } - if _, ok := resourceDocs[fIndex]; !ok { - resourceDocs[fIndex] = make(map[uint32]objmodel.Document) + groupedDataPoints, ok := groupedDataPointsByIndex[fIndex] + if !ok { + groupedDataPoints = make(map[uint32][]dataPoint) + groupedDataPointsByIndex[fIndex] = groupedDataPoints } - - if err = e.model.upsertMetricDataPointValue(resourceDocs[fIndex], resource, - resourceMetric.SchemaUrl(), scope, scopeMetrics.SchemaUrl(), metric, dp); err != nil { - return err + dpHash := e.model.hashDataPoint(dp) + dataPoints, ok := groupedDataPoints[dpHash] + if !ok { + groupedDataPoints[dpHash] = []dataPoint{dp} + } else { + groupedDataPoints[dpHash] = append(dataPoints, dp) } return nil } @@ -232,7 +231,7 @@ func (e *elasticsearchExporter) pushMetricsData( dps := metric.Sum().DataPoints() for l := 0; l < dps.Len(); l++ { dp := dps.At(l) - if err := upsertDataPoint(newNumberDataPoint(dp)); err != nil { + if err := upsertDataPoint(newNumberDataPoint(metric, dp)); err != nil { validationErrs = append(validationErrs, err) continue } @@ -241,7 +240,7 @@ func (e *elasticsearchExporter) pushMetricsData( dps := metric.Gauge().DataPoints() for l := 0; l < dps.Len(); l++ { dp := dps.At(l) - if err := upsertDataPoint(newNumberDataPoint(dp)); err != nil { + if err := upsertDataPoint(newNumberDataPoint(metric, dp)); err != nil { validationErrs = append(validationErrs, err) continue } @@ -254,7 +253,7 @@ func (e *elasticsearchExporter) pushMetricsData( dps := metric.ExponentialHistogram().DataPoints() for l := 0; l < dps.Len(); l++ { dp := dps.At(l) - if err := upsertDataPoint(newExponentialHistogramDataPoint(dp)); err != nil { + if err := upsertDataPoint(newExponentialHistogramDataPoint(metric, dp)); err != nil { validationErrs = append(validationErrs, err) continue } @@ -267,7 +266,7 @@ func (e *elasticsearchExporter) pushMetricsData( dps := metric.Histogram().DataPoints() for l := 0; l < dps.Len(); l++ { dp := dps.At(l) - if err := upsertDataPoint(newHistogramDataPoint(dp)); err != nil { + if err := upsertDataPoint(newHistogramDataPoint(metric, dp)); err != nil { validationErrs = append(validationErrs, err) continue } @@ -276,37 +275,32 @@ func (e *elasticsearchExporter) pushMetricsData( dps := metric.Summary().DataPoints() for l := 0; l < dps.Len(); l++ { dp := dps.At(l) - if err := upsertDataPoint(newSummaryDataPoint(dp)); err != nil { + if err := upsertDataPoint(newSummaryDataPoint(metric, dp)); err != nil { validationErrs = append(validationErrs, err) continue } } } } - } - - if len(validationErrs) > 0 { - e.Logger.Warn("validation errors", zap.Error(errors.Join(validationErrs...))) - } - for fIndex, docs := range resourceDocs { - for _, doc := range docs { - var ( - docBytes []byte - err error - ) - docBytes, err = e.model.encodeDocument(doc) - if err != nil { - errs = append(errs, err) - continue - } - if err := session.Add(ctx, fIndex, bytes.NewReader(docBytes), doc.DynamicTemplates()); err != nil { - if cerr := ctx.Err(); cerr != nil { - return cerr + for fIndex, groupedDataPoints := range groupedDataPointsByIndex { + for _, dataPoints := range groupedDataPoints { + docBytes, dynamicTemplates, err := e.model.encodeMetrics(resource, resourceMetric.SchemaUrl(), scope, scopeMetrics.SchemaUrl(), dataPoints, &validationErrs) + if err != nil { + errs = append(errs, err) + continue + } + if err := session.Add(ctx, fIndex, bytes.NewReader(docBytes), dynamicTemplates); err != nil { + if cerr := ctx.Err(); cerr != nil { + return cerr + } + errs = append(errs, err) } - errs = append(errs, err) } } + if len(validationErrs) > 0 { + e.Logger.Warn("validation errors", zap.Error(errors.Join(validationErrs...))) + } } } diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index 0cb04d1db036..27d38105e67f 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -761,8 +761,8 @@ func TestExporterLogs(t *testing.T) { assert.Len(t, rec.Items(), 1) doc := rec.Items()[0].Document assert.JSONEq(t, `{"some.record.attribute":{"foo.bar":{"baz":"qux"}}}`, gjson.GetBytes(doc, `attributes`).Raw) - assert.JSONEq(t, `{"some.scope.attribute":"{\"foo.bar\":{\"baz\":\"qux\"}}"}`, gjson.GetBytes(doc, `scope.attributes`).Raw) - assert.JSONEq(t, `{"some.resource.attribute":"{\"foo.bar\":{\"baz\":\"qux\"}}"}`, gjson.GetBytes(doc, `resource.attributes`).Raw) + assert.JSONEq(t, `{"some.scope.attribute":{"foo.bar":{"baz":"qux"}}}`, gjson.GetBytes(doc, `scope.attributes`).Raw) + assert.JSONEq(t, `{"some.resource.attribute":{"foo.bar":{"baz":"qux"}}}`, gjson.GetBytes(doc, `resource.attributes`).Raw) }) } @@ -1196,19 +1196,19 @@ func TestExporterMetrics(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.metric.foo":"histogram"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"metric.foo":{"counts":[1,2,3,4],"values":[0.5,1.5,2.5,3.0]}},"resource":{"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0}}`), + Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"metric.foo":{"counts":[1,2,3,4],"values":[0.5,1.5,2.5,3.0]}},"resource":{},"scope":{}}`), }, { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.metric.foo":"histogram"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"metric.foo":{"counts":[4,5,6,7],"values":[2.0,4.5,5.5,6.0]}},"resource":{"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0}}`), + Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"metric.foo":{"counts":[4,5,6,7],"values":[2.0,4.5,5.5,6.0]}},"resource":{},"scope":{}}`), }, { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.metric.sum":"gauge_double"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"metric.sum":1.5},"resource":{"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0},"start_timestamp":"1970-01-01T02:00:00.000000000Z"}`), + Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"metric.sum":1.5},"resource":{},"scope":{},"start_timestamp":"1970-01-01T02:00:00.000000000Z"}`), }, { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.metric.summary":"summary"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T03:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"metric.summary":{"sum":1.5,"value_count":1}},"resource":{"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0},"start_timestamp":"1970-01-01T03:00:00.000000000Z"}`), + Document: []byte(`{"@timestamp":"1970-01-01T03:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"metric.summary":{"sum":1.5,"value_count":1}},"resource":{},"scope":{},"start_timestamp":"1970-01-01T03:00:00.000000000Z"}`), }, } @@ -1277,7 +1277,7 @@ func TestExporterMetrics(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.sum":"gauge_long","metrics.summary":"summary"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"sum":0,"summary":{"sum":1.0,"value_count":10}},"resource":{"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0}}`), + Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"sum":0,"summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), }, } @@ -1327,11 +1327,11 @@ func TestExporterMetrics(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.histogram.summary":"summary"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"histogram.summary":{"sum":1.0,"value_count":10}},"resource":{"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0}}`), + Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"histogram.summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), }, { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.exphistogram.summary":"summary"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"exphistogram.summary":{"sum":1.0,"value_count":10}},"resource":{"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0}}`), + Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"exphistogram.summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), }, } @@ -1370,7 +1370,7 @@ func TestExporterMetrics(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.foo.bar":"gauge_long","metrics.foo":"gauge_long","metrics.foo.bar.baz":"gauge_long"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"foo":0,"foo.bar":0,"foo.bar.baz":0},"resource":{"dropped_attributes_count":0},"scope":{"dropped_attributes_count":0}}`), + Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"foo":0,"foo.bar":0,"foo.bar.baz":0},"resource":{},"scope":{}}`), }, } diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index 709f76e55286..62e49f90d335 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -80,8 +80,9 @@ type mappingModel interface { encodeLog(pcommon.Resource, string, plog.LogRecord, pcommon.InstrumentationScope, string) ([]byte, error) encodeSpan(pcommon.Resource, string, ptrace.Span, pcommon.InstrumentationScope, string) ([]byte, error) encodeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent, scope pcommon.InstrumentationScope, scopeSchemaURL string) ([]byte, error) - upsertMetricDataPointValue(map[uint32]objmodel.Document, pcommon.Resource, string, pcommon.InstrumentationScope, string, pmetric.Metric, dataPoint) error + hashDataPoint(dataPoint) uint32 encodeDocument(objmodel.Document) ([]byte, error) + encodeMetrics(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, dataPoints []dataPoint, validationErrors *[]error) ([]byte, map[string]string, error) } // encodeModel tries to keep the event as close to the original open telemetry semantics as is. @@ -103,6 +104,7 @@ type dataPoint interface { DynamicTemplate(pmetric.Metric) string DocCount() uint64 HasMappingHint(mappingHint) bool + Metric() pmetric.Metric } const ( @@ -216,97 +218,54 @@ func (m *encodeModel) encodeDocument(document objmodel.Document) ([]byte, error) } // upsertMetricDataPointValue upserts a datapoint value to documents which is already hashed by resource and index -func (m *encodeModel) upsertMetricDataPointValue(documents map[uint32]objmodel.Document, resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, metric pmetric.Metric, dp dataPoint) error { +func (m *encodeModel) hashDataPoint(dp dataPoint) uint32 { switch m.mode { case MappingOTel: - return m.upsertMetricDataPointValueOTelMode(documents, resource, resourceSchemaURL, scope, scopeSchemaURL, metric, dp) - case MappingECS: - return m.upsertMetricDataPointValueECSMode(documents, resource, resourceSchemaURL, scope, scopeSchemaURL, metric, dp) + return metricOTelHash(dp, dp.Metric().Unit()) default: // Defaults to ECS for backward compatibility - return m.upsertMetricDataPointValueECSMode(documents, resource, resourceSchemaURL, scope, scopeSchemaURL, metric, dp) - } -} - -func (m *encodeModel) upsertMetricDataPointValueECSMode(documents map[uint32]objmodel.Document, resource pcommon.Resource, _ string, _ pcommon.InstrumentationScope, _ string, metric pmetric.Metric, dp dataPoint) error { - value, err := dp.Value() - if err != nil { - return err - } - - hash := metricECSHash(dp.Timestamp(), dp.Attributes()) - var ( - document objmodel.Document - ok bool - ) - if document, ok = documents[hash]; !ok { - encodeAttributesECSMode(&document, resource.Attributes(), resourceAttrsConversionMap, resourceAttrsToPreserve) - document.AddTimestamp("@timestamp", dp.Timestamp()) - document.AddAttributes("", dp.Attributes()) + return metricECSHash(dp.Timestamp(), dp.Attributes()) } - - document.AddAttribute(metric.Name(), value) - - documents[hash] = document - return nil } -func (m *encodeModel) upsertMetricDataPointValueOTelMode(documents map[uint32]objmodel.Document, resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, metric pmetric.Metric, dp dataPoint) error { - value, err := dp.Value() - if err != nil { - return err - } +func (m *encodeModel) encodeDataPointsECSMode(resource pcommon.Resource, dataPoints []dataPoint, validationErrors *[]error) ([]byte, map[string]string, error) { + dp0 := dataPoints[0] + var document objmodel.Document + encodeAttributesECSMode(&document, resource.Attributes(), resourceAttrsConversionMap, resourceAttrsToPreserve) + document.AddTimestamp("@timestamp", dp0.Timestamp()) + document.AddAttributes("", dp0.Attributes()) - // documents is per-resource. Therefore, there is no need to hash resource attributes - hash := metricOTelHash(dp, scope.Attributes(), metric.Unit()) - var ( - document objmodel.Document - ok bool - ) - if document, ok = documents[hash]; !ok { - document.AddTimestamp("@timestamp", dp.Timestamp()) - if dp.StartTimestamp() != 0 { - document.AddTimestamp("start_timestamp", dp.StartTimestamp()) + for _, dp := range dataPoints { + value, err := dp.Value() + if err != nil { + *validationErrors = append(*validationErrors, err) + continue } - document.AddString("unit", metric.Unit()) - - m.encodeAttributesOTelMode(&document, dp.Attributes()) - m.encodeResourceOTelMode(&document, resource, resourceSchemaURL) - m.encodeScopeOTelMode(&document, scope, scopeSchemaURL) + document.AddAttribute(dp.Metric().Name(), value) } + docBytes, err := m.encodeDocument(document) - if dp.HasMappingHint(hintDocCount) { - docCount := dp.DocCount() - document.AddUInt("_doc_count", docCount) - } + return docBytes, document.DynamicTemplates(), err +} - switch value.Type() { - case pcommon.ValueTypeMap: - m := pcommon.NewMap() - value.Map().CopyTo(m) - document.Add("metrics."+metric.Name(), objmodel.UnflattenableObjectValue(m)) +func (m *encodeModel) encodeMetrics(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, dataPoints []dataPoint, validationErrors *[]error) ([]byte, map[string]string, error) { + switch m.mode { + case MappingOTel: + return serializeMetrics(resource, resourceSchemaURL, scope, scopeSchemaURL, dataPoints, validationErrors) default: - document.Add("metrics."+metric.Name(), objmodel.ValueFromAttribute(value)) - } - // TODO: support quantiles - // https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/34561 + return m.encodeDataPointsECSMode(resource, dataPoints, validationErrors) - // DynamicTemplate returns the name of dynamic template that applies to the metric and data point, - // so that the field is indexed into Elasticsearch with the correct mapping. The name should correspond to a - // dynamic template that is defined in ES mapping, e.g. - // https://github.com/elastic/elasticsearch/blob/8.15/x-pack/plugin/core/template-resources/src/main/resources/metrics%40mappings.json - document.AddDynamicTemplate("metrics."+metric.Name(), dp.DynamicTemplate(metric)) - documents[hash] = document - return nil + } } type summaryDataPoint struct { pmetric.SummaryDataPoint mappingHintGetter + metric pmetric.Metric } -func newSummaryDataPoint(dp pmetric.SummaryDataPoint) summaryDataPoint { - return summaryDataPoint{SummaryDataPoint: dp, mappingHintGetter: newMappingHintGetter(dp.Attributes())} +func newSummaryDataPoint(metric pmetric.Metric, dp pmetric.SummaryDataPoint) summaryDataPoint { + return summaryDataPoint{SummaryDataPoint: dp, mappingHintGetter: newMappingHintGetter(dp.Attributes()), metric: metric} } func (dp summaryDataPoint) Value() (pcommon.Value, error) { @@ -327,13 +286,18 @@ func (dp summaryDataPoint) DocCount() uint64 { return dp.Count() } +func (dp summaryDataPoint) Metric() pmetric.Metric { + return dp.metric +} + type exponentialHistogramDataPoint struct { pmetric.ExponentialHistogramDataPoint mappingHintGetter + metric pmetric.Metric } -func newExponentialHistogramDataPoint(dp pmetric.ExponentialHistogramDataPoint) exponentialHistogramDataPoint { - return exponentialHistogramDataPoint{ExponentialHistogramDataPoint: dp, mappingHintGetter: newMappingHintGetter(dp.Attributes())} +func newExponentialHistogramDataPoint(metric pmetric.Metric, dp pmetric.ExponentialHistogramDataPoint) exponentialHistogramDataPoint { + return exponentialHistogramDataPoint{ExponentialHistogramDataPoint: dp, mappingHintGetter: newMappingHintGetter(dp.Attributes()), metric: metric} } func (dp exponentialHistogramDataPoint) Value() (pcommon.Value, error) { @@ -374,13 +338,18 @@ func (dp exponentialHistogramDataPoint) DocCount() uint64 { return dp.Count() } +func (dp exponentialHistogramDataPoint) Metric() pmetric.Metric { + return dp.metric +} + type histogramDataPoint struct { pmetric.HistogramDataPoint mappingHintGetter + metric pmetric.Metric } -func newHistogramDataPoint(dp pmetric.HistogramDataPoint) histogramDataPoint { - return histogramDataPoint{HistogramDataPoint: dp, mappingHintGetter: newMappingHintGetter(dp.Attributes())} +func newHistogramDataPoint(metric pmetric.Metric, dp pmetric.HistogramDataPoint) histogramDataPoint { + return histogramDataPoint{HistogramDataPoint: dp, mappingHintGetter: newMappingHintGetter(dp.Attributes()), metric: metric} } func (dp histogramDataPoint) Value() (pcommon.Value, error) { @@ -405,6 +374,10 @@ func (dp histogramDataPoint) DocCount() uint64 { return dp.HistogramDataPoint.Count() } +func (dp histogramDataPoint) Metric() pmetric.Metric { + return dp.metric +} + func histogramToValue(dp pmetric.HistogramDataPoint) (pcommon.Value, error) { // Histogram conversion function is from // https://github.com/elastic/apm-data/blob/3b28495c3cbdc0902983134276eb114231730249/input/otlp/metrics.go#L277 @@ -456,10 +429,11 @@ func histogramToValue(dp pmetric.HistogramDataPoint) (pcommon.Value, error) { type numberDataPoint struct { pmetric.NumberDataPoint mappingHintGetter + metric pmetric.Metric } -func newNumberDataPoint(dp pmetric.NumberDataPoint) numberDataPoint { - return numberDataPoint{NumberDataPoint: dp, mappingHintGetter: newMappingHintGetter(dp.Attributes())} +func newNumberDataPoint(metric pmetric.Metric, dp pmetric.NumberDataPoint) numberDataPoint { + return numberDataPoint{NumberDataPoint: dp, mappingHintGetter: newMappingHintGetter(dp.Attributes()), metric: metric} } func (dp numberDataPoint) Value() (pcommon.Value, error) { @@ -510,73 +484,11 @@ func (dp numberDataPoint) DocCount() uint64 { return 1 } -var errInvalidNumberDataPoint = errors.New("invalid number data point") - -func (m *encodeModel) encodeResourceOTelMode(document *objmodel.Document, resource pcommon.Resource, resourceSchemaURL string) { - resourceMapVal := pcommon.NewValueMap() - resourceMap := resourceMapVal.Map() - if resourceSchemaURL != "" { - resourceMap.PutStr("schema_url", resourceSchemaURL) - } - resourceMap.PutInt("dropped_attributes_count", int64(resource.DroppedAttributesCount())) - resourceAttrMap := resourceMap.PutEmptyMap("attributes") - resource.Attributes().CopyTo(resourceAttrMap) - resourceAttrMap.RemoveIf(func(key string, _ pcommon.Value) bool { - switch key { - case dataStreamType, dataStreamDataset, dataStreamNamespace: - return true - } - return false - }) - mergeGeolocation(resourceAttrMap) - document.Add("resource", objmodel.ValueFromAttribute(resourceMapVal)) -} - -func (m *encodeModel) encodeScopeOTelMode(document *objmodel.Document, scope pcommon.InstrumentationScope, scopeSchemaURL string) { - scopeMapVal := pcommon.NewValueMap() - scopeMap := scopeMapVal.Map() - if scope.Name() != "" { - scopeMap.PutStr("name", scope.Name()) - } - if scope.Version() != "" { - scopeMap.PutStr("version", scope.Version()) - } - if scopeSchemaURL != "" { - scopeMap.PutStr("schema_url", scopeSchemaURL) - } - scopeMap.PutInt("dropped_attributes_count", int64(scope.DroppedAttributesCount())) - scopeAttrMap := scopeMap.PutEmptyMap("attributes") - scope.Attributes().CopyTo(scopeAttrMap) - scopeAttrMap.RemoveIf(func(key string, _ pcommon.Value) bool { - switch key { - case dataStreamType, dataStreamDataset, dataStreamNamespace: - return true - } - return false - }) - mergeGeolocation(scopeAttrMap) - document.Add("scope", objmodel.ValueFromAttribute(scopeMapVal)) +func (dp numberDataPoint) Metric() pmetric.Metric { + return dp.metric } -func (m *encodeModel) encodeAttributesOTelMode(document *objmodel.Document, attributeMap pcommon.Map) { - attrsCopy := pcommon.NewMap() // Copy to avoid mutating original map - attributeMap.CopyTo(attrsCopy) - attrsCopy.RemoveIf(func(key string, val pcommon.Value) bool { - switch key { - case dataStreamType, dataStreamDataset, dataStreamNamespace: - // At this point the data_stream attributes are expected to be in the record attributes, - // updated by the router. - // Move them to the top of the document and remove them from the record - document.AddAttribute(key, val) - return true - case mappingHintsAttrKey: - return true - } - return false - }) - mergeGeolocation(attrsCopy) - document.AddAttributes("attributes", attrsCopy) -} +var errInvalidNumberDataPoint = errors.New("invalid number data point") func (m *encodeModel) encodeSpan(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, scope pcommon.InstrumentationScope, scopeSchemaURL string) ([]byte, error) { var document objmodel.Document @@ -798,7 +710,7 @@ func metricECSHash(timestamp pcommon.Timestamp, attributes pcommon.Map) uint32 { return hasher.Sum32() } -func metricOTelHash(dp dataPoint, scopeAttrs pcommon.Map, unit string) uint32 { +func metricOTelHash(dp dataPoint, unit string) uint32 { hasher := fnv.New32a() timestampBuf := make([]byte, 8) @@ -810,7 +722,6 @@ func metricOTelHash(dp dataPoint, scopeAttrs pcommon.Map, unit string) uint32 { hasher.Write([]byte(unit)) - mapHashExcludeReservedAttrs(hasher, scopeAttrs) mapHashExcludeReservedAttrs(hasher, dp.Attributes(), mappingHintsAttrKey) return hasher.Sum32() diff --git a/exporter/elasticsearchexporter/model_test.go b/exporter/elasticsearchexporter/model_test.go index eda750a540e7..3fb62412b377 100644 --- a/exporter/elasticsearchexporter/model_test.go +++ b/exporter/elasticsearchexporter/model_test.go @@ -99,24 +99,26 @@ func TestEncodeMetric(t *testing.T) { mode: MappingECS, } - docs := make(map[uint32]objmodel.Document) + groupedDataPoints := make(map[uint32][]dataPoint) var docsBytes [][]byte - for i := 0; i < metrics.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(0).Sum().DataPoints().Len(); i++ { - err := model.upsertMetricDataPointValue( - docs, - metrics.ResourceMetrics().At(0).Resource(), - "", - metrics.ResourceMetrics().At(0).ScopeMetrics().At(0).Scope(), - "", - metrics.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(0), - newNumberDataPoint(metrics.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(0).Sum().DataPoints().At(i)), - ) - require.NoError(t, err) + rm := metrics.ResourceMetrics().At(0) + sm := rm.ScopeMetrics().At(0) + m := sm.Metrics().At(0) + dps := m.Sum().DataPoints() + for i := 0; i < dps.Len(); i++ { + dp := newNumberDataPoint(m, dps.At(i)) + dpHash := model.hashDataPoint(dp) + dataPoints, ok := groupedDataPoints[dpHash] + if !ok { + groupedDataPoints[dpHash] = []dataPoint{dp} + } else { + groupedDataPoints[dpHash] = append(dataPoints, dp) + } } - for _, doc := range docs { - bytes, err := model.encodeDocument(doc) + for _, dataPoints := range groupedDataPoints { + bytes, _, err := model.encodeMetrics(rm.Resource(), rm.SchemaUrl(), sm.Scope(), sm.SchemaUrl(), dataPoints, nil) require.NoError(t, err) docsBytes = append(docsBytes, bytes) } diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index 6bce3db40b07..39c48d88b528 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -13,6 +13,99 @@ import ( const tsLayout = "2006-01-02T15:04:05.000000000Z" +func serializeMetrics(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, dataPoints []dataPoint, validationErrors *[]error) ([]byte, map[string]string, error) { + if len(dataPoints) == 0 { + return nil, nil, nil + } + dp0 := dataPoints[0] + var buf bytes.Buffer + + v := json.NewVisitor(&buf) + // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. + // This is required to generate the correct dynamic mapping in ES. + v.SetExplicitRadixPoint(true) + if err := v.OnObjectStart(-1, structform.AnyType); err != nil { + return nil, nil, err + } + if err := writeTimestampField(v, "@timestamp", dp0.Timestamp()); err != nil { + return nil, nil, err + } + if dp0.StartTimestamp() != 0 { + if err := writeTimestampField(v, "start_timestamp", dp0.StartTimestamp()); err != nil { + return nil, nil, err + } + } + if err := writeStringFieldSkipDefault(v, "unit", dp0.Metric().Unit()); err != nil { + return nil, nil, err + } + if err := writeDataStream(v, dp0.Attributes()); err != nil { + return nil, nil, err + } + if err := writeAttributes(v, dp0.Attributes(), true); err != nil { + return nil, nil, err + } + if err := writeResource(v, resource, resourceSchemaURL, true); err != nil { + return nil, nil, err + } + if err := writeScope(v, scope, scopeSchemaURL, true); err != nil { + return nil, nil, err + } + dynamicTemplates, err := serializeDataPoints(v, dataPoints, validationErrors) + if err := v.OnObjectFinished(); err != nil { + return nil, nil, err + } + if err != nil { + return nil, nil, err + } + + return buf.Bytes(), dynamicTemplates, nil +} + +func serializeDataPoints(v *json.Visitor, dataPoints []dataPoint, validationErrors *[]error) (map[string]string, error) { + if err := v.OnKey("metrics"); err != nil { + return nil, err + } + if err := v.OnObjectStart(-1, structform.AnyType); err != nil { + return nil, err + } + + dynamicTemplates := make(map[string]string, len(dataPoints)) + var docCount uint64 = 0 + for _, dp := range dataPoints { + metric := dp.Metric() + value, err := dp.Value() + if dp.HasMappingHint(hintDocCount) { + docCount = dp.DocCount() + } + if err != nil { + *validationErrors = append(*validationErrors, err) + continue + } + if err = v.OnKey(metric.Name()); err != nil { + return nil, err + } + // TODO: support quantiles + // https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/34561 + if err := writeValue(v, value, false); err != nil { + return nil, err + } + // DynamicTemplate returns the name of dynamic template that applies to the metric and data point, + // so that the field is indexed into Elasticsearch with the correct mapping. The name should correspond to a + // dynamic template that is defined in ES mapping, e.g. + // https://github.com/elastic/elasticsearch/blob/8.15/x-pack/plugin/core/template-resources/src/main/resources/metrics%40mappings.json + dynamicTemplates["metrics."+metric.Name()] = dp.DynamicTemplate(metric) + } + if err := v.OnObjectFinished(); err != nil { + return nil, err + } + if docCount != 0 { + if err := writeUIntField(v, "_doc_count", docCount); err != nil { + return nil, err + } + } + return dynamicTemplates, nil +} + func serializeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent) ([]byte, error) { var buf bytes.Buffer @@ -53,10 +146,10 @@ func serializeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, sco if err := writeAttributes(v, attributes, false); err != nil { return nil, err } - if err := writeResource(v, resource, resourceSchemaURL); err != nil { + if err := writeResource(v, resource, resourceSchemaURL, false); err != nil { return nil, err } - if err := writeScope(v, scope, scopeSchemaURL); err != nil { + if err := writeScope(v, scope, scopeSchemaURL, false); err != nil { return nil, err } if err := v.OnObjectFinished(); err != nil { @@ -120,10 +213,10 @@ func serializeSpan(resource pcommon.Resource, resourceSchemaURL string, scope pc if err := writeStatus(v, span.Status()); err != nil { return nil, err } - if err := writeResource(v, resource, resourceSchemaURL); err != nil { + if err := writeResource(v, resource, resourceSchemaURL, false); err != nil { return nil, err } - if err := writeScope(v, scope, scopeSchemaURL); err != nil { + if err := writeScope(v, scope, scopeSchemaURL, false); err != nil { return nil, err } if err := v.OnObjectFinished(); err != nil { @@ -230,10 +323,10 @@ func serializeLog(resource pcommon.Resource, resourceSchemaURL string, scope pco if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(record.DroppedAttributesCount())); err != nil { return nil, err } - if err := writeResource(v, resource, resourceSchemaURL); err != nil { + if err := writeResource(v, resource, resourceSchemaURL, false); err != nil { return nil, err } - if err := writeScope(v, scope, scopeSchemaURL); err != nil { + if err := writeScope(v, scope, scopeSchemaURL, false); err != nil { return nil, err } if err := writeLogBody(v, record); err != nil { @@ -324,7 +417,7 @@ func writeLogBody(v *json.Visitor, record plog.LogRecord) error { return nil } -func writeResource(v *json.Visitor, resource pcommon.Resource, resourceSchemaURL string) error { +func writeResource(v *json.Visitor, resource pcommon.Resource, resourceSchemaURL string, stringifyMapAttributes bool) error { if err := v.OnKey("resource"); err != nil { return err } @@ -334,7 +427,7 @@ func writeResource(v *json.Visitor, resource pcommon.Resource, resourceSchemaURL if err := writeStringFieldSkipDefault(v, "schema_url", resourceSchemaURL); err != nil { return err } - if err := writeAttributes(v, resource.Attributes(), true); err != nil { + if err := writeAttributes(v, resource.Attributes(), stringifyMapAttributes); err != nil { return err } if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(resource.DroppedAttributesCount())); err != nil { @@ -346,7 +439,7 @@ func writeResource(v *json.Visitor, resource pcommon.Resource, resourceSchemaURL return nil } -func writeScope(v *json.Visitor, scope pcommon.InstrumentationScope, scopeSchemaURL string) error { +func writeScope(v *json.Visitor, scope pcommon.InstrumentationScope, scopeSchemaURL string, stringifyMapAttributes bool) error { if err := v.OnKey("scope"); err != nil { return err } @@ -362,7 +455,7 @@ func writeScope(v *json.Visitor, scope pcommon.InstrumentationScope, scopeSchema if err := writeStringFieldSkipDefault(v, "version", scope.Version()); err != nil { return err } - if err := writeAttributes(v, scope.Attributes(), true); err != nil { + if err := writeAttributes(v, scope.Attributes(), stringifyMapAttributes); err != nil { return err } if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(scope.DroppedAttributesCount())); err != nil { @@ -378,19 +471,22 @@ func writeAttributes(v *json.Visitor, attributes pcommon.Map, stringifyMapValues if attributes.Len() == 0 { return nil } - if err := v.OnKey("attributes"); err != nil { - return err - } attrCopy := pcommon.NewMap() attributes.CopyTo(attrCopy) attrCopy.RemoveIf(func(key string, _ pcommon.Value) bool { switch key { - case dataStreamType, dataStreamDataset, dataStreamNamespace: + case dataStreamType, dataStreamDataset, dataStreamNamespace, mappingHintsAttrKey: return true } return false }) mergeGeolocation(attrCopy) + if attrCopy.Len() == 0 { + return nil + } + if err := v.OnKey("attributes"); err != nil { + return err + } if err := writeMap(v, attrCopy, stringifyMapValues); err != nil { return err } diff --git a/exporter/elasticsearchexporter/pdata_serializer_test.go b/exporter/elasticsearchexporter/pdata_serializer_test.go index 3b00d7f8e425..37cf1a0c23a1 100644 --- a/exporter/elasticsearchexporter/pdata_serializer_test.go +++ b/exporter/elasticsearchexporter/pdata_serializer_test.go @@ -43,12 +43,16 @@ func TestSerializeLog(t *testing.T) { "severity_text": "debug", "resource": map[string]interface{}{ "attributes": map[string]interface{}{ - "resource_map": `{"foo":"bar"}`, + "resource_map": map[string]interface{}{ + "foo": "bar", + }, }, }, "scope": map[string]interface{}{ "attributes": map[string]interface{}{ - "scope_map": `{"foo":"bar"}`, + "scope_map": map[string]interface{}{ + "foo": "bar", + }, }, }, "attributes": map[string]interface{}{ From eb36c67d11f1e85aeded549ff621c350cfbb3c64 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 6 Jan 2025 18:27:25 +0100 Subject: [PATCH 04/30] Add changelog --- ...earchexporter_optimized-json-encoding.yaml | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .chloggen/elasticsearchexporter_optimized-json-encoding.yaml diff --git a/.chloggen/elasticsearchexporter_optimized-json-encoding.yaml b/.chloggen/elasticsearchexporter_optimized-json-encoding.yaml new file mode 100644 index 000000000000..6b37e98f6847 --- /dev/null +++ b/.chloggen/elasticsearchexporter_optimized-json-encoding.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: elasticsearchexporter + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: More efficient JSON encoding for OTel mode + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [37032] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] From ea0ac70e7a346ee55e6735e9786cc0d37b459be4 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 6 Jan 2025 18:40:32 +0100 Subject: [PATCH 05/30] goporto --- exporter/elasticsearchexporter/pdata_serializer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index 39c48d88b528..172676e6a21c 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -1,4 +1,4 @@ -package elasticsearchexporter +package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter" import ( "bytes" From 2fc6b0bf2b70353eebc7fb5fcebfef2d45ad3a79 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 6 Jan 2025 19:28:08 +0100 Subject: [PATCH 06/30] Fix linting issues --- exporter/elasticsearchexporter/model.go | 1 - .../elasticsearchexporter/pdata_serializer.go | 35 +++++----- .../pdata_serializer_test.go | 70 ++++++++++--------- 3 files changed, 55 insertions(+), 51 deletions(-) diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index 3a7c68da0dd8..82ed7b64d606 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -254,7 +254,6 @@ func (m *encodeModel) encodeMetrics(resource pcommon.Resource, resourceSchemaURL return serializeMetrics(resource, resourceSchemaURL, scope, scopeSchemaURL, dataPoints, validationErrors) default: return m.encodeDataPointsECSMode(resource, dataPoints, validationErrors) - } } diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index 172676e6a21c..e101847ddb6c 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -1,14 +1,18 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter" import ( "bytes" "encoding/hex" + "strings" + "github.com/elastic/go-structform" "github.com/elastic/go-structform/json" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/plog" "go.opentelemetry.io/collector/pdata/ptrace" - "strings" ) const tsLayout = "2006-01-02T15:04:05.000000000Z" @@ -50,14 +54,13 @@ func serializeMetrics(resource pcommon.Resource, resourceSchemaURL string, scope if err := writeScope(v, scope, scopeSchemaURL, true); err != nil { return nil, nil, err } - dynamicTemplates, err := serializeDataPoints(v, dataPoints, validationErrors) - if err := v.OnObjectFinished(); err != nil { - return nil, nil, err + dynamicTemplates, serr := serializeDataPoints(v, dataPoints, validationErrors) + if serr != nil { + return nil, nil, serr } - if err != nil { + if err := v.OnObjectFinished(); err != nil { return nil, nil, err } - return buf.Bytes(), dynamicTemplates, nil } @@ -70,7 +73,7 @@ func serializeDataPoints(v *json.Visitor, dataPoints []dataPoint, validationErro } dynamicTemplates := make(map[string]string, len(dataPoints)) - var docCount uint64 = 0 + var docCount uint64 for _, dp := range dataPoints { metric := dp.Metric() value, err := dp.Value() @@ -122,10 +125,10 @@ func serializeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, sco if err := writeDataStream(v, spanEvent.Attributes()); err != nil { return nil, err } - if err := writeTraceIdField(v, span.TraceID()); err != nil { + if err := writeTraceIDField(v, span.TraceID()); err != nil { return nil, err } - if err := writeSpanIdField(v, "span_id", span.SpanID()); err != nil { + if err := writeSpanIDField(v, "span_id", span.SpanID()); err != nil { return nil, err } if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(spanEvent.DroppedAttributesCount())); err != nil { @@ -174,16 +177,16 @@ func serializeSpan(resource pcommon.Resource, resourceSchemaURL string, scope pc if err := writeDataStream(v, span.Attributes()); err != nil { return nil, err } - if err := writeTraceIdField(v, span.TraceID()); err != nil { + if err := writeTraceIDField(v, span.TraceID()); err != nil { return nil, err } - if err := writeSpanIdField(v, "span_id", span.SpanID()); err != nil { + if err := writeSpanIDField(v, "span_id", span.SpanID()); err != nil { return nil, err } if err := writeStringFieldSkipDefault(v, "trace_state", span.TraceState().AsRaw()); err != nil { return nil, err } - if err := writeSpanIdField(v, "parent_span_id", span.ParentSpanID()); err != nil { + if err := writeSpanIDField(v, "parent_span_id", span.ParentSpanID()); err != nil { return nil, err } if err := writeStringFieldSkipDefault(v, "name", span.Name()); err != nil { @@ -311,10 +314,10 @@ func serializeLog(resource pcommon.Resource, resourceSchemaURL string, scope pco if err := writeIntFieldSkipDefault(v, "severity_number", int64(record.SeverityNumber())); err != nil { return nil, err } - if err := writeTraceIdField(v, record.TraceID()); err != nil { + if err := writeTraceIDField(v, record.TraceID()); err != nil { return nil, err } - if err := writeSpanIdField(v, "span_id", record.SpanID()); err != nil { + if err := writeSpanIDField(v, "span_id", record.SpanID()); err != nil { return nil, err } if err := writeAttributes(v, record.Attributes(), false); err != nil { @@ -613,7 +616,7 @@ func writeStringFieldSkipDefault(v *json.Visitor, key, value string) error { return nil } -func writeTraceIdField(v *json.Visitor, id pcommon.TraceID) error { +func writeTraceIDField(v *json.Visitor, id pcommon.TraceID) error { if id.IsEmpty() { return nil } @@ -626,7 +629,7 @@ func writeTraceIdField(v *json.Visitor, id pcommon.TraceID) error { return nil } -func writeSpanIdField(v *json.Visitor, key string, id pcommon.SpanID) error { +func writeSpanIDField(v *json.Visitor, key string, id pcommon.SpanID) error { if id.IsEmpty() { return nil } diff --git a/exporter/elasticsearchexporter/pdata_serializer_test.go b/exporter/elasticsearchexporter/pdata_serializer_test.go index 37cf1a0c23a1..ff901da27a1c 100644 --- a/exporter/elasticsearchexporter/pdata_serializer_test.go +++ b/exporter/elasticsearchexporter/pdata_serializer_test.go @@ -1,21 +1,24 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + package elasticsearchexporter import ( "bytes" "encoding/json" + "testing" + "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/plog" - "testing" ) func TestSerializeLog(t *testing.T) { - tests := []struct { name string logCustomizer func(resource pcommon.Resource, scope pcommon.InstrumentationScope, record plog.LogRecord) wantErr bool - expected interface{} + expected any }{ {name: "test attributes", logCustomizer: func(resource pcommon.Resource, scope pcommon.InstrumentationScope, record plog.LogRecord) { record.SetSeverityText("debug") @@ -26,7 +29,7 @@ func TestSerializeLog(t *testing.T) { record.Attributes().PutDouble("double", 42.0) record.Attributes().PutInt("int", 42) record.Attributes().PutEmptyBytes("bytes").Append(42) - _ = record.Attributes().PutEmptySlice("slice").FromRaw([]interface{}{42, "foo"}) + _ = record.Attributes().PutEmptySlice("slice").FromRaw([]any{42, "foo"}) record.Attributes().PutEmptySlice("map_slice").AppendEmpty().SetEmptyMap().PutStr("foo.bar", "baz") mapAttr := record.Attributes().PutEmptyMap("map") mapAttr.PutStr("foo.bar", "baz") @@ -34,75 +37,75 @@ func TestSerializeLog(t *testing.T) { resource.Attributes().PutEmptyMap("resource_map").PutStr("foo", "bar") scope.Attributes().PutEmptyMap("scope_map").PutStr("foo", "bar") - }, wantErr: false, expected: map[string]interface{}{ + }, wantErr: false, expected: map[string]any{ "@timestamp": "1970-01-01T00:00:00.000000000Z", "observed_timestamp": "1970-01-01T00:00:00.000000000Z", - "data_stream": map[string]interface{}{ + "data_stream": map[string]any{ "type": "logs", }, "severity_text": "debug", - "resource": map[string]interface{}{ - "attributes": map[string]interface{}{ - "resource_map": map[string]interface{}{ + "resource": map[string]any{ + "attributes": map[string]any{ + "resource_map": map[string]any{ "foo": "bar", }, }, }, - "scope": map[string]interface{}{ - "attributes": map[string]interface{}{ - "scope_map": map[string]interface{}{ + "scope": map[string]any{ + "attributes": map[string]any{ + "scope_map": map[string]any{ "foo": "bar", }, }, }, - "attributes": map[string]interface{}{ + "attributes": map[string]any{ "empty": nil, "string": "foo", "bool": true, "double": json.Number("42.0"), "int": json.Number("42"), "bytes": "2a", - "slice": []interface{}{json.Number("42"), "foo"}, - "map_slice": []interface{}{map[string]interface{}{ + "slice": []any{json.Number("42"), "foo"}, + "map_slice": []any{map[string]any{ "foo.bar": "baz", }}, - "map": map[string]interface{}{ + "map": map[string]any{ "foo.bar": "baz", - "inner.slice": []interface{}{"foo"}, + "inner.slice": []any{"foo"}, }, }, }}, { name: "text body", - logCustomizer: func(resource pcommon.Resource, scope pcommon.InstrumentationScope, record plog.LogRecord) { + logCustomizer: func(_ pcommon.Resource, _ pcommon.InstrumentationScope, record plog.LogRecord) { record.Body().SetStr("foo") }, wantErr: false, - expected: map[string]interface{}{ + expected: map[string]any{ "@timestamp": "1970-01-01T00:00:00.000000000Z", "observed_timestamp": "1970-01-01T00:00:00.000000000Z", - "data_stream": map[string]interface{}{}, - "resource": map[string]interface{}{}, - "scope": map[string]interface{}{}, - "body": map[string]interface{}{ + "data_stream": map[string]any{}, + "resource": map[string]any{}, + "scope": map[string]any{}, + "body": map[string]any{ "text": "foo", }, }, }, { name: "map body", - logCustomizer: func(resource pcommon.Resource, scope pcommon.InstrumentationScope, record plog.LogRecord) { + logCustomizer: func(_ pcommon.Resource, _ pcommon.InstrumentationScope, record plog.LogRecord) { record.Body().SetEmptyMap().PutStr("foo.bar", "baz") }, wantErr: false, - expected: map[string]interface{}{ + expected: map[string]any{ "@timestamp": "1970-01-01T00:00:00.000000000Z", "observed_timestamp": "1970-01-01T00:00:00.000000000Z", - "data_stream": map[string]interface{}{}, - "resource": map[string]interface{}{}, - "scope": map[string]interface{}{}, - "body": map[string]interface{}{ - "flattened": map[string]interface{}{ + "data_stream": map[string]any{}, + "resource": map[string]any{}, + "scope": map[string]any{}, + "body": map[string]any{ + "flattened": map[string]any{ "foo.bar": "baz", }, }, @@ -111,7 +114,6 @@ func TestSerializeLog(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - resourceLogs := plog.NewResourceLogs() scopeLogs := resourceLogs.ScopeLogs().AppendEmpty() record := scopeLogs.LogRecords().AppendEmpty() @@ -121,15 +123,15 @@ func TestSerializeLog(t *testing.T) { if (err != nil) != tt.wantErr { t.Errorf("serializeLog() error = %v, wantErr %v", err, tt.wantErr) } - eventAsJson := string(logBytes) - var result interface{} + eventAsJSON := string(logBytes) + var result any decoder := json.NewDecoder(bytes.NewBuffer(logBytes)) decoder.UseNumber() if err := decoder.Decode(&result); err != nil { t.Error(err) } - assert.Equal(t, tt.expected, result, eventAsJson) + assert.Equal(t, tt.expected, result, eventAsJSON) }) } } From d3e8c7a37cbe308480123fbd9bfe8f122741186b Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Fri, 10 Jan 2025 08:41:00 +0100 Subject: [PATCH 07/30] Add event_name for logs --- exporter/elasticsearchexporter/pdata_serializer.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index e101847ddb6c..d4ba646b9672 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -326,6 +326,15 @@ func serializeLog(resource pcommon.Resource, resourceSchemaURL string, scope pco if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(record.DroppedAttributesCount())); err != nil { return nil, err } + if record.EventName() != "" { + if err := writeStringFieldSkipDefault(v, "event_name", record.EventName()); err != nil { + return nil, err + } + } else if eventNameAttr, ok := record.Attributes().Get("event.name"); ok && eventNameAttr.Str() != "" { + if err := writeStringFieldSkipDefault(v, "event_name", eventNameAttr.Str()); err != nil { + return nil, err + } + } if err := writeResource(v, resource, resourceSchemaURL, false); err != nil { return nil, err } From e09e0e51ecd53d73ac00d0fa926dfba0bf160bce Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Fri, 10 Jan 2025 09:58:16 +0100 Subject: [PATCH 08/30] Remove all error handling from serialization code bytes.Buffer.Write is guaranteed to not return an error --- .../elasticsearchexporter/pdata_serializer.go | 592 +++++------------- 1 file changed, 156 insertions(+), 436 deletions(-) diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index d4ba646b9672..ed491850e05f 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -28,49 +28,24 @@ func serializeMetrics(resource pcommon.Resource, resourceSchemaURL string, scope // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. // This is required to generate the correct dynamic mapping in ES. v.SetExplicitRadixPoint(true) - if err := v.OnObjectStart(-1, structform.AnyType); err != nil { - return nil, nil, err - } - if err := writeTimestampField(v, "@timestamp", dp0.Timestamp()); err != nil { - return nil, nil, err - } + _ = v.OnObjectStart(-1, structform.AnyType) + writeTimestampField(v, "@timestamp", dp0.Timestamp()) if dp0.StartTimestamp() != 0 { - if err := writeTimestampField(v, "start_timestamp", dp0.StartTimestamp()); err != nil { - return nil, nil, err - } - } - if err := writeStringFieldSkipDefault(v, "unit", dp0.Metric().Unit()); err != nil { - return nil, nil, err - } - if err := writeDataStream(v, dp0.Attributes()); err != nil { - return nil, nil, err - } - if err := writeAttributes(v, dp0.Attributes(), true); err != nil { - return nil, nil, err - } - if err := writeResource(v, resource, resourceSchemaURL, true); err != nil { - return nil, nil, err - } - if err := writeScope(v, scope, scopeSchemaURL, true); err != nil { - return nil, nil, err - } - dynamicTemplates, serr := serializeDataPoints(v, dataPoints, validationErrors) - if serr != nil { - return nil, nil, serr - } - if err := v.OnObjectFinished(); err != nil { - return nil, nil, err - } + writeTimestampField(v, "start_timestamp", dp0.StartTimestamp()) + } + writeStringFieldSkipDefault(v, "unit", dp0.Metric().Unit()) + writeDataStream(v, dp0.Attributes()) + writeAttributes(v, dp0.Attributes(), true) + writeResource(v, resource, resourceSchemaURL, true) + writeScope(v, scope, scopeSchemaURL, true) + dynamicTemplates := serializeDataPoints(v, dataPoints, validationErrors) + _ = v.OnObjectFinished() return buf.Bytes(), dynamicTemplates, nil } -func serializeDataPoints(v *json.Visitor, dataPoints []dataPoint, validationErrors *[]error) (map[string]string, error) { - if err := v.OnKey("metrics"); err != nil { - return nil, err - } - if err := v.OnObjectStart(-1, structform.AnyType); err != nil { - return nil, err - } +func serializeDataPoints(v *json.Visitor, dataPoints []dataPoint, validationErrors *[]error) map[string]string { + _ = v.OnKey("metrics") + _ = v.OnObjectStart(-1, structform.AnyType) dynamicTemplates := make(map[string]string, len(dataPoints)) var docCount uint64 @@ -84,29 +59,21 @@ func serializeDataPoints(v *json.Visitor, dataPoints []dataPoint, validationErro *validationErrors = append(*validationErrors, err) continue } - if err = v.OnKey(metric.Name()); err != nil { - return nil, err - } + _ = v.OnKey(metric.Name()) // TODO: support quantiles // https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/34561 - if err := writeValue(v, value, false); err != nil { - return nil, err - } + writeValue(v, value, false) // DynamicTemplate returns the name of dynamic template that applies to the metric and data point, // so that the field is indexed into Elasticsearch with the correct mapping. The name should correspond to a // dynamic template that is defined in ES mapping, e.g. // https://github.com/elastic/elasticsearch/blob/8.15/x-pack/plugin/core/template-resources/src/main/resources/metrics%40mappings.json dynamicTemplates["metrics."+metric.Name()] = dp.DynamicTemplate(metric) } - if err := v.OnObjectFinished(); err != nil { - return nil, err - } + _ = v.OnObjectFinished() if docCount != 0 { - if err := writeUIntField(v, "_doc_count", docCount); err != nil { - return nil, err - } + writeUIntField(v, "_doc_count", docCount) } - return dynamicTemplates, nil + return dynamicTemplates } func serializeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent) ([]byte, error) { @@ -116,27 +83,13 @@ func serializeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, sco // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. // This is required to generate the correct dynamic mapping in ES. v.SetExplicitRadixPoint(true) - if err := v.OnObjectStart(-1, structform.AnyType); err != nil { - return nil, err - } - if err := writeTimestampField(v, "@timestamp", spanEvent.Timestamp()); err != nil { - return nil, err - } - if err := writeDataStream(v, spanEvent.Attributes()); err != nil { - return nil, err - } - if err := writeTraceIDField(v, span.TraceID()); err != nil { - return nil, err - } - if err := writeSpanIDField(v, "span_id", span.SpanID()); err != nil { - return nil, err - } - if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(spanEvent.DroppedAttributesCount())); err != nil { - return nil, err - } - if err := writeStringFieldSkipDefault(v, "event_name", spanEvent.Name()); err != nil { - return nil, err - } + _ = v.OnObjectStart(-1, structform.AnyType) + writeTimestampField(v, "@timestamp", spanEvent.Timestamp()) + writeDataStream(v, spanEvent.Attributes()) + writeTraceIDField(v, span.TraceID()) + writeSpanIDField(v, "span_id", span.SpanID()) + writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(spanEvent.DroppedAttributesCount())) + writeStringFieldSkipDefault(v, "event_name", spanEvent.Name()) var attributes pcommon.Map if spanEvent.Name() != "" { @@ -146,18 +99,10 @@ func serializeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, sco } else { attributes = spanEvent.Attributes() } - if err := writeAttributes(v, attributes, false); err != nil { - return nil, err - } - if err := writeResource(v, resource, resourceSchemaURL, false); err != nil { - return nil, err - } - if err := writeScope(v, scope, scopeSchemaURL, false); err != nil { - return nil, err - } - if err := v.OnObjectFinished(); err != nil { - return nil, err - } + writeAttributes(v, attributes, false) + writeResource(v, resource, resourceSchemaURL, false) + writeScope(v, scope, scopeSchemaURL, false) + _ = v.OnObjectFinished() return buf.Bytes(), nil } @@ -168,121 +113,51 @@ func serializeSpan(resource pcommon.Resource, resourceSchemaURL string, scope pc // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. // This is required to generate the correct dynamic mapping in ES. v.SetExplicitRadixPoint(true) - if err := v.OnObjectStart(-1, structform.AnyType); err != nil { - return nil, err - } - if err := writeTimestampField(v, "@timestamp", span.StartTimestamp()); err != nil { - return nil, err - } - if err := writeDataStream(v, span.Attributes()); err != nil { - return nil, err - } - if err := writeTraceIDField(v, span.TraceID()); err != nil { - return nil, err - } - if err := writeSpanIDField(v, "span_id", span.SpanID()); err != nil { - return nil, err - } - if err := writeStringFieldSkipDefault(v, "trace_state", span.TraceState().AsRaw()); err != nil { - return nil, err - } - if err := writeSpanIDField(v, "parent_span_id", span.ParentSpanID()); err != nil { - return nil, err - } - if err := writeStringFieldSkipDefault(v, "name", span.Name()); err != nil { - return nil, err - } - if err := writeStringFieldSkipDefault(v, "kind", span.Kind().String()); err != nil { - return nil, err - } - if err := writeUIntField(v, "duration", uint64(span.EndTimestamp()-span.StartTimestamp())); err != nil { - return nil, err - } - if err := writeAttributes(v, span.Attributes(), false); err != nil { - return nil, err - } - if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(span.DroppedAttributesCount())); err != nil { - return nil, err - } - if err := writeIntFieldSkipDefault(v, "dropped_events_count", int64(span.DroppedEventsCount())); err != nil { - return nil, err - } - if err := writeSpanLinks(v, span); err != nil { - return nil, err - } - if err := writeIntFieldSkipDefault(v, "dropped_links_count", int64(span.DroppedLinksCount())); err != nil { - return nil, err - } - if err := writeStatus(v, span.Status()); err != nil { - return nil, err - } - if err := writeResource(v, resource, resourceSchemaURL, false); err != nil { - return nil, err - } - if err := writeScope(v, scope, scopeSchemaURL, false); err != nil { - return nil, err - } - if err := v.OnObjectFinished(); err != nil { - return nil, err - } + _ = v.OnObjectStart(-1, structform.AnyType) + writeTimestampField(v, "@timestamp", span.StartTimestamp()) + writeDataStream(v, span.Attributes()) + writeTraceIDField(v, span.TraceID()) + writeSpanIDField(v, "span_id", span.SpanID()) + writeStringFieldSkipDefault(v, "trace_state", span.TraceState().AsRaw()) + writeSpanIDField(v, "parent_span_id", span.ParentSpanID()) + writeStringFieldSkipDefault(v, "name", span.Name()) + writeStringFieldSkipDefault(v, "kind", span.Kind().String()) + writeUIntField(v, "duration", uint64(span.EndTimestamp()-span.StartTimestamp())) + writeAttributes(v, span.Attributes(), false) + writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(span.DroppedAttributesCount())) + writeIntFieldSkipDefault(v, "dropped_events_count", int64(span.DroppedEventsCount())) + writeSpanLinks(v, span) + writeIntFieldSkipDefault(v, "dropped_links_count", int64(span.DroppedLinksCount())) + writeStatus(v, span.Status()) + writeResource(v, resource, resourceSchemaURL, false) + writeScope(v, scope, scopeSchemaURL, false) + _ = v.OnObjectFinished() return buf.Bytes(), nil } -func writeStatus(v *json.Visitor, status ptrace.Status) error { - if err := v.OnKey("status"); err != nil { - return err - } - if err := v.OnObjectStart(-1, structform.AnyType); err != nil { - return err - } - if err := writeStringFieldSkipDefault(v, "message", status.Message()); err != nil { - return err - } - if err := writeStringFieldSkipDefault(v, "code", status.Code().String()); err != nil { - return err - } - if err := v.OnObjectFinished(); err != nil { - return err - } - return nil +func writeStatus(v *json.Visitor, status ptrace.Status) { + _ = v.OnKey("status") + _ = v.OnObjectStart(-1, structform.AnyType) + writeStringFieldSkipDefault(v, "message", status.Message()) + writeStringFieldSkipDefault(v, "code", status.Code().String()) + _ = v.OnObjectFinished() } -func writeSpanLinks(v *json.Visitor, span ptrace.Span) error { - if err := v.OnKey("links"); err != nil { - return err - } - if err := v.OnArrayStart(-1, structform.AnyType); err != nil { - return err - } +func writeSpanLinks(v *json.Visitor, span ptrace.Span) { + _ = v.OnKey("links") + _ = v.OnArrayStart(-1, structform.AnyType) spanLinks := span.Links() for i := 0; i < spanLinks.Len(); i++ { spanLink := spanLinks.At(i) - if err := v.OnObjectStart(-1, structform.AnyType); err != nil { - return err - } - if err := writeStringFieldSkipDefault(v, "trace_id", spanLink.TraceID().String()); err != nil { - return err - } - if err := writeStringFieldSkipDefault(v, "span_id", spanLink.SpanID().String()); err != nil { - return err - } - if err := writeStringFieldSkipDefault(v, "trace_state", spanLink.TraceState().AsRaw()); err != nil { - return err - } - if err := writeAttributes(v, spanLink.Attributes(), false); err != nil { - return err - } - if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(spanLink.DroppedAttributesCount())); err != nil { - return err - } - if err := v.OnObjectFinished(); err != nil { - return err - } - } - if err := v.OnArrayFinished(); err != nil { - return err - } - return nil + _ = v.OnObjectStart(-1, structform.AnyType) + writeStringFieldSkipDefault(v, "trace_id", spanLink.TraceID().String()) + writeStringFieldSkipDefault(v, "span_id", spanLink.SpanID().String()) + writeStringFieldSkipDefault(v, "trace_state", spanLink.TraceState().AsRaw()) + writeAttributes(v, spanLink.Attributes(), false) + writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(spanLink.DroppedAttributesCount())) + _ = v.OnObjectFinished() + } + _ = v.OnArrayFinished() } func serializeLog(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, record plog.LogRecord) ([]byte, error) { @@ -292,100 +167,51 @@ func serializeLog(resource pcommon.Resource, resourceSchemaURL string, scope pco // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. // This is required to generate the correct dynamic mapping in ES. v.SetExplicitRadixPoint(true) - if err := v.OnObjectStart(-1, structform.AnyType); err != nil { - return nil, err - } + _ = v.OnObjectStart(-1, structform.AnyType) docTimeStamp := record.Timestamp() if docTimeStamp.AsTime().UnixNano() == 0 { docTimeStamp = record.ObservedTimestamp() } - if err := writeTimestampField(v, "@timestamp", docTimeStamp); err != nil { - return nil, err - } - if err := writeTimestampField(v, "observed_timestamp", record.ObservedTimestamp()); err != nil { - return nil, err - } - if err := writeDataStream(v, record.Attributes()); err != nil { - return nil, err - } - if err := writeStringFieldSkipDefault(v, "severity_text", record.SeverityText()); err != nil { - return nil, err - } - if err := writeIntFieldSkipDefault(v, "severity_number", int64(record.SeverityNumber())); err != nil { - return nil, err - } - if err := writeTraceIDField(v, record.TraceID()); err != nil { - return nil, err - } - if err := writeSpanIDField(v, "span_id", record.SpanID()); err != nil { - return nil, err - } - if err := writeAttributes(v, record.Attributes(), false); err != nil { - return nil, err - } - if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(record.DroppedAttributesCount())); err != nil { - return nil, err - } + writeTimestampField(v, "@timestamp", docTimeStamp) + writeTimestampField(v, "observed_timestamp", record.ObservedTimestamp()) + writeDataStream(v, record.Attributes()) + writeStringFieldSkipDefault(v, "severity_text", record.SeverityText()) + writeIntFieldSkipDefault(v, "severity_number", int64(record.SeverityNumber())) + writeTraceIDField(v, record.TraceID()) + writeSpanIDField(v, "span_id", record.SpanID()) + writeAttributes(v, record.Attributes(), false) + writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(record.DroppedAttributesCount())) if record.EventName() != "" { - if err := writeStringFieldSkipDefault(v, "event_name", record.EventName()); err != nil { - return nil, err - } + writeStringFieldSkipDefault(v, "event_name", record.EventName()) } else if eventNameAttr, ok := record.Attributes().Get("event.name"); ok && eventNameAttr.Str() != "" { - if err := writeStringFieldSkipDefault(v, "event_name", eventNameAttr.Str()); err != nil { - return nil, err - } - } - if err := writeResource(v, resource, resourceSchemaURL, false); err != nil { - return nil, err - } - if err := writeScope(v, scope, scopeSchemaURL, false); err != nil { - return nil, err - } - if err := writeLogBody(v, record); err != nil { - return nil, err - } - if err := v.OnObjectFinished(); err != nil { - return nil, err + writeStringFieldSkipDefault(v, "event_name", eventNameAttr.Str()) } + writeResource(v, resource, resourceSchemaURL, false) + writeScope(v, scope, scopeSchemaURL, false) + writeLogBody(v, record) + _ = v.OnObjectFinished() return buf.Bytes(), nil } -func writeDataStream(v *json.Visitor, attributes pcommon.Map) error { - if err := v.OnKey("data_stream"); err != nil { - return err - } - if err := v.OnObjectStart(-1, structform.AnyType); err != nil { - return err - } - var err error +func writeDataStream(v *json.Visitor, attributes pcommon.Map) { + _ = v.OnKey("data_stream") + _ = v.OnObjectStart(-1, structform.AnyType) attributes.Range(func(k string, val pcommon.Value) bool { if strings.HasPrefix(k, "data_stream.") && val.Type() == pcommon.ValueTypeStr { - if err = writeStringFieldSkipDefault(v, k[12:], val.Str()); err != nil { - return false - } + writeStringFieldSkipDefault(v, k[12:], val.Str()) } return true }) - if err != nil { - return err - } - if err := v.OnObjectFinished(); err != nil { - return err - } - return nil + _ = v.OnObjectFinished() } -func writeLogBody(v *json.Visitor, record plog.LogRecord) error { +func writeLogBody(v *json.Visitor, record plog.LogRecord) { if record.Body().Type() == pcommon.ValueTypeEmpty { - return nil - } - if err := v.OnKey("body"); err != nil { - return err - } - if err := v.OnObjectStart(-1, structform.AnyType); err != nil { - return err + return } + _ = v.OnKey("body") + _ = v.OnObjectStart(-1, structform.AnyType) // Determine if this log record is an event, as they are mapped differently // https://github.com/open-telemetry/semantic-conventions/blob/main/docs/general/events.md @@ -417,71 +243,34 @@ func writeLogBody(v *json.Visitor, record plog.LogRecord) error { default: bodyType = "text" } - if err := v.OnKey(bodyType); err != nil { - return err - } - if err := writeValue(v, body, false); err != nil { - return err - } - if err := v.OnObjectFinished(); err != nil { - return err - } - return nil + _ = v.OnKey(bodyType) + writeValue(v, body, false) + _ = v.OnObjectFinished() } -func writeResource(v *json.Visitor, resource pcommon.Resource, resourceSchemaURL string, stringifyMapAttributes bool) error { - if err := v.OnKey("resource"); err != nil { - return err - } - if err := v.OnObjectStart(-1, structform.AnyType); err != nil { - return err - } - if err := writeStringFieldSkipDefault(v, "schema_url", resourceSchemaURL); err != nil { - return err - } - if err := writeAttributes(v, resource.Attributes(), stringifyMapAttributes); err != nil { - return err - } - if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(resource.DroppedAttributesCount())); err != nil { - return err - } - if err := v.OnObjectFinished(); err != nil { - return err - } - return nil +func writeResource(v *json.Visitor, resource pcommon.Resource, resourceSchemaURL string, stringifyMapAttributes bool) { + _ = v.OnKey("resource") + _ = v.OnObjectStart(-1, structform.AnyType) + writeStringFieldSkipDefault(v, "schema_url", resourceSchemaURL) + writeAttributes(v, resource.Attributes(), stringifyMapAttributes) + writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(resource.DroppedAttributesCount())) + _ = v.OnObjectFinished() } -func writeScope(v *json.Visitor, scope pcommon.InstrumentationScope, scopeSchemaURL string, stringifyMapAttributes bool) error { - if err := v.OnKey("scope"); err != nil { - return err - } - if err := v.OnObjectStart(-1, structform.AnyType); err != nil { - return err - } - if err := writeStringFieldSkipDefault(v, "schema_url", scopeSchemaURL); err != nil { - return err - } - if err := writeStringFieldSkipDefault(v, "name", scope.Name()); err != nil { - return err - } - if err := writeStringFieldSkipDefault(v, "version", scope.Version()); err != nil { - return err - } - if err := writeAttributes(v, scope.Attributes(), stringifyMapAttributes); err != nil { - return err - } - if err := writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(scope.DroppedAttributesCount())); err != nil { - return err - } - if err := v.OnObjectFinished(); err != nil { - return err - } - return nil +func writeScope(v *json.Visitor, scope pcommon.InstrumentationScope, scopeSchemaURL string, stringifyMapAttributes bool) { + _ = v.OnKey("scope") + _ = v.OnObjectStart(-1, structform.AnyType) + writeStringFieldSkipDefault(v, "schema_url", scopeSchemaURL) + writeStringFieldSkipDefault(v, "name", scope.Name()) + writeStringFieldSkipDefault(v, "version", scope.Version()) + writeAttributes(v, scope.Attributes(), stringifyMapAttributes) + writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(scope.DroppedAttributesCount())) + _ = v.OnObjectFinished() } -func writeAttributes(v *json.Visitor, attributes pcommon.Map, stringifyMapValues bool) error { +func writeAttributes(v *json.Visitor, attributes pcommon.Map, stringifyMapValues bool) { if attributes.Len() == 0 { - return nil + return } attrCopy := pcommon.NewMap() attributes.CopyTo(attrCopy) @@ -494,159 +283,90 @@ func writeAttributes(v *json.Visitor, attributes pcommon.Map, stringifyMapValues }) mergeGeolocation(attrCopy) if attrCopy.Len() == 0 { - return nil - } - if err := v.OnKey("attributes"); err != nil { - return err - } - if err := writeMap(v, attrCopy, stringifyMapValues); err != nil { - return err + return } - return nil + _ = v.OnKey("attributes") + writeMap(v, attrCopy, stringifyMapValues) } -func writeMap(v *json.Visitor, attributes pcommon.Map, stringifyMapValues bool) error { - if err := v.OnObjectStart(-1, structform.AnyType); err != nil { - return err - } - var err error +func writeMap(v *json.Visitor, attributes pcommon.Map, stringifyMapValues bool) { + _ = v.OnObjectStart(-1, structform.AnyType) attributes.Range(func(k string, val pcommon.Value) bool { - if err = v.OnKey(k); err != nil { - return false - } - err = writeValue(v, val, stringifyMapValues) - return err == nil + _ = v.OnKey(k) + writeValue(v, val, stringifyMapValues) + return true }) - if err != nil { - return err - } - if err := v.OnObjectFinished(); err != nil { - return err - } - return nil + _ = v.OnObjectFinished() } -func writeValue(v *json.Visitor, val pcommon.Value, stringifyMaps bool) error { +func writeValue(v *json.Visitor, val pcommon.Value, stringifyMaps bool) { switch val.Type() { case pcommon.ValueTypeEmpty: - if err := v.OnNil(); err != nil { - return err - } + _ = v.OnNil() case pcommon.ValueTypeStr: - if err := v.OnString(val.Str()); err != nil { - return err - } + _ = v.OnString(val.Str()) case pcommon.ValueTypeBool: - if err := v.OnBool(val.Bool()); err != nil { - return err - } + _ = v.OnBool(val.Bool()) case pcommon.ValueTypeDouble: - if err := v.OnFloat64(val.Double()); err != nil { - return err - } + _ = v.OnFloat64(val.Double()) case pcommon.ValueTypeInt: - if err := v.OnInt64(val.Int()); err != nil { - return err - } + _ = v.OnInt64(val.Int()) case pcommon.ValueTypeBytes: - if err := v.OnString(hex.EncodeToString(val.Bytes().AsRaw())); err != nil { - return err - } + _ = v.OnString(hex.EncodeToString(val.Bytes().AsRaw())) case pcommon.ValueTypeMap: if stringifyMaps { - if err := v.OnString(val.AsString()); err != nil { - return err - } + _ = v.OnString(val.AsString()) } else { - if err := writeMap(v, val.Map(), false); err != nil { - return err - } + writeMap(v, val.Map(), false) } case pcommon.ValueTypeSlice: - if err := v.OnArrayStart(-1, structform.AnyType); err != nil { - return err - } + _ = v.OnArrayStart(-1, structform.AnyType) slice := val.Slice() for i := 0; i < slice.Len(); i++ { - if err := writeValue(v, slice.At(i), stringifyMaps); err != nil { - return err - } - } - if err := v.OnArrayFinished(); err != nil { - return err + writeValue(v, slice.At(i), stringifyMaps) } + _ = v.OnArrayFinished() } - return nil } -func writeTimestampField(v *json.Visitor, key string, timestamp pcommon.Timestamp) error { - if err := v.OnKey(key); err != nil { - return err - } - if err := v.OnString(timestamp.AsTime().UTC().Format(tsLayout)); err != nil { - return err - } - return nil +func writeTimestampField(v *json.Visitor, key string, timestamp pcommon.Timestamp) { + _ = v.OnKey(key) + _ = v.OnString(timestamp.AsTime().UTC().Format(tsLayout)) } -func writeUIntField(v *json.Visitor, key string, i uint64) error { - if err := v.OnKey(key); err != nil { - return err - } - if err := v.OnUint64(i); err != nil { - return err - } - return nil +func writeUIntField(v *json.Visitor, key string, i uint64) { + _ = v.OnKey(key) + _ = v.OnUint64(i) } -func writeIntFieldSkipDefault(v *json.Visitor, key string, i int64) error { +func writeIntFieldSkipDefault(v *json.Visitor, key string, i int64) { if i == 0 { - return nil + return } - if err := v.OnKey(key); err != nil { - return err - } - if err := v.OnInt64(i); err != nil { - return err - } - return nil + _ = v.OnKey(key) + _ = v.OnInt64(i) } -func writeStringFieldSkipDefault(v *json.Visitor, key, value string) error { +func writeStringFieldSkipDefault(v *json.Visitor, key, value string) { if value == "" { - return nil - } - if err := v.OnKey(key); err != nil { - return err - } - if err := v.OnString(value); err != nil { - return err + return } - return nil + _ = v.OnKey(key) + _ = v.OnString(value) } -func writeTraceIDField(v *json.Visitor, id pcommon.TraceID) error { +func writeTraceIDField(v *json.Visitor, id pcommon.TraceID) { if id.IsEmpty() { - return nil + return } - if err := v.OnKey("trace_id"); err != nil { - return err - } - if err := v.OnString(hex.EncodeToString(id[:])); err != nil { - return err - } - return nil + _ = v.OnKey("trace_id") + _ = v.OnString(hex.EncodeToString(id[:])) } -func writeSpanIDField(v *json.Visitor, key string, id pcommon.SpanID) error { +func writeSpanIDField(v *json.Visitor, key string, id pcommon.SpanID) { if id.IsEmpty() { - return nil - } - if err := v.OnKey(key); err != nil { - return err - } - if err := v.OnString(hex.EncodeToString(id[:])); err != nil { - return err + return } - return nil + _ = v.OnKey(key) + _ = v.OnString(hex.EncodeToString(id[:])) } From 539fa9df336d7d5b71c9d53fa5cf7f0d7be697fc Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Fri, 10 Jan 2025 13:00:59 +0100 Subject: [PATCH 09/30] Avoid copying attributes --- .../elasticsearchexporter/exporter_test.go | 16 +++++----- exporter/elasticsearchexporter/model.go | 22 +++++-------- exporter/elasticsearchexporter/model_test.go | 15 +++------ .../elasticsearchexporter/pdata_serializer.go | 31 +++++++++++-------- 4 files changed, 37 insertions(+), 47 deletions(-) diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index 8c5b604da548..da0e2accb382 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -1196,19 +1196,19 @@ func TestExporterMetrics(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.metric.foo":"histogram"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"metric.foo":{"counts":[1,2,3,4],"values":[0.5,1.5,2.5,3.0]}},"resource":{},"scope":{}}`), + Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"metric.foo":{"counts":[1,2,3,4],"values":[0.5,1.5,2.5,3.0]}},"resource":{},"scope":{}}`), }, { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.metric.foo":"histogram"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"metric.foo":{"counts":[4,5,6,7],"values":[2.0,4.5,5.5,6.0]}},"resource":{},"scope":{}}`), + Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"metric.foo":{"counts":[4,5,6,7],"values":[2.0,4.5,5.5,6.0]}},"resource":{},"scope":{}}`), }, { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.metric.sum":"gauge_double"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"metric.sum":1.5},"resource":{},"scope":{},"start_timestamp":"1970-01-01T02:00:00.000000000Z"}`), + Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"metric.sum":1.5},"resource":{},"scope":{},"start_timestamp":"1970-01-01T02:00:00.000000000Z"}`), }, { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.metric.summary":"summary"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T03:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"metric.summary":{"sum":1.5,"value_count":1}},"resource":{},"scope":{},"start_timestamp":"1970-01-01T03:00:00.000000000Z"}`), + Document: []byte(`{"@timestamp":"1970-01-01T03:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"metric.summary":{"sum":1.5,"value_count":1}},"resource":{},"scope":{},"start_timestamp":"1970-01-01T03:00:00.000000000Z"}`), }, } @@ -1277,7 +1277,7 @@ func TestExporterMetrics(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.sum":"gauge_long","metrics.summary":"summary"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"sum":0,"summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), + Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"sum":0,"summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), }, } @@ -1327,11 +1327,11 @@ func TestExporterMetrics(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.histogram.summary":"summary"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"histogram.summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), + Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"histogram.summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), }, { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.exphistogram.summary":"summary"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"exphistogram.summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), + Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"exphistogram.summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), }, } @@ -1370,7 +1370,7 @@ func TestExporterMetrics(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.foo.bar":"gauge_long","metrics.foo":"gauge_long","metrics.foo.bar.baz":"gauge_long"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"metrics":{"foo":0,"foo.bar":0,"foo.bar.baz":0},"resource":{},"scope":{}}`), + Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"foo":0,"foo.bar":0,"foo.bar.baz":0},"resource":{},"scope":{}}`), }, } diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index 82ed7b64d606..90a2194f3f5d 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -791,7 +791,7 @@ func sliceHash(h hash.Hash, s pcommon.Slice) { // mergeGeolocation mutates attributes map to merge all `geo.location.{lon,lat}`, // and namespaced `*.geo.location.{lon,lat}` to unnamespaced and namespaced `geo.location`. // This is to match the geo_point type in Elasticsearch. -func mergeGeolocation(attributes pcommon.Map) { +func mergeGeolocation(attributes pcommon.Map) pcommon.Map { const ( lonKey = "geo.location.lon" latKey = "geo.location.lat" @@ -815,9 +815,9 @@ func mergeGeolocation(attributes pcommon.Map) { g.latSet = true prefixToGeo[prefix] = g } - attributes.RemoveIf(func(key string, val pcommon.Value) bool { + attributes.Range(func(key string, val pcommon.Value) bool { if val.Type() != pcommon.ValueTypeDouble { - return false + return true } if key == lonKey { @@ -835,30 +835,22 @@ func mergeGeolocation(attributes pcommon.Map) { setLat(prefix, val.Double()) return true } - return false + return true }) + geoAttributes := pcommon.NewMap() for prefix, geo := range prefixToGeo { if geo.lonSet && geo.latSet { key := prefix + mergedKey // Geopoint expressed as an array with the format: [lon, lat] - s := attributes.PutEmptySlice(key) + s := geoAttributes.PutEmptySlice(key) s.EnsureCapacity(2) s.AppendEmpty().SetDouble(geo.lon) s.AppendEmpty().SetDouble(geo.lat) continue } - - // Place the attributes back if lon and lat are not present together - if geo.lonSet { - key := prefix + lonKey - attributes.PutDouble(key, geo.lon) - } - if geo.latSet { - key := prefix + latKey - attributes.PutDouble(key, geo.lat) - } } + return geoAttributes } func safeUint64ToInt64(v uint64) int64 { diff --git a/exporter/elasticsearchexporter/model_test.go b/exporter/elasticsearchexporter/model_test.go index 79574232e9e9..4067a136d798 100644 --- a/exporter/elasticsearchexporter/model_test.go +++ b/exporter/elasticsearchexporter/model_test.go @@ -1323,20 +1323,13 @@ func TestMergeGeolocation(t *testing.T) { "e.geo.location.lat": "bar", } wantAttributes := map[string]any{ - "geo.location": []any{1.1, 2.2}, - "foo.bar.geo.location": []any{3.3, 4.4}, - "a.geo.location.lon": 5.5, - "b.geo.location.lat": 6.6, - "unrelatedgeo.location.lon": 7.7, - "unrelatedgeo.location.lat": 8.8, - "d": 9.9, - "e.geo.location.lon": "foo", - "e.geo.location.lat": "bar", + "geo.location": []any{1.1, 2.2}, + "foo.bar.geo.location": []any{3.3, 4.4}, } input := pcommon.NewMap() err := input.FromRaw(attributes) require.NoError(t, err) - mergeGeolocation(input) - after := input.AsRaw() + output := mergeGeolocation(input) + after := output.AsRaw() assert.Equal(t, wantAttributes, after) } diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index ed491850e05f..e8054756087e 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -272,26 +272,31 @@ func writeAttributes(v *json.Visitor, attributes pcommon.Map, stringifyMapValues if attributes.Len() == 0 { return } - attrCopy := pcommon.NewMap() - attributes.CopyTo(attrCopy) - attrCopy.RemoveIf(func(key string, _ pcommon.Value) bool { - switch key { + geoAttributes := mergeGeolocation(attributes) + _ = v.OnKey("attributes") + _ = v.OnObjectStart(-1, structform.AnyType) + attributes.Range(func(k string, val pcommon.Value) bool { + switch k { case dataStreamType, dataStreamDataset, dataStreamNamespace, mappingHintsAttrKey: return true } - return false + if strings.HasSuffix(k, ".geo.location") { + return true + } + _ = v.OnKey(k) + writeValue(v, val, stringifyMapValues) + return true }) - mergeGeolocation(attrCopy) - if attrCopy.Len() == 0 { - return - } - _ = v.OnKey("attributes") - writeMap(v, attrCopy, stringifyMapValues) + geoAttributes.Range(func(k string, val pcommon.Value) bool { + writeValue(v, val, stringifyMapValues) + return true + }) + _ = v.OnObjectFinished() } -func writeMap(v *json.Visitor, attributes pcommon.Map, stringifyMapValues bool) { +func writeMap(v *json.Visitor, m pcommon.Map, stringifyMapValues bool) { _ = v.OnObjectStart(-1, structform.AnyType) - attributes.Range(func(k string, val pcommon.Value) bool { + m.Range(func(k string, val pcommon.Value) bool { _ = v.OnKey(k) writeValue(v, val, stringifyMapValues) return true From 7ba257562886184311eee8058ccff4a1c8f485d0 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Fri, 10 Jan 2025 16:04:43 +0100 Subject: [PATCH 10/30] Propagate isEvent flag to writeLogBody function --- exporter/elasticsearchexporter/pdata_serializer.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index e8054756087e..12054c351672 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -181,14 +181,17 @@ func serializeLog(resource pcommon.Resource, resourceSchemaURL string, scope pco writeSpanIDField(v, "span_id", record.SpanID()) writeAttributes(v, record.Attributes(), false) writeIntFieldSkipDefault(v, "dropped_attributes_count", int64(record.DroppedAttributesCount())) + isEvent := false if record.EventName() != "" { + isEvent = true writeStringFieldSkipDefault(v, "event_name", record.EventName()) } else if eventNameAttr, ok := record.Attributes().Get("event.name"); ok && eventNameAttr.Str() != "" { + isEvent = true writeStringFieldSkipDefault(v, "event_name", eventNameAttr.Str()) } writeResource(v, resource, resourceSchemaURL, false) writeScope(v, scope, scopeSchemaURL, false) - writeLogBody(v, record) + writeLogBody(v, record, isEvent) _ = v.OnObjectFinished() return buf.Bytes(), nil } @@ -206,7 +209,7 @@ func writeDataStream(v *json.Visitor, attributes pcommon.Map) { _ = v.OnObjectFinished() } -func writeLogBody(v *json.Visitor, record plog.LogRecord) { +func writeLogBody(v *json.Visitor, record plog.LogRecord, isEvent bool) { if record.Body().Type() == pcommon.ValueTypeEmpty { return } @@ -216,7 +219,7 @@ func writeLogBody(v *json.Visitor, record plog.LogRecord) { // Determine if this log record is an event, as they are mapped differently // https://github.com/open-telemetry/semantic-conventions/blob/main/docs/general/events.md var bodyType string - if _, hasEventNameAttribute := record.Attributes().Get("event.name"); hasEventNameAttribute || record.EventName() != "" { + if isEvent { bodyType = "structured" } else { bodyType = "flattened" From b15169d74d95aa1a8b33205b2286c27a859508bd Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Fri, 10 Jan 2025 16:08:07 +0100 Subject: [PATCH 11/30] write geo attribute keys --- exporter/elasticsearchexporter/pdata_serializer.go | 1 + 1 file changed, 1 insertion(+) diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index 12054c351672..ec3cd1caea94 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -291,6 +291,7 @@ func writeAttributes(v *json.Visitor, attributes pcommon.Map, stringifyMapValues return true }) geoAttributes.Range(func(k string, val pcommon.Value) bool { + _ = v.OnKey(k) writeValue(v, val, stringifyMapValues) return true }) From 5e523c5a19966ab3f09c3a087dfb6f6ad40f21a3 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Fri, 10 Jan 2025 17:51:41 +0100 Subject: [PATCH 12/30] Pool buffers --- exporter/elasticsearchexporter/bufferpol.go | 40 ++++++++++++ exporter/elasticsearchexporter/exporter.go | 29 +++++---- exporter/elasticsearchexporter/model.go | 64 +++++++++---------- exporter/elasticsearchexporter/model_test.go | 52 +++++++++------ .../elasticsearchexporter/pdata_serializer.go | 38 +++++------ .../pdata_serializer_test.go | 4 +- 6 files changed, 141 insertions(+), 86 deletions(-) create mode 100644 exporter/elasticsearchexporter/bufferpol.go diff --git a/exporter/elasticsearchexporter/bufferpol.go b/exporter/elasticsearchexporter/bufferpol.go new file mode 100644 index 000000000000..257aaaf09e0f --- /dev/null +++ b/exporter/elasticsearchexporter/bufferpol.go @@ -0,0 +1,40 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter" + +import ( + "bytes" + "io" + "sync" +) + +type BufferPool struct { + pool *sync.Pool +} + +func NewBufferPool() *BufferPool { + return &BufferPool{pool: &sync.Pool{New: func() any { return &bytes.Buffer{} }}} +} + +func (w *BufferPool) NewPooledBuffer() PooledBuffer { + return PooledBuffer{ + Buffer: w.pool.Get().(*bytes.Buffer), + pool: w.pool, + } +} + +type PooledBuffer struct { + Buffer *bytes.Buffer + pool *sync.Pool +} + +func (p PooledBuffer) recycle() { + p.Buffer.Reset() + p.pool.Put(p.Buffer) +} + +func (p PooledBuffer) WriteTo(w io.Writer) (n int64, err error) { + defer p.recycle() + return bytes.NewReader(p.Buffer.Bytes()).WriteTo(w) +} diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index dadc5e30e526..d1410761f336 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -4,7 +4,6 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter" import ( - "bytes" "context" "errors" "fmt" @@ -34,6 +33,8 @@ type elasticsearchExporter struct { wg sync.WaitGroup // active sessions bulkIndexer bulkIndexer + + bufferPool *BufferPool } func newExporter( @@ -67,6 +68,7 @@ func newExporter( model: model, logstashFormat: cfg.LogstashFormat, otel: otel, + bufferPool: NewBufferPool(), } } @@ -171,11 +173,12 @@ func (e *elasticsearchExporter) pushLogRecord( fIndex = formattedIndex } - document, err := e.model.encodeLog(resource, resourceSchemaURL, record, scope, scopeSchemaURL) + buffer := e.bufferPool.NewPooledBuffer() + err := e.model.encodeLog(resource, resourceSchemaURL, record, scope, scopeSchemaURL, buffer.Buffer) if err != nil { return fmt.Errorf("failed to encode log event: %w", err) } - return bulkIndexerSession.Add(ctx, fIndex, bytes.NewReader(document), nil) + return bulkIndexerSession.Add(ctx, fIndex, buffer, nil) } func (e *elasticsearchExporter) pushMetricsData( @@ -285,12 +288,13 @@ func (e *elasticsearchExporter) pushMetricsData( for fIndex, groupedDataPoints := range groupedDataPointsByIndex { for _, dataPoints := range groupedDataPoints { - docBytes, dynamicTemplates, err := e.model.encodeMetrics(resource, resourceMetric.SchemaUrl(), scope, scopeMetrics.SchemaUrl(), dataPoints, &validationErrs) + buf := e.bufferPool.NewPooledBuffer() + dynamicTemplates, err := e.model.encodeMetrics(resource, resourceMetric.SchemaUrl(), scope, scopeMetrics.SchemaUrl(), dataPoints, &validationErrs, buf.Buffer) if err != nil { errs = append(errs, err) continue } - if err := session.Add(ctx, fIndex, bytes.NewReader(docBytes), dynamicTemplates); err != nil { + if err := session.Add(ctx, fIndex, buf, dynamicTemplates); err != nil { if cerr := ctx.Err(); cerr != nil { return cerr } @@ -405,11 +409,12 @@ func (e *elasticsearchExporter) pushTraceRecord( fIndex = formattedIndex } - document, err := e.model.encodeSpan(resource, resourceSchemaURL, span, scope, scopeSchemaURL) + buf := e.bufferPool.NewPooledBuffer() + err := e.model.encodeSpan(resource, resourceSchemaURL, span, scope, scopeSchemaURL, buf.Buffer) if err != nil { return fmt.Errorf("failed to encode trace record: %w", err) } - return bulkIndexerSession.Add(ctx, fIndex, bytes.NewReader(document), nil) + return bulkIndexerSession.Add(ctx, fIndex, buf, nil) } func (e *elasticsearchExporter) pushSpanEvent( @@ -434,13 +439,11 @@ func (e *elasticsearchExporter) pushSpanEvent( } fIndex = formattedIndex } - docBytes, err := e.model.encodeSpanEvent(resource, resourceSchemaURL, span, spanEvent, scope, scopeSchemaURL) - if err != nil { - return err - } - if docBytes == nil { + buf := e.bufferPool.NewPooledBuffer() + e.model.encodeSpanEvent(resource, resourceSchemaURL, span, spanEvent, scope, scopeSchemaURL, buf.Buffer) + if buf.Buffer.Len() == 0 { return nil } - return bulkIndexerSession.Add(ctx, fIndex, bytes.NewReader(docBytes), nil) + return bulkIndexerSession.Add(ctx, fIndex, buf, nil) } diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index 90a2194f3f5d..88bcf6e7bdf0 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -16,7 +16,6 @@ import ( "strings" "time" - jsoniter "github.com/json-iterator/go" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/plog" "go.opentelemetry.io/collector/pdata/pmetric" @@ -77,12 +76,12 @@ var resourceAttrsToPreserve = map[string]bool{ var ErrInvalidTypeForBodyMapMode = errors.New("invalid log record body type for 'bodymap' mapping mode") type mappingModel interface { - encodeLog(pcommon.Resource, string, plog.LogRecord, pcommon.InstrumentationScope, string) ([]byte, error) - encodeSpan(pcommon.Resource, string, ptrace.Span, pcommon.InstrumentationScope, string) ([]byte, error) - encodeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent, scope pcommon.InstrumentationScope, scopeSchemaURL string) ([]byte, error) + encodeLog(pcommon.Resource, string, plog.LogRecord, pcommon.InstrumentationScope, string, *bytes.Buffer) error + encodeSpan(pcommon.Resource, string, ptrace.Span, pcommon.InstrumentationScope, string, *bytes.Buffer) error + encodeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent, scope pcommon.InstrumentationScope, scopeSchemaURL string, buf *bytes.Buffer) hashDataPoint(dataPoint) uint32 - encodeDocument(objmodel.Document) ([]byte, error) - encodeMetrics(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, dataPoints []dataPoint, validationErrors *[]error) ([]byte, map[string]string, error) + encodeDocument(objmodel.Document, *bytes.Buffer) error + encodeMetrics(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, dataPoints []dataPoint, validationErrors *[]error, buf *bytes.Buffer) (map[string]string, error) } // encodeModel tries to keep the event as close to the original open telemetry semantics as is. @@ -113,24 +112,22 @@ const ( attributeField = "attribute" ) -func (m *encodeModel) encodeLog(resource pcommon.Resource, resourceSchemaURL string, record plog.LogRecord, scope pcommon.InstrumentationScope, scopeSchemaURL string) ([]byte, error) { +func (m *encodeModel) encodeLog(resource pcommon.Resource, resourceSchemaURL string, record plog.LogRecord, scope pcommon.InstrumentationScope, scopeSchemaURL string, buf *bytes.Buffer) error { var document objmodel.Document switch m.mode { case MappingECS: document = m.encodeLogECSMode(resource, record, scope) case MappingOTel: - return serializeLog(resource, resourceSchemaURL, scope, scopeSchemaURL, record) + return serializeLog(resource, resourceSchemaURL, scope, scopeSchemaURL, record, buf) case MappingBodyMap: - return m.encodeLogBodyMapMode(record) + return m.encodeLogBodyMapMode(record, buf) default: document = m.encodeLogDefaultMode(resource, record, scope) } // For OTel mode, prefix conflicts are not a problem as otel-data has subobjects: false document.Dedup(m.mode != MappingOTel) - var buf bytes.Buffer - err := document.Serialize(&buf, m.dedot, m.mode == MappingOTel) - return buf.Bytes(), err + return document.Serialize(buf, m.dedot, m.mode == MappingOTel) } func (m *encodeModel) encodeLogDefaultMode(resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) objmodel.Document { @@ -154,13 +151,14 @@ func (m *encodeModel) encodeLogDefaultMode(resource pcommon.Resource, record plo return document } -func (m *encodeModel) encodeLogBodyMapMode(record plog.LogRecord) ([]byte, error) { +func (m *encodeModel) encodeLogBodyMapMode(record plog.LogRecord, buf *bytes.Buffer) error { body := record.Body() if body.Type() != pcommon.ValueTypeMap { - return nil, fmt.Errorf("%w: %q", ErrInvalidTypeForBodyMapMode, body.Type()) + return fmt.Errorf("%w: %q", ErrInvalidTypeForBodyMapMode, body.Type()) } - return jsoniter.Marshal(body.Map().AsRaw()) + serializeMap(body.Map(), buf) + return nil } func (m *encodeModel) encodeLogECSMode(resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) objmodel.Document { @@ -205,16 +203,15 @@ func (m *encodeModel) encodeLogECSMode(resource pcommon.Resource, record plog.Lo return document } -func (m *encodeModel) encodeDocument(document objmodel.Document) ([]byte, error) { +func (m *encodeModel) encodeDocument(document objmodel.Document, buf *bytes.Buffer) error { // For OTel mode, prefix conflicts are not a problem as otel-data has subobjects: false document.Dedup(m.mode != MappingOTel) - var buf bytes.Buffer - err := document.Serialize(&buf, m.dedot, m.mode == MappingOTel) + err := document.Serialize(buf, m.dedot, m.mode == MappingOTel) if err != nil { - return nil, err + return err } - return buf.Bytes(), nil + return nil } // upsertMetricDataPointValue upserts a datapoint value to documents which is already hashed by resource and index @@ -228,7 +225,7 @@ func (m *encodeModel) hashDataPoint(dp dataPoint) uint32 { } } -func (m *encodeModel) encodeDataPointsECSMode(resource pcommon.Resource, dataPoints []dataPoint, validationErrors *[]error) ([]byte, map[string]string, error) { +func (m *encodeModel) encodeDataPointsECSMode(resource pcommon.Resource, dataPoints []dataPoint, validationErrors *[]error, buf *bytes.Buffer) (map[string]string, error) { dp0 := dataPoints[0] var document objmodel.Document encodeAttributesECSMode(&document, resource.Attributes(), resourceAttrsConversionMap, resourceAttrsToPreserve) @@ -243,17 +240,17 @@ func (m *encodeModel) encodeDataPointsECSMode(resource pcommon.Resource, dataPoi } document.AddAttribute(dp.Metric().Name(), value) } - docBytes, err := m.encodeDocument(document) + err := m.encodeDocument(document, buf) - return docBytes, document.DynamicTemplates(), err + return document.DynamicTemplates(), err } -func (m *encodeModel) encodeMetrics(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, dataPoints []dataPoint, validationErrors *[]error) ([]byte, map[string]string, error) { +func (m *encodeModel) encodeMetrics(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, dataPoints []dataPoint, validationErrors *[]error, buf *bytes.Buffer) (map[string]string, error) { switch m.mode { case MappingOTel: - return serializeMetrics(resource, resourceSchemaURL, scope, scopeSchemaURL, dataPoints, validationErrors) + return serializeMetrics(resource, resourceSchemaURL, scope, scopeSchemaURL, dataPoints, validationErrors, buf) default: - return m.encodeDataPointsECSMode(resource, dataPoints, validationErrors) + return m.encodeDataPointsECSMode(resource, dataPoints, validationErrors, buf) } } @@ -489,19 +486,18 @@ func (dp numberDataPoint) Metric() pmetric.Metric { var errInvalidNumberDataPoint = errors.New("invalid number data point") -func (m *encodeModel) encodeSpan(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, scope pcommon.InstrumentationScope, scopeSchemaURL string) ([]byte, error) { +func (m *encodeModel) encodeSpan(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, scope pcommon.InstrumentationScope, scopeSchemaURL string, buf *bytes.Buffer) error { var document objmodel.Document switch m.mode { case MappingOTel: - return serializeSpan(resource, resourceSchemaURL, scope, scopeSchemaURL, span) + return serializeSpan(resource, resourceSchemaURL, scope, scopeSchemaURL, span, buf) default: document = m.encodeSpanDefaultMode(resource, span, scope) } // For OTel mode, prefix conflicts are not a problem as otel-data has subobjects: false document.Dedup(m.mode != MappingOTel) - var buf bytes.Buffer - err := document.Serialize(&buf, m.dedot, m.mode == MappingOTel) - return buf.Bytes(), err + err := document.Serialize(buf, m.dedot, m.mode == MappingOTel) + return err } func (m *encodeModel) encodeSpanDefaultMode(resource pcommon.Resource, span ptrace.Span, scope pcommon.InstrumentationScope) objmodel.Document { @@ -524,13 +520,13 @@ func (m *encodeModel) encodeSpanDefaultMode(resource pcommon.Resource, span ptra return document } -func (m *encodeModel) encodeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent, scope pcommon.InstrumentationScope, scopeSchemaURL string) ([]byte, error) { +func (m *encodeModel) encodeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent, scope pcommon.InstrumentationScope, scopeSchemaURL string, buf *bytes.Buffer) { if m.mode != MappingOTel { // Currently span events are stored separately only in OTel mapping mode. // In other modes, they are stored within the span document. - return nil, nil + return } - return serializeSpanEvent(resource, resourceSchemaURL, scope, scopeSchemaURL, span, spanEvent) + serializeSpanEvent(resource, resourceSchemaURL, scope, scopeSchemaURL, span, spanEvent, buf) } func (m *encodeModel) encodeAttributes(document *objmodel.Document, attributes pcommon.Map) { diff --git a/exporter/elasticsearchexporter/model_test.go b/exporter/elasticsearchexporter/model_test.go index 4067a136d798..0f9b27ba2d2d 100644 --- a/exporter/elasticsearchexporter/model_test.go +++ b/exporter/elasticsearchexporter/model_test.go @@ -56,9 +56,10 @@ var ( func TestEncodeSpan(t *testing.T) { model := &encodeModel{dedot: false} td := mockResourceSpans() - spanByte, err := model.encodeSpan(td.ResourceSpans().At(0).Resource(), "", td.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0), td.ResourceSpans().At(0).ScopeSpans().At(0).Scope(), "") + var buf bytes.Buffer + err := model.encodeSpan(td.ResourceSpans().At(0).Resource(), "", td.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0), td.ResourceSpans().At(0).ScopeSpans().At(0).Scope(), "", &buf) assert.NoError(t, err) - assert.Equal(t, expectedSpanBody, string(spanByte)) + assert.Equal(t, expectedSpanBody, buf.String()) } func TestEncodeLog(t *testing.T) { @@ -66,26 +67,29 @@ func TestEncodeLog(t *testing.T) { model := &encodeModel{dedot: false} td := mockResourceLogs() td.ScopeLogs().At(0).LogRecords().At(0).SetObservedTimestamp(pcommon.NewTimestampFromTime(time.Date(2023, 4, 19, 3, 4, 5, 6, time.UTC))) - logByte, err := model.encodeLog(td.Resource(), td.SchemaUrl(), td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope(), td.ScopeLogs().At(0).SchemaUrl()) + var buf bytes.Buffer + err := model.encodeLog(td.Resource(), td.SchemaUrl(), td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope(), td.ScopeLogs().At(0).SchemaUrl(), &buf) assert.NoError(t, err) - assert.Equal(t, expectedLogBody, string(logByte)) + assert.Equal(t, expectedLogBody, buf.String()) }) t.Run("both timestamp and observedTimestamp empty", func(t *testing.T) { model := &encodeModel{dedot: false} td := mockResourceLogs() - logByte, err := model.encodeLog(td.Resource(), td.SchemaUrl(), td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope(), td.ScopeLogs().At(0).SchemaUrl()) + var buf bytes.Buffer + err := model.encodeLog(td.Resource(), td.SchemaUrl(), td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope(), td.ScopeLogs().At(0).SchemaUrl(), &buf) assert.NoError(t, err) - assert.Equal(t, expectedLogBodyWithEmptyTimestamp, string(logByte)) + assert.Equal(t, expectedLogBodyWithEmptyTimestamp, buf.String()) }) t.Run("dedot true", func(t *testing.T) { model := &encodeModel{dedot: true} td := mockResourceLogs() td.Resource().Attributes().PutStr("foo.bar", "baz") - logByte, err := model.encodeLog(td.Resource(), td.SchemaUrl(), td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope(), td.ScopeLogs().At(0).SchemaUrl()) + var buf bytes.Buffer + err := model.encodeLog(td.Resource(), td.SchemaUrl(), td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope(), td.ScopeLogs().At(0).SchemaUrl(), &buf) require.NoError(t, err) - require.Equal(t, expectedLogBodyDeDottedWithEmptyTimestamp, string(logByte)) + require.Equal(t, expectedLogBodyDeDottedWithEmptyTimestamp, buf.String()) }) } @@ -118,9 +122,12 @@ func TestEncodeMetric(t *testing.T) { } for _, dataPoints := range groupedDataPoints { - bytes, _, err := model.encodeMetrics(rm.Resource(), rm.SchemaUrl(), sm.Scope(), sm.SchemaUrl(), dataPoints, nil) + var buf bytes.Buffer + errors := make([]error, 0) + _, err := model.encodeMetrics(rm.Resource(), rm.SchemaUrl(), sm.Scope(), sm.SchemaUrl(), dataPoints, &errors, &buf) + require.Empty(t, errors, err) require.NoError(t, err) - docsBytes = append(docsBytes, bytes) + docsBytes = append(docsBytes, buf.Bytes()) } allDocsSorted := docBytesToSortedString(docsBytes) @@ -338,10 +345,11 @@ func TestEncodeLogECSModeDuplication(t *testing.T) { mode: MappingECS, dedot: true, } - doc, err := m.encodeLog(resource, "", record, scope, "") + var buf bytes.Buffer + err = m.encodeLog(resource, "", record, scope, "", &buf) require.NoError(t, err) - assert.Equal(t, want, string(doc)) + assert.Equal(t, want, buf.String()) } func TestEncodeLogECSMode(t *testing.T) { @@ -1116,7 +1124,8 @@ func TestEncodeLogOtelMode(t *testing.T) { // This sets the data_stream values default or derived from the record/scope/resources routeLogRecord(record.Attributes(), scope.Attributes(), resource.Attributes(), "", true, scope.Name()) - b, err := m.encodeLog(resource, tc.rec.Resource.SchemaURL, record, scope, tc.rec.Scope.SchemaURL) + var buf bytes.Buffer + err := m.encodeLog(resource, tc.rec.Resource.SchemaURL, record, scope, tc.rec.Scope.SchemaURL, &buf) require.NoError(t, err) want := tc.rec @@ -1125,7 +1134,7 @@ func TestEncodeLogOtelMode(t *testing.T) { } var got OTelRecord - err = json.Unmarshal(b, &got) + err = json.Unmarshal(buf.Bytes(), &got) require.NoError(t, err) @@ -1248,9 +1257,11 @@ func TestEncodeLogScalarObjectConflict(t *testing.T) { td := mockResourceLogs() td.ScopeLogs().At(0).LogRecords().At(0).Attributes().PutStr("foo", "scalar") td.ScopeLogs().At(0).LogRecords().At(0).Attributes().PutStr("foo.bar", "baz") - encoded, err := model.encodeLog(td.Resource(), "", td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope(), "") + var buf bytes.Buffer + err := model.encodeLog(td.Resource(), "", td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope(), "", &buf) assert.NoError(t, err) + encoded := buf.Bytes() assert.True(t, gjson.ValidBytes(encoded)) assert.False(t, gjson.GetBytes(encoded, "Attributes\\.foo").Exists()) fooValue := gjson.GetBytes(encoded, "Attributes\\.foo\\.value") @@ -1260,9 +1271,11 @@ func TestEncodeLogScalarObjectConflict(t *testing.T) { // If there is an attribute named "foo.value", then "foo" would be omitted rather than renamed. td.ScopeLogs().At(0).LogRecords().At(0).Attributes().PutStr("foo.value", "foovalue") - encoded, err = model.encodeLog(td.Resource(), "", td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope(), "") + buf = bytes.Buffer{} + err = model.encodeLog(td.Resource(), "", td.ScopeLogs().At(0).LogRecords().At(0), td.ScopeLogs().At(0).Scope(), "", &buf) assert.NoError(t, err) + encoded = buf.Bytes() assert.False(t, gjson.GetBytes(encoded, "Attributes\\.foo").Exists()) fooValue = gjson.GetBytes(encoded, "Attributes\\.foo\\.value") assert.Equal(t, "foovalue", fooValue.Str) @@ -1289,7 +1302,8 @@ func TestEncodeLogBodyMapMode(t *testing.T) { bodyMap.CopyTo(logRecord.Body().SetEmptyMap()) m := encodeModel{} - got, err := m.encodeLogBodyMapMode(logRecord) + var buf bytes.Buffer + err := m.encodeLogBodyMapMode(logRecord, &buf) require.NoError(t, err) require.JSONEq(t, `{ @@ -1299,11 +1313,11 @@ func TestEncodeLogBodyMapMode(t *testing.T) { "key.a": "a", "key.a.b": "b", "pi": 3.14 - }`, string(got)) + }`, buf.String()) // invalid body map logRecord.Body().SetEmptySlice() - _, err = m.encodeLogBodyMapMode(logRecord) + err = m.encodeLogBodyMapMode(logRecord, &bytes.Buffer{}) require.Error(t, err) require.ErrorIs(t, err, ErrInvalidTypeForBodyMapMode) } diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index ec3cd1caea94..f0697ea7ae1f 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -17,14 +17,13 @@ import ( const tsLayout = "2006-01-02T15:04:05.000000000Z" -func serializeMetrics(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, dataPoints []dataPoint, validationErrors *[]error) ([]byte, map[string]string, error) { +func serializeMetrics(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, dataPoints []dataPoint, validationErrors *[]error, buf *bytes.Buffer) (map[string]string, error) { if len(dataPoints) == 0 { - return nil, nil, nil + return nil, nil } dp0 := dataPoints[0] - var buf bytes.Buffer - v := json.NewVisitor(&buf) + v := json.NewVisitor(buf) // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. // This is required to generate the correct dynamic mapping in ES. v.SetExplicitRadixPoint(true) @@ -40,7 +39,7 @@ func serializeMetrics(resource pcommon.Resource, resourceSchemaURL string, scope writeScope(v, scope, scopeSchemaURL, true) dynamicTemplates := serializeDataPoints(v, dataPoints, validationErrors) _ = v.OnObjectFinished() - return buf.Bytes(), dynamicTemplates, nil + return dynamicTemplates, nil } func serializeDataPoints(v *json.Visitor, dataPoints []dataPoint, validationErrors *[]error) map[string]string { @@ -76,10 +75,8 @@ func serializeDataPoints(v *json.Visitor, dataPoints []dataPoint, validationErro return dynamicTemplates } -func serializeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent) ([]byte, error) { - var buf bytes.Buffer - - v := json.NewVisitor(&buf) +func serializeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, span ptrace.Span, spanEvent ptrace.SpanEvent, buf *bytes.Buffer) { + v := json.NewVisitor(buf) // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. // This is required to generate the correct dynamic mapping in ES. v.SetExplicitRadixPoint(true) @@ -103,13 +100,10 @@ func serializeSpanEvent(resource pcommon.Resource, resourceSchemaURL string, sco writeResource(v, resource, resourceSchemaURL, false) writeScope(v, scope, scopeSchemaURL, false) _ = v.OnObjectFinished() - return buf.Bytes(), nil } -func serializeSpan(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, span ptrace.Span) ([]byte, error) { - var buf bytes.Buffer - - v := json.NewVisitor(&buf) +func serializeSpan(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, span ptrace.Span, buf *bytes.Buffer) error { + v := json.NewVisitor(buf) // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. // This is required to generate the correct dynamic mapping in ES. v.SetExplicitRadixPoint(true) @@ -132,7 +126,7 @@ func serializeSpan(resource pcommon.Resource, resourceSchemaURL string, scope pc writeResource(v, resource, resourceSchemaURL, false) writeScope(v, scope, scopeSchemaURL, false) _ = v.OnObjectFinished() - return buf.Bytes(), nil + return nil } func writeStatus(v *json.Visitor, status ptrace.Status) { @@ -160,10 +154,16 @@ func writeSpanLinks(v *json.Visitor, span ptrace.Span) { _ = v.OnArrayFinished() } -func serializeLog(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, record plog.LogRecord) ([]byte, error) { - var buf bytes.Buffer +func serializeMap(m pcommon.Map, buf *bytes.Buffer) { + v := json.NewVisitor(buf) + // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. + // This is required to generate the correct dynamic mapping in ES. + v.SetExplicitRadixPoint(true) + writeMap(v, m, false) +} - v := json.NewVisitor(&buf) +func serializeLog(resource pcommon.Resource, resourceSchemaURL string, scope pcommon.InstrumentationScope, scopeSchemaURL string, record plog.LogRecord, buf *bytes.Buffer) error { + v := json.NewVisitor(buf) // Enable ExplicitRadixPoint such that 1.0 is encoded as 1.0 instead of 1. // This is required to generate the correct dynamic mapping in ES. v.SetExplicitRadixPoint(true) @@ -193,7 +193,7 @@ func serializeLog(resource pcommon.Resource, resourceSchemaURL string, scope pco writeScope(v, scope, scopeSchemaURL, false) writeLogBody(v, record, isEvent) _ = v.OnObjectFinished() - return buf.Bytes(), nil + return nil } func writeDataStream(v *json.Visitor, attributes pcommon.Map) { diff --git a/exporter/elasticsearchexporter/pdata_serializer_test.go b/exporter/elasticsearchexporter/pdata_serializer_test.go index ff901da27a1c..1aa61d0ccb7a 100644 --- a/exporter/elasticsearchexporter/pdata_serializer_test.go +++ b/exporter/elasticsearchexporter/pdata_serializer_test.go @@ -119,10 +119,12 @@ func TestSerializeLog(t *testing.T) { record := scopeLogs.LogRecords().AppendEmpty() tt.logCustomizer(resourceLogs.Resource(), scopeLogs.Scope(), record) - logBytes, err := serializeLog(resourceLogs.Resource(), "", scopeLogs.Scope(), "", record) + var buf bytes.Buffer + err := serializeLog(resourceLogs.Resource(), "", scopeLogs.Scope(), "", record, &buf) if (err != nil) != tt.wantErr { t.Errorf("serializeLog() error = %v, wantErr %v", err, tt.wantErr) } + logBytes := buf.Bytes() eventAsJSON := string(logBytes) var result any decoder := json.NewDecoder(bytes.NewBuffer(logBytes)) From fb5f38d512e7c34fde81c554f75ad13a7581f19f Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Fri, 10 Jan 2025 17:58:51 +0100 Subject: [PATCH 13/30] Add subtext to changelog --- .chloggen/elasticsearchexporter_optimized-json-encoding.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.chloggen/elasticsearchexporter_optimized-json-encoding.yaml b/.chloggen/elasticsearchexporter_optimized-json-encoding.yaml index 6b37e98f6847..440ac12a5ce7 100644 --- a/.chloggen/elasticsearchexporter_optimized-json-encoding.yaml +++ b/.chloggen/elasticsearchexporter_optimized-json-encoding.yaml @@ -15,7 +15,7 @@ issues: [37032] # (Optional) One or more lines of additional information to render under the primary note. # These lines will be padded with 2 spaces and then inserted directly into the document. # Use pipe (|) for multiline entries. -subtext: +subtext: Increases throughput for metrics by 2x and for logs and traces by 3x # If your change doesn't affect end users or the exported elements of any package, # you should instead start your pull request title with [chore] or use the "Skip Changelog" label. From 1fb21560480050b5ab6079a35e61500273f6d57d Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Fri, 10 Jan 2025 18:10:58 +0100 Subject: [PATCH 14/30] Fix checkapi error exporter/elasticsearchexporter has more than one function: "NewBufferPool,NewFactory" --- exporter/elasticsearchexporter/bufferpol.go | 16 ++++++++-------- exporter/elasticsearchexporter/exporter.go | 12 ++++++------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/exporter/elasticsearchexporter/bufferpol.go b/exporter/elasticsearchexporter/bufferpol.go index 257aaaf09e0f..cdb8f16fa696 100644 --- a/exporter/elasticsearchexporter/bufferpol.go +++ b/exporter/elasticsearchexporter/bufferpol.go @@ -9,32 +9,32 @@ import ( "sync" ) -type BufferPool struct { +type bufferPool struct { pool *sync.Pool } -func NewBufferPool() *BufferPool { - return &BufferPool{pool: &sync.Pool{New: func() any { return &bytes.Buffer{} }}} +func newBufferPool() *bufferPool { + return &bufferPool{pool: &sync.Pool{New: func() any { return &bytes.Buffer{} }}} } -func (w *BufferPool) NewPooledBuffer() PooledBuffer { - return PooledBuffer{ +func (w *bufferPool) newPooledBuffer() pooledBuffer { + return pooledBuffer{ Buffer: w.pool.Get().(*bytes.Buffer), pool: w.pool, } } -type PooledBuffer struct { +type pooledBuffer struct { Buffer *bytes.Buffer pool *sync.Pool } -func (p PooledBuffer) recycle() { +func (p pooledBuffer) recycle() { p.Buffer.Reset() p.pool.Put(p.Buffer) } -func (p PooledBuffer) WriteTo(w io.Writer) (n int64, err error) { +func (p pooledBuffer) WriteTo(w io.Writer) (n int64, err error) { defer p.recycle() return bytes.NewReader(p.Buffer.Bytes()).WriteTo(w) } diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index d1410761f336..392f3ba552c5 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -34,7 +34,7 @@ type elasticsearchExporter struct { wg sync.WaitGroup // active sessions bulkIndexer bulkIndexer - bufferPool *BufferPool + bufferPool *bufferPool } func newExporter( @@ -68,7 +68,7 @@ func newExporter( model: model, logstashFormat: cfg.LogstashFormat, otel: otel, - bufferPool: NewBufferPool(), + bufferPool: newBufferPool(), } } @@ -173,7 +173,7 @@ func (e *elasticsearchExporter) pushLogRecord( fIndex = formattedIndex } - buffer := e.bufferPool.NewPooledBuffer() + buffer := e.bufferPool.newPooledBuffer() err := e.model.encodeLog(resource, resourceSchemaURL, record, scope, scopeSchemaURL, buffer.Buffer) if err != nil { return fmt.Errorf("failed to encode log event: %w", err) @@ -288,7 +288,7 @@ func (e *elasticsearchExporter) pushMetricsData( for fIndex, groupedDataPoints := range groupedDataPointsByIndex { for _, dataPoints := range groupedDataPoints { - buf := e.bufferPool.NewPooledBuffer() + buf := e.bufferPool.newPooledBuffer() dynamicTemplates, err := e.model.encodeMetrics(resource, resourceMetric.SchemaUrl(), scope, scopeMetrics.SchemaUrl(), dataPoints, &validationErrs, buf.Buffer) if err != nil { errs = append(errs, err) @@ -409,7 +409,7 @@ func (e *elasticsearchExporter) pushTraceRecord( fIndex = formattedIndex } - buf := e.bufferPool.NewPooledBuffer() + buf := e.bufferPool.newPooledBuffer() err := e.model.encodeSpan(resource, resourceSchemaURL, span, scope, scopeSchemaURL, buf.Buffer) if err != nil { return fmt.Errorf("failed to encode trace record: %w", err) @@ -439,7 +439,7 @@ func (e *elasticsearchExporter) pushSpanEvent( } fIndex = formattedIndex } - buf := e.bufferPool.NewPooledBuffer() + buf := e.bufferPool.newPooledBuffer() e.model.encodeSpanEvent(resource, resourceSchemaURL, span, spanEvent, scope, scopeSchemaURL, buf.Buffer) if buf.Buffer.Len() == 0 { return nil From 29e9daf2912d8d898ddcf8e6ca6d7c4a098e32c9 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Fri, 10 Jan 2025 18:34:33 +0100 Subject: [PATCH 15/30] gotidy --- exporter/elasticsearchexporter/go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/go.mod b/exporter/elasticsearchexporter/go.mod index f638fce745fb..512a343e4617 100644 --- a/exporter/elasticsearchexporter/go.mod +++ b/exporter/elasticsearchexporter/go.mod @@ -7,7 +7,6 @@ require ( github.com/elastic/go-docappender/v2 v2.3.3 github.com/elastic/go-elasticsearch/v7 v7.17.10 github.com/elastic/go-structform v0.0.12 - github.com/json-iterator/go v1.1.12 github.com/klauspost/compress v1.17.11 github.com/lestrrat-go/strftime v1.1.0 github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.117.0 @@ -48,6 +47,7 @@ require ( github.com/google/uuid v1.6.0 // indirect github.com/hashicorp/go-version v1.7.0 // indirect github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 // indirect + github.com/json-iterator/go v1.1.12 // indirect github.com/knadh/koanf/maps v0.1.1 // indirect github.com/knadh/koanf/providers/confmap v0.1.0 // indirect github.com/knadh/koanf/v2 v2.1.2 // indirect From 19d0c949d74d1b9b362df41f70da266916256f33 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 11 Jan 2025 08:58:58 +0100 Subject: [PATCH 16/30] Apply suggestions from code review Co-authored-by: Carson Ip --- exporter/elasticsearchexporter/bufferpol.go | 2 +- exporter/elasticsearchexporter/model.go | 15 ++++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/exporter/elasticsearchexporter/bufferpol.go b/exporter/elasticsearchexporter/bufferpol.go index cdb8f16fa696..b3070885513c 100644 --- a/exporter/elasticsearchexporter/bufferpol.go +++ b/exporter/elasticsearchexporter/bufferpol.go @@ -36,5 +36,5 @@ func (p pooledBuffer) recycle() { func (p pooledBuffer) WriteTo(w io.Writer) (n int64, err error) { defer p.recycle() - return bytes.NewReader(p.Buffer.Bytes()).WriteTo(w) + return p.Buffer.WriteTo(w) } diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index 88bcf6e7bdf0..458f481f2689 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -124,10 +124,9 @@ func (m *encodeModel) encodeLog(resource pcommon.Resource, resourceSchemaURL str default: document = m.encodeLogDefaultMode(resource, record, scope) } - // For OTel mode, prefix conflicts are not a problem as otel-data has subobjects: false - document.Dedup(m.mode != MappingOTel) + document.Dedup(true) - return document.Serialize(buf, m.dedot, m.mode == MappingOTel) + return document.Serialize(buf, m.dedot, false) } func (m *encodeModel) encodeLogDefaultMode(resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) objmodel.Document { @@ -204,10 +203,9 @@ func (m *encodeModel) encodeLogECSMode(resource pcommon.Resource, record plog.Lo } func (m *encodeModel) encodeDocument(document objmodel.Document, buf *bytes.Buffer) error { - // For OTel mode, prefix conflicts are not a problem as otel-data has subobjects: false - document.Dedup(m.mode != MappingOTel) + document.Dedup(true) - err := document.Serialize(buf, m.dedot, m.mode == MappingOTel) + err := document.Serialize(buf, m.dedot, false) if err != nil { return err } @@ -494,9 +492,8 @@ func (m *encodeModel) encodeSpan(resource pcommon.Resource, resourceSchemaURL st default: document = m.encodeSpanDefaultMode(resource, span, scope) } - // For OTel mode, prefix conflicts are not a problem as otel-data has subobjects: false - document.Dedup(m.mode != MappingOTel) - err := document.Serialize(buf, m.dedot, m.mode == MappingOTel) + document.Dedup(true) + err := document.Serialize(buf, m.dedot, false) return err } From 43b7869f3ae19461760beea697363d2b23794e65 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 11 Jan 2025 09:01:05 +0100 Subject: [PATCH 17/30] fix stale comment --- exporter/elasticsearchexporter/model.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index 458f481f2689..56f2a0ff610d 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -781,8 +781,8 @@ func sliceHash(h hash.Hash, s pcommon.Slice) { } } -// mergeGeolocation mutates attributes map to merge all `geo.location.{lon,lat}`, -// and namespaced `*.geo.location.{lon,lat}` to unnamespaced and namespaced `geo.location`. +// mergeGeolocation returns a new map that to merges all `geo.location.{lon,lat}`, +// and namespaced `*.geo.location.{lon,lat}` attributes from the provided map to unnamespaced and namespaced `geo.location`. // This is to match the geo_point type in Elasticsearch. func mergeGeolocation(attributes pcommon.Map) pcommon.Map { const ( From cd163432dea4d0e276de1245d29b619445c235ba Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 11 Jan 2025 09:01:15 +0100 Subject: [PATCH 18/30] fix typo in file name --- exporter/elasticsearchexporter/{bufferpol.go => bufferpool.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename exporter/elasticsearchexporter/{bufferpol.go => bufferpool.go} (100%) diff --git a/exporter/elasticsearchexporter/bufferpol.go b/exporter/elasticsearchexporter/bufferpool.go similarity index 100% rename from exporter/elasticsearchexporter/bufferpol.go rename to exporter/elasticsearchexporter/bufferpool.go From d1504935309624d5463ddc38e88f71937361824f Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 11 Jan 2025 09:07:02 +0100 Subject: [PATCH 19/30] Remove otel serialization code from objmodel --- .../internal/objmodel/objmodel.go | 48 +++++-------------- .../internal/objmodel/objmodel_test.go | 6 +-- exporter/elasticsearchexporter/model.go | 6 +-- exporter/elasticsearchexporter/model_test.go | 10 ++-- 4 files changed, 24 insertions(+), 46 deletions(-) diff --git a/exporter/elasticsearchexporter/internal/objmodel/objmodel.go b/exporter/elasticsearchexporter/internal/objmodel/objmodel.go index 0f514e06aaaa..25c99b19088a 100644 --- a/exporter/elasticsearchexporter/internal/objmodel/objmodel.go +++ b/exporter/elasticsearchexporter/internal/objmodel/objmodel.go @@ -277,19 +277,19 @@ func newJSONVisitor(w io.Writer) *json.Visitor { // Serialize writes the document to the given writer. The serializer will create nested objects if dedot is true. // // NOTE: The documented MUST be sorted if dedot is true. -func (doc *Document) Serialize(w io.Writer, dedot bool, otel bool) error { +func (doc *Document) Serialize(w io.Writer, dedot bool) error { v := newJSONVisitor(w) - return doc.iterJSON(v, dedot, otel) + return doc.iterJSON(v, dedot) } -func (doc *Document) iterJSON(v *json.Visitor, dedot bool, otel bool) error { +func (doc *Document) iterJSON(v *json.Visitor, dedot bool) error { if dedot { - return doc.iterJSONDedot(v, otel) + return doc.iterJSONDedot(v) } - return doc.iterJSONFlat(v, otel) + return doc.iterJSONFlat(v) } -func (doc *Document) iterJSONFlat(w *json.Visitor, otel bool) error { +func (doc *Document) iterJSONFlat(w *json.Visitor) error { err := w.OnObjectStart(-1, structform.AnyType) if err != nil { return err @@ -308,7 +308,7 @@ func (doc *Document) iterJSONFlat(w *json.Visitor, otel bool) error { return err } - if err := fld.value.iterJSON(w, true, otel); err != nil { + if err := fld.value.iterJSON(w, true); err != nil { return err } } @@ -316,20 +316,7 @@ func (doc *Document) iterJSONFlat(w *json.Visitor, otel bool) error { return nil } -// Under OTel mode, set of key prefixes where keys should be flattened from that level, -// such that a document (root or not) with fields {"attributes.a.b": 1} will be serialized as {"attributes": {"a.b": 1}} -// It is not aware of whether it is a root document or sub-document. -// NOTE: This works very delicately with the implementation of OTel mode that -// e.g. resource.attributes is a "resource" objmodel.Document under the root document that contains attributes -// added using AddAttributes func as flattened keys. -// Therefore, there will be correctness issues when attributes are added / used in other ways, but it is working -// for current use cases and the proper fix will be slightly too complex. YAGNI. -var otelPrefixSet = map[string]struct{}{ - "attributes.": {}, - "metrics.": {}, -} - -func (doc *Document) iterJSONDedot(w *json.Visitor, otel bool) error { +func (doc *Document) iterJSONDedot(w *json.Visitor) error { objPrefix := "" level := 0 @@ -381,15 +368,6 @@ func (doc *Document) iterJSONDedot(w *json.Visitor, otel bool) error { // increase object level up to current field for { - // Otel mode serialization - if otel { - // Check the prefix - _, isOtelPrefix := otelPrefixSet[objPrefix] - if isOtelPrefix { - break - } - } - start := len(objPrefix) idx := strings.IndexByte(key[start:], '.') if idx < 0 { @@ -412,7 +390,7 @@ func (doc *Document) iterJSONDedot(w *json.Visitor, otel bool) error { if err := w.OnKey(fieldName); err != nil { return err } - if err := fld.value.iterJSON(w, true, otel); err != nil { + if err := fld.value.iterJSON(w, true); err != nil { return err } } @@ -524,7 +502,7 @@ func (v *Value) IsEmpty() bool { } } -func (v *Value) iterJSON(w *json.Visitor, dedot bool, otel bool) error { +func (v *Value) iterJSON(w *json.Visitor, dedot bool) error { switch v.kind { case KindNil: return w.OnNil() @@ -549,18 +527,18 @@ func (v *Value) iterJSON(w *json.Visitor, dedot bool, otel bool) error { if len(v.doc.fields) == 0 { return w.OnNil() } - return v.doc.iterJSON(w, dedot, otel) + return v.doc.iterJSON(w, dedot) case KindUnflattenableObject: if len(v.doc.fields) == 0 { return w.OnNil() } - return v.doc.iterJSON(w, true, otel) + return v.doc.iterJSON(w, true) case KindArr: if err := w.OnArrayStart(-1, structform.AnyType); err != nil { return err } for i := range v.arr { - if err := v.arr[i].iterJSON(w, dedot, otel); err != nil { + if err := v.arr[i].iterJSON(w, dedot); err != nil { return err } } diff --git a/exporter/elasticsearchexporter/internal/objmodel/objmodel_test.go b/exporter/elasticsearchexporter/internal/objmodel/objmodel_test.go index 6805a958a019..0128120508ec 100644 --- a/exporter/elasticsearchexporter/internal/objmodel/objmodel_test.go +++ b/exporter/elasticsearchexporter/internal/objmodel/objmodel_test.go @@ -301,7 +301,7 @@ func TestDocument_Serialize_Flat(t *testing.T) { assert.NoError(t, m.FromRaw(test.attrs)) doc := DocumentFromAttributes(m) doc.Dedup(true) - err := doc.Serialize(&buf, false, false) + err := doc.Serialize(&buf, false) require.NoError(t, err) assert.Equal(t, test.want, buf.String()) @@ -362,7 +362,7 @@ func TestDocument_Serialize_Dedot(t *testing.T) { assert.NoError(t, m.FromRaw(test.attrs)) doc := DocumentFromAttributes(m) doc.Dedup(true) - err := doc.Serialize(&buf, true, false) + err := doc.Serialize(&buf, true) require.NoError(t, err) assert.Equal(t, test.want, buf.String()) @@ -410,7 +410,7 @@ func TestValue_Serialize(t *testing.T) { for name, test := range tests { t.Run(name, func(t *testing.T) { var buf strings.Builder - err := test.value.iterJSON(newJSONVisitor(&buf), false, false) + err := test.value.iterJSON(newJSONVisitor(&buf), false) require.NoError(t, err) assert.Equal(t, test.want, buf.String()) }) diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index 56f2a0ff610d..c1ac0ea17701 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -126,7 +126,7 @@ func (m *encodeModel) encodeLog(resource pcommon.Resource, resourceSchemaURL str } document.Dedup(true) - return document.Serialize(buf, m.dedot, false) + return document.Serialize(buf, m.dedot) } func (m *encodeModel) encodeLogDefaultMode(resource pcommon.Resource, record plog.LogRecord, scope pcommon.InstrumentationScope) objmodel.Document { @@ -205,7 +205,7 @@ func (m *encodeModel) encodeLogECSMode(resource pcommon.Resource, record plog.Lo func (m *encodeModel) encodeDocument(document objmodel.Document, buf *bytes.Buffer) error { document.Dedup(true) - err := document.Serialize(buf, m.dedot, false) + err := document.Serialize(buf, m.dedot) if err != nil { return err } @@ -493,7 +493,7 @@ func (m *encodeModel) encodeSpan(resource pcommon.Resource, resourceSchemaURL st document = m.encodeSpanDefaultMode(resource, span, scope) } document.Dedup(true) - err := document.Serialize(buf, m.dedot, false) + err := document.Serialize(buf, m.dedot) return err } diff --git a/exporter/elasticsearchexporter/model_test.go b/exporter/elasticsearchexporter/model_test.go index 0f9b27ba2d2d..d0a202799190 100644 --- a/exporter/elasticsearchexporter/model_test.go +++ b/exporter/elasticsearchexporter/model_test.go @@ -419,7 +419,7 @@ func TestEncodeLogECSMode(t *testing.T) { var buf bytes.Buffer m := encodeModel{} doc := m.encodeLogECSMode(resource, record, scope) - require.NoError(t, doc.Serialize(&buf, false, false)) + require.NoError(t, doc.Serialize(&buf, false)) require.JSONEq(t, `{ "@timestamp": "2024-03-12T20:00:41.123456789Z", @@ -550,7 +550,7 @@ func TestEncodeLogECSModeAgentName(t *testing.T) { var buf bytes.Buffer m := encodeModel{} doc := m.encodeLogECSMode(resource, record, scope) - require.NoError(t, doc.Serialize(&buf, false, false)) + require.NoError(t, doc.Serialize(&buf, false)) require.JSONEq(t, fmt.Sprintf(`{ "@timestamp": "2024-03-13T23:50:59.123456789Z", "agent.name": %q @@ -602,7 +602,7 @@ func TestEncodeLogECSModeAgentVersion(t *testing.T) { var buf bytes.Buffer m := encodeModel{} doc := m.encodeLogECSMode(resource, record, scope) - require.NoError(t, doc.Serialize(&buf, false, false)) + require.NoError(t, doc.Serialize(&buf, false)) if test.expectedAgentVersion == "" { require.JSONEq(t, `{ @@ -709,7 +709,7 @@ func TestEncodeLogECSModeHostOSType(t *testing.T) { var buf bytes.Buffer m := encodeModel{} doc := m.encodeLogECSMode(resource, record, scope) - require.NoError(t, doc.Serialize(&buf, false, false)) + require.NoError(t, doc.Serialize(&buf, false)) expectedJSON := `{"@timestamp":"2024-03-13T23:50:59.123456789Z", "agent.name":"otlp"` if test.expectedHostOsName != "" { @@ -760,7 +760,7 @@ func TestEncodeLogECSModeTimestamps(t *testing.T) { var buf bytes.Buffer m := encodeModel{} doc := m.encodeLogECSMode(resource, record, scope) - require.NoError(t, doc.Serialize(&buf, false, false)) + require.NoError(t, doc.Serialize(&buf, false)) require.JSONEq(t, fmt.Sprintf( `{"@timestamp":%q,"agent.name":"otlp"}`, test.expectedTimestamp, From 90f46f7eb6ea4af851577386ad90acbe52067a27 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 11 Jan 2025 09:18:29 +0100 Subject: [PATCH 20/30] Move bufferpool to dedicated package --- exporter/elasticsearchexporter/bufferpool.go | 40 ------------------- exporter/elasticsearchexporter/exporter.go | 13 +++--- .../internal/pool/bufferpool.go | 40 +++++++++++++++++++ 3 files changed, 47 insertions(+), 46 deletions(-) delete mode 100644 exporter/elasticsearchexporter/bufferpool.go create mode 100644 exporter/elasticsearchexporter/internal/pool/bufferpool.go diff --git a/exporter/elasticsearchexporter/bufferpool.go b/exporter/elasticsearchexporter/bufferpool.go deleted file mode 100644 index b3070885513c..000000000000 --- a/exporter/elasticsearchexporter/bufferpool.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter" - -import ( - "bytes" - "io" - "sync" -) - -type bufferPool struct { - pool *sync.Pool -} - -func newBufferPool() *bufferPool { - return &bufferPool{pool: &sync.Pool{New: func() any { return &bytes.Buffer{} }}} -} - -func (w *bufferPool) newPooledBuffer() pooledBuffer { - return pooledBuffer{ - Buffer: w.pool.Get().(*bytes.Buffer), - pool: w.pool, - } -} - -type pooledBuffer struct { - Buffer *bytes.Buffer - pool *sync.Pool -} - -func (p pooledBuffer) recycle() { - p.Buffer.Reset() - p.pool.Put(p.Buffer) -} - -func (p pooledBuffer) WriteTo(w io.Writer) (n int64, err error) { - defer p.recycle() - return p.Buffer.WriteTo(w) -} diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index 392f3ba552c5..173cd9ff2d33 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -7,6 +7,7 @@ import ( "context" "errors" "fmt" + "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/pool" "runtime" "sync" "time" @@ -34,7 +35,7 @@ type elasticsearchExporter struct { wg sync.WaitGroup // active sessions bulkIndexer bulkIndexer - bufferPool *bufferPool + bufferPool *pool.BufferPool } func newExporter( @@ -68,7 +69,7 @@ func newExporter( model: model, logstashFormat: cfg.LogstashFormat, otel: otel, - bufferPool: newBufferPool(), + bufferPool: pool.NewBufferPool(), } } @@ -173,7 +174,7 @@ func (e *elasticsearchExporter) pushLogRecord( fIndex = formattedIndex } - buffer := e.bufferPool.newPooledBuffer() + buffer := e.bufferPool.NewPooledBuffer() err := e.model.encodeLog(resource, resourceSchemaURL, record, scope, scopeSchemaURL, buffer.Buffer) if err != nil { return fmt.Errorf("failed to encode log event: %w", err) @@ -288,7 +289,7 @@ func (e *elasticsearchExporter) pushMetricsData( for fIndex, groupedDataPoints := range groupedDataPointsByIndex { for _, dataPoints := range groupedDataPoints { - buf := e.bufferPool.newPooledBuffer() + buf := e.bufferPool.NewPooledBuffer() dynamicTemplates, err := e.model.encodeMetrics(resource, resourceMetric.SchemaUrl(), scope, scopeMetrics.SchemaUrl(), dataPoints, &validationErrs, buf.Buffer) if err != nil { errs = append(errs, err) @@ -409,7 +410,7 @@ func (e *elasticsearchExporter) pushTraceRecord( fIndex = formattedIndex } - buf := e.bufferPool.newPooledBuffer() + buf := e.bufferPool.NewPooledBuffer() err := e.model.encodeSpan(resource, resourceSchemaURL, span, scope, scopeSchemaURL, buf.Buffer) if err != nil { return fmt.Errorf("failed to encode trace record: %w", err) @@ -439,7 +440,7 @@ func (e *elasticsearchExporter) pushSpanEvent( } fIndex = formattedIndex } - buf := e.bufferPool.newPooledBuffer() + buf := e.bufferPool.NewPooledBuffer() e.model.encodeSpanEvent(resource, resourceSchemaURL, span, spanEvent, scope, scopeSchemaURL, buf.Buffer) if buf.Buffer.Len() == 0 { return nil diff --git a/exporter/elasticsearchexporter/internal/pool/bufferpool.go b/exporter/elasticsearchexporter/internal/pool/bufferpool.go new file mode 100644 index 000000000000..88f896ae384f --- /dev/null +++ b/exporter/elasticsearchexporter/internal/pool/bufferpool.go @@ -0,0 +1,40 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package pool // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter" + +import ( + "bytes" + "io" + "sync" +) + +type BufferPool struct { + pool *sync.Pool +} + +func NewBufferPool() *BufferPool { + return &BufferPool{pool: &sync.Pool{New: func() any { return &bytes.Buffer{} }}} +} + +func (w *BufferPool) NewPooledBuffer() PooledBuffer { + return PooledBuffer{ + Buffer: w.pool.Get().(*bytes.Buffer), + pool: w.pool, + } +} + +type PooledBuffer struct { + Buffer *bytes.Buffer + pool *sync.Pool +} + +func (p PooledBuffer) recycle() { + p.Buffer.Reset() + p.pool.Put(p.Buffer) +} + +func (p PooledBuffer) WriteTo(w io.Writer) (n int64, err error) { + defer p.recycle() + return p.Buffer.WriteTo(w) +} From 20e960cc6a152b99529229b3f366aaba3536d3c0 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 11 Jan 2025 09:28:24 +0100 Subject: [PATCH 21/30] Fix geo serialization --- exporter/elasticsearchexporter/model.go | 9 +++++++++ .../elasticsearchexporter/pdata_serializer.go | 2 +- .../pdata_serializer_test.go | 20 +++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index c1ac0ea17701..21253ffa1e33 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -842,6 +842,15 @@ func mergeGeolocation(attributes pcommon.Map) pcommon.Map { s.AppendEmpty().SetDouble(geo.lat) continue } + // Place the attributes back if lon and lat are not present together + if geo.lonSet { + key := prefix + lonKey + geoAttributes.PutDouble(key, geo.lon) + } + if geo.latSet { + key := prefix + latKey + geoAttributes.PutDouble(key, geo.lat) + } } return geoAttributes } diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index f0697ea7ae1f..abb25ffba67d 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -283,7 +283,7 @@ func writeAttributes(v *json.Visitor, attributes pcommon.Map, stringifyMapValues case dataStreamType, dataStreamDataset, dataStreamNamespace, mappingHintsAttrKey: return true } - if strings.HasSuffix(k, ".geo.location") { + if strings.HasSuffix(k, ".geo.location.lat") || strings.HasSuffix(k, ".geo.location.lon") { return true } _ = v.OnKey(k) diff --git a/exporter/elasticsearchexporter/pdata_serializer_test.go b/exporter/elasticsearchexporter/pdata_serializer_test.go index 1aa61d0ccb7a..475192916ec6 100644 --- a/exporter/elasticsearchexporter/pdata_serializer_test.go +++ b/exporter/elasticsearchexporter/pdata_serializer_test.go @@ -111,6 +111,26 @@ func TestSerializeLog(t *testing.T) { }, }, }, + { + name: "geo attributes", + logCustomizer: func(_ pcommon.Resource, _ pcommon.InstrumentationScope, record plog.LogRecord) { + record.Attributes().PutDouble("foo.geo.location.lon", 1) + record.Attributes().PutDouble("foo.geo.location.lat", 2) + record.Attributes().PutDouble("bar.geo.location.lat", 3) + }, + wantErr: false, + expected: map[string]any{ + "@timestamp": "1970-01-01T00:00:00.000000000Z", + "observed_timestamp": "1970-01-01T00:00:00.000000000Z", + "data_stream": map[string]any{}, + "resource": map[string]any{}, + "scope": map[string]any{}, + "attributes": map[string]any{ + "foo.geo.location": []any{json.Number("1.0"), json.Number("2.0")}, + "bar.geo.location.lat": json.Number("3.0"), + }, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { From 60bc183caa71bccfa4991d36b7fcadeb2b598a41 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 11 Jan 2025 09:32:00 +0100 Subject: [PATCH 22/30] Move mergeGeoLocation to pdata_serializer.go --- exporter/elasticsearchexporter/model.go | 75 ------------------- exporter/elasticsearchexporter/model_test.go | 26 ------- .../elasticsearchexporter/pdata_serializer.go | 74 ++++++++++++++++++ .../pdata_serializer_test.go | 29 +++++++ 4 files changed, 103 insertions(+), 101 deletions(-) diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index 21253ffa1e33..6fe5f596049e 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -13,7 +13,6 @@ import ( "hash/fnv" "math" "slices" - "strings" "time" "go.opentelemetry.io/collector/pdata/pcommon" @@ -781,80 +780,6 @@ func sliceHash(h hash.Hash, s pcommon.Slice) { } } -// mergeGeolocation returns a new map that to merges all `geo.location.{lon,lat}`, -// and namespaced `*.geo.location.{lon,lat}` attributes from the provided map to unnamespaced and namespaced `geo.location`. -// This is to match the geo_point type in Elasticsearch. -func mergeGeolocation(attributes pcommon.Map) pcommon.Map { - const ( - lonKey = "geo.location.lon" - latKey = "geo.location.lat" - mergedKey = "geo.location" - ) - // Prefix is the attribute name without lonKey or latKey suffix - // e.g. prefix of "foo.bar.geo.location.lon" is "foo.bar.", prefix of "geo.location.lon" is "". - prefixToGeo := make(map[string]struct { - lon, lat float64 - lonSet, latSet bool - }) - setLon := func(prefix string, v float64) { - g := prefixToGeo[prefix] - g.lon = v - g.lonSet = true - prefixToGeo[prefix] = g - } - setLat := func(prefix string, v float64) { - g := prefixToGeo[prefix] - g.lat = v - g.latSet = true - prefixToGeo[prefix] = g - } - attributes.Range(func(key string, val pcommon.Value) bool { - if val.Type() != pcommon.ValueTypeDouble { - return true - } - - if key == lonKey { - setLon("", val.Double()) - return true - } else if key == latKey { - setLat("", val.Double()) - return true - } else if namespace, found := strings.CutSuffix(key, "."+lonKey); found { - prefix := namespace + "." - setLon(prefix, val.Double()) - return true - } else if namespace, found := strings.CutSuffix(key, "."+latKey); found { - prefix := namespace + "." - setLat(prefix, val.Double()) - return true - } - return true - }) - - geoAttributes := pcommon.NewMap() - for prefix, geo := range prefixToGeo { - if geo.lonSet && geo.latSet { - key := prefix + mergedKey - // Geopoint expressed as an array with the format: [lon, lat] - s := geoAttributes.PutEmptySlice(key) - s.EnsureCapacity(2) - s.AppendEmpty().SetDouble(geo.lon) - s.AppendEmpty().SetDouble(geo.lat) - continue - } - // Place the attributes back if lon and lat are not present together - if geo.lonSet { - key := prefix + lonKey - geoAttributes.PutDouble(key, geo.lon) - } - if geo.latSet { - key := prefix + latKey - geoAttributes.PutDouble(key, geo.lat) - } - } - return geoAttributes -} - func safeUint64ToInt64(v uint64) int64 { if v > math.MaxInt64 { return math.MaxInt64 diff --git a/exporter/elasticsearchexporter/model_test.go b/exporter/elasticsearchexporter/model_test.go index d0a202799190..f2bda530ce5d 100644 --- a/exporter/elasticsearchexporter/model_test.go +++ b/exporter/elasticsearchexporter/model_test.go @@ -1321,29 +1321,3 @@ func TestEncodeLogBodyMapMode(t *testing.T) { require.Error(t, err) require.ErrorIs(t, err, ErrInvalidTypeForBodyMapMode) } - -func TestMergeGeolocation(t *testing.T) { - attributes := map[string]any{ - "geo.location.lon": 1.1, - "geo.location.lat": 2.2, - "foo.bar.geo.location.lon": 3.3, - "foo.bar.geo.location.lat": 4.4, - "a.geo.location.lon": 5.5, - "b.geo.location.lat": 6.6, - "unrelatedgeo.location.lon": 7.7, - "unrelatedgeo.location.lat": 8.8, - "d": 9.9, - "e.geo.location.lon": "foo", - "e.geo.location.lat": "bar", - } - wantAttributes := map[string]any{ - "geo.location": []any{1.1, 2.2}, - "foo.bar.geo.location": []any{3.3, 4.4}, - } - input := pcommon.NewMap() - err := input.FromRaw(attributes) - require.NoError(t, err) - output := mergeGeolocation(input) - after := output.AsRaw() - assert.Equal(t, wantAttributes, after) -} diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index abb25ffba67d..1afa5029c6be 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -298,6 +298,80 @@ func writeAttributes(v *json.Visitor, attributes pcommon.Map, stringifyMapValues _ = v.OnObjectFinished() } +// mergeGeolocation returns a new map that to merges all `geo.location.{lon,lat}`, +// and namespaced `*.geo.location.{lon,lat}` attributes from the provided map to unnamespaced and namespaced `geo.location`. +// This is to match the geo_point type in Elasticsearch. +func mergeGeolocation(attributes pcommon.Map) pcommon.Map { + const ( + lonKey = "geo.location.lon" + latKey = "geo.location.lat" + mergedKey = "geo.location" + ) + // Prefix is the attribute name without lonKey or latKey suffix + // e.g. prefix of "foo.bar.geo.location.lon" is "foo.bar.", prefix of "geo.location.lon" is "". + prefixToGeo := make(map[string]struct { + lon, lat float64 + lonSet, latSet bool + }) + setLon := func(prefix string, v float64) { + g := prefixToGeo[prefix] + g.lon = v + g.lonSet = true + prefixToGeo[prefix] = g + } + setLat := func(prefix string, v float64) { + g := prefixToGeo[prefix] + g.lat = v + g.latSet = true + prefixToGeo[prefix] = g + } + attributes.Range(func(key string, val pcommon.Value) bool { + if val.Type() != pcommon.ValueTypeDouble { + return true + } + + if key == lonKey { + setLon("", val.Double()) + return true + } else if key == latKey { + setLat("", val.Double()) + return true + } else if namespace, found := strings.CutSuffix(key, "."+lonKey); found { + prefix := namespace + "." + setLon(prefix, val.Double()) + return true + } else if namespace, found := strings.CutSuffix(key, "."+latKey); found { + prefix := namespace + "." + setLat(prefix, val.Double()) + return true + } + return true + }) + + geoAttributes := pcommon.NewMap() + for prefix, geo := range prefixToGeo { + if geo.lonSet && geo.latSet { + key := prefix + mergedKey + // Geopoint expressed as an array with the format: [lon, lat] + s := geoAttributes.PutEmptySlice(key) + s.EnsureCapacity(2) + s.AppendEmpty().SetDouble(geo.lon) + s.AppendEmpty().SetDouble(geo.lat) + continue + } + // Place the attributes back if lon and lat are not present together + if geo.lonSet { + key := prefix + lonKey + geoAttributes.PutDouble(key, geo.lon) + } + if geo.latSet { + key := prefix + latKey + geoAttributes.PutDouble(key, geo.lat) + } + } + return geoAttributes +} + func writeMap(v *json.Visitor, m pcommon.Map, stringifyMapValues bool) { _ = v.OnObjectStart(-1, structform.AnyType) m.Range(func(k string, val pcommon.Value) bool { diff --git a/exporter/elasticsearchexporter/pdata_serializer_test.go b/exporter/elasticsearchexporter/pdata_serializer_test.go index 475192916ec6..6698a04dce23 100644 --- a/exporter/elasticsearchexporter/pdata_serializer_test.go +++ b/exporter/elasticsearchexporter/pdata_serializer_test.go @@ -6,6 +6,7 @@ package elasticsearchexporter import ( "bytes" "encoding/json" + "github.com/stretchr/testify/require" "testing" "github.com/stretchr/testify/assert" @@ -157,3 +158,31 @@ func TestSerializeLog(t *testing.T) { }) } } + +func TestMergeGeolocation(t *testing.T) { + attributes := map[string]any{ + "geo.location.lon": 1.1, + "geo.location.lat": 2.2, + "foo.bar.geo.location.lon": 3.3, + "foo.bar.geo.location.lat": 4.4, + "a.geo.location.lon": 5.5, + "b.geo.location.lat": 6.6, + "unrelatedgeo.location.lon": 7.7, + "unrelatedgeo.location.lat": 8.8, + "d": 9.9, + "e.geo.location.lon": "foo", + "e.geo.location.lat": "bar", + } + wantAttributes := map[string]any{ + "a.geo.location.lon": 5.5, + "b.geo.location.lat": 6.6, + "geo.location": []any{1.1, 2.2}, + "foo.bar.geo.location": []any{3.3, 4.4}, + } + input := pcommon.NewMap() + err := input.FromRaw(attributes) + require.NoError(t, err) + output := mergeGeolocation(input) + after := output.AsRaw() + assert.Equal(t, wantAttributes, after) +} From 16145d23ffe19c9b0644ee9411dae19a12d0ec54 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 11 Jan 2025 09:35:19 +0100 Subject: [PATCH 23/30] Fix imports --- exporter/elasticsearchexporter/exporter.go | 3 ++- exporter/elasticsearchexporter/pdata_serializer_test.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index 173cd9ff2d33..99fc5674cdd2 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -7,7 +7,6 @@ import ( "context" "errors" "fmt" - "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/pool" "runtime" "sync" "time" @@ -19,6 +18,8 @@ import ( "go.opentelemetry.io/collector/pdata/pmetric" "go.opentelemetry.io/collector/pdata/ptrace" "go.uber.org/zap" + + "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/pool" ) type elasticsearchExporter struct { diff --git a/exporter/elasticsearchexporter/pdata_serializer_test.go b/exporter/elasticsearchexporter/pdata_serializer_test.go index 6698a04dce23..1cf4687db67e 100644 --- a/exporter/elasticsearchexporter/pdata_serializer_test.go +++ b/exporter/elasticsearchexporter/pdata_serializer_test.go @@ -6,10 +6,10 @@ package elasticsearchexporter import ( "bytes" "encoding/json" - "github.com/stretchr/testify/require" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/plog" ) From 69ae5adb6ab0d35875af861ba1ee430b162dc2b4 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 11 Jan 2025 09:41:34 +0100 Subject: [PATCH 24/30] Remove appendValueOnConflict parameter as it's always true --- .../internal/objmodel/objmodel.go | 32 +++++++-------- .../internal/objmodel/objmodel_test.go | 40 +++++-------------- exporter/elasticsearchexporter/model.go | 6 +-- 3 files changed, 29 insertions(+), 49 deletions(-) diff --git a/exporter/elasticsearchexporter/internal/objmodel/objmodel.go b/exporter/elasticsearchexporter/internal/objmodel/objmodel.go index 25c99b19088a..b60a90daf0a6 100644 --- a/exporter/elasticsearchexporter/internal/objmodel/objmodel.go +++ b/exporter/elasticsearchexporter/internal/objmodel/objmodel.go @@ -218,12 +218,12 @@ func (doc *Document) sort() { // The filtering only keeps the last value for a key. // // Dedup ensure that keys are sorted. -func (doc *Document) Dedup(appendValueOnConflict bool) { +func (doc *Document) Dedup() { // 1. Always ensure the fields are sorted, Dedup support requires // Fields to be sorted. doc.sort() - // 2. rename fields if a primitive value is overwritten by an object if appendValueOnConflict. + // 2. rename fields if a primitive value is overwritten by an object. // For example the pair (path.x=1, path.x.a="test") becomes: // (path.x.value=1, path.x.a="test"). // @@ -236,19 +236,17 @@ func (doc *Document) Dedup(appendValueOnConflict bool) { // field in favor of the `value` field in the document. // // This step removes potential conflicts when dedotting and serializing fields. - if appendValueOnConflict { - var renamed bool - for i := 0; i < len(doc.fields)-1; i++ { - key, nextKey := doc.fields[i].key, doc.fields[i+1].key - if len(key) < len(nextKey) && strings.HasPrefix(nextKey, key) && nextKey[len(key)] == '.' { - renamed = true - doc.fields[i].key = key + ".value" - } - } - if renamed { - doc.sort() + var renamed bool + for i := 0; i < len(doc.fields)-1; i++ { + key, nextKey := doc.fields[i].key, doc.fields[i+1].key + if len(key) < len(nextKey) && strings.HasPrefix(nextKey, key) && nextKey[len(key)] == '.' { + renamed = true + doc.fields[i].key = key + ".value" } } + if renamed { + doc.sort() + } // 3. mark duplicates as 'ignore' // @@ -262,7 +260,7 @@ func (doc *Document) Dedup(appendValueOnConflict bool) { // 4. fix objects that might be stored in arrays for i := range doc.fields { - doc.fields[i].value.Dedup(appendValueOnConflict) + doc.fields[i].value.Dedup() } } @@ -478,13 +476,13 @@ func (v *Value) sort() { // Dedup recursively dedups keys in stored documents. // // NOTE: The value MUST be sorted. -func (v *Value) Dedup(appendValueOnConflict bool) { +func (v *Value) Dedup() { switch v.kind { case KindObject: - v.doc.Dedup(appendValueOnConflict) + v.doc.Dedup() case KindArr: for i := range v.arr { - v.arr[i].Dedup(appendValueOnConflict) + v.arr[i].Dedup() } } } diff --git a/exporter/elasticsearchexporter/internal/objmodel/objmodel_test.go b/exporter/elasticsearchexporter/internal/objmodel/objmodel_test.go index 0128120508ec..915ad9eceae0 100644 --- a/exporter/elasticsearchexporter/internal/objmodel/objmodel_test.go +++ b/exporter/elasticsearchexporter/internal/objmodel/objmodel_test.go @@ -86,9 +86,8 @@ func TestObjectModel_CreateMap(t *testing.T) { func TestObjectModel_Dedup(t *testing.T) { tests := map[string]struct { - build func() Document - appendValueOnConflict bool - want Document + build func() Document + want Document }{ "no duplicates": { build: func() (doc Document) { @@ -96,8 +95,7 @@ func TestObjectModel_Dedup(t *testing.T) { doc.AddInt("c", 3) return doc }, - appendValueOnConflict: true, - want: Document{fields: []field{{"a", IntValue(1)}, {"c", IntValue(3)}}}, + want: Document{fields: []field{{"a", IntValue(1)}, {"c", IntValue(3)}}}, }, "duplicate keys": { build: func() (doc Document) { @@ -106,8 +104,7 @@ func TestObjectModel_Dedup(t *testing.T) { doc.AddInt("a", 2) return doc }, - appendValueOnConflict: true, - want: Document{fields: []field{{"a", ignoreValue}, {"a", IntValue(2)}, {"c", IntValue(3)}}}, + want: Document{fields: []field{{"a", ignoreValue}, {"a", IntValue(2)}, {"c", IntValue(3)}}}, }, "duplicate after flattening from map: namespace object at end": { build: func() Document { @@ -117,8 +114,7 @@ func TestObjectModel_Dedup(t *testing.T) { am.PutEmptyMap("namespace").PutInt("a", 23) return DocumentFromAttributes(am) }, - appendValueOnConflict: true, - want: Document{fields: []field{{"namespace.a", ignoreValue}, {"namespace.a", IntValue(23)}, {"toplevel", StringValue("test")}}}, + want: Document{fields: []field{{"namespace.a", ignoreValue}, {"namespace.a", IntValue(23)}, {"toplevel", StringValue("test")}}}, }, "duplicate after flattening from map: namespace object at beginning": { build: func() Document { @@ -128,8 +124,7 @@ func TestObjectModel_Dedup(t *testing.T) { am.PutStr("toplevel", "test") return DocumentFromAttributes(am) }, - appendValueOnConflict: true, - want: Document{fields: []field{{"namespace.a", ignoreValue}, {"namespace.a", IntValue(42)}, {"toplevel", StringValue("test")}}}, + want: Document{fields: []field{{"namespace.a", ignoreValue}, {"namespace.a", IntValue(42)}, {"toplevel", StringValue("test")}}}, }, "dedup in arrays": { build: func() (doc Document) { @@ -141,7 +136,6 @@ func TestObjectModel_Dedup(t *testing.T) { doc.Add("arr", ArrValue(Value{kind: KindObject, doc: embedded})) return doc }, - appendValueOnConflict: true, want: Document{fields: []field{{"arr", ArrValue(Value{kind: KindObject, doc: Document{fields: []field{ {"a", ignoreValue}, {"a", IntValue(2)}, @@ -154,8 +148,7 @@ func TestObjectModel_Dedup(t *testing.T) { doc.AddInt("namespace.a", 2) return doc }, - appendValueOnConflict: true, - want: Document{fields: []field{{"namespace.a", IntValue(2)}, {"namespace.value", IntValue(1)}}}, + want: Document{fields: []field{{"namespace.a", IntValue(2)}, {"namespace.value", IntValue(1)}}}, }, "dedup removes primitive if value exists": { build: func() (doc Document) { @@ -164,25 +157,14 @@ func TestObjectModel_Dedup(t *testing.T) { doc.AddInt("namespace.value", 3) return doc }, - appendValueOnConflict: true, - want: Document{fields: []field{{"namespace.a", IntValue(2)}, {"namespace.value", ignoreValue}, {"namespace.value", IntValue(3)}}}, - }, - "dedup without append value on conflict": { - build: func() (doc Document) { - doc.AddInt("namespace", 1) - doc.AddInt("namespace.a", 2) - doc.AddInt("namespace.value", 3) - return doc - }, - appendValueOnConflict: false, - want: Document{fields: []field{{"namespace", IntValue(1)}, {"namespace.a", IntValue(2)}, {"namespace.value", IntValue(3)}}}, + want: Document{fields: []field{{"namespace.a", IntValue(2)}, {"namespace.value", ignoreValue}, {"namespace.value", IntValue(3)}}}, }, } for name, test := range tests { t.Run(name, func(t *testing.T) { doc := test.build() - doc.Dedup(test.appendValueOnConflict) + doc.Dedup() assert.Equal(t, test.want, doc) }) } @@ -300,7 +282,7 @@ func TestDocument_Serialize_Flat(t *testing.T) { m := pcommon.NewMap() assert.NoError(t, m.FromRaw(test.attrs)) doc := DocumentFromAttributes(m) - doc.Dedup(true) + doc.Dedup() err := doc.Serialize(&buf, false) require.NoError(t, err) @@ -361,7 +343,7 @@ func TestDocument_Serialize_Dedot(t *testing.T) { m := pcommon.NewMap() assert.NoError(t, m.FromRaw(test.attrs)) doc := DocumentFromAttributes(m) - doc.Dedup(true) + doc.Dedup() err := doc.Serialize(&buf, true) require.NoError(t, err) diff --git a/exporter/elasticsearchexporter/model.go b/exporter/elasticsearchexporter/model.go index 6fe5f596049e..b18c9d2f2917 100644 --- a/exporter/elasticsearchexporter/model.go +++ b/exporter/elasticsearchexporter/model.go @@ -123,7 +123,7 @@ func (m *encodeModel) encodeLog(resource pcommon.Resource, resourceSchemaURL str default: document = m.encodeLogDefaultMode(resource, record, scope) } - document.Dedup(true) + document.Dedup() return document.Serialize(buf, m.dedot) } @@ -202,7 +202,7 @@ func (m *encodeModel) encodeLogECSMode(resource pcommon.Resource, record plog.Lo } func (m *encodeModel) encodeDocument(document objmodel.Document, buf *bytes.Buffer) error { - document.Dedup(true) + document.Dedup() err := document.Serialize(buf, m.dedot) if err != nil { @@ -491,7 +491,7 @@ func (m *encodeModel) encodeSpan(resource pcommon.Resource, resourceSchemaURL st default: document = m.encodeSpanDefaultMode(resource, span, scope) } - document.Dedup(true) + document.Dedup() err := document.Serialize(buf, m.dedot) return err } From 8fcd99b525141e856d77ce12d9405f368a946a92 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 11 Jan 2025 11:41:13 +0100 Subject: [PATCH 25/30] Log validation error when metric with same name has already been serialized --- .../elasticsearchexporter/pdata_serializer.go | 14 ++++++ .../pdata_serializer_test.go | 44 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index 1afa5029c6be..b7f74beefa36 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -6,6 +6,7 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry import ( "bytes" "encoding/hex" + "fmt" "strings" "github.com/elastic/go-structform" @@ -48,8 +49,21 @@ func serializeDataPoints(v *json.Visitor, dataPoints []dataPoint, validationErro dynamicTemplates := make(map[string]string, len(dataPoints)) var docCount uint64 + metricNames := make(map[string]bool, len(dataPoints)) for _, dp := range dataPoints { metric := dp.Metric() + if _, present := metricNames[metric.Name()]; present { + *validationErrors = append( + *validationErrors, + fmt.Errorf( + "metric with name '%s' has already been serialized in document with timestamp %s", + metric.Name(), + dp.Timestamp().AsTime().UTC().Format(tsLayout), + ), + ) + continue + } + metricNames[metric.Name()] = true value, err := dp.Value() if dp.HasMappingHint(hintDocCount) { docCount = dp.DocCount() diff --git a/exporter/elasticsearchexporter/pdata_serializer_test.go b/exporter/elasticsearchexporter/pdata_serializer_test.go index 1cf4687db67e..f3f595cdabff 100644 --- a/exporter/elasticsearchexporter/pdata_serializer_test.go +++ b/exporter/elasticsearchexporter/pdata_serializer_test.go @@ -6,12 +6,14 @@ package elasticsearchexporter import ( "bytes" "encoding/json" + "fmt" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/plog" + "go.opentelemetry.io/collector/pdata/pmetric" ) func TestSerializeLog(t *testing.T) { @@ -159,6 +161,48 @@ func TestSerializeLog(t *testing.T) { } } +func TestSerializeMetricsConflict(t *testing.T) { + resourceMetrics := pmetric.NewResourceMetrics() + scopeMetrics := resourceMetrics.ScopeMetrics().AppendEmpty() + var dataPoints []dataPoint + metric1 := scopeMetrics.Metrics().AppendEmpty() + metric2 := scopeMetrics.Metrics().AppendEmpty() + for _, m := range []pmetric.Metric{metric1, metric2} { + m.SetName("foo") + dp := m.SetEmptyGauge().DataPoints().AppendEmpty() + dp.SetIntValue(42) + dataPoints = append(dataPoints, newNumberDataPoint(m, dp)) + } + + var validationErrors []error + var buf bytes.Buffer + _, err := serializeMetrics(resourceMetrics.Resource(), "", scopeMetrics.Scope(), "", dataPoints, &validationErrors, &buf) + if err != nil { + t.Errorf("serializeMetrics() error = %v", err) + } + b := buf.Bytes() + eventAsJSON := string(b) + var result any + decoder := json.NewDecoder(bytes.NewBuffer(b)) + decoder.UseNumber() + if err := decoder.Decode(&result); err != nil { + t.Error(err) + } + + assert.Len(t, validationErrors, 1) + assert.Equal(t, fmt.Errorf("metric with name 'foo' has already been serialized in document with timestamp 1970-01-01T00:00:00.000000000Z"), validationErrors[0]) + + assert.Equal(t, map[string]any{ + "@timestamp": "1970-01-01T00:00:00.000000000Z", + "data_stream": map[string]any{}, + "resource": map[string]any{}, + "scope": map[string]any{}, + "metrics": map[string]any{ + "foo": json.Number("42"), + }, + }, result, eventAsJSON) +} + func TestMergeGeolocation(t *testing.T) { attributes := map[string]any{ "geo.location.lon": 1.1, From a99c3fc1fc4a18985606968c819137cee4528dbf Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sat, 11 Jan 2025 11:44:43 +0100 Subject: [PATCH 26/30] make goporto --- exporter/elasticsearchexporter/internal/pool/bufferpool.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporter/elasticsearchexporter/internal/pool/bufferpool.go b/exporter/elasticsearchexporter/internal/pool/bufferpool.go index 88f896ae384f..c4277f044daf 100644 --- a/exporter/elasticsearchexporter/internal/pool/bufferpool.go +++ b/exporter/elasticsearchexporter/internal/pool/bufferpool.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package pool // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter" +package pool // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/elasticsearchexporter/internal/pool" import ( "bytes" From 4277f7677e169d27b2be264a6018b336bd948dfe Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sun, 12 Jan 2025 10:09:11 +0100 Subject: [PATCH 27/30] Optimize and fix geo attribute serialization --- .../elasticsearchexporter/pdata_serializer.go | 45 ++++++++-------- .../pdata_serializer_test.go | 54 +++++++------------ 2 files changed, 44 insertions(+), 55 deletions(-) diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index b7f74beefa36..72d22ee9dfc8 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -289,7 +289,7 @@ func writeAttributes(v *json.Visitor, attributes pcommon.Map, stringifyMapValues if attributes.Len() == 0 { return } - geoAttributes := mergeGeolocation(attributes) + _ = v.OnKey("attributes") _ = v.OnObjectStart(-1, structform.AnyType) attributes.Range(func(k string, val pcommon.Value) bool { @@ -297,25 +297,29 @@ func writeAttributes(v *json.Visitor, attributes pcommon.Map, stringifyMapValues case dataStreamType, dataStreamDataset, dataStreamNamespace, mappingHintsAttrKey: return true } - if strings.HasSuffix(k, ".geo.location.lat") || strings.HasSuffix(k, ".geo.location.lon") { + if isGeoAttribute(k, val) { return true } _ = v.OnKey(k) writeValue(v, val, stringifyMapValues) return true }) - geoAttributes.Range(func(k string, val pcommon.Value) bool { - _ = v.OnKey(k) - writeValue(v, val, stringifyMapValues) - return true - }) + writeGeolocationAttributes(v, attributes) _ = v.OnObjectFinished() } -// mergeGeolocation returns a new map that to merges all `geo.location.{lon,lat}`, -// and namespaced `*.geo.location.{lon,lat}` attributes from the provided map to unnamespaced and namespaced `geo.location`. -// This is to match the geo_point type in Elasticsearch. -func mergeGeolocation(attributes pcommon.Map) pcommon.Map { +func isGeoAttribute(k string, val pcommon.Value) bool { + if val.Type() != pcommon.ValueTypeDouble { + return false + } + switch k { + case "geo.location.lat", "geo.location.lon": + return true + } + return strings.HasSuffix(k, ".geo.location.lat") || strings.HasSuffix(k, ".geo.location.lon") +} + +func writeGeolocationAttributes(v *json.Visitor, attributes pcommon.Map) { const ( lonKey = "geo.location.lon" latKey = "geo.location.lat" @@ -362,28 +366,27 @@ func mergeGeolocation(attributes pcommon.Map) pcommon.Map { return true }) - geoAttributes := pcommon.NewMap() for prefix, geo := range prefixToGeo { if geo.lonSet && geo.latSet { key := prefix + mergedKey // Geopoint expressed as an array with the format: [lon, lat] - s := geoAttributes.PutEmptySlice(key) - s.EnsureCapacity(2) - s.AppendEmpty().SetDouble(geo.lon) - s.AppendEmpty().SetDouble(geo.lat) + _ = v.OnKey(key) + _ = v.OnArrayStart(-1, structform.AnyType) + _ = v.OnFloat64(geo.lon) + _ = v.OnFloat64(geo.lat) + _ = v.OnArrayFinished() continue } // Place the attributes back if lon and lat are not present together if geo.lonSet { - key := prefix + lonKey - geoAttributes.PutDouble(key, geo.lon) + _ = v.OnKey(prefix + lonKey) + _ = v.OnFloat64(geo.lon) } if geo.latSet { - key := prefix + latKey - geoAttributes.PutDouble(key, geo.lat) + _ = v.OnKey(prefix + latKey) + _ = v.OnFloat64(geo.lat) } } - return geoAttributes } func writeMap(v *json.Visitor, m pcommon.Map, stringifyMapValues bool) { diff --git a/exporter/elasticsearchexporter/pdata_serializer_test.go b/exporter/elasticsearchexporter/pdata_serializer_test.go index f3f595cdabff..5a37f2abb4b6 100644 --- a/exporter/elasticsearchexporter/pdata_serializer_test.go +++ b/exporter/elasticsearchexporter/pdata_serializer_test.go @@ -10,7 +10,6 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/plog" "go.opentelemetry.io/collector/pdata/pmetric" @@ -117,9 +116,17 @@ func TestSerializeLog(t *testing.T) { { name: "geo attributes", logCustomizer: func(_ pcommon.Resource, _ pcommon.InstrumentationScope, record plog.LogRecord) { - record.Attributes().PutDouble("foo.geo.location.lon", 1) - record.Attributes().PutDouble("foo.geo.location.lat", 2) - record.Attributes().PutDouble("bar.geo.location.lat", 3) + record.Attributes().PutDouble("geo.location.lon", 1.1) + record.Attributes().PutDouble("geo.location.lat", 2.2) + record.Attributes().PutDouble("foo.bar.geo.location.lon", 3.3) + record.Attributes().PutDouble("foo.bar.geo.location.lat", 4.4) + record.Attributes().PutDouble("a.geo.location.lon", 5.5) + record.Attributes().PutDouble("b.geo.location.lat", 6.6) + record.Attributes().PutDouble("unrelatedgeo.location.lon", 7.7) + record.Attributes().PutDouble("unrelatedgeo.location.lat", 8.8) + record.Attributes().PutDouble("d", 9.9) + record.Attributes().PutStr("e.geo.location.lon", "foo") + record.Attributes().PutStr("e.geo.location.lat", "bar") }, wantErr: false, expected: map[string]any{ @@ -129,8 +136,15 @@ func TestSerializeLog(t *testing.T) { "resource": map[string]any{}, "scope": map[string]any{}, "attributes": map[string]any{ - "foo.geo.location": []any{json.Number("1.0"), json.Number("2.0")}, - "bar.geo.location.lat": json.Number("3.0"), + "geo.location": []any{json.Number("1.1"), json.Number("2.2")}, + "foo.bar.geo.location": []any{json.Number("3.3"), json.Number("4.4")}, + "a.geo.location.lon": json.Number("5.5"), + "b.geo.location.lat": json.Number("6.6"), + "unrelatedgeo.location.lon": json.Number("7.7"), + "unrelatedgeo.location.lat": json.Number("8.8"), + "d": json.Number("9.9"), + "e.geo.location.lon": "foo", + "e.geo.location.lat": "bar", }, }, }, @@ -202,31 +216,3 @@ func TestSerializeMetricsConflict(t *testing.T) { }, }, result, eventAsJSON) } - -func TestMergeGeolocation(t *testing.T) { - attributes := map[string]any{ - "geo.location.lon": 1.1, - "geo.location.lat": 2.2, - "foo.bar.geo.location.lon": 3.3, - "foo.bar.geo.location.lat": 4.4, - "a.geo.location.lon": 5.5, - "b.geo.location.lat": 6.6, - "unrelatedgeo.location.lon": 7.7, - "unrelatedgeo.location.lat": 8.8, - "d": 9.9, - "e.geo.location.lon": "foo", - "e.geo.location.lat": "bar", - } - wantAttributes := map[string]any{ - "a.geo.location.lon": 5.5, - "b.geo.location.lat": 6.6, - "geo.location": []any{1.1, 2.2}, - "foo.bar.geo.location": []any{3.3, 4.4}, - } - input := pcommon.NewMap() - err := input.FromRaw(attributes) - require.NoError(t, err) - output := mergeGeolocation(input) - after := output.AsRaw() - assert.Equal(t, wantAttributes, after) -} From 2328a7a5fbb5ed0e203cacaf4b333ba1fb68801a Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sun, 12 Jan 2025 11:47:40 +0100 Subject: [PATCH 28/30] Optimize timestamp serialization --- .../elasticsearchexporter/exporter_test.go | 30 +++++------ exporter/elasticsearchexporter/model_test.go | 4 -- .../elasticsearchexporter/pdata_serializer.go | 6 ++- .../pdata_serializer_test.go | 51 +++++++++++++++---- 4 files changed, 62 insertions(+), 29 deletions(-) diff --git a/exporter/elasticsearchexporter/exporter_test.go b/exporter/elasticsearchexporter/exporter_test.go index da0e2accb382..6125988ea463 100644 --- a/exporter/elasticsearchexporter/exporter_test.go +++ b/exporter/elasticsearchexporter/exporter_test.go @@ -427,7 +427,7 @@ func TestExporterLogs(t *testing.T) { body: func() pcommon.Value { return pcommon.NewValueStr("foo") }(), - wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"text":"foo"}}`), + wantDocument: []byte(`{"@timestamp":"0.0","attributes":{"attr.foo":"attr.foo.value"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"0.0","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"text":"foo"}}`), }, { body: func() pcommon.Value { @@ -438,7 +438,7 @@ func TestExporterLogs(t *testing.T) { m.PutEmptyMap("inner").PutStr("foo", "bar") return vm }(), - wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"flattened":{"true":true,"false":false,"inner":{"foo":"bar"}}}}`), + wantDocument: []byte(`{"@timestamp":"0.0","attributes":{"attr.foo":"attr.foo.value"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"0.0","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"flattened":{"true":true,"false":false,"inner":{"foo":"bar"}}}}`), }, { body: func() pcommon.Value { @@ -450,7 +450,7 @@ func TestExporterLogs(t *testing.T) { return vm }(), isEvent: true, - wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value","event.name":"foo"},"event_name":"foo","data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"structured":{"true":true,"false":false,"inner":{"foo":"bar"}}}}`), + wantDocument: []byte(`{"@timestamp":"0.0","attributes":{"attr.foo":"attr.foo.value","event.name":"foo"},"event_name":"foo","data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"0.0","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"structured":{"true":true,"false":false,"inner":{"foo":"bar"}}}}`), }, { body: func() pcommon.Value { @@ -461,7 +461,7 @@ func TestExporterLogs(t *testing.T) { s.AppendEmpty().SetEmptyMap().PutStr("foo", "bar") return vs }(), - wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"flattened":{"value":["foo",false,{"foo":"bar"}]}}}`), + wantDocument: []byte(`{"@timestamp":"0.0","attributes":{"attr.foo":"attr.foo.value"},"data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"0.0","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"flattened":{"value":["foo",false,{"foo":"bar"}]}}}`), }, { body: func() pcommon.Value { @@ -473,7 +473,7 @@ func TestExporterLogs(t *testing.T) { return vs }(), isEvent: true, - wantDocument: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","attributes":{"attr.foo":"attr.foo.value","event.name":"foo"},"event_name":"foo","data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"1970-01-01T00:00:00.000000000Z","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"structured":{"value":["foo",false,{"foo":"bar"}]}}}`), + wantDocument: []byte(`{"@timestamp":"0.0","attributes":{"attr.foo":"attr.foo.value","event.name":"foo"},"event_name":"foo","data_stream":{"dataset":"attr.dataset.otel","namespace":"resource.attribute.namespace","type":"logs"},"observed_timestamp":"0.0","resource":{"attributes":{"resource.attr.foo":"resource.attr.foo.value"}},"scope":{},"body":{"structured":{"value":["foo",false,{"foo":"bar"}]}}}`), }, } { rec := newBulkRecorder() @@ -1196,19 +1196,19 @@ func TestExporterMetrics(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.metric.foo":"histogram"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"metric.foo":{"counts":[1,2,3,4],"values":[0.5,1.5,2.5,3.0]}},"resource":{},"scope":{}}`), + Document: []byte(`{"@timestamp":"0.0","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"metric.foo":{"counts":[1,2,3,4],"values":[0.5,1.5,2.5,3.0]}},"resource":{},"scope":{}}`), }, { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.metric.foo":"histogram"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"metric.foo":{"counts":[4,5,6,7],"values":[2.0,4.5,5.5,6.0]}},"resource":{},"scope":{}}`), + Document: []byte(`{"@timestamp":"3600000.0","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"metric.foo":{"counts":[4,5,6,7],"values":[2.0,4.5,5.5,6.0]}},"resource":{},"scope":{}}`), }, { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.metric.sum":"gauge_double"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"metric.sum":1.5},"resource":{},"scope":{},"start_timestamp":"1970-01-01T02:00:00.000000000Z"}`), + Document: []byte(`{"@timestamp":"3600000.0","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"metric.sum":1.5},"resource":{},"scope":{},"start_timestamp":"7200000.0"}`), }, { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.metric.summary":"summary"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T03:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"metric.summary":{"sum":1.5,"value_count":1}},"resource":{},"scope":{},"start_timestamp":"1970-01-01T03:00:00.000000000Z"}`), + Document: []byte(`{"@timestamp":"10800000.0","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"metric.summary":{"sum":1.5,"value_count":1}},"resource":{},"scope":{},"start_timestamp":"10800000.0"}`), }, } @@ -1277,7 +1277,7 @@ func TestExporterMetrics(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.sum":"gauge_long","metrics.summary":"summary"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"sum":0,"summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), + Document: []byte(`{"@timestamp":"0.0","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"sum":0,"summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), }, } @@ -1327,11 +1327,11 @@ func TestExporterMetrics(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.histogram.summary":"summary"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"histogram.summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), + Document: []byte(`{"@timestamp":"0.0","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"histogram.summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), }, { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.exphistogram.summary":"summary"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"exphistogram.summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), + Document: []byte(`{"@timestamp":"3600000.0","_doc_count":10,"data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"exphistogram.summary":{"sum":1.0,"value_count":10}},"resource":{},"scope":{}}`), }, } @@ -1370,7 +1370,7 @@ func TestExporterMetrics(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"metrics-generic.otel-default","dynamic_templates":{"metrics.foo.bar":"gauge_long","metrics.foo":"gauge_long","metrics.foo.bar.baz":"gauge_long"}}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"foo":0,"foo.bar":0,"foo.bar.baz":0},"resource":{},"scope":{}}`), + Document: []byte(`{"@timestamp":"0.0","data_stream":{"dataset":"generic.otel","namespace":"default","type":"metrics"},"attributes":{},"metrics":{"foo":0,"foo.bar":0,"foo.bar.baz":0},"resource":{},"scope":{}}`), }, } @@ -1655,11 +1655,11 @@ func TestExporterTraces(t *testing.T) { expected := []itemRequest{ { Action: []byte(`{"create":{"_index":"traces-generic.otel-default"}}`), - Document: []byte(`{"@timestamp":"1970-01-01T01:00:00.000000000Z","attributes":{"attr.foo":"attr.bar"},"data_stream":{"dataset":"generic.otel","namespace":"default","type":"traces"},"dropped_attributes_count":2,"dropped_events_count":3,"dropped_links_count":4,"duration":3600000000000,"kind":"Unspecified","links":[{"attributes":{"link.attr.foo":"link.attr.bar"},"dropped_attributes_count":11,"span_id":"0100000000000000","trace_id":"01000000000000000000000000000000","trace_state":"bar"}],"name":"name","resource":{"attributes":{"resource.foo":"resource.bar"}},"scope":{},"status":{"code":"Unset"},"trace_state":"foo"}`), + Document: []byte(`{"@timestamp":"3600000.0","attributes":{"attr.foo":"attr.bar"},"data_stream":{"dataset":"generic.otel","namespace":"default","type":"traces"},"dropped_attributes_count":2,"dropped_events_count":3,"dropped_links_count":4,"duration":3600000000000,"kind":"Unspecified","links":[{"attributes":{"link.attr.foo":"link.attr.bar"},"dropped_attributes_count":11,"span_id":"0100000000000000","trace_id":"01000000000000000000000000000000","trace_state":"bar"}],"name":"name","resource":{"attributes":{"resource.foo":"resource.bar"}},"scope":{},"status":{"code":"Unset"},"trace_state":"foo"}`), }, { Action: []byte(`{"create":{"_index":"logs-generic.otel-default"}}`), - Document: []byte(`{"@timestamp":"1970-01-01T00:00:00.000000000Z","event_name":"exception","attributes":{"event.attr.foo":"event.attr.bar","event.name":"exception"},"event_name":"exception","data_stream":{"dataset":"generic.otel","namespace":"default","type":"logs"},"dropped_attributes_count":1,"resource":{"attributes":{"resource.foo":"resource.bar"}},"scope":{}}`), + Document: []byte(`{"@timestamp":"0.0","event_name":"exception","attributes":{"event.attr.foo":"event.attr.bar","event.name":"exception"},"event_name":"exception","data_stream":{"dataset":"generic.otel","namespace":"default","type":"logs"},"dropped_attributes_count":1,"resource":{"attributes":{"resource.foo":"resource.bar"}},"scope":{}}`), }, } diff --git a/exporter/elasticsearchexporter/model_test.go b/exporter/elasticsearchexporter/model_test.go index f2bda530ce5d..772674b9af86 100644 --- a/exporter/elasticsearchexporter/model_test.go +++ b/exporter/elasticsearchexporter/model_test.go @@ -914,8 +914,6 @@ func TestMapLogAttributesToECS(t *testing.T) { type OTelRecord struct { TraceID OTelTraceID `json:"trace_id"` SpanID OTelSpanID `json:"span_id"` - Timestamp time.Time `json:"@timestamp"` - ObservedTimestamp time.Time `json:"observed_timestamp"` SeverityNumber int32 `json:"severity_number"` SeverityText string `json:"severity_text"` EventName string `json:"event_name"` @@ -1145,8 +1143,6 @@ func TestEncodeLogOtelMode(t *testing.T) { // helper function that creates the OTel LogRecord from the test structure func createTestOTelLogRecord(t *testing.T, rec OTelRecord) (plog.LogRecord, pcommon.InstrumentationScope, pcommon.Resource) { record := plog.NewLogRecord() - record.SetTimestamp(pcommon.Timestamp(uint64(rec.Timestamp.UnixNano()))) //nolint:gosec // this input is controlled by tests - record.SetObservedTimestamp(pcommon.Timestamp(uint64(rec.ObservedTimestamp.UnixNano()))) //nolint:gosec // this input is controlled by tests record.SetTraceID(pcommon.TraceID(rec.TraceID)) record.SetSpanID(pcommon.SpanID(rec.SpanID)) diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index 72d22ee9dfc8..20a7ba95a70e 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -7,6 +7,7 @@ import ( "bytes" "encoding/hex" "fmt" + "strconv" "strings" "github.com/elastic/go-structform" @@ -431,7 +432,10 @@ func writeValue(v *json.Visitor, val pcommon.Value, stringifyMaps bool) { func writeTimestampField(v *json.Visitor, key string, timestamp pcommon.Timestamp) { _ = v.OnKey(key) - _ = v.OnString(timestamp.AsTime().UTC().Format(tsLayout)) + nsec := uint64(timestamp) + msec := nsec / 1e6 + nsec -= msec * 1e6 + _ = v.OnString(strconv.FormatUint(msec, 10) + "." + strconv.FormatUint(nsec, 10)) } func writeUIntField(v *json.Visitor, key string, i uint64) { diff --git a/exporter/elasticsearchexporter/pdata_serializer_test.go b/exporter/elasticsearchexporter/pdata_serializer_test.go index 5a37f2abb4b6..85ba952d140f 100644 --- a/exporter/elasticsearchexporter/pdata_serializer_test.go +++ b/exporter/elasticsearchexporter/pdata_serializer_test.go @@ -40,8 +40,8 @@ func TestSerializeLog(t *testing.T) { resource.Attributes().PutEmptyMap("resource_map").PutStr("foo", "bar") scope.Attributes().PutEmptyMap("scope_map").PutStr("foo", "bar") }, wantErr: false, expected: map[string]any{ - "@timestamp": "1970-01-01T00:00:00.000000000Z", - "observed_timestamp": "1970-01-01T00:00:00.000000000Z", + "@timestamp": "0.0", + "observed_timestamp": "0.0", "data_stream": map[string]any{ "type": "logs", }, @@ -84,8 +84,8 @@ func TestSerializeLog(t *testing.T) { }, wantErr: false, expected: map[string]any{ - "@timestamp": "1970-01-01T00:00:00.000000000Z", - "observed_timestamp": "1970-01-01T00:00:00.000000000Z", + "@timestamp": "0.0", + "observed_timestamp": "0.0", "data_stream": map[string]any{}, "resource": map[string]any{}, "scope": map[string]any{}, @@ -101,8 +101,8 @@ func TestSerializeLog(t *testing.T) { }, wantErr: false, expected: map[string]any{ - "@timestamp": "1970-01-01T00:00:00.000000000Z", - "observed_timestamp": "1970-01-01T00:00:00.000000000Z", + "@timestamp": "0.0", + "observed_timestamp": "0.0", "data_stream": map[string]any{}, "resource": map[string]any{}, "scope": map[string]any{}, @@ -130,8 +130,8 @@ func TestSerializeLog(t *testing.T) { }, wantErr: false, expected: map[string]any{ - "@timestamp": "1970-01-01T00:00:00.000000000Z", - "observed_timestamp": "1970-01-01T00:00:00.000000000Z", + "@timestamp": "0.0", + "observed_timestamp": "0.0", "data_stream": map[string]any{}, "resource": map[string]any{}, "scope": map[string]any{}, @@ -148,6 +148,39 @@ func TestSerializeLog(t *testing.T) { }, }, }, + { + name: "event_name takes precedent over attributes.event.name", + logCustomizer: func(_ pcommon.Resource, _ pcommon.InstrumentationScope, record plog.LogRecord) { + record.Attributes().PutStr("event.name", "foo") + record.SetEventName("bar") + }, + wantErr: false, + expected: map[string]any{ + "@timestamp": "0.0", + "observed_timestamp": "0.0", + "event_name": "bar", + "data_stream": map[string]any{}, + "resource": map[string]any{}, + "scope": map[string]any{}, + "attributes": map[string]any{ + "event.name": "foo", + }, + }, + }, + { + name: "timestamp", + logCustomizer: func(_ pcommon.Resource, _ pcommon.InstrumentationScope, record plog.LogRecord) { + record.SetTimestamp(1721314113467654123) + }, + wantErr: false, + expected: map[string]any{ + "@timestamp": "1721314113467.654123", + "observed_timestamp": "0.0", + "data_stream": map[string]any{}, + "resource": map[string]any{}, + "scope": map[string]any{}, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -207,7 +240,7 @@ func TestSerializeMetricsConflict(t *testing.T) { assert.Equal(t, fmt.Errorf("metric with name 'foo' has already been serialized in document with timestamp 1970-01-01T00:00:00.000000000Z"), validationErrors[0]) assert.Equal(t, map[string]any{ - "@timestamp": "1970-01-01T00:00:00.000000000Z", + "@timestamp": "0.0", "data_stream": map[string]any{}, "resource": map[string]any{}, "scope": map[string]any{}, From 1dc4635398c3d3128bd2bb0a2d75d83a20de9776 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Sun, 12 Jan 2025 11:55:44 +0100 Subject: [PATCH 29/30] Add todo for more optimization for metrics --- exporter/elasticsearchexporter/pdata_serializer.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/exporter/elasticsearchexporter/pdata_serializer.go b/exporter/elasticsearchexporter/pdata_serializer.go index 20a7ba95a70e..5fbcc5c91fa0 100644 --- a/exporter/elasticsearchexporter/pdata_serializer.go +++ b/exporter/elasticsearchexporter/pdata_serializer.go @@ -65,6 +65,8 @@ func serializeDataPoints(v *json.Visitor, dataPoints []dataPoint, validationErro continue } metricNames[metric.Name()] = true + // TODO here's potential for more optimization by directly serializing the value instead of allocating a pcommon.Value + // the tradeoff is that this would imply a duplicated logic for the ECS mode value, err := dp.Value() if dp.HasMappingHint(hintDocCount) { docCount = dp.DocCount() From 14f487d0471d66a12ec921f16d628c79cc26b01c Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 13 Jan 2025 08:43:21 +0100 Subject: [PATCH 30/30] Recycle buffer when encoding returns an error --- exporter/elasticsearchexporter/exporter.go | 15 +++++++++++---- .../internal/pool/bufferpool.go | 4 ++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/exporter/elasticsearchexporter/exporter.go b/exporter/elasticsearchexporter/exporter.go index 99fc5674cdd2..27db606c6963 100644 --- a/exporter/elasticsearchexporter/exporter.go +++ b/exporter/elasticsearchexporter/exporter.go @@ -175,12 +175,14 @@ func (e *elasticsearchExporter) pushLogRecord( fIndex = formattedIndex } - buffer := e.bufferPool.NewPooledBuffer() - err := e.model.encodeLog(resource, resourceSchemaURL, record, scope, scopeSchemaURL, buffer.Buffer) + buf := e.bufferPool.NewPooledBuffer() + err := e.model.encodeLog(resource, resourceSchemaURL, record, scope, scopeSchemaURL, buf.Buffer) if err != nil { + buf.Recycle() return fmt.Errorf("failed to encode log event: %w", err) } - return bulkIndexerSession.Add(ctx, fIndex, buffer, nil) + // not recycling after Add returns an error as we don't know if it's already recycled + return bulkIndexerSession.Add(ctx, fIndex, buf, nil) } func (e *elasticsearchExporter) pushMetricsData( @@ -293,10 +295,12 @@ func (e *elasticsearchExporter) pushMetricsData( buf := e.bufferPool.NewPooledBuffer() dynamicTemplates, err := e.model.encodeMetrics(resource, resourceMetric.SchemaUrl(), scope, scopeMetrics.SchemaUrl(), dataPoints, &validationErrs, buf.Buffer) if err != nil { + buf.Recycle() errs = append(errs, err) continue } if err := session.Add(ctx, fIndex, buf, dynamicTemplates); err != nil { + // not recycling after Add returns an error as we don't know if it's already recycled if cerr := ctx.Err(); cerr != nil { return cerr } @@ -414,8 +418,10 @@ func (e *elasticsearchExporter) pushTraceRecord( buf := e.bufferPool.NewPooledBuffer() err := e.model.encodeSpan(resource, resourceSchemaURL, span, scope, scopeSchemaURL, buf.Buffer) if err != nil { + buf.Recycle() return fmt.Errorf("failed to encode trace record: %w", err) } + // not recycling after Add returns an error as we don't know if it's already recycled return bulkIndexerSession.Add(ctx, fIndex, buf, nil) } @@ -444,8 +450,9 @@ func (e *elasticsearchExporter) pushSpanEvent( buf := e.bufferPool.NewPooledBuffer() e.model.encodeSpanEvent(resource, resourceSchemaURL, span, spanEvent, scope, scopeSchemaURL, buf.Buffer) if buf.Buffer.Len() == 0 { + buf.Recycle() return nil } - + // not recycling after Add returns an error as we don't know if it's already recycled return bulkIndexerSession.Add(ctx, fIndex, buf, nil) } diff --git a/exporter/elasticsearchexporter/internal/pool/bufferpool.go b/exporter/elasticsearchexporter/internal/pool/bufferpool.go index c4277f044daf..ee0b260efb37 100644 --- a/exporter/elasticsearchexporter/internal/pool/bufferpool.go +++ b/exporter/elasticsearchexporter/internal/pool/bufferpool.go @@ -29,12 +29,12 @@ type PooledBuffer struct { pool *sync.Pool } -func (p PooledBuffer) recycle() { +func (p PooledBuffer) Recycle() { p.Buffer.Reset() p.pool.Put(p.Buffer) } func (p PooledBuffer) WriteTo(w io.Writer) (n int64, err error) { - defer p.recycle() + defer p.Recycle() return p.Buffer.WriteTo(w) }