From 15ddc2998deabb640f904d6ed812bf89f43ab957 Mon Sep 17 00:00:00 2001 From: kaiyan-sheng Date: Wed, 20 Nov 2024 13:17:06 -0700 Subject: [PATCH] [receiver/awsfirehosereceiver] Fix timestamp in cwlogs (#36122) #### Description When testing ingesting json format logs from CloudWatch log groups using Firehose, I see the timestamp field has been populated wrong. For example the timestamp for the log message I ingested should be `2024-10-23T21:28:49.707` but I'm getting a timestamp from `1970-01-01T00:28:49.707Z`. ``` { "cloud": { "account": { "id": "123456789012" } }, "agent": { "name": "otlp", "version": "unknown" }, "@timestamp": "1970-01-01T00:28:49.707Z", "log": {}, "data_stream": { "namespace": "default", "type": "logs", "dataset": "apm.app.unknown" }, "service": { "environment": "unset", "name": "unknown", "language": { "name": "unknown" } }, "event": {}, "message": "test-1", "labels": { "aws_cloudwatch_log_stream_name": "test-log-stream", "aws_cloudwatch_log_group_name": "test-cloudwatch-logs-ks" } } } ``` This issue is caused by `pcommon.Timestamp` is a time specified as UNIX Epoch time in nanoseconds but timestamp in cloudwatch logs are in milliseconds. So converting the timestamp from milliseconds to nanoseconds is needed. #### Testing Added unit test. --- .chloggen/fix_timestamp.yaml | 25 +++++++++++++++++++ .../internal/unmarshaler/cwlog/logsbuilder.go | 6 ++++- .../unmarshaler/cwlog/unmarshaler_test.go | 22 ++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 .chloggen/fix_timestamp.yaml diff --git a/.chloggen/fix_timestamp.yaml b/.chloggen/fix_timestamp.yaml new file mode 100644 index 000000000000..b7e96e3d663e --- /dev/null +++ b/.chloggen/fix_timestamp.yaml @@ -0,0 +1,25 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: awsfirehosereceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: fix timestamp when ingesting logs from CloudWatch through firehose + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [36122] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] \ No newline at end of file diff --git a/receiver/awsfirehosereceiver/internal/unmarshaler/cwlog/logsbuilder.go b/receiver/awsfirehosereceiver/internal/unmarshaler/cwlog/logsbuilder.go index eae5902b5ea1..5dc7a3db59f8 100644 --- a/receiver/awsfirehosereceiver/internal/unmarshaler/cwlog/logsbuilder.go +++ b/receiver/awsfirehosereceiver/internal/unmarshaler/cwlog/logsbuilder.go @@ -4,6 +4,8 @@ package cwlog // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/awsfirehosereceiver/internal/unmarshaler/cwlog" import ( + "time" + "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/plog" conventions "go.opentelemetry.io/collector/semconv/v1.6.1" @@ -44,7 +46,9 @@ func newResourceLogsBuilder(logs plog.Logs, attrs resourceAttributes) *resourceL func (rlb *resourceLogsBuilder) AddLog(log cWLog) { for _, event := range log.LogEvents { logLine := rlb.rls.AppendEmpty() - logLine.SetTimestamp(pcommon.Timestamp(event.Timestamp)) + // pcommon.Timestamp is a time specified as UNIX Epoch time in nanoseconds + // but timestamp in cloudwatch logs are in milliseconds. + logLine.SetTimestamp(pcommon.Timestamp(event.Timestamp * int64(time.Millisecond))) logLine.Body().SetStr(event.Message) } } diff --git a/receiver/awsfirehosereceiver/internal/unmarshaler/cwlog/unmarshaler_test.go b/receiver/awsfirehosereceiver/internal/unmarshaler/cwlog/unmarshaler_test.go index fbd683939aa5..71b49295df60 100644 --- a/receiver/awsfirehosereceiver/internal/unmarshaler/cwlog/unmarshaler_test.go +++ b/receiver/awsfirehosereceiver/internal/unmarshaler/cwlog/unmarshaler_test.go @@ -81,3 +81,25 @@ func TestUnmarshal(t *testing.T) { }) } } + +func TestLogTimestamp(t *testing.T) { + unmarshaler := NewUnmarshaler(zap.NewNop()) + record, err := os.ReadFile(filepath.Join(".", "testdata", "single_record")) + require.NoError(t, err) + + compressedRecord, err := compression.Zip(record) + require.NoError(t, err) + records := [][]byte{compressedRecord} + + got, err := unmarshaler.Unmarshal(records) + require.NoError(t, err) + require.NotNil(t, got) + require.Equal(t, 1, got.ResourceLogs().Len()) + + rm := got.ResourceLogs().At(0) + require.Equal(t, 1, rm.ScopeLogs().Len()) + ilm := rm.ScopeLogs().At(0) + ilm.LogRecords().At(0).Timestamp() + expectedTimestamp := "2024-09-05 13:47:15.523 +0000 UTC" + require.Equal(t, expectedTimestamp, ilm.LogRecords().At(0).Timestamp().String()) +}