Skip to content

Commit

Permalink
[filelog] clusterrole settings and parsing k8s, prom metrics (#191)
Browse files Browse the repository at this point in the history
* [filelog] clusterrole settings and parsing k8s, prom metrics

* make prom configurable

* Automatic application of license header

* reduce batch size, because of circuitbreaker problem

* switch to variables for index, adding k8sevents

* add switch for podMonitor

* bump chart

* bump in plugindefinition

---------

Co-authored-by: License Bot <[email protected]>
  • Loading branch information
2 people authored and hodanoori committed Jul 25, 2024
1 parent 1782707 commit 3b27bf6
Show file tree
Hide file tree
Showing 7 changed files with 300 additions and 38 deletions.
2 changes: 1 addition & 1 deletion opentelemetry-operator/chart/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

apiVersion: v2
name: opentelemetry-operator
version: 0.0.15
version: 0.0.16
description: OpenTelemetry Operator Helm chart for Kubernetes
icon: https://raw.githubusercontent.com/cncf/artwork/a718fa97fffec1b9fd14147682e9e3ac0c8817cb/projects/opentelemetry/icon/color/opentelemetry-icon-color.png
type: application
Expand Down
19 changes: 19 additions & 0 deletions opentelemetry-operator/chart/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors
# SPDX-License-Identifier: Apache-2.0

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: logs-collector
labels:
app: logs-collector
rules:
- apiGroups: [""]
resources: ["pods", "namespaces", "nodes"]
verbs: ["get", "watch", "list"]
- apiGroups: ["apps"]
resources: ["replicasets"]
verbs: ["get", "list", "watch"]
- apiGroups: ["extensions"]
resources: ["replicasets"]
verbs: ["get", "list", "watch"]
17 changes: 17 additions & 0 deletions opentelemetry-operator/chart/templates/clusterrolebindings.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors
# SPDX-License-Identifier: Apache-2.0

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: logs-collector
labels:
app: logs-collector
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: logs-collector
subjects:
- kind: ServiceAccount
name: logs-collector
namespace: otel
260 changes: 225 additions & 35 deletions opentelemetry-operator/chart/templates/logs-collector.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,23 @@ kind: OpenTelemetryCollector
metadata:
name: logs
spec:
mode: deployment
image: {{ index .Values "opentelemetry-operator" "manager" "collectorImage" "repository" }}:{{ index .Values "opentelemetry-operator" "manager" "collectorImage" "tag" }}
volumeMounts:
- mountPath: /var/log
name: varlog
readOnly: true
volumes:
- name: varlog
hostPath:
path: /var/log
mode: daemonset
tolerations:
- operator: Exists
env:
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: KUBE_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: MY_POD_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: username
valueFrom:
secretKeyRef:
Expand All @@ -28,7 +34,25 @@ spec:
valueFrom:
secretKeyRef:
name: otel-basic-auth
key: password
key: password
- name: cluster
value: "{{ .Values.open_telemetry.cluster }}"
- name: region
value: "{{ .Values.open_telemetry.region }}"
{{- if index .Values "opentelemetry-operator" "manager" "podMonitor" "enabled" }}
ports:
- name: prometheus
port: 9999
{- end }}
image: {{ index .Values "opentelemetry-operator" "manager" "collectorImage" "repository" }}:{{ index .Values "opentelemetry-operator" "manager" "collectorImage" "tag" }}
volumeMounts:
- mountPath: /var/log
name: varlog
readOnly: true
volumes:
- name: varlog
hostPath:
path: /var/log
config: |
receivers:
filelog:
Expand All @@ -39,41 +63,183 @@ spec:
start_at: end
retry_on_failure:
enabled: true
operators:
- type: router
id: get-format
- id: get-format
type: router
routes:
- output: parser-containerd
- output: set-containerd
expr: 'body matches "^[^ Z]+Z"'
default: parse-time
# Parse CRI-Containerd format
- type: regex_parser
id: parser-containerd
regex: '^(?P<time>[^ Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<logs>.*)$'
- id: set-containerd
type: add
field: resource["container.runtime"]
value: "containerd"
# Parse CRI-Containerd format
- id: parser-containerd
type: regex_parser
regex: '^(?P<time>[^ Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*)'
timestamp:
parse_from: attributes.time
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
- type: regex_parser
id: parse-time
- id: parse-time
type: regex_parser
regex: '^(?P<time>[^ Z]+)'
timestamp:
parse_from: attributes.time
layout_type: gotime
layout: '2006-01-02T15:04:05.999999999'
- type: move
from: attributes.logs
to: body
# Extract metadata from file path
- type: regex_parser
id: extract_metadata_from_filepath
regex: '^.*\/(?P<pod_name>[^_]+)_(?P<namespace>[^_]+)_(?P<container>[a-z].*)\-(?P<uid>[a-f0-9\-]{64})'
- id: extract_metadata_from_filepath
type: regex_parser
regex: "^.*/(?P<pod_name>[^_]+)_(?P<namespace>[^_]+)_(?P<container_name>[^_]+)-(?P<uid>[a-f0-9\\-]{64})\\.log$"
parse_from: attributes["log.file.path"]
cache:
size: 128
# workaround, transform cache cannot be used, as this makes entries with wrong metadata
- id: move-pod-name
type: move
from: attributes.pod_name
to: resource["k8s.pod.name"]
- id: move-uid
type: move
from: attributes.uid
to: resource["k8s.pod.id"]
- id: move-namespace
type: move
from: attributes.namespace
to: resource["k8s.namespace"]
{{- if index .Values "opentelemetry-operator" "manager" "podMonitor" "enabled" }}
prometheus/internal:
config:
scrape_configs:
- job_name: 'internal'
scrape_interval: 5s
static_configs:
- targets: ['127.0.0.1:8888']
{- end }}

processors:
batch:
send_batch_max_size: 10000
timeout: 10s
send_batch_size : 400

{{- if index .Values "opentelemetry-operator" "manager" "podMonitor" "enabled" }}
attributes/prometheus:
actions:
- action: insert
key: nodename
value: ${KUBE_NODE_NAME}
- action: insert
key: cluster
value: ${cluster}
- action: insert
key: region
value: ${region}
- action: delete
key: service_instance_id
- action: delete
key: pod
{- end }}

attributes/cluster:
actions:
- action: insert
key: cluster
value: ${cluster}
- action: insert
key: region
value: ${region}
- action: insert
key: nodename
value: ${KUBE_NODE_NAME}

resource/k8sevents:
attributes:
- key: k8s.node.name
value: ${KUBE_NODE_NAME}
action: insert
- key: cluster
value: ${cluster}
action: insert
- key: region
value: ${region}
action: insert

transform/containerd:
log_statements:
context: log
statements:
- merge_maps(cache,ExtractPatterns(body,"^(?P<time>[^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^\\s]*) ?(?P<log>.*)$"), "upsert") where body != nil
- merge_maps(cache,ExtractPatterns(body,"^(?P<time>\\d+/\\d+/\\d+\\s+\\d+:\\d+\\d+) (?P<log>.*)$"), "upsert") where attributes["log_name"]!= "MeshAccessLog" and cache["log"]!= nil and not IsMap(cache["log"])
- set(body,cache["log"]) where cache["log"] != nil
- merge_maps(cache,ParseJSON(body), "upsert") where IsMap(body)
- set(body,cache["message"]) where cache["message"] != nil
- set(body,cache["msg"]) where cache["msg"] != nil
- set(severity_text,cache["level"]) where cache["level"] != nil
- set(severity_text,cache["severity"]) where cache["severity"] != nil
- set(severity_number,SEVERITY_NUMBER_INFO) where cache["level"] == "INFO"
- set(severity_number,SEVERITY_NUMBER_INFO) where cache["severity"] == "info"
- set(attributes["loggerName"],cache["loggerName"]) where cache["loggerName"] != nil

k8sattributes:
auth_type: "serviceAccount"
passthrough: false
filter:
node_from_env_var: KUBE_NODE_NAME
extract:
metadata:
- k8s.pod.name
- k8s.pod.uid
- k8s.deployment.name
- k8s.node.name
- k8s.pod.start_time
labels:
- tag_name: app.label.ingress-nginx
key: ingress-nginx
from: pod
- tag_name: app.label.support-group
key: ccloud/support-group
from: pod
- tag_name: app.label.app_name
key: app.kubernetes.io/app_name
from: pod
- tag_name: app.label.component
key: app.kubernetes.io/component
from: pod
- tag_name: app.label.name
key: app.kubernetes.io/name
from: pod
- tag_name: app.label.instance
key: app.kubernetes.io/instance
from: pod
- tag_name: app.label.pod-template-hash
key: app.kubernetes.io/pod-template-hash
from: pod
pod_association:
- sources:
- from: resource_attribute
name: k8s.pod.ip
- sources:
- from: resource_attribute
name: k8s.pod.uid
- sources:
- from: resource_attribute
name: k8s.pod.name
- sources:
- from: connection
memory_limiter:
check_interval: 5s
limit_percentage: 70
spike_limit_percentage: 30

exporters:
debug:
Expand All @@ -84,21 +250,45 @@ spec:
authenticator: basicauth
endpoint: {{ index .Values "open_telemetry" "opensearch_logs" "endpoint" }}
logs_index: ${username}-datastream

prometheus:
endpoint: 0.0.0.0:9999

extensions:
basicauth:
client_auth:
password: ${password}
username: ${username}
password: ${password}

connectors:
routing:
default_pipelines: [logs/containerd]
error_mode: ignore
table:
- statement: route() where attributes["container.runtime"] =="containerd"
pipelines: [logs/containerd]

service:
extensions:
- basicauth
{{- if index .Values "opentelemetry-operator" "manager" "podMonitor" "enabled" }}
telemetry:
metrics:
address: 0.0.0.0:8888
address: 127.0.0.1:8888
level: detailed
{- end }}
pipelines:
logs:
receivers:
- filelog
exporters:
- opensearch/logs
logs/containerd:
receivers: [filelog]
processors: [transform/containerd,k8sattributes,attributes/cluster,batch]
exporters: [opensearch/logs]
logs/k8sevents:
receivers: [k8s_events]
processors: [resource/k8sevents,batch]
exporters: [opensearch/logs]
{{- if index .Values "opentelemetry-operator" "manager" "podMonitor" "enabled" }}
metrics/prometheus:
receivers: [prometheus/internal]
processors: [attributes/prometheus]
exporters: [prometheus]
{- end }}
10 changes: 10 additions & 0 deletions opentelemetry-operator/chart/templates/serviceaccount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors
# SPDX-License-Identifier: Apache-2.0

apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: logs-collector
name: logs-collector
namespace: otel
26 changes: 26 additions & 0 deletions opentelemetry-operator/chart/templates/smon-filelog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{{/*
SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors
SPDX-License-Identifier: Apache-2.0
*/}}
{{- if index .Values "opentelemetry-operator" "manager" "podMonitor" "enabled" }}
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
annotations:
name: opentelemetry-collector-logs
labels:
prometheus: "{{ .Values.open_telemetry.prometheus }}"
spec:
podMetricsEndpoints:
- interval: 60s
scrapeTimeout: 50s
path: /metrics
scheme: http
port: prometheus
selector:
matchLabels:
app.kubernetes.io/component: opentelemetry-collector
app.kubernetes.io/instance: otel.logs
app.kubernetes.io/managed-by: opentelemetry-operator
app.kubernetes.io/part-of: opentelemetry
{- end }}
Loading

0 comments on commit 3b27bf6

Please sign in to comment.