Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[filelog] clusterrole settings and parsing k8s, prom metrics #191

Merged
merged 10 commits into from
Jul 17, 2024
2 changes: 1 addition & 1 deletion opentelemetry-operator/chart/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

apiVersion: v2
name: opentelemetry-operator
version: 0.0.15
version: 0.0.16
description: OpenTelemetry Operator Helm chart for Kubernetes
icon: https://raw.githubusercontent.com/cncf/artwork/a718fa97fffec1b9fd14147682e9e3ac0c8817cb/projects/opentelemetry/icon/color/opentelemetry-icon-color.png
type: application
Expand Down
19 changes: 19 additions & 0 deletions opentelemetry-operator/chart/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors
# SPDX-License-Identifier: Apache-2.0

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: logs-collector
labels:
app: logs-collector
rules:
- apiGroups: [""]
resources: ["pods", "namespaces", "nodes"]
verbs: ["get", "watch", "list"]
- apiGroups: ["apps"]
resources: ["replicasets"]
verbs: ["get", "list", "watch"]
- apiGroups: ["extensions"]
resources: ["replicasets"]
verbs: ["get", "list", "watch"]
17 changes: 17 additions & 0 deletions opentelemetry-operator/chart/templates/clusterrolebindings.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors
# SPDX-License-Identifier: Apache-2.0

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: logs-collector
labels:
app: logs-collector
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: logs-collector
subjects:
- kind: ServiceAccount
name: logs-collector
namespace: otel
260 changes: 225 additions & 35 deletions opentelemetry-operator/chart/templates/logs-collector.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,23 @@ kind: OpenTelemetryCollector
metadata:
name: logs
spec:
mode: deployment
image: {{ index .Values "opentelemetry-operator" "manager" "collectorImage" "repository" }}:{{ index .Values "opentelemetry-operator" "manager" "collectorImage" "tag" }}
volumeMounts:
- mountPath: /var/log
name: varlog
readOnly: true
volumes:
- name: varlog
hostPath:
path: /var/log
mode: daemonset
tolerations:
- operator: Exists
env:
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: KUBE_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: MY_POD_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: username
valueFrom:
secretKeyRef:
Expand All @@ -28,7 +34,25 @@ spec:
valueFrom:
secretKeyRef:
name: otel-basic-auth
key: password
key: password
- name: cluster
value: "{{ .Values.open_telemetry.cluster }}"
- name: region
value: "{{ .Values.open_telemetry.region }}"
{{- if index .Values "opentelemetry-operator" "manager" "podMonitor" "enabled" }}
ports:
- name: prometheus
port: 9999
{- end }}
image: {{ index .Values "opentelemetry-operator" "manager" "collectorImage" "repository" }}:{{ index .Values "opentelemetry-operator" "manager" "collectorImage" "tag" }}
volumeMounts:
- mountPath: /var/log
name: varlog
readOnly: true
volumes:
- name: varlog
hostPath:
path: /var/log
config: |
receivers:
filelog:
Expand All @@ -39,41 +63,183 @@ spec:
start_at: end
retry_on_failure:
enabled: true
operators:
- type: router
id: get-format
- id: get-format
type: router
routes:
- output: parser-containerd
- output: set-containerd
expr: 'body matches "^[^ Z]+Z"'
default: parse-time
# Parse CRI-Containerd format
- type: regex_parser
id: parser-containerd
regex: '^(?P<time>[^ Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<logs>.*)$'
- id: set-containerd
type: add
field: resource["container.runtime"]
value: "containerd"
# Parse CRI-Containerd format
- id: parser-containerd
type: regex_parser
regex: '^(?P<time>[^ Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*)'
timestamp:
parse_from: attributes.time
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
- type: regex_parser
id: parse-time
- id: parse-time
type: regex_parser
regex: '^(?P<time>[^ Z]+)'
timestamp:
parse_from: attributes.time
layout_type: gotime
layout: '2006-01-02T15:04:05.999999999'
- type: move
from: attributes.logs
to: body
# Extract metadata from file path
- type: regex_parser
id: extract_metadata_from_filepath
regex: '^.*\/(?P<pod_name>[^_]+)_(?P<namespace>[^_]+)_(?P<container>[a-z].*)\-(?P<uid>[a-f0-9\-]{64})'
- id: extract_metadata_from_filepath
type: regex_parser
regex: "^.*/(?P<pod_name>[^_]+)_(?P<namespace>[^_]+)_(?P<container_name>[^_]+)-(?P<uid>[a-f0-9\\-]{64})\\.log$"
parse_from: attributes["log.file.path"]
cache:
size: 128
# workaround, transform cache cannot be used, as this makes entries with wrong metadata
- id: move-pod-name
type: move
from: attributes.pod_name
to: resource["k8s.pod.name"]
- id: move-uid
type: move
from: attributes.uid
to: resource["k8s.pod.id"]
- id: move-namespace
type: move
from: attributes.namespace
to: resource["k8s.namespace"]
{{- if index .Values "opentelemetry-operator" "manager" "podMonitor" "enabled" }}
prometheus/internal:
config:
scrape_configs:
- job_name: 'internal'
scrape_interval: 5s
static_configs:
- targets: ['127.0.0.1:8888']
{- end }}

processors:
batch:
send_batch_max_size: 10000
timeout: 10s
send_batch_size : 400

{{- if index .Values "opentelemetry-operator" "manager" "podMonitor" "enabled" }}
attributes/prometheus:
actions:
- action: insert
key: nodename
value: ${KUBE_NODE_NAME}
- action: insert
key: cluster
value: ${cluster}
- action: insert
key: region
value: ${region}
- action: delete
key: service_instance_id
- action: delete
key: pod
{- end }}

attributes/cluster:
actions:
- action: insert
key: cluster
value: ${cluster}
- action: insert
key: region
value: ${region}
- action: insert
key: nodename
value: ${KUBE_NODE_NAME}

resource/k8sevents:
attributes:
- key: k8s.node.name
value: ${KUBE_NODE_NAME}
action: insert
- key: cluster
value: ${cluster}
action: insert
- key: region
value: ${region}
action: insert

transform/containerd:
log_statements:
context: log
statements:
- merge_maps(cache,ExtractPatterns(body,"^(?P<time>[^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^\\s]*) ?(?P<log>.*)$"), "upsert") where body != nil
- merge_maps(cache,ExtractPatterns(body,"^(?P<time>\\d+/\\d+/\\d+\\s+\\d+:\\d+\\d+) (?P<log>.*)$"), "upsert") where attributes["log_name"]!= "MeshAccessLog" and cache["log"]!= nil and not IsMap(cache["log"])
- set(body,cache["log"]) where cache["log"] != nil
- merge_maps(cache,ParseJSON(body), "upsert") where IsMap(body)
- set(body,cache["message"]) where cache["message"] != nil
- set(body,cache["msg"]) where cache["msg"] != nil
- set(severity_text,cache["level"]) where cache["level"] != nil
- set(severity_text,cache["severity"]) where cache["severity"] != nil
- set(severity_number,SEVERITY_NUMBER_INFO) where cache["level"] == "INFO"
- set(severity_number,SEVERITY_NUMBER_INFO) where cache["severity"] == "info"
- set(attributes["loggerName"],cache["loggerName"]) where cache["loggerName"] != nil

k8sattributes:
auth_type: "serviceAccount"
passthrough: false
filter:
node_from_env_var: KUBE_NODE_NAME
extract:
metadata:
- k8s.pod.name
- k8s.pod.uid
- k8s.deployment.name
- k8s.node.name
- k8s.pod.start_time
labels:
- tag_name: app.label.ingress-nginx
key: ingress-nginx
from: pod
- tag_name: app.label.support-group
key: ccloud/support-group
from: pod
- tag_name: app.label.app_name
key: app.kubernetes.io/app_name
from: pod
- tag_name: app.label.component
key: app.kubernetes.io/component
from: pod
- tag_name: app.label.name
key: app.kubernetes.io/name
from: pod
- tag_name: app.label.instance
key: app.kubernetes.io/instance
from: pod
- tag_name: app.label.pod-template-hash
key: app.kubernetes.io/pod-template-hash
from: pod
pod_association:
- sources:
- from: resource_attribute
name: k8s.pod.ip
- sources:
- from: resource_attribute
name: k8s.pod.uid
- sources:
- from: resource_attribute
name: k8s.pod.name
- sources:
- from: connection
memory_limiter:
check_interval: 5s
limit_percentage: 70
spike_limit_percentage: 30

exporters:
debug:
Expand All @@ -84,21 +250,45 @@ spec:
authenticator: basicauth
endpoint: {{ index .Values "open_telemetry" "opensearch_logs" "endpoint" }}
logs_index: ${username}-datastream

prometheus:
endpoint: 0.0.0.0:9999

extensions:
basicauth:
client_auth:
password: ${password}
username: ${username}
password: ${password}

connectors:
routing:
default_pipelines: [logs/containerd]
error_mode: ignore
table:
- statement: route() where attributes["container.runtime"] =="containerd"
pipelines: [logs/containerd]

service:
extensions:
- basicauth
{{- if index .Values "opentelemetry-operator" "manager" "podMonitor" "enabled" }}
telemetry:
metrics:
address: 0.0.0.0:8888
address: 127.0.0.1:8888
level: detailed
{- end }}
pipelines:
logs:
receivers:
- filelog
exporters:
- opensearch/logs
logs/containerd:
receivers: [filelog]
processors: [transform/containerd,k8sattributes,attributes/cluster,batch]
exporters: [opensearch/logs]
logs/k8sevents:
receivers: [k8s_events]
processors: [resource/k8sevents,batch]
exporters: [opensearch/logs]
{{- if index .Values "opentelemetry-operator" "manager" "podMonitor" "enabled" }}
metrics/prometheus:
receivers: [prometheus/internal]
processors: [attributes/prometheus]
exporters: [prometheus]
{- end }}
10 changes: 10 additions & 0 deletions opentelemetry-operator/chart/templates/serviceaccount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors
# SPDX-License-Identifier: Apache-2.0

apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: logs-collector
name: logs-collector
namespace: otel
26 changes: 26 additions & 0 deletions opentelemetry-operator/chart/templates/smon-filelog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{{/*
SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors
SPDX-License-Identifier: Apache-2.0
*/}}
{{- if index .Values "opentelemetry-operator" "manager" "podMonitor" "enabled" }}
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
annotations:
name: opentelemetry-collector-logs
labels:
prometheus: "{{ .Values.open_telemetry.prometheus }}"
spec:
podMetricsEndpoints:
- interval: 60s
scrapeTimeout: 50s
path: /metrics
scheme: http
port: prometheus
selector:
matchLabels:
app.kubernetes.io/component: opentelemetry-collector
app.kubernetes.io/instance: otel.logs
app.kubernetes.io/managed-by: opentelemetry-operator
app.kubernetes.io/part-of: opentelemetry
{- end }}
Loading
Loading