From 7b4be25274b2ac7129f64f1072c44b628a6e7f63 Mon Sep 17 00:00:00 2001 From: Bruno Koeferli Date: Tue, 19 Nov 2024 12:35:35 +0100 Subject: [PATCH] Add support for distributed tracing 387870 --- .../docker/observability/docker-compose.yml | 31 ++++++++-- .../grafana-datasources/ds-prometheus.yml | 5 +- .../grafana-datasources/ds-tempo.yml | 15 +++++ .../observability/otel-collector-config.yml | 19 +++++- .../src/main/docker/observability/tempo.yml | 58 +++++++++++++++++++ 5 files changed, 120 insertions(+), 8 deletions(-) create mode 100644 code/org.eclipse.scout.docs.snippets/src/main/docker/observability/grafana-datasources/ds-tempo.yml create mode 100644 code/org.eclipse.scout.docs.snippets/src/main/docker/observability/tempo.yml diff --git a/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/docker-compose.yml b/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/docker-compose.yml index 6351c66f49..ebcfa0b0a9 100644 --- a/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/docker-compose.yml +++ b/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/docker-compose.yml @@ -1,23 +1,42 @@ +name: scout-observability-infrastructure services: otel_collector: networks: - backend - image: otel/opentelemetry-collector-contrib:latest + image: otel/opentelemetry-collector-contrib:0.115.1 volumes: - "./otel-collector-config.yml:/etc/otelcol/otel-collector-config.yml" - command: --config /etc/otelcol/otel-collector-config.yml + command: + - "--config=/etc/otelcol/otel-collector-config.yml" ports: - "4318:4318" depends_on: - prometheus + - tempo + tempo: + networks: + - backend + image: grafana/tempo:latest + volumes: + - "./tempo.yml:/etc/tempo.yaml" + - "tempo-data:/var/tempo" + command: + - "-config.file=/etc/tempo.yaml" + ports: + - "3200:3200" # tempo rest api + - "9095:9095" # tempo grpc + - "4327:4327" # otlp grpc + - "4328:4328" # otlp http prometheus: networks: - backend - image: prom/prometheus:latest + image: prom/prometheus:v3.0.1 command: - "--storage.tsdb.retention.time=15d" - "--config.file=/etc/prometheus/prometheus.yml" - "--web.enable-remote-write-receiver" + - "--enable-feature=exemplar-storage" + - "--enable-feature=native-histograms" volumes: - "./prometheus.yml:/etc/prometheus/prometheus.yml" ports: @@ -25,15 +44,17 @@ services: grafana: networks: - backend - image: grafana/grafana-oss:10.0.3 + image: grafana/grafana-oss:11.4.0 volumes: - "./grafana-datasources:/etc/grafana/provisioning/datasources" - - grafana-data:/var/lib/grafana + - "grafana-data:/var/lib/grafana" ports: - "3000:3000" depends_on: - prometheus + - tempo volumes: grafana-data: { } + tempo-data: { } networks: backend: { } diff --git a/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/grafana-datasources/ds-prometheus.yml b/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/grafana-datasources/ds-prometheus.yml index 1b7e1da95f..b5aca80e79 100644 --- a/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/grafana-datasources/ds-prometheus.yml +++ b/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/grafana-datasources/ds-prometheus.yml @@ -1,6 +1,9 @@ +apiVersion: 1 + datasources: - name: Prometheus - access: proxy type: prometheus + access: proxy + uid: prometheus url: http://prometheus:9090 isDefault: true diff --git a/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/grafana-datasources/ds-tempo.yml b/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/grafana-datasources/ds-tempo.yml new file mode 100644 index 0000000000..7a5bf7adb7 --- /dev/null +++ b/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/grafana-datasources/ds-tempo.yml @@ -0,0 +1,15 @@ +apiVersion: 1 + +datasources: + - name: Tempo + type: tempo + access: proxy + uid: tempo + url: http://tempo:3200 + jsonData: + serviceMap: + datasourceUid: 'prometheus' + nodeGraph: + enabled: true + tracesToMetrics: + datasourceUid: 'prometheus' diff --git a/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/otel-collector-config.yml b/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/otel-collector-config.yml index 000c961eda..73f61778c0 100644 --- a/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/otel-collector-config.yml +++ b/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/otel-collector-config.yml @@ -3,13 +3,28 @@ receivers: protocols: http: endpoint: "0.0.0.0:4318" +processors: + batch: + memory_limiter: + # 75% of maximum memory up to 2G + limit_mib: 1536 + # 25% of limit up to 2G + spike_limit_mib: 512 + check_interval: 5s exporters: - logging: + debug: + # verbosity: detailed prometheusremotewrite: endpoint: "http://prometheus:9090/api/v1/write" + otlphttp: + endpoint: "http://tempo:4328" service: pipelines: + traces: + receivers: [ otlp ] + processors: [ memory_limiter, batch ] + exporters: [ debug, otlphttp ] metrics: receivers: [ otlp ] processors: [ ] - exporters: [ logging, prometheusremotewrite ] + exporters: [ debug, prometheusremotewrite ] diff --git a/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/tempo.yml b/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/tempo.yml new file mode 100644 index 0000000000..44c2c62836 --- /dev/null +++ b/code/org.eclipse.scout.docs.snippets/src/main/docker/observability/tempo.yml @@ -0,0 +1,58 @@ +# inspired by Grafana Tempo's docker compose setup: https://github.com/grafana/tempo/blob/main/example/docker-compose/local/tempo.yaml +stream_over_http_enabled: true +server: + http_listen_port: 3200 + log_level: info + +query_frontend: + search: + duration_slo: 5s + throughput_bytes_slo: 1.073741824e+09 + metadata_slo: + duration_slo: 5s + throughput_bytes_slo: 1.073741824e+09 + trace_by_id: + duration_slo: 5s + +distributor: + receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4328 + grpc: + endpoint: 0.0.0.0:4327 + +ingester: + max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally + +compactor: + compaction: + block_retention: 192h # overall Tempo trace retention. set for demo purposes to 8 days + +metrics_generator: + registry: + external_labels: + source: tempo + cluster: docker-compose + storage: + path: /var/tempo/generator/wal + remote_write: + - url: http://prometheus:9090/api/v1/write + send_exemplars: true + traces_storage: + path: /var/tempo/generator/traces + +storage: + trace: + backend: local # backend configuration to use + wal: + path: /var/tempo/wal # where to store the wal locally + local: + path: /var/tempo/blocks + +overrides: + defaults: + metrics_generator: + processors: [ service-graphs, span-metrics, local-blocks ] # enables metrics generator + generate_native_histograms: both