Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add in Monitoring #40

Open
wants to merge 9 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,20 @@ pnpm env:development build --filter=chain
pnpm env:development sequencer:start --filter=chain
```

### Observability

Protokit has the ability to report metrics, logs and traces to a Grafana instance for visualisation.
These can be configured by the following environment variables
```zsh
OPEN_TELEMETRY_TRACING_URL=
OPEN_TELEMETRY_TRACING_ENABLED=

OPEN_TELEMETRY_METRICS_URL=
OPEN_TELEMETRY_METRICS_ENABLED=
OPEN_TELEMETRY_METRICS_SCRAPING_FREQUENCY=
````
Note that the functionality is not configured for the `in-memory` mode.

### Running the UI

```zsh
Expand Down Expand Up @@ -237,4 +251,4 @@ The caddy reverse-proxy automatically uses https for all connections, use this g
2. Adapt your starter-kit's package.json to use the file:// references to framework
3. Go into the framework folder, and build a docker image containing the sources with `docker build -f ./packages/deployment/docker/development-base/Dockerfile -t protokit-base .`

4. Comment out the first line of docker/base/Dockerfile to use protokit-base
4. Replace the first line of docker/base/Dockerfile to use `FROM protokit-base`
4 changes: 2 additions & 2 deletions docker/base/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# For regular deployment
FROM node:18
FROM protokit-base

# For locally built and imported framework references
# Make sure you build the base-image first
Expand All @@ -17,7 +17,7 @@ COPY ./packages/chain/package.json ./packages/chain/package.json
COPY ./packages/eslint-config-custom/package.json ./packages/eslint-config-custom/package.json
COPY ./apps/web/package.json ./apps/web/package.json

RUN corepack enable
RUN npm i -g [email protected]

COPY ./tsconfig.json .
COPY ./packages/chain/tsconfig.json ./packages/chain/tsconfig.json
Expand Down
1 change: 1 addition & 0 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ include:
- ./indexer/docker-compose.yml
- ./processor-persistence/docker-compose.yml
- ./processor/docker-compose.yml
- ./monitoring/docker-compose.yml
82 changes: 82 additions & 0 deletions docker/monitoring/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
services:
prometheus:
image: prom/prometheus
container_name: prometheus
profiles:
- monitoring
command:
- '--config.file=/etc/prometheus/prometheus.yml'
ports:
- "9090:9090"
restart: unless-stopped
volumes:
- ./prometheus:/etc/prometheus
- ../data/prometheus:/prometheus

loki:
image: grafana/loki:latest
container_name: loki
profiles:
- monitoring
ports:
- "3100:3100"
command: -config.file=/etc/loki/local-config.yaml

promtail:
image: grafana/promtail:latest
container_name: promtail
profiles:
- monitoring
volumes:
- ./promtail/promtail.yaml:/etc/promtail/docker-config.yaml
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock
command: -config.file=/etc/promtail/docker-config.yaml
depends_on:
- loki

otel-collector:
image: otel/opentelemetry-collector:0.86.0
container_name: otel-collector
profiles:
- monitoring
command: [ "--config=/etc/otel-collector.yaml" ]
volumes:
- ./otel-collector/otel-collector.yaml:/etc/otel-collector.yaml
ports:
- "4318:4317"

tempo:
image: grafana/tempo:latest
container_name: tempo
profiles:
- monitoring
command: [ "-config.file=/etc/tempo.yaml" ]
volumes:
- ./tempo/tempo.yaml:/etc/tempo.yaml
- ../data/tempo:/var/tempo
ports:
- "3200:3200" # tempo
- "4317:4317" # otlp grpc

grafana:
image: grafana/grafana:11.0.1
container_name: grafana
profiles:
- monitoring
restart: unless-stopped
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_USER=${OPEN_TELEMETRY_GF_SECURITY_ADMIN_USER}
- GF_SECURITY_ADMIN_PASSWORD=${OPEN_TELEMETRY_GF_SECURITY_ADMIN_PASSWORD}
- GF_SECURITY_ALLOW_EMBEDDING=true
volumes:
- ./grafana:/etc/grafana/provisioning/datasources
depends_on:
- loki
- tempo
- prometheus
labels:
logging: "promtail"
logging_jobname: "grafana"
33 changes: 33 additions & 0 deletions docker/monitoring/grafana/grafana-datasources.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
apiVersion: 1

datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
isDefault: true
access: proxy
editable: true
- name: Loki
type: loki
access: proxy
orgId: 1
url: http://loki:3100
basicAuth: false
version: 1
editable: false
- name: Tempo
type: tempo
access: proxy
orgId: 1
url: http://tempo:3200
basicAuth: false
version: 1
editable: false
apiVersion: 1
uid: tempo
jsonData:
httpMethod: GET
serviceMap:
datasourceUid: prometheus
streamingEnabled:
search: true
16 changes: 16 additions & 0 deletions docker/monitoring/otel-collector/otel-collector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
receivers:
otlp:
protocols:
grpc:
exporters:
otlp:
endpoint: tempo:4317
tls:
insecure: true
debug:
verbosity: detailed
service:
pipelines:
traces:
receivers: [otlp]
exporters: [otlp]
21 changes: 21 additions & 0 deletions docker/monitoring/prometheus/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: []
scheme: http
timeout: 10s
api_version: v2
scrape_configs:
- job_name: prometheus
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
static_configs:
- targets:
- localhost:9090
26 changes: 26 additions & 0 deletions docker/monitoring/promtail/promtail.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
server:
http_listen_port: 9080
grpc_listen_port: 0

positions:
filename: /tmp/positions.yaml

clients:
- url: http://loki:3100/loki/api/v1/push

scrape_configs:
- job_name: flog_scrape
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 5s
filters:
- name: label
values: ["logging=promtail"]
relabel_configs:
- source_labels: ['__meta_docker_container_name']
regex: '/(.*)'
target_label: 'container'
- source_labels: ['__meta_docker_container_log_stream']
target_label: 'logstream'
- source_labels: ['__meta_docker_container_label_logging_jobname']
target_label: 'job'
56 changes: 56 additions & 0 deletions docker/monitoring/tempo/tempo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@

stream_over_http_enabled: true
server:
http_listen_port: 3200
log_level: info

query_frontend:
search:
duration_slo: 5s
throughput_bytes_slo: 1.073741824e+09
metadata_slo:
duration_slo: 5s
throughput_bytes_slo: 1.073741824e+09
trace_by_id:
duration_slo: 5s

distributor:
receivers:
otlp:
protocols:
grpc:
endpoint: "tempo:4317"

ingester:
max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally

compactor:
compaction:
block_retention: 1h # overall Tempo trace retention. set for demo purposes

metrics_generator:
registry:
external_labels:
source: tempo
cluster: docker-compose
storage:
path: /var/tempo/generator/wal
remote_write:
- url: http://prometheus:9090/api/v1/write
send_exemplars: true
traces_storage:
path: /var/tempo/generator/traces

storage:
trace:
backend: local # backend configuration to use
wal:
path: /var/tempo/wal # where to store the wal locally
local:
path: /var/tempo/blocks

overrides:
defaults:
metrics_generator:
processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator
generate_native_histograms: both
2 changes: 1 addition & 1 deletion docker/web/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ COPY package.json pnpm-workspace.yaml pnpm-lock.yaml turbo.json ./
COPY ./packages/chain/package.json ./packages/chain/package.json
COPY ./apps/web/package.json ./apps/web/package.json

RUN corepack enable pnpm
RUN npm i -g pnpm@9.8.0

COPY ./tsconfig.json .
COPY ./packages/chain/tsconfig.json ./packages/chain/tsconfig.json
Expand Down
10 changes: 9 additions & 1 deletion packages/chain/src/environments/development/.env
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,12 @@ PROTOKIT_PROCESSOR_INDEXER_GRAPHQL_HOST=0.0.0.0
# expose graphql configuration to the client app-chain
NEXT_PUBLIC_PROTOKIT_GRAPHQL_URL=http://localhost:8080/graphql
NEXT_PUBLIC_PROTOKIT_INDEXER_GRAPHQL_URL=http://localhost:8081/graphql
NEXT_PUBLIC_PROTOKIT_PROCESSOR_GRAPHQL_URL=http://localhost:8082/graphql
NEXT_PUBLIC_PROTOKIT_PROCESSOR_GRAPHQL_URL=http://localhost:8082/graphql

OPEN_TELEMETRY_TRACING_URL=http://localhost:4318
OPEN_TELEMETRY_TRACING_ENABLED=true

OPEN_TELEMETRY_METRICS_PORT=4318
OPEN_TELEMETRY_METRICS_ENDPOINT=http://localhost
OPEN_TELEMETRY_METRICS_ENABLED=true
OPEN_TELEMETRY_METRICS_SCRAPING_FREQUENCY=10
20 changes: 20 additions & 0 deletions packages/chain/src/environments/development/chain.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
baseAppChainModules,
baseAppChainModulesConfig,
} from "../../app-chain";
import {OpenTelemetryServer} from "../../../../../../framework/packages/api";

export const appChain = AppChain.from({
Runtime: Runtime.from({
Expand All @@ -33,6 +34,7 @@ export const appChain = AppChain.from({
...indexerSequencerModules,
TaskQueue: BullQueue,
DatabasePruneModule,
OpenTelemetryServer: OpenTelemetryServer
},
}),
modules: baseAppChainModules,
Expand Down Expand Up @@ -65,6 +67,24 @@ export default async (args: Arguments): Promise<Startable> => {
connection: process.env.DATABASE_URL!,
},
},
OpenTelemetryServer: {
metrics: {
enabled: Boolean(process.env.OPEN_TELEMETRY_METRICS_ENABLED ?? false),
prometheus: { host: undefined,
port: Number(process.env.OPEN_TELEMETRY_METRICS_PORT),
endpoint: process.env.OPEN_TELEMETRY_METRICS_ENDPOINT,
prefix: "",
appendTimestamp: true},
nodeScrapeInterval: Number(process.env.OPEN_TELEMETRY_METRICS_SCRAPING_FREQUENCY ?? 10),
},
tracing: {
enabled: Boolean(process.env.OPEN_TELEMETRY_TRACING_ENABLED ?? false),
otlp: {
url: process.env.OPEN_TELEMETRY_TRACING_URL,
},
},
},

},
...baseAppChainModulesConfig,
});
Expand Down
13 changes: 12 additions & 1 deletion packages/chain/src/environments/sovereign/.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
COMPOSE_PROFILES=db,indexer-db,monolithic-sequencer,monolithic-indexer,proxy,web,processor-db,monolithic-processor
COMPOSE_PROFILES=db,indexer-db,monolithic-sequencer,monolithic-indexer,proxy,web,processor-db,monolithic-processor, monitoring

COMPOSE_PROJECT_NAME=starter-kit

Expand Down Expand Up @@ -58,3 +58,14 @@ NEXT_PUBLIC_PROTOKIT_GRAPHQL_URL=https://localhost/graphql
# NEXT_PUBLIC_PROTOKIT_GRAPHQL_URL=http://yourdomain.com/graphql
NEXT_PUBLIC_PROTOKIT_INDEXER_GRAPHQL_URL=https://localhost/indexer/graphql
NEXT_PUBLIC_PROTOKIT_PROCESSOR_GRAPHQL_URL=https://localhost/processor/graphql

OPEN_TELEMETRY_TRACING_URL=http://localhost:4318
OPEN_TELEMETRY_TRACING_ENABLED=true

OPEN_TELEMETRY_METRICS_PORT=4318
OPEN_TELEMETRY_METRICS_ENDPOINT=http://localhost
OPEN_TELEMETRY_METRICS_ENABLED=true
OPEN_TELEMETRY_METRICS_SCRAPING_FREQUENCY=10

OPEN_TELEMETRY_GF_SECURITY_ADMIN_USER=admin
OPEN_TELEMETRY_GF_SECURITY_ADMIN_PASSWORD=grafana
Loading