Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(outputs/otel_metrics): add OTEL metrics output support #1012

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,10 @@ func getConfig() *types.Configuration {
Alertmanager: types.AlertmanagerOutputConfig{ExtraLabels: make(map[string]string), ExtraAnnotations: make(map[string]string), CustomSeverityMap: make(map[types.PriorityType]string), CustomHeaders: make(map[string]string)},
CloudEvents: types.CloudEventsOutputConfig{Extensions: make(map[string]string)},
GCP: types.GcpOutputConfig{PubSub: types.GcpPubSub{CustomAttributes: make(map[string]string)}},
OTLP: types.OTLPOutputConfig{Traces: types.OTLPTraces{ExtraEnvVars: make(map[string]string)}},
OTLP: types.OTLPOutputConfig{
Traces: types.OTLPTraces{ExtraEnvVars: make(map[string]string)},
Metrics: types.OTLPMetrics{ExtraEnvVars: make(map[string]string)},
},
}

configFile := kingpin.Flag("config-file", "config file").Short('c').ExistingFile()
Expand Down Expand Up @@ -557,7 +560,7 @@ func getConfig() *types.Configuration {
v.SetDefault("OTLP.Traces.Endpoint", "")
v.SetDefault("OTLP.Traces.Protocol", "http/json")
// NOTE: we don't need to parse the OTLP.Traces.Headers field, as use it to
// set OTEL_EXPORTER_OTLP_TRACES_HEADERS (at otlp_init.go), which is then
// set OTEL_EXPORTER_OTLP_TRACES_HEADERS (at otlp_traces_init.go), which is then
// parsed by the OTLP SDK libs, see
// https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/#otel_exporter_otlp_traces_headers
v.SetDefault("OTLP.Traces.Headers", "")
Expand All @@ -569,6 +572,15 @@ func getConfig() *types.Configuration {
// it to 1000ms by default, override-able via OTLP_DURATION environment variable.
v.SetDefault("OTLP.Traces.Duration", 1000)

v.SetDefault("OTLP.Metrics.Endpoint", "")
v.SetDefault("OTLP.Metrics.Protocol", "grpc")
// NOTE: we don't need to parse the OTLP.Metrics.Headers field, as use it to set OTEL_EXPORTER_OTLP_METRICS_HEADERS
// (at otlp_metrics.go), which is then parsed by the OTLP SDK libs.
v.SetDefault("OTLP.Metrics.Headers", "")
v.SetDefault("OTLP.Metrics.Timeout", 10000)
v.SetDefault("OTLP.Metrics.MinimumPriority", "")
v.SetDefault("OTLP.Metrics.CheckCert", true)

v.SetEnvKeyReplacer(strings.NewReplacer(".", "_"))
v.AutomaticEnv()
if *configFile != "" {
Expand Down Expand Up @@ -596,6 +608,7 @@ func getConfig() *types.Configuration {
v.GetStringMapString("AlertManager.CustomSeverityMap")
v.GetStringMapString("GCP.PubSub.CustomAttributes")
v.GetStringMapString("OTLP.Traces.ExtraEnvVars")
v.GetStringMapString("OTLP.Metrics.ExtraEnvVars")

c.Elasticsearch.CustomHeaders = v.GetStringMapString("Elasticsearch.CustomHeaders")

Expand Down Expand Up @@ -750,6 +763,21 @@ func getConfig() *types.Configuration {
}
}

if value, present := os.LookupEnv("OTLP_METRICS_EXTRAENVVARS"); present {
extraEnvVars := strings.Split(value, ",")
for _, extraEnvVarData := range extraEnvVars {
envName, envValue, found := strings.Cut(extraEnvVarData, ":")
envName, envValue = strings.TrimSpace(envName), strings.TrimSpace(envValue)
if !promKVNameRegex.MatchString(envName) {
log.Printf("[ERROR] : OTLPMetrics - Extra Env Var name '%v' is not valid", envName)
} else if found {
c.OTLP.Metrics.ExtraEnvVars[envName] = envValue
} else {
c.OTLP.Metrics.ExtraEnvVars[envName] = ""
}
}
}

if c.AWS.SecurityLake.Interval < 5 {
c.AWS.SecurityLake.Interval = 5
}
Expand Down Expand Up @@ -881,6 +909,7 @@ func getConfig() *types.Configuration {
c.OpenObserve.MinimumPriority = checkPriority(c.OpenObserve.MinimumPriority)
c.Dynatrace.MinimumPriority = checkPriority(c.Dynatrace.MinimumPriority)
c.SumoLogic.MinimumPriority = checkPriority(c.SumoLogic.MinimumPriority)
c.OTLP.Metrics.MinimumPriority = checkPriority(c.OTLP.Metrics.MinimumPriority)
c.Talon.MinimumPriority = checkPriority(c.Talon.MinimumPriority)

c.Slack.MessageFormatTemplate = getMessageFormatTemplate("Slack", c.Slack.MessageFormat)
Expand Down
11 changes: 11 additions & 0 deletions config_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,17 @@ otlp:
# minimumpriority: "" # minimum priority of event for using this output, order is emergency|alert|critical|error|warning|notice|informational|debug or "" (default)
# checkcert: true # Set if you want to skip TLS certificate validation (default: true)

metrics:
# endpoint: "" # OTLP endpoint, typically in the form http{s}://{domain or ip}:4318/v1/metrics
# protocol: "" # OTLP transport protocol to be used for metrics data; it can be "grpc" or "http/protobuf" (default: "grpc")
# timeout: "" # OTLP timeout for outgoing metrics in milliseconds (default: "" which uses SDK default: 10000)
# headers: "" # List of headers to apply to all outgoing metrics in the form of "some-key=some-value,other-key=other-value" (default: "")
# extraenvvars: # Extra env vars (override the other settings) (default: "")
# OTEL_EXPORTER_OTLP_METRICS_TIMEOUT: 10000
# OTEL_EXPORTER_OTLP_TIMEOUT: 10000
# minimumpriority: "" # Minimum priority of event for using this output, order is emergency|alert|critical|error|warning|notice|informational|debug or "" (default: "")
# checkcert: true # Set to false if you want to skip TLS certificate validation (only with https) (default: true)

talon:
# address: "" # Falco talon address, if not empty, Falco Talon output is enabled
# checkcert: false # check if ssl certificate of the output is valid (default: true)
Expand Down
Binary file added docs/outputs/images/otlp_metrics-prom_view.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
195 changes: 195 additions & 0 deletions docs/outputs/otlp_metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
# OTEL Metrics

- **Category**: Metrics/Observability
- **Website**: <https://opentelemetry.io/docs/concepts/signals/metrics/>

## Table of content

- [OTEL Metrics](#otel-metrics)
- [Table of content](#table-of-content)
- [Configuration](#configuration)
- [Example of config.yaml](#example-of-configyaml)
- [Additional info](#additional-info)
- [Running a whole stack with docker-compose](#running-a-whole-stack-with-docker-compose)

## Configuration

| Setting | Env var | Default value | Description |
|--------------------------------|--------------------------------|--------------------|-------------------------------------------------------------------------------------------------------------------------------------|
| `otlp.metrics.endpoint` | `OTLP_METRICS_ENDPOINT` | | OTLP endpoint, typically in the form http{s}://{domain or ip}:4318/v1/metrics |
| `otlp.metrics.protocol` | `OTLP_METRICS_PROTOCOL` | `grpc` | OTLP transport protocol to be used for metrics data; it can be `"grpc"` or `"http/protobuf"` |
| `otlp.metrics.timeout` | `OTLP_METRICS_TIMEOUT` | `10000` (from SDK) | OTLP timeout for outgoing metrics in milliseconds |
| `otlp.metrics.headers` | `OTLP_METRICS_HEADERS` | `""` | List of headers to apply to all outgoing metrics in the form of `some-key=some-value,other-key=other-value` |
| `otlp.metrics.extraenvvars` | `OTLP_METRICS_EXTRAENVVARS` | `""` | Extra env vars (override the other settings) |
| `otlp.metrics.minimumpriority` | `OTLP_METRICS_MINIMUMPRIORITY` | `""` (=`debug`) | Minimum priority of event for using this output, order is `emergency,alert,critical,error,warning,notice,informational,debug or ""` |
| `otlp.metrics.checkcert` | `OTLP_METRICS_CHECKCERT` | `true` | Set to false if you want to skip TLS certificate validation (only with https) |

> [!NOTE]
For the extra Env Vars values see [standard `OTEL_*` environment variables](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/):

## Example of config.yaml

```yaml
otlp:
metrics:
# endpoint: "" # OTLP endpoint, typically in the form http{s}://{domain or ip}:4318/v1/metrics
# protocol: "" # OTLP transport protocol to be used for metrics data; it can be "grpc" or "http/protobuf" (default: "grpc")
# timeout: "" # OTLP timeout for outgoing metrics in milliseconds (default: "" which uses SDK default: 10000)
# headers: "" # List of headers to apply to all outgoing metrics in the form of "some-key=some-value,other-key=other-value" (default: "")
# extraenvvars: # Extra env vars (override the other settings) (default: "")
# OTEL_EXPORTER_OTLP_METRICS_TIMEOUT: 10000
# OTEL_EXPORTER_OTLP_TIMEOUT: 10000
# minimumpriority: "" # Minimum priority of event for using this output, order is emergency|alert|critical|error|warning|notice|informational|debug or "" (default: "")
# checkcert: true # Set to false if you want to skip TLS certificate validation (only with https) (default: true)
```

## Additional info

> [!NOTE]
The OTLP Metrics are only available for the source: `syscalls`.

## Running a whole stack with docker-compose

Below `docker-compose` file runs a stack of:

- `falco`
- `falcosidekick`
- `prometheus` as metrics backend
- OTEL collector to collect OTEL metrics from `falcosidekick` and let prometheus scrape them
- `events-generator` to generate arbitrary Falco events

### Requirements

A local Linux kernel capable of running `falco`--modern-bpf`, see <https://falco.org/blog/falco-modern-bpf/>.

### Configuration files

You need to create these files:

- `./docker-compose.yaml`: minimal docker-compose configuration

```yaml
---
services:
falco:
image: falcosecurity/falco:0.39.0
privileged: true
volumes:
- /var/run/docker.sock:/host/var/run/docker.sock
- /dev:/host/dev
- /proc:/host/proc:ro
- /boot:/host/boot:ro
- /lib/modules:/host/lib/modules:ro
- /usr:/host/usr:ro
- /etc/falco:/host/etc:ro
command: [
"/usr/bin/falco" ,
"-o", "json_output=true",
"-o", "http_output.enabled=true",
"-o", "http_output.url=http://sidekick:2801", # Set the HTTP output url to Falco sidekick endpoint
"-o", "http_output.insecure=true"
]

sidekick:
image: falcosidekick:latest
ports:
- "2801:2801" # Expose default port towards Falco instance
environment:
- OTLP_METRICS_ENDPOINT=http://otel-collector:4317
- OTLP_METRICS_CHECKCERT=false

otel-collector:
image: otel/opentelemetry-collector-contrib
volumes:
- ./config.yaml:/etc/otelcol-contrib/config.yaml
ports:
- "4317:4317" # Expose OTLP gRPC port

prometheus:
image: prom/prometheus:latest
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090" # Expose port to access Prometheus expression browser

event-generator:
image: falcosecurity/event-generator
command: run
restart: always
trigger:
image: alpine
command: [ # Alternate reads to /etc/shadow with creations of symlinks from it
"sh",
"-c",
"while true; do cat /etc/shadow > /dev/null; sleep 5; ln -s /etc/shadow shadow; rm shadow; sleep 5; done"
]
```

> `./docker-compose.yaml` mentions the `falcosidekick:latest` docker image, that must be locally available before
> bringing up the stack. You can build it from source by cloning the repository and issuing the building commands:
> ```shell
> git clone https://github.com/falcosecurity/falcosidekick.git
> cd falcosidekick
> go build . && docker build . -t falcosidekick:latest
> ```

- `./config.yaml`: minimal OTEL collector configuration

```yaml
---
receivers:
otlp:
protocols:
grpc:
endpoint: "0.0.0.0:4317"

exporters:
prometheus:
endpoint: "0.0.0.0:9090"

service:
pipelines:
metrics:
receivers: [otlp]
processors: []
exporters: [prometheus]
```

- `./prometheus.yml`: minimal prometheus configuration

```yaml
global:
scrape_interval: 5s

scrape_configs:
- job_name: 'otel-collector'
static_configs:
- targets: ['otel-collector:9090']
```

### Run it

To bring up the stack, and see the results on prometheus expression browser:

1. Bring up the stack

```shell
docker compose up
```

2. Navigate to <http://localhost:9090/graph> to start browsing the local prometheus expression browser

3. Navigate to the `Graph` tab and adjust the time interval to be comparable to the stack uptime (e.g.: 15 minutes)

5. To get information regarding the `falcosecurity_falco_rules_matches_total` metric, you can enter a simple query like
`falcosecurity_falco_rules_matches_total` or `sum by (rule) (falcosecurity_falco_rules_matches_total)` and press
`Execute`

6. Explore the obtained results
![Falco metrics view](images/otlp_metrics-prom_view.png)

1. Bring down the stack

```shell
docker compose down
```
6 changes: 4 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,13 @@ require (
github.com/xitongsys/parquet-go v1.6.2
github.com/xitongsys/parquet-go-source v0.0.0-20240122235623-d6294584ab18
go.opentelemetry.io/otel v1.30.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.30.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.30.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0
go.opentelemetry.io/otel/metric v1.30.0
go.opentelemetry.io/otel/sdk v1.30.0
go.opentelemetry.io/otel/sdk/metric v1.30.0
go.opentelemetry.io/otel/trace v1.30.0
golang.org/x/oauth2 v0.23.0
golang.org/x/sync v0.8.0
Expand Down Expand Up @@ -144,8 +148,6 @@ require (
go.opentelemetry.io/contrib/detectors/gcp v1.29.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect
go.opentelemetry.io/otel/metric v1.30.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.29.0 // indirect
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.27.0 // indirect
Expand Down
8 changes: 6 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,10 @@ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+n
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8=
go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts=
go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.30.0 h1:WypxHH02KX2poqqbaadmkMYalGyy/vil4HE4PM4nRJc=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.30.0/go.mod h1:U79SV99vtvGSEBeeHnpgGJfTsnsdkWLpPN/CcHAzBSI=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.30.0 h1:VrMAbeJz4gnVDg2zEzjHG4dEH86j4jO6VYB+NgtGD8s=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.30.0/go.mod h1:qqN/uFdpeitTvm+JDqqnjm517pmQRYxTORbETHq5tOc=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0 h1:lsInsfvhVIfOI6qHVyysXMNDnjO9Npvl7tlDPJFBVd4=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0/go.mod h1:KQsVNh4OjgjTG0G6EiNi1jVpnaeeKsKMRwbLN+f1+8M=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0 h1:umZgi92IyxfXd/l4kaDhnKgY8rnN/cZcF1LKc6I8OQ8=
Expand All @@ -869,8 +873,8 @@ go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4Q
go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ=
go.opentelemetry.io/otel/sdk v1.30.0 h1:cHdik6irO49R5IysVhdn8oaiR9m8XluDaJAs4DfOrYE=
go.opentelemetry.io/otel/sdk v1.30.0/go.mod h1:p14X4Ok8S+sygzblytT1nqG98QG2KYKv++HE0LY/mhg=
go.opentelemetry.io/otel/sdk/metric v1.29.0 h1:K2CfmJohnRgvZ9UAj2/FhIf/okdWcNdBwe1m8xFXiSY=
go.opentelemetry.io/otel/sdk/metric v1.29.0/go.mod h1:6zZLdCl2fkauYoZIOn/soQIDSWFmNSRcICarHfuhNJQ=
go.opentelemetry.io/otel/sdk/metric v1.30.0 h1:QJLT8Pe11jyHBHfSAgYH7kEmT24eX792jZO1bo4BXkM=
go.opentelemetry.io/otel/sdk/metric v1.30.0/go.mod h1:waS6P3YqFNzeP01kuo/MBBYqaoBJl7efRQHOaydhy1Y=
go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc=
go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o=
go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
Expand Down
8 changes: 7 additions & 1 deletion handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,13 @@ func forwardEvent(falcopayload types.FalcoPayload) {
}

if config.OTLP.Traces.Endpoint != "" && (falcopayload.Priority >= types.Priority(config.OTLP.Traces.MinimumPriority)) && (falcopayload.Source == syscall || falcopayload.Source == syscalls) {
go otlpClient.OTLPTracesPost(falcopayload)
go otlpTracesClient.OTLPTracesPost(falcopayload)
}

if config.OTLP.Metrics.Endpoint != "" &&
(falcopayload.Priority) >= types.Priority(config.OTLP.Metrics.MinimumPriority) &&
(falcopayload.Source == syscall || falcopayload.Source == syscalls) {
ekoops marked this conversation as resolved.
Show resolved Hide resolved
go otlpMetricsClient.OTLPMetricsPost(falcopayload)
}

if config.Talon.Address != "" && (falcopayload.Priority >= types.Priority(config.Talon.MinimumPriority) || falcopayload.Rule == testRule) {
Expand Down
19 changes: 16 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ var (
n8nClient *outputs.Client
openObserveClient *outputs.Client
dynatraceClient *outputs.Client
otlpClient *outputs.Client
otlpTracesClient *outputs.Client
otlpMetricsClient *outputs.Client
talonClient *outputs.Client

statsdClient, dogstatsdClient *statsd.Client
Expand Down Expand Up @@ -801,12 +802,24 @@ func init() {

if config.OTLP.Traces.Endpoint != "" {
var err error
otlpClient, err = outputs.NewOtlpTracesClient(config, stats, promStats, statsdClient, dogstatsdClient)
otlpTracesClient, err = outputs.NewOtlpTracesClient(config, stats, promStats, statsdClient, dogstatsdClient)
if err != nil {
config.OTLP.Traces.Endpoint = ""
} else {
outputs.EnabledOutputs = append(outputs.EnabledOutputs, "OTLPTraces")
shutDownFuncs = append(shutDownFuncs, otlpClient.ShutDownFunc)
shutDownFuncs = append(shutDownFuncs, otlpTracesClient.ShutDownFunc)
}
}

if config.OTLP.Metrics.Endpoint != "" {
var err error
otlpMetricsClient, err = outputs.NewOTLPMetricsClient(context.Background(), config, stats, promStats,
statsdClient, dogstatsdClient)
if err != nil {
config.OTLP.Metrics.Endpoint = ""
} else {
outputs.EnabledOutputs = append(outputs.EnabledOutputs, "OTLPMetrics")
shutDownFuncs = append(shutDownFuncs, otlpMetricsClient.ShutDownFunc)
}
}

Expand Down
Loading