Skip to content

Commit

Permalink
metrics: add start label for prometheus counters (#3089)
Browse files Browse the repository at this point in the history
Add start label to each counter with the value of counter creation
timestamp as unix nanoseconds.

This enables OpenTelemetry cumulative temporality,
see https://opentelemetry.io/docs/specs/otel/metrics/data-model/#temporality

Example:
```
~$ curl -s localhost:9911/metrics | grep host_count
 # HELP skipper_serve_host_count Total number of requests of serving a host.
 # TYPE skipper_serve_host_count counter
skipper_serve_host_count{code="200",host="bar_test",method="GET",start="1717066533598500794"} 1
skipper_serve_host_count{code="200",host="foo_test",method="GET",start="1717066538031805059"} 2
```

Fixes #3087

Signed-off-by: Alexander Yastrebov <[email protected]>
  • Loading branch information
AlexanderYastrebov authored Jun 3, 2024
1 parent 7e21afe commit 338c99b
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 1 deletion.
3 changes: 3 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ type Config struct {

// logging, metrics, profiling, tracing:
EnablePrometheusMetrics bool `yaml:"enable-prometheus-metrics"`
EnablePrometheusStartLabel bool `yaml:"enable-prometheus-start-label"`
OpenTracing string `yaml:"opentracing"`
OpenTracingInitialSpan string `yaml:"opentracing-initial-span"`
OpenTracingExcludedProxyTags string `yaml:"opentracing-excluded-proxy-tags"`
Expand Down Expand Up @@ -378,6 +379,7 @@ func NewConfig() *Config {
flag.IntVar(&cfg.BlockProfileRate, "block-profile-rate", 0, "block profile sample rate, see runtime.SetBlockProfileRate")
flag.IntVar(&cfg.MutexProfileFraction, "mutex-profile-fraction", 0, "mutex profile fraction rate, see runtime.SetMutexProfileFraction")
flag.IntVar(&cfg.MemProfileRate, "memory-profile-rate", 0, "memory profile rate, see runtime.SetMemProfileRate, keeps default 512 kB")
flag.BoolVar(&cfg.EnablePrometheusStartLabel, "enable-prometheus-start-label", false, "adds start label to each prometheus counter with the value of counter creation timestamp as unix nanoseconds")
flag.BoolVar(&cfg.DebugGcMetrics, "debug-gc-metrics", false, "enables reporting of the Go garbage collector statistics exported in debug.GCStats")
flag.BoolVar(&cfg.RuntimeMetrics, "runtime-metrics", true, "enables reporting of the Go runtime statistics exported in runtime and specifically runtime.MemStats")
flag.BoolVar(&cfg.ServeRouteMetrics, "serve-route-metrics", false, "enables reporting total serve time metrics for each route")
Expand Down Expand Up @@ -734,6 +736,7 @@ func (c *Config) ToOptions() skipper.Options {

// logging, metrics, profiling, tracing:
EnablePrometheusMetrics: c.EnablePrometheusMetrics,
EnablePrometheusStartLabel: c.EnablePrometheusStartLabel,
OpenTracing: strings.Split(c.OpenTracing, " "),
OpenTracingInitialSpan: c.OpenTracingInitialSpan,
OpenTracingExcludedProxyTags: strings.Split(c.OpenTracingExcludedProxyTags, ","),
Expand Down
4 changes: 4 additions & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ type Options struct {
// library.
// A new registry is created if this option is nil.
PrometheusRegistry *prometheus.Registry

// EnablePrometheusStartLabel adds start label to each prometheus counter with the value of counter creation
// timestamp as unix nanoseconds.
EnablePrometheusStartLabel bool
}

var (
Expand Down
29 changes: 28 additions & 1 deletion metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
dto "github.com/prometheus/client_model/go"
"google.golang.org/protobuf/proto"
)

const (
Expand Down Expand Up @@ -295,7 +297,11 @@ func (p *Prometheus) registerMetrics() {
}

func (p *Prometheus) CreateHandler() http.Handler {
return promhttp.HandlerFor(p.registry, promhttp.HandlerOpts{})
var gatherer prometheus.Gatherer = p.registry
if p.opts.EnablePrometheusStartLabel {
gatherer = withStartLabelGatherer{p.registry}
}
return promhttp.HandlerFor(gatherer, promhttp.HandlerOpts{})
}

func (p *Prometheus) getHandler() http.Handler {
Expand Down Expand Up @@ -457,3 +463,24 @@ func (p *Prometheus) IncErrorsStreaming(routeID string) {
}

func (p *Prometheus) Close() {}

// withStartLabelGatherer adds a "start" label to all counters with
// the value of counter creation timestamp as unix nanoseconds.
type withStartLabelGatherer struct {
*prometheus.Registry
}

func (g withStartLabelGatherer) Gather() ([]*dto.MetricFamily, error) {
metricFamilies, err := g.Registry.Gather()
for _, metricFamily := range metricFamilies {
if metricFamily.GetType() == dto.MetricType_COUNTER {
for _, metric := range metricFamily.Metric {
metric.Label = append(metric.Label, &dto.LabelPair{
Name: proto.String("start"),
Value: proto.String(fmt.Sprintf("%d", metric.Counter.CreatedTimestamp.AsTime().UnixNano())),
})
}
}
}
return metricFamilies, err
}
62 changes: 62 additions & 0 deletions metrics/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,14 @@ import (
"io"
"net/http"
"net/http/httptest"
"regexp"
"strconv"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/zalando/skipper/metrics"
)

Expand Down Expand Up @@ -1053,3 +1057,61 @@ func TestPrometheusMetrics(t *testing.T) {
})
}
}

func TestPrometheusMetricsStartTimestamp(t *testing.T) {
pm := metrics.NewPrometheus(metrics.Options{
EnablePrometheusStartLabel: true,
EnableServeHostCounter: true,
})
path := "/awesome-metrics"

mux := http.NewServeMux()
pm.RegisterHandler(path, mux)

before := time.Now()

pm.MeasureServe("route1", "foo.test", "GET", 200, time.Now().Add(-15*time.Millisecond))
pm.MeasureServe("route1", "bar.test", "POST", 201, time.Now().Add(-15*time.Millisecond))
pm.MeasureServe("route1", "bar.test", "POST", 201, time.Now().Add(-15*time.Millisecond))
pm.IncRoutingFailures()
pm.IncRoutingFailures()
pm.IncRoutingFailures()

after := time.Now()

req := httptest.NewRequest("GET", path, nil)
w := httptest.NewRecorder()
mux.ServeHTTP(w, req)

resp := w.Result()
require.Equal(t, http.StatusOK, resp.StatusCode)

body, err := io.ReadAll(resp.Body)
require.NoError(t, err)

t.Logf("Metrics response:\n%s", body)

// Prometheus client does not allow to mock counter creation timestamps,
// see https://github.com/prometheus/client_golang/issues/1354
//
// checkMetric tests that timestamp is within [before, after] range.
checkMetric := func(pattern string) {
t.Helper()

re := regexp.MustCompile(pattern)

matches := re.FindSubmatch(body)
require.NotNil(t, matches, "Metrics response does not match: %s", pattern)
require.Len(t, matches, 2)

ts, err := strconv.ParseInt(string(matches[1]), 10, 64)
require.NoError(t, err)

assert.GreaterOrEqual(t, ts, before.UnixNano())
assert.LessOrEqual(t, ts, after.UnixNano())
}

checkMetric(`skipper_serve_host_count{code="200",host="foo_test",method="GET",start="(\d+)"} 1`)
checkMetric(`skipper_serve_host_count{code="201",host="bar_test",method="POST",start="(\d+)"} 2`)
checkMetric(`skipper_route_error_total{start="(\d+)"} 3`)
}
5 changes: 5 additions & 0 deletions skipper.go
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,10 @@ type Options struct {
// use the MetricsFlavours option.
EnablePrometheusMetrics bool

// EnablePrometheusStartLabel adds start label to each prometheus counter with the value of counter creation
// timestamp as unix nanoseconds.
EnablePrometheusStartLabel bool

// An instance of a Prometheus registry. It allows registering and serving custom metrics when skipper is used as a
// library.
// A new registry is created if this option is nil.
Expand Down Expand Up @@ -1514,6 +1518,7 @@ func run(o Options, sig chan os.Signal, idleConnsCH chan struct{}) error {
HistogramBuckets: o.HistogramMetricBuckets,
DisableCompatibilityDefaults: o.DisableMetricsCompatibilityDefaults,
PrometheusRegistry: o.PrometheusRegistry,
EnablePrometheusStartLabel: o.EnablePrometheusStartLabel,
}

mtr := o.MetricsBackend
Expand Down

0 comments on commit 338c99b

Please sign in to comment.