Skip to content

Commit 3279504

Browse files
authored
feat: Add active users prometheus metric (coder#3406)
This allows deployments using our Prometheus export t determine the number of active users in the past hour. The interval is an hour to align with API key last used refresh times. SSH connections poll to check shutdown time, so this will be accurate even on long-running connections without dashboard requests.
1 parent 13a2014 commit 3279504

File tree

8 files changed

+307
-55
lines changed

8 files changed

+307
-55
lines changed

Diff for: .vscode/settings.json

+3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"coderdtest",
1212
"codersdk",
1313
"cronstrue",
14+
"databasefake",
1415
"devel",
1516
"drpc",
1617
"drpcconn",
@@ -52,6 +53,7 @@
5253
"oneof",
5354
"parameterscopeid",
5455
"pqtype",
56+
"prometheusmetrics",
5557
"promptui",
5658
"protobuf",
5759
"provisionerd",
@@ -72,6 +74,7 @@
7274
"templateversions",
7375
"testdata",
7476
"testid",
77+
"testutil",
7578
"tfexec",
7679
"tfjson",
7780
"tfplan",

Diff for: cli/server.go

+22-11
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/google/uuid"
3131
"github.com/pion/turn/v2"
3232
"github.com/pion/webrtc/v3"
33+
"github.com/prometheus/client_golang/prometheus"
3334
"github.com/prometheus/client_golang/prometheus/promhttp"
3435
"github.com/spf13/afero"
3536
"github.com/spf13/cobra"
@@ -53,6 +54,7 @@ import (
5354
"github.com/coder/coder/coderd/database/databasefake"
5455
"github.com/coder/coder/coderd/devtunnel"
5556
"github.com/coder/coder/coderd/gitsshkey"
57+
"github.com/coder/coder/coderd/prometheusmetrics"
5658
"github.com/coder/coder/coderd/telemetry"
5759
"github.com/coder/coder/coderd/tracing"
5860
"github.com/coder/coder/coderd/turnconn"
@@ -392,6 +394,26 @@ func server() *cobra.Command {
392394
defer options.Telemetry.Close()
393395
}
394396

397+
// This prevents the pprof import from being accidentally deleted.
398+
_ = pprof.Handler
399+
if pprofEnabled {
400+
//nolint:revive
401+
defer serveHandler(ctx, logger, nil, pprofAddress, "pprof")()
402+
}
403+
if promEnabled {
404+
options.PrometheusRegistry = prometheus.NewRegistry()
405+
closeFunc, err := prometheusmetrics.ActiveUsers(ctx, options.PrometheusRegistry, options.Database, 0)
406+
if err != nil {
407+
return xerrors.Errorf("register active users prometheus metric: %w", err)
408+
}
409+
defer closeFunc()
410+
411+
//nolint:revive
412+
defer serveHandler(ctx, logger, promhttp.InstrumentMetricHandler(
413+
options.PrometheusRegistry, promhttp.HandlerFor(options.PrometheusRegistry, promhttp.HandlerOpts{}),
414+
), promAddress, "prometheus")()
415+
}
416+
395417
coderAPI := coderd.New(options)
396418
defer coderAPI.Close()
397419

@@ -406,17 +428,6 @@ func server() *cobra.Command {
406428
}
407429
}
408430

409-
// This prevents the pprof import from being accidentally deleted.
410-
_ = pprof.Handler
411-
if pprofEnabled {
412-
//nolint:revive
413-
defer serveHandler(ctx, logger, nil, pprofAddress, "pprof")()
414-
}
415-
if promEnabled {
416-
//nolint:revive
417-
defer serveHandler(ctx, logger, promhttp.Handler(), promAddress, "prometheus")()
418-
}
419-
420431
// Since errCh only has one buffered slot, all routines
421432
// sending on it must be wrapped in a select/default to
422433
// avoid leaving dangling goroutines waiting for the

Diff for: cli/server_test.go

+55
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package cli_test
22

33
import (
4+
"bufio"
45
"context"
56
"crypto/ecdsa"
67
"crypto/elliptic"
@@ -10,17 +11,21 @@ import (
1011
"crypto/x509/pkix"
1112
"encoding/json"
1213
"encoding/pem"
14+
"fmt"
1315
"math/big"
1416
"net"
1517
"net/http"
1618
"net/http/httptest"
1719
"net/url"
1820
"os"
1921
"runtime"
22+
"strconv"
23+
"strings"
2024
"testing"
2125
"time"
2226

2327
"github.com/go-chi/chi"
28+
"github.com/stretchr/testify/assert"
2429
"github.com/stretchr/testify/require"
2530
"go.uber.org/goleak"
2631

@@ -374,6 +379,56 @@ func TestServer(t *testing.T) {
374379
cancelFunc()
375380
<-errC
376381
})
382+
t.Run("Prometheus", func(t *testing.T) {
383+
t.Parallel()
384+
ctx, cancelFunc := context.WithCancel(context.Background())
385+
defer cancelFunc()
386+
387+
random, err := net.Listen("tcp", "127.0.0.1:0")
388+
require.NoError(t, err)
389+
_ = random.Close()
390+
tcpAddr, valid := random.Addr().(*net.TCPAddr)
391+
require.True(t, valid)
392+
randomPort := tcpAddr.Port
393+
394+
root, cfg := clitest.New(t,
395+
"server",
396+
"--in-memory",
397+
"--address", ":0",
398+
"--provisioner-daemons", "1",
399+
"--prometheus-enable",
400+
"--prometheus-address", ":"+strconv.Itoa(randomPort),
401+
"--cache-dir", t.TempDir(),
402+
)
403+
serverErr := make(chan error, 1)
404+
go func() {
405+
serverErr <- root.ExecuteContext(ctx)
406+
}()
407+
_ = waitAccessURL(t, cfg)
408+
409+
var res *http.Response
410+
require.Eventually(t, func() bool {
411+
req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://127.0.0.1:%d", randomPort), nil)
412+
assert.NoError(t, err)
413+
res, err = http.DefaultClient.Do(req)
414+
return err == nil
415+
}, testutil.WaitShort, testutil.IntervalFast)
416+
417+
scanner := bufio.NewScanner(res.Body)
418+
hasActiveUsers := false
419+
for scanner.Scan() {
420+
// This metric is manually registered to be tracked in the server. That's
421+
// why we test it's tracked here.
422+
if strings.HasPrefix(scanner.Text(), "coderd_api_active_users_duration_hour") {
423+
hasActiveUsers = true
424+
continue
425+
}
426+
}
427+
require.NoError(t, scanner.Err())
428+
require.True(t, hasActiveUsers)
429+
cancelFunc()
430+
<-serverErr
431+
})
377432
}
378433

379434
func generateTLSCertificate(t testing.TB) (certPath, keyPath string) {

Diff for: coderd/coderd.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/go-chi/chi/v5/middleware"
1717
"github.com/klauspost/compress/zstd"
1818
"github.com/pion/webrtc/v3"
19+
"github.com/prometheus/client_golang/prometheus"
1920
sdktrace "go.opentelemetry.io/otel/sdk/trace"
2021
"golang.org/x/xerrors"
2122
"google.golang.org/api/idtoken"
@@ -58,6 +59,7 @@ type Options struct {
5859
GoogleTokenValidator *idtoken.Validator
5960
GithubOAuth2Config *GithubOAuth2Config
6061
OIDCConfig *OIDCConfig
62+
PrometheusRegistry *prometheus.Registry
6163
ICEServers []webrtc.ICEServer
6264
SecureAuthCookie bool
6365
SSHKeygenAlgorithm gitsshkey.Algorithm
@@ -87,6 +89,9 @@ func New(options *Options) *API {
8789
panic(xerrors.Errorf("rego authorize panic: %w", err))
8890
}
8991
}
92+
if options.PrometheusRegistry == nil {
93+
options.PrometheusRegistry = prometheus.NewRegistry()
94+
}
9095

9196
siteCacheDir := options.CacheDir
9297
if siteCacheDir != "" {
@@ -116,7 +121,7 @@ func New(options *Options) *API {
116121
next.ServeHTTP(middleware.NewWrapResponseWriter(w, r.ProtoMajor), r)
117122
})
118123
},
119-
httpmw.Prometheus,
124+
httpmw.Prometheus(options.PrometheusRegistry),
120125
tracing.HTTPMW(api.TracerProvider, "coderd.http"),
121126
)
122127

Diff for: coderd/httpmw/prometheus.go

+45-43
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,31 @@ import (
1212
"github.com/prometheus/client_golang/prometheus/promauto"
1313
)
1414

15-
var (
16-
requestsProcessed = promauto.NewCounterVec(prometheus.CounterOpts{
15+
func durationToFloatMs(d time.Duration) float64 {
16+
return float64(d.Milliseconds())
17+
}
18+
19+
func Prometheus(register prometheus.Registerer) func(http.Handler) http.Handler {
20+
factory := promauto.With(register)
21+
requestsProcessed := factory.NewCounterVec(prometheus.CounterOpts{
1722
Namespace: "coderd",
1823
Subsystem: "api",
1924
Name: "requests_processed_total",
2025
Help: "The total number of processed API requests",
2126
}, []string{"code", "method", "path"})
22-
requestsConcurrent = promauto.NewGauge(prometheus.GaugeOpts{
27+
requestsConcurrent := factory.NewGauge(prometheus.GaugeOpts{
2328
Namespace: "coderd",
2429
Subsystem: "api",
2530
Name: "concurrent_requests",
2631
Help: "The number of concurrent API requests",
2732
})
28-
websocketsConcurrent = promauto.NewGauge(prometheus.GaugeOpts{
33+
websocketsConcurrent := factory.NewGauge(prometheus.GaugeOpts{
2934
Namespace: "coderd",
3035
Subsystem: "api",
3136
Name: "concurrent_websockets",
3237
Help: "The total number of concurrent API websockets",
3338
})
34-
websocketsDist = promauto.NewHistogramVec(prometheus.HistogramOpts{
39+
websocketsDist := factory.NewHistogramVec(prometheus.HistogramOpts{
3540
Namespace: "coderd",
3641
Subsystem: "api",
3742
Name: "websocket_durations_ms",
@@ -45,58 +50,55 @@ var (
4550
durationToFloatMs(30 * time.Hour),
4651
},
4752
}, []string{"path"})
48-
requestsDist = promauto.NewHistogramVec(prometheus.HistogramOpts{
53+
requestsDist := factory.NewHistogramVec(prometheus.HistogramOpts{
4954
Namespace: "coderd",
5055
Subsystem: "api",
5156
Name: "request_latencies_ms",
5257
Help: "Latency distribution of requests in milliseconds",
5358
Buckets: []float64{1, 5, 10, 25, 50, 100, 500, 1000, 5000, 10000, 30000},
5459
}, []string{"method", "path"})
55-
)
5660

57-
func durationToFloatMs(d time.Duration) float64 {
58-
return float64(d.Milliseconds())
59-
}
61+
return func(next http.Handler) http.Handler {
62+
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
63+
var (
64+
start = time.Now()
65+
method = r.Method
66+
rctx = chi.RouteContext(r.Context())
67+
)
6068

61-
func Prometheus(next http.Handler) http.Handler {
62-
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
63-
var (
64-
start = time.Now()
65-
method = r.Method
66-
rctx = chi.RouteContext(r.Context())
67-
)
68-
sw, ok := w.(chimw.WrapResponseWriter)
69-
if !ok {
70-
panic("dev error: http.ResponseWriter is not chimw.WrapResponseWriter")
71-
}
69+
sw, ok := w.(chimw.WrapResponseWriter)
70+
if !ok {
71+
panic("dev error: http.ResponseWriter is not chimw.WrapResponseWriter")
72+
}
7273

73-
var (
74-
dist *prometheus.HistogramVec
75-
distOpts []string
76-
)
77-
// We want to count websockets separately.
78-
if isWebsocketUpgrade(r) {
79-
websocketsConcurrent.Inc()
80-
defer websocketsConcurrent.Dec()
74+
var (
75+
dist *prometheus.HistogramVec
76+
distOpts []string
77+
)
78+
// We want to count WebSockets separately.
79+
if isWebsocketUpgrade(r) {
80+
websocketsConcurrent.Inc()
81+
defer websocketsConcurrent.Dec()
8182

82-
dist = websocketsDist
83-
} else {
84-
requestsConcurrent.Inc()
85-
defer requestsConcurrent.Dec()
83+
dist = websocketsDist
84+
} else {
85+
requestsConcurrent.Inc()
86+
defer requestsConcurrent.Dec()
8687

87-
dist = requestsDist
88-
distOpts = []string{method}
89-
}
88+
dist = requestsDist
89+
distOpts = []string{method}
90+
}
9091

91-
next.ServeHTTP(w, r)
92+
next.ServeHTTP(w, r)
9293

93-
path := rctx.RoutePattern()
94-
distOpts = append(distOpts, path)
95-
statusStr := strconv.Itoa(sw.Status())
94+
path := rctx.RoutePattern()
95+
distOpts = append(distOpts, path)
96+
statusStr := strconv.Itoa(sw.Status())
9697

97-
requestsProcessed.WithLabelValues(statusStr, method, path).Inc()
98-
dist.WithLabelValues(distOpts...).Observe(float64(time.Since(start)) / 1e6)
99-
})
98+
requestsProcessed.WithLabelValues(statusStr, method, path).Inc()
99+
dist.WithLabelValues(distOpts...).Observe(float64(time.Since(start)) / 1e6)
100+
})
101+
}
100102
}
101103

102104
func isWebsocketUpgrade(r *http.Request) bool {

Diff for: coderd/httpmw/prometheus_test.go

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package httpmw_test
2+
3+
import (
4+
"context"
5+
"net/http"
6+
"net/http/httptest"
7+
"testing"
8+
9+
"github.com/go-chi/chi/v5"
10+
chimw "github.com/go-chi/chi/v5/middleware"
11+
"github.com/prometheus/client_golang/prometheus"
12+
"github.com/stretchr/testify/require"
13+
14+
"github.com/coder/coder/coderd/httpmw"
15+
)
16+
17+
func TestPrometheus(t *testing.T) {
18+
t.Parallel()
19+
t.Run("All", func(t *testing.T) {
20+
req := httptest.NewRequest("GET", "/", nil)
21+
req = req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, chi.NewRouteContext()))
22+
res := chimw.NewWrapResponseWriter(httptest.NewRecorder(), 0)
23+
reg := prometheus.NewRegistry()
24+
httpmw.Prometheus(reg)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
25+
w.WriteHeader(http.StatusOK)
26+
})).ServeHTTP(res, req)
27+
metrics, err := reg.Gather()
28+
require.NoError(t, err)
29+
require.Greater(t, len(metrics), 0)
30+
})
31+
}

0 commit comments

Comments
 (0)