Skip to content

Commit

Permalink
Add metrics serve
Browse files Browse the repository at this point in the history
  • Loading branch information
jsun-m committed Jan 18, 2024
1 parent fcd4468 commit 574346d
Show file tree
Hide file tree
Showing 20 changed files with 280 additions and 176 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,6 @@ protocol:

test-internal:
go test -v ./internal/... -bench=./internal/..

prometheus:
cd charts/kube-prometheus-stack && helm upgrade --install prometheus prometheus-community/kube-prometheus-stack --values values.yml
24 changes: 24 additions & 0 deletions charts/kube-prometheus-stack/values.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
prometheus:
enabled: true
prometheusSpec:
additionalScrapeConfigs:
- job_name: beam-pods
metrics_path: /metrics
scheme: http
honor_labels: true
scrape_timeout: 10s
scrape_interval: 15s
enable_http2: true
follow_redirects: true
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- beam
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_container_port_number]
action: keep
regex: 9090
7 changes: 6 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ require (
github.com/aws/aws-sdk-go-v2/service/ecr v1.24.4
github.com/aws/aws-sdk-go-v2/service/kinesis v1.20.0
github.com/aws/aws-sdk-go-v2/service/s3 v1.38.3
github.com/beam-cloud/blobcache v0.0.0-20240111193326-cae10a611c3f
github.com/beam-cloud/clip v0.0.0-20230822004255-4ecd939864f9
github.com/beam-cloud/go-runc v0.0.0-20231222221338-b89899f33170
github.com/bsm/redislock v0.9.4
Expand All @@ -35,6 +36,7 @@ require (
github.com/opencontainers/umoci v0.4.7
github.com/pkg/errors v0.9.1
github.com/pressly/goose/v3 v3.17.0
github.com/prometheus/client_golang v1.16.0
github.com/prometheus/procfs v0.12.0
github.com/redis/go-redis/v9 v9.4.0
github.com/sirupsen/logrus v1.9.3
Expand Down Expand Up @@ -67,7 +69,7 @@ require (
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.15.3 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.21.3 // indirect
github.com/aws/smithy-go v1.19.0 // indirect
github.com/beam-cloud/blobcache v0.0.0-20240111193326-cae10a611c3f // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/briandowns/spinner v1.23.0 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/containerd/console v1.0.3 // indirect
Expand Down Expand Up @@ -105,6 +107,7 @@ require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-sqlite3 v1.14.19 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
Expand All @@ -117,6 +120,8 @@ require (
github.com/opencontainers/runc v1.1.10 // indirect
github.com/pierrec/lz4/v4 v4.1.19 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/rootless-containers/proto v0.1.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/sethvargo/go-retry v0.2.4 // indirect
Expand Down
11 changes: 11 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ github.com/beam-cloud/clip v0.0.0-20230822004255-4ecd939864f9 h1:KL6HsfcXc6BR7f8
github.com/beam-cloud/clip v0.0.0-20230822004255-4ecd939864f9/go.mod h1:cbVxTplcnELB3f55OGGJvBG4Ta40X+yE0eIE2M3oU3c=
github.com/beam-cloud/go-runc v0.0.0-20231222221338-b89899f33170 h1:KYVz18kobBGU8URM9Srn++2tcL9e0PcwYyH0Z4GYicM=
github.com/beam-cloud/go-runc v0.0.0-20231222221338-b89899f33170/go.mod h1:aw0zhDi28Hemve0raHcfU9suxZwkCpyNANOEwKZSSXo=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/briandowns/spinner v1.23.0 h1:alDF2guRWqa/FOZZYWjlMIx2L6H0wyewPxo/CH4Pt2A=
github.com/briandowns/spinner v1.23.0/go.mod h1:rPG4gmXeN3wQV/TsAY4w8lPdIM6RX3yqeBQJSrbXjuE=
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
Expand Down Expand Up @@ -316,6 +318,8 @@ github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D
github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
github.com/mattn/go-sqlite3 v1.14.19 h1:fhGleo2h1p8tVChob4I9HpmVFIAkKGpiukdrgQbWfGI=
github.com/mattn/go-sqlite3 v1.14.19/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo=
github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4=
Expand Down Expand Up @@ -382,7 +386,13 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pressly/goose/v3 v3.17.0 h1:fT4CL3LRm4kfyLuPWzDFAoxjR5ZHjeJ6uQhibQtBaIs=
github.com/pressly/goose/v3 v3.17.0/go.mod h1:22aw7NpnCPlS86oqkO/+3+o9FuCaJg4ZVWRUO3oGzHQ=
github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8=
github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY=
github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU=
github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY=
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
github.com/redis/go-redis/v9 v9.4.0 h1:Yzoz33UZw9I/mFhx4MNrB6Fk+XHO1VukNcCa1+lwyKk=
Expand Down Expand Up @@ -510,6 +520,7 @@ golang.org/x/oauth2 v0.15.0 h1:s8pnnxNVzjWyrvYdFUQq5llS1PX2zhPXmccZv99h7uQ=
golang.org/x/oauth2 v0.15.0/go.mod h1:q48ptWNTY5XWf+JNten23lcvHpLJ0ZSxF5ttTHKVCAM=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
Expand Down
2 changes: 2 additions & 0 deletions internal/common/config.default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ worker:
defaultWorkerCPURequest: 2000
defaultWorkerMemoryRequest: 1024
metrics:
prometheus:
enabled: false
kinesis:
streamName: beam-realtime-metrics
region: us-east-1
Expand Down
5 changes: 3 additions & 2 deletions internal/gateway/config.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package gateway

type config struct {
GrpcServerAddress string
HttpServerAddress string
GrpcServerAddress string
HttpServerAddress string
PrometheusMetricsAddress string
}

var GatewayConfig config = config{
Expand Down
10 changes: 8 additions & 2 deletions internal/gateway/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ type Gateway struct {
redisClient *common.RedisClient
ContainerRepo repository.ContainerRepository
BackendRepo repository.BackendRepository
metricsRepo repository.MetricsStatsdRepository
metricsRepo repository.MetricsRepository
Storage storage.Storage
Scheduler *scheduler.Scheduler
ctx context.Context
Expand Down Expand Up @@ -85,7 +85,7 @@ func NewGateway() (*Gateway, error) {
}

containerRepo := repository.NewContainerRedisRepository(redisClient)
metricsRepo := repository.NewMetricsStatsdRepository()
metricsRepo := repository.NewMetricsPrometheusRepository(config.Metrics.Prometheus.Enabled)

gateway.config = config
gateway.ContainerRepo = containerRepo
Expand Down Expand Up @@ -222,6 +222,12 @@ func (g *Gateway) Start() error {
}
}()

go func() {
if err := g.metricsRepo.Init(); err != nil {
log.Fatalf("Failed to start metrics server: %v", err)
}
}()

log.Println("Gateway http server running @", GatewayConfig.HttpServerAddress)
log.Println("Gateway grpc server running @", GatewayConfig.GrpcServerAddress)

Expand Down
15 changes: 3 additions & 12 deletions internal/repository/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,9 @@ type WorkerPoolRepository interface {
RemovePool(name string) error
}

type MetricsStatsdRepository interface {
ContainerStarted(containerId string, workerId string)
ContainerStopped(containerId string, workerId string)
ContainerRequested(containerId string)
ContainerScheduled(containerId string)
ContainerDuration(containerId string, workerId string, timestampNs int64, duration time.Duration)
BeamDeploymentRequestDuration(bucketName string, duration time.Duration)
BeamDeploymentRequestStatus(bucketName string, status int)
BeamDeploymentRequestCount(bucketName string)
WorkerStarted(workerId string)
WorkerStopped(workerId string)
WorkerDuration(workerId string, timestampNs int64, duration time.Duration)
type MetricsRepository interface {
Init() error
ContainerDurationSeconds(containerId string, workerId string, duration time.Duration)
}

type MetricsStreamRepository interface {
Expand Down
87 changes: 87 additions & 0 deletions internal/repository/metrics_prometheus.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package repository

import (
"context"
"log"
"time"

"github.com/labstack/echo/v4"
"github.com/labstack/echo/v4/middleware"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

var PrometheusMetricsAddress = "0.0.0.0:9090"

type PrometheusRepository struct {
registry *prometheus.Registry
registeredMetrics map[string]prometheus.Collector
ctx *context.Context
enabled bool
}

func NewMetricsPrometheusRepository(enabled bool) *PrometheusRepository {
if !enabled {
return &PrometheusRepository{
enabled: false,
}
}

reg := prometheus.NewRegistry()
reg.MustRegister(
collectors.NewGoCollector(), // Metrics from Go runtime.
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), // Metrics about the current UNIX process.
)

return &PrometheusRepository{
registry: reg,
registeredMetrics: map[string]prometheus.Collector{},
enabled: true,
}
}

func (r *PrometheusRepository) Init() error {
if !r.enabled {
return nil
}

prometheusServer := echo.New()
prometheusServer.HideBanner = true
prometheusServer.HidePort = true

prometheusServer.Use(middleware.Recover())

prometheusServer.GET("/metrics", echo.WrapHandler(promhttp.HandlerFor(r.registry, promhttp.HandlerOpts{
EnableOpenMetrics: true,
})))

log.Printf("Starting Prometheus metrics server at %s\n", PrometheusMetricsAddress)

return prometheusServer.Start(PrometheusMetricsAddress)
}

func (r *PrometheusRepository) ContainerDurationSeconds(containerId string, workerId string, duration time.Duration) {
if !r.enabled {
return
}

var metricName = "container_duration_seconds"
var collector prometheus.Collector
collector, exists := r.registeredMetrics[metricName]

if !exists {
collector = promauto.With(r.registry).NewGaugeVec(
prometheus.GaugeOpts{
Name: metricName,
},
[]string{"container_id", "worker_id"},
)

r.registeredMetrics[metricName] = collector
}

handle := collector.(*prometheus.GaugeVec)
handle.WithLabelValues(containerId, workerId).Add(duration.Seconds())
}
90 changes: 0 additions & 90 deletions internal/repository/metrics_statsd.go

This file was deleted.

8 changes: 0 additions & 8 deletions internal/repository/testutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,3 @@ func NewContainerRedisRepositoryForTest(rdb *common.RedisClient) ContainerReposi
lock := common.NewRedisLock(rdb)
return &ContainerRedisRepository{rdb: rdb, lock: lock}
}

func NewMetricsStatsdRepositoryForTest() MetricsStatsdRepository {
return &MetricsStatsd{
statSender: common.InitStatsdSender(
"test:8125",
),
}
}
Loading

0 comments on commit 574346d

Please sign in to comment.