Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove high cardinality labels server_route_id and server_gateway_id #192

Merged
merged 3 commits into from
Mar 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ jobs:
--disable gocritic \
--enable stylecheck \
--enable unconvert \
--enable gocyclo \
--enable gofmt \
--enable misspell \
--enable unparam \
Expand Down
131 changes: 2 additions & 129 deletions README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@
{
"expr": "rate(nats_core_route_sent_msg_count{server_cluster=~\"$cluster\"}[1m])",
"interval": "",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -527,7 +527,7 @@
{
"expr": "rate(nats_core_route_recv_msg_count{server_cluster=~\"$cluster\"}[1m])",
"interval": "",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -641,7 +641,7 @@
{
"expr": "rate(nats_core_route_sent_bytes{server_cluster=~\"$cluster\"}[1m])",
"interval": "",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -739,7 +739,7 @@
"targets": [
{
"expr": "rate(nats_core_route_recv_bytes{server_cluster=~\"$cluster\"}[1m])",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -852,7 +852,7 @@
"targets": [
{
"expr": "nats_core_route_pending_bytes{server_cluster=~\"$cluster\"}",
"legendFormat": "{{server_name}} - {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3091,7 +3091,7 @@
"targets": [
{
"expr": "rate(nats_core_route_sent_bytes{server_cluster=~\"$cluster\"}[1m])",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -3178,7 +3178,7 @@
"targets": [
{
"expr": "rate(nats_core_route_recv_bytes{server_cluster=~\"$cluster\"}[1m])",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -3265,7 +3265,7 @@
"targets": [
{
"expr": "rate(nats_core_route_sent_msg_count{server_cluster=~\"$cluster\"}[1m])",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -3352,7 +3352,7 @@
"targets": [
{
"expr": "rate(nats_core_route_recv_msg_count{server_cluster=~\"$cluster\"}[1m])",
"legendFormat": "{{server_name}} - ID {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down Expand Up @@ -3526,7 +3526,7 @@
"targets": [
{
"expr": "nats_core_route_pending_bytes{server_cluster=~\"$cluster\"}",
"legendFormat": "{{server_name}} - {{server_route_id}}",
"legendFormat": "Server {{server_name}} - Route {{server_route_name}}",
"refId": "A"
}
],
Expand Down
1 change: 0 additions & 1 deletion scripts/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ $(go env GOPATH)/bin/golangci-lint run \
--enable interfacer \
--enable unconvert \
--enable dupl \
--enable gocyclo \
--enable gofmt \
--enable goimports \
--enable misspell \
Expand Down
92 changes: 88 additions & 4 deletions surveyor/collector_statz.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"encoding/json"
"fmt"
"io"
"slices"
"sort"
"strconv"
"strings"
Expand All @@ -29,6 +30,7 @@ import (
"github.com/nats-io/nats.go"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"golang.org/x/exp/maps"
"golang.org/x/sync/singleflight"
)

Expand Down Expand Up @@ -139,6 +141,8 @@ type StatzCollector struct {
descs statzDescs
collectAccounts bool
natsUp *prometheus.Desc
routeIDRemap map[string]map[uint64]int
gatewayIDRemap map[string]map[uint64]int

serverLabels []string
serverInfoLabels []string
Expand Down Expand Up @@ -221,11 +225,25 @@ func (sc *StatzCollector) serverInfoLabelValues(sm *server.ServerStatsMsg) []str
}

func (sc *StatzCollector) routeLabelValues(sm *server.ServerStatsMsg, rStat *server.RouteStat) []string {
return []string{sm.Server.Cluster, serverName(sm), sm.Server.ID, strconv.FormatUint(rStat.ID, 10)}
idxS := strconv.FormatUint(rStat.ID, 10)
if byName, ok := sc.routeIDRemap[rStat.Name]; ok {
if idx, ok := byName[rStat.ID]; ok {
idxS = strconv.Itoa(idx)
}
}

return []string{sm.Server.Cluster, serverName(sm), sm.Server.ID, rStat.Name, idxS}
}

func (sc *StatzCollector) gatewayLabelValues(sm *server.ServerStatsMsg, gStat *server.GatewayStat) []string {
return []string{sm.Server.Cluster, serverName(sm), sm.Server.ID, gStat.Name, strconv.FormatUint(gStat.ID, 10)}
idxS := strconv.FormatUint(gStat.ID, 10)
if byName, ok := sc.gatewayIDRemap[gStat.Name]; ok {
if idx, ok := byName[gStat.ID]; ok {
idxS = strconv.Itoa(idx)
}
}

return []string{sm.Server.Cluster, serverName(sm), sm.Server.ID, gStat.Name, idxS}
}

// Up/Down on servers - look at discovery mechanisms in Prometheus - aging out, how does it work?
Expand Down Expand Up @@ -383,12 +401,14 @@ func NewStatzCollector(nc *nats.Conn, logger *logrus.Logger, numServers int, ser
servers: make(map[string]bool),
doneCh: make(chan struct{}, 1),
collectAccounts: accounts,
routeIDRemap: make(map[string]map[uint64]int),
gatewayIDRemap: make(map[string]map[uint64]int),

// TODO - normalize these if possible. Jetstream varies from the other server labels
serverLabels: []string{"server_cluster", "server_name", "server_id"},
serverInfoLabels: []string{"server_cluster", "server_name", "server_id", "server_version"},
routeLabels: []string{"server_cluster", "server_name", "server_id", "server_route_id"},
gatewayLabels: []string{"server_cluster", "server_name", "server_id", "server_gateway_name", "server_gateway_id"},
routeLabels: []string{"server_cluster", "server_name", "server_id", "server_route_name", "server_route_name_idx"},
gatewayLabels: []string{"server_cluster", "server_name", "server_id", "server_gateway_name", "server_gateway_name_idx"},
jsServerLabels: []string{"server_id", "server_name", "cluster_name"},
jsServerInfoLabels: []string{"server_name", "server_host", "server_id", "server_cluster", "server_domain", "server_version", "server_jetstream"},
constLabels: constLabels,
Expand Down Expand Up @@ -1013,6 +1033,14 @@ func (sc *StatzCollector) Collect(ch chan<- prometheus.Metric) {
}
}
}

pairs := make([]nameIDPair, len(sm.Stats.Routes))
for i, rs := range sm.Stats.Routes {
pairs[i].id = rs.ID
pairs[i].name = rs.Name
}
sc.routeIDRemap = remapIDToIdx(pairs, sc.routeIDRemap)

for _, rs := range sm.Stats.Routes {
labels = sc.routeLabelValues(sm, rs)
metrics.newGaugeMetric(sc.descs.RouteSentMsgs, float64(rs.Sent.Msgs), labels)
Expand All @@ -1022,6 +1050,13 @@ func (sc *StatzCollector) Collect(ch chan<- prometheus.Metric) {
metrics.newGaugeMetric(sc.descs.RoutePending, float64(rs.Pending), labels)
}

pairs = make([]nameIDPair, len(sm.Stats.Gateways))
for i, rs := range sm.Stats.Gateways {
pairs[i].id = rs.ID
pairs[i].name = rs.Name
}
sc.gatewayIDRemap = remapIDToIdx(pairs, sc.gatewayIDRemap)

for _, gw := range sm.Stats.Gateways {
labels = sc.gatewayLabelValues(sm, gw)
metrics.newGaugeMetric(sc.descs.GatewaySentMsgs, float64(gw.Sent.Msgs), labels)
Expand Down Expand Up @@ -1184,3 +1219,52 @@ func unmarshalMsg(msg *nats.Msg, v any) error {

return json.Unmarshal(data, v)
}

type nameIDPair struct {
name string
id uint64
}

func remapIDToIdx(pairs []nameIDPair, existingMapping map[string]map[uint64]int) map[string]map[uint64]int {
caleblloyd marked this conversation as resolved.
Show resolved Hide resolved
newMapping := make(map[string]map[uint64]int)

// give existing the same idx
for _, rs := range pairs {
newByName, ok := newMapping[rs.name]
if !ok {
newByName = make(map[uint64]int)
newMapping[rs.name] = newByName
}

existingByName, ok := existingMapping[rs.name]
if !ok {
continue
}

idx, ok := existingByName[rs.id]
if !ok {
continue
}

newByName[rs.id] = idx
}

// assign new ones new idx
for _, path := range pairs {
newByName := newMapping[path.name]
_, ok := newByName[path.id]
if ok {
continue
}

vals := maps.Values(newByName)
for i := 0; i <= len(vals); i++ {
if !slices.Contains(vals, i) {
newByName[path.id] = i
break
}
}
}

return newMapping
}
49 changes: 49 additions & 0 deletions surveyor/collector_statz_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package surveyor

import (
"reflect"
"testing"
)

func TestRemapIdToIdx(t *testing.T) {
existingMapping := map[string]map[uint64]int{
"a": {
100: 0,
200: 2,
},
"b": {
100: 0,
},
}

pairs := []nameIDPair{
{name: "a", id: 200},
{name: "a", id: 100},
{name: "a", id: 300},
{name: "a", id: 400},
{name: "b", id: 200},
{name: "c", id: 200},
{name: "c", id: 100},
}

newMapping := remapIDToIdx(pairs, existingMapping)
expected := map[string]map[uint64]int{
"a": {
100: 0,
200: 2,
300: 1,
400: 3,
},
"b": {
200: 0,
},
"c": {
200: 0,
100: 1,
},
}

if !reflect.DeepEqual(expected, newMapping) {
t.Fatalf("Invalid mapping config; want: %v; got: %v", expected, newMapping)
}
}
11 changes: 7 additions & 4 deletions surveyor/surveyor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,14 @@ func TestSurveyor_Basic(t *testing.T) {
if !strings.Contains(output, "server_gateway_name") {
t.Fatalf("invalid output, missing 'server_gateway_name': %v\n", output)
}
if !strings.Contains(output, "server_gateway_id") {
t.Fatalf("invalid output, missing 'server_gateway_id': %v\n", output)
if !strings.Contains(output, "server_gateway_name_idx") {
t.Fatalf("invalid output, missing 'server_gateway_name_idx': %v\n", output)
}
if !strings.Contains(output, "server_route_id") {
t.Fatalf("invalid output, missing 'server_route_id': %v\n", output)
if !strings.Contains(output, "server_route_name") {
t.Fatalf("invalid output, missing 'server_route_name': %v\n", output)
}
if !strings.Contains(output, "server_route_name_idx") {
t.Fatalf("invalid output, missing 'server_route_name_idx': %v\n", output)
}
if !strings.Contains(output, "nats_survey_surveyed_count 3") {
t.Fatalf("invalid output, missing 'nats_survey_surveyed_count 3': %v\n", output)
Expand Down