Skip to content

Commit

Permalink
Improvements and fixes (#38)
Browse files Browse the repository at this point in the history
- [PROD-1287] - Add device or file name to the labels for storage-engine stats
- [PROD-1343] - Use latency info command based on Aerospike build version
- [TOOLS-1601] - Add constant labels to `aerospike_node_up` metric.
- [TOOLS-1595] - Fix info commands to get service address and port for TLS and non-TLS service.
  • Loading branch information
spkesan authored Nov 27, 2020
1 parent dc82264 commit 6c68b6c
Show file tree
Hide file tree
Showing 9 changed files with 354 additions and 51 deletions.
14 changes: 8 additions & 6 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,17 @@ go 1.14

require (
github.com/BurntSushi/toml v0.3.1
github.com/aerospike/aerospike-client-go v3.0.4+incompatible
github.com/aerospike/aerospike-client-go v3.1.1+incompatible
github.com/gobwas/glob v0.2.3
github.com/hashicorp/go-version v1.2.1
github.com/jameskeane/bcrypt v0.0.0-20120420032655-c3cd44c1e20f
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
github.com/onsi/ginkgo v1.14.0 // indirect
github.com/prometheus/client_golang v1.7.1
github.com/sirupsen/logrus v1.6.0
github.com/yuin/gopher-lua v0.0.0-20200603152657-dc2b0ca8b37e // indirect
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208 // indirect
golang.org/x/sys v0.0.0-20200724161237-0e2f3a69832c // indirect
github.com/prometheus/client_golang v1.8.0
github.com/prometheus/common v0.15.0 // indirect
github.com/sirupsen/logrus v1.7.0
github.com/yuin/gopher-lua v0.0.0-20200816102855-ee81675732da // indirect
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 // indirect
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68 // indirect
google.golang.org/protobuf v1.25.0 // indirect
)
277 changes: 269 additions & 8 deletions go.sum

Large diffs are not rendered by default.

42 changes: 27 additions & 15 deletions observer.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,19 @@ type Observer struct {
}

var (
// Metric for node active status
nodeActiveDesc = prometheus.NewDesc(
"aerospike_node_up",
"Aerospike node active status",
[]string{"cluster_name", "service", "build"},
nil,
)
// aerospike_node_up metric descriptor
nodeActiveDesc *prometheus.Desc

// Node service endpoint, cluster name and build version
gService, gClusterName, gBuild string

// Number of retries on info request
retryCount = 3

// Default info commands
ikClusterName = "cluster-name"
ikService = "service-clear-std"
ikBuild = "build"
)

func initTLS() *tls.Config {
Expand Down Expand Up @@ -121,6 +121,14 @@ func initTLS() *tls.Config {
}

func newObserver(server *aero.Host, user, pass string) (o *Observer, err error) {
// initialize aerospike_node_up metric descriptor
nodeActiveDesc = prometheus.NewDesc(
"aerospike_node_up",
"Aerospike node active status",
[]string{"cluster_name", "service", "build"},
config.AeroProm.MetricLabels,
)

// use all cpus in the system for concurrency
authMode := strings.ToLower(strings.TrimSpace(config.Aerospike.AuthMode))
if authMode != "internal" && authMode != "external" {
Expand All @@ -140,6 +148,10 @@ func newObserver(server *aero.Host, user, pass string) (o *Observer, err error)

clientPolicy.TlsConfig = initTLS()

if clientPolicy.TlsConfig != nil {
ikService = "service-tls-std"
}

createNewConnection := func() (*aero.Connection, error) {
conn, err := aero.NewConnection(clientPolicy, server)
if err != nil {
Expand Down Expand Up @@ -200,7 +212,7 @@ func (o *Observer) Collect(ch chan<- prometheus.Metric) {
return
}

gClusterName, gService, gBuild = stats["cluster-name"], stats["service"], stats["build"]
gClusterName, gService, gBuild = stats[ikClusterName], stats[ikService], stats[ikBuild]
ch <- prometheus.MustNewConstMetric(nodeActiveDesc, prometheus.GaugeValue, 1.0, gClusterName, gService, gBuild)
}

Expand Down Expand Up @@ -245,31 +257,31 @@ func (o *Observer) requestInfo(retryCount int, infoKeys []string) (map[string]st
func (o *Observer) refresh(ch chan<- prometheus.Metric) (map[string]string, error) {
log.Debugf("Refreshing node %s", fullHost)

// get first keys
// fetch first set of info keys
var infoKeys []string
for _, c := range o.watchers {
if keys := c.infoKeys(); len(keys) > 0 {
if keys := c.passOneKeys(); len(keys) > 0 {
infoKeys = append(infoKeys, keys...)
}
}

// request first round of keys
// info request for first set of info keys
rawMetrics, err := o.requestInfo(retryCount, infoKeys)
if err != nil {
return nil, err
}

// get first keys
infoKeys = []string{"cluster-name", "service", "build"}
// fetch second second set of info keys
infoKeys = []string{ikClusterName, ikService, ikBuild}
watcherInfoKeys := make([][]string, len(o.watchers))
for i, c := range o.watchers {
if keys := c.detailKeys(rawMetrics); len(keys) > 0 {
if keys := c.passTwoKeys(rawMetrics); len(keys) > 0 {
infoKeys = append(infoKeys, keys...)
watcherInfoKeys[i] = keys
}
}

// request second round of keys
// info request for second set of info keys
nRawMetrics, err := o.requestInfo(retryCount, infoKeys)
if err != nil {
return rawMetrics, err
Expand Down
4 changes: 2 additions & 2 deletions types.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ const (
)

type Watcher interface {
infoKeys() []string
detailKeys(rawMetrics map[string]string) []string
passOneKeys() []string
passTwoKeys(rawMetrics map[string]string) []string
refresh(infoKeys []string, rawMetrics map[string]string, ch chan<- prometheus.Metric) error
describe(ch chan<- *prometheus.Desc)
}
Expand Down
38 changes: 32 additions & 6 deletions watcher_latency.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"github.com/prometheus/client_golang/prometheus"

goversion "github.com/hashicorp/go-version"
log "github.com/sirupsen/logrus"
)

Expand All @@ -13,12 +14,37 @@ func (lw *LatencyWatcher) describe(ch chan<- *prometheus.Desc) {
return
}

func (lw *LatencyWatcher) infoKeys() []string {
return nil
func (lw *LatencyWatcher) passOneKeys() []string {
return []string{"build"}
}

func (lw *LatencyWatcher) detailKeys(rawMetrics map[string]string) []string {
return []string{"latencies:", "latency:"}
func (lw *LatencyWatcher) passTwoKeys(rawMetrics map[string]string) (latencyCommands []string) {
latencyCommands = []string{"latencies:", "latency:"}

if len(rawMetrics["build"]) > 0 {
ver := rawMetrics["build"]
ref := "5.1.0.0"

version, err := goversion.NewVersion(ver)
if err != nil {
log.Warnf("Error parsing build version %s: %v", ver, err)
return latencyCommands
}

refVersion, err := goversion.NewVersion(ref)
if err != nil {
log.Warnf("Error parsing reference version %s: %v", ref, err)
return latencyCommands
}

if version.GreaterThanOrEqual(refVersion) {
return []string{"latencies:"}
}

return []string{"latency:"}
}

return latencyCommands
}

func (lw *LatencyWatcher) refresh(infoKeys []string, rawMetrics map[string]string, ch chan<- prometheus.Metric) error {
Expand All @@ -37,12 +63,12 @@ func (lw *LatencyWatcher) refresh(infoKeys []string, rawMetrics map[string]strin
for i, labelValue := range opLatencyStats.(StatsMap)["bucketLabels"].([]string) {
// aerospike_latencies_<operation>_<timeunit>_bucket metric - Less than or equal to histogram buckets
pm := makeMetric("aerospike_latencies", operation+"_"+opLatencyStats.(StatsMap)["timeUnit"].(string)+"_bucket", mtGauge, config.AeroProm.MetricLabels, "cluster_name", "service", "ns", "le")
ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, opLatencyStats.(StatsMap)["bucketValues"].([]float64)[i], rawMetrics["cluster-name"], rawMetrics["service"], namespaceName, labelValue)
ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, opLatencyStats.(StatsMap)["bucketValues"].([]float64)[i], rawMetrics[ikClusterName], rawMetrics[ikService], namespaceName, labelValue)

// aerospike_latencies_<operation>_<timeunit>_count metric
if i == 0 {
pm = makeMetric("aerospike_latencies", operation+"_"+opLatencyStats.(StatsMap)["timeUnit"].(string)+"_count", mtGauge, config.AeroProm.MetricLabels, "cluster_name", "service", "ns")
ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, opLatencyStats.(StatsMap)["bucketValues"].([]float64)[i], rawMetrics["cluster-name"], rawMetrics["service"], namespaceName)
ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, opLatencyStats.(StatsMap)["bucketValues"].([]float64)[i], rawMetrics[ikClusterName], rawMetrics[ikService], namespaceName)
}
}
}
Expand Down
12 changes: 7 additions & 5 deletions watcher_namespaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -378,11 +378,11 @@ func (nw *NamespaceWatcher) describe(ch chan<- *prometheus.Desc) {
return
}

func (nw *NamespaceWatcher) infoKeys() []string {
func (nw *NamespaceWatcher) passOneKeys() []string {
return []string{"namespaces"}
}

func (nw *NamespaceWatcher) detailKeys(rawMetrics map[string]string) []string {
func (nw *NamespaceWatcher) passTwoKeys(rawMetrics map[string]string) []string {
s := rawMetrics["namespaces"]
list := strings.Split(s, ";")

Expand Down Expand Up @@ -427,7 +427,7 @@ func (nw *NamespaceWatcher) refresh(infoKeys []string, rawMetrics map[string]str
continue
}

ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, pv, rawMetrics["cluster-name"], rawMetrics["service"], nsName)
ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, pv, rawMetrics[ikClusterName], rawMetrics[ikService], nsName)
}

for stat, value := range stats {
Expand All @@ -445,13 +445,15 @@ func (nw *NamespaceWatcher) refresh(infoKeys []string, rawMetrics map[string]str
continue
}

pm := makeMetric("aerospike_namespace", "storage-engine_"+metricType+"_"+metricName, mtGauge, config.AeroProm.MetricLabels, "cluster_name", "service", "ns", metricType+"_index")
deviceOrFileName := stats["storage-engine."+metricType+"["+metricIndex+"]"]
pm := makeMetric("aerospike_namespace", "storage-engine_"+metricType+"_"+metricName, mtGauge, config.AeroProm.MetricLabels, "cluster_name", "service", "ns", metricType+"_index", metricType)

pv, err := tryConvert(value)
if err != nil {
continue
}

ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, pv, rawMetrics["cluster-name"], rawMetrics["service"], nsName, metricIndex)
ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, pv, rawMetrics[ikClusterName], rawMetrics[ikService], nsName, metricIndex, deviceOrFileName)
}
}

Expand Down
6 changes: 3 additions & 3 deletions watcher_node_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@ func (sw *StatsWatcher) describe(ch chan<- *prometheus.Desc) {
return
}

func (sw *StatsWatcher) infoKeys() []string {
func (sw *StatsWatcher) passOneKeys() []string {
return nil
}

func (sw *StatsWatcher) detailKeys(rawMetrics map[string]string) []string {
func (sw *StatsWatcher) passTwoKeys(rawMetrics map[string]string) []string {
return []string{"statistics"}
}

Expand Down Expand Up @@ -168,7 +168,7 @@ func (sw *StatsWatcher) refresh(infoKeys []string, rawMetrics map[string]string,
continue
}

ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, pv, rawMetrics["cluster-name"], rawMetrics["service"])
ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, pv, rawMetrics[ikClusterName], rawMetrics[ikService])
}

return nil
Expand Down
6 changes: 3 additions & 3 deletions watcher_sets.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ func (sw *SetWatcher) describe(ch chan<- *prometheus.Desc) {
return
}

func (sw *SetWatcher) infoKeys() []string {
func (sw *SetWatcher) passOneKeys() []string {
return nil
}

func (sw *SetWatcher) detailKeys(rawMetrics map[string]string) []string {
func (sw *SetWatcher) passTwoKeys(rawMetrics map[string]string) []string {
return []string{"sets"}
}

Expand Down Expand Up @@ -63,7 +63,7 @@ func (sw *SetWatcher) refresh(infoKeys []string, rawMetrics map[string]string, c
continue
}

ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, pv, rawMetrics["cluster-name"], rawMetrics["service"], stats["ns"], stats["set"])
ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, pv, rawMetrics[ikClusterName], rawMetrics[ikService], stats["ns"], stats["set"])
}
}

Expand Down
6 changes: 3 additions & 3 deletions watcher_xdr.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ func (xw *XdrWatcher) describe(ch chan<- *prometheus.Desc) {
return
}

func (xw *XdrWatcher) infoKeys() []string {
func (xw *XdrWatcher) passOneKeys() []string {
return []string{"get-config:context=xdr"}
}

func (xw *XdrWatcher) detailKeys(rawMetrics map[string]string) []string {
func (xw *XdrWatcher) passTwoKeys(rawMetrics map[string]string) []string {
res := rawMetrics["get-config:context=xdr"]
list := parseStats(res, ";")
dcsList := strings.Split(list["dcs"], ",")
Expand Down Expand Up @@ -86,7 +86,7 @@ func (xw *XdrWatcher) refresh(infoKeys []string, rawMetrics map[string]string, c
continue
}

ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, pv, rawMetrics["cluster-name"], rawMetrics["service"], dcName)
ch <- prometheus.MustNewConstMetric(pm.desc, pm.valueType, pv, rawMetrics[ikClusterName], rawMetrics[ikService], dcName)
}
}

Expand Down

0 comments on commit 6c68b6c

Please sign in to comment.