From 67d84ccb3dfae6db23db16956b7ae4e6b8546173 Mon Sep 17 00:00:00 2001 From: Arno Uhlig Date: Thu, 23 Nov 2017 11:34:20 -0800 Subject: [PATCH] rename and add some more metrics. also add metric unit test --- pkg/controller/ground.go | 3 +- pkg/controller/metrics.go | 78 ++++++++++----- pkg/controller/metrics_test.go | 167 +++++++++++++++++++++++++++++++++ 3 files changed, 224 insertions(+), 24 deletions(-) create mode 100644 pkg/controller/metrics_test.go diff --git a/pkg/controller/ground.go b/pkg/controller/ground.go index fe539bc983..f1077daa57 100644 --- a/pkg/controller/ground.go +++ b/pkg/controller/ground.go @@ -136,7 +136,8 @@ func (op *GroundControl) handler(key string) error { } else { kluster := obj.(*v1.Kluster) glog.V(5).Infof("Handling kluster %v in phase %q", kluster.Name, kluster.Status.Phase) - setMetricStatusPhaseForKluster(kluster.GetName(), kluster.Status.Phase) + setMetricKlusterInfo(kluster.GetNamespace(),kluster.GetName(),kluster.Status.Version,kluster.Spec.Openstack.ProjectID,kluster.GetAnnotations(),kluster.GetLabels()) + setMetricKlusterStatusPhase(kluster.GetName(), kluster.Status.Phase) switch phase := kluster.Status.Phase; phase { case models.KlusterPhasePending: diff --git a/pkg/controller/metrics.go b/pkg/controller/metrics.go index 1ec1eb74f0..3bbb39deec 100644 --- a/pkg/controller/metrics.go +++ b/pkg/controller/metrics.go @@ -15,29 +15,36 @@ const ( metricNamespace = "kubernikus" ) -var klusterInstancesTotal = prometheus.NewGaugeVec( +var klusterPhases = []models.KlusterPhase{ + models.KlusterPhasePending, + models.KlusterPhaseCreating, + models.KlusterPhaseRunning, + models.KlusterPhaseTerminating, +} + +var klusterInfo = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: metricNamespace, - Name: "kluster_instances_total", - Help: "total number of klusters", + Name: "kluster_info", + Help: "detailed information on a kluster", }, - []string{"domain_id", "project_id"}, + []string{"kluster_namespace","kluster_name","kluster_version","creator","account","project_id"}, ) var klusterStatusPhase = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: metricNamespace, Name: "kluster_status_phase", - Help: "The phase the kluster is currently in", + Help: "the phase the kluster is currently in", }, []string{"kluster_id", "phase"}, ) -var nodePoolInfo = prometheus.NewGaugeVec( +var nodePoolSize = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: metricNamespace, - Name: "node_pool_info", - Help: "information for a node pool", + Name: "node_pool_size", + Help: "size of a node pool", }, []string{"kluster_id", "node_pool", "image_name", "flavor_name"}, ) @@ -46,11 +53,23 @@ var nodePoolStatus = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: metricNamespace, Name: "node_pool_status", - Help: "status of the node pool", + Help: "status of the node pool and the number of nodes nodes in that status", }, []string{"kluster_id", "node_pool", "status"}, ) +func setMetricKlusterInfo(namespace, name, version, projectID string, annotations, labels map[string]string) { + promLabels := prometheus.Labels{ + "kluster_namespace": namespace, + "kluster_name": name, + "kluster_version": version, + "creator": getCreatorFromAnnotations(annotations), + "account": getAccountFromLabels(labels), + "project_id": projectID, + } + klusterInfo.With(promLabels).Set(1) +} + /* kubernikus_kluster_status_phase{"kluster_id"="","phase"=""} < 1|0 > kubernikus_kluster_status_phase{"kluster_id"="","phase"="creating"} 1 @@ -58,30 +77,27 @@ kubernikus_kluster_status_phase{"kluster_id"="","phase"="running"} 0 kubernikus_kluster_status_phase{"kluster_id"="","phase"="pending"} 0 kubernikus_kluster_status_phase{"kluster_id"="","phase"="terminating"} 0 */ -func setMetricStatusPhaseForKluster(klusterName string, klusterPhase models.KlusterPhase) { - // Set current phase to 1, others to 0 if it is set. - if klusterPhase != "" { +func setMetricKlusterStatusPhase(klusterName string, klusterPhase models.KlusterPhase) { + // Set current phase to 1, others to 0 + for _, phase := range klusterPhases { labels := prometheus.Labels{ "kluster_id": klusterName, - "phase": string(klusterPhase), + "phase": string(phase), } - klusterStatusPhase.With(labels).Set(boolToFloat64(klusterPhase == models.KlusterPhaseCreating)) - klusterStatusPhase.With(labels).Set(boolToFloat64(klusterPhase == models.KlusterPhaseRunning)) - klusterStatusPhase.With(labels).Set(boolToFloat64(klusterPhase == models.KlusterPhasePending)) - klusterStatusPhase.With(labels).Set(boolToFloat64(klusterPhase == models.KlusterPhaseTerminating)) + klusterStatusPhase.With(labels).Set(boolToFloat64(klusterPhase == phase)) } } /* -kubernikus_node_pool_info{"kluster_id"=" +kubernikus_node_pool_size{"kluster_id"=" */ -func setMetricNodePoolSize(klusterID, nodePoolName, imageName, flavorName string, nodePoolSize int64) { - nodePoolInfo.With(prometheus.Labels{ +func setMetricNodePoolSize(klusterID, nodePoolName, imageName, flavorName string, size int64) { + nodePoolSize.With(prometheus.Labels{ "kluster_id": klusterID, "node_pool": nodePoolName, "image_name": imageName, "flavor_name": flavorName, - }).Set(float64(nodePoolSize)) + }).Set(float64(size)) } /* @@ -109,11 +125,27 @@ func boolToFloat64(b bool) float64 { return 0 } +func getCreatorFromAnnotations(annotations map[string]string) string { + creator, ok := annotations["creator"] + if !ok { + return "NA" + } + return creator +} + +func getAccountFromLabels(labels map[string]string) string { + account, ok := labels["account"] + if !ok { + return "NA" + } + return account +} + func init() { prometheus.MustRegister( - klusterInstancesTotal, + klusterInfo, klusterStatusPhase, - nodePoolInfo, + nodePoolSize, nodePoolStatus, ) } diff --git a/pkg/controller/metrics_test.go b/pkg/controller/metrics_test.go new file mode 100644 index 0000000000..56ccbc5959 --- /dev/null +++ b/pkg/controller/metrics_test.go @@ -0,0 +1,167 @@ +package controller + +import ( + "bytes" + "fmt" + "reflect" + "sort" + "testing" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" + "github.com/sapcc/kubernikus/pkg/api/models" + "github.com/stretchr/testify/assert" +) + +func TestMetrics(t *testing.T) { + + expectedMetrics := map[prometheus.Collector]string{ + nodePoolSize: + ` +# HELP kubernikus_node_pool_size size of a node pool +# TYPE kubernikus_node_pool_size gauge +kubernikus_node_pool_size{flavor_name="flavorName",image_name="imageName",kluster_id="klusterID",node_pool="nodePoolName"} 3 + `, + nodePoolStatus: + ` +# HELP kubernikus_node_pool_status status of the node pool and the number of nodes nodes in that status +# TYPE kubernikus_node_pool_status gauge +kubernikus_node_pool_status{kluster_id="klusterID",node_pool="nodePoolName",status="ready"} 2 +kubernikus_node_pool_status{kluster_id="klusterID",node_pool="nodePoolName",status="running"} 2 +kubernikus_node_pool_status{kluster_id="klusterID",node_pool="nodePoolName",status="starting"} 1 + `, + klusterInfo: + ` +# HELP kubernikus_kluster_info detailed information on a kluster +# TYPE kubernikus_kluster_info gauge +kubernikus_kluster_info{account="account",creator="D012345",kluster_name="klusterName",kluster_namespace="namespace",kluster_version="version",project_id="projectID"} 1 + `, + klusterStatusPhase: + ` +# HELP kubernikus_kluster_status_phase the phase the kluster is currently in +# TYPE kubernikus_kluster_status_phase gauge +kubernikus_kluster_status_phase{kluster_id="klusterID",phase="Pending"} 0 +kubernikus_kluster_status_phase{kluster_id="klusterID",phase="Creating"} 0 +kubernikus_kluster_status_phase{kluster_id="klusterID",phase="Running"} 1 +kubernikus_kluster_status_phase{kluster_id="klusterID",phase="Terminating"} 0 + `, + } + + // call functions that update the metrics here + setMetricNodePoolSize("klusterID", "nodePoolName", "imageName", "flavorName", 3) + setMetricNodePoolStatus("klusterID", "nodePoolName", map[string]int64{"running": 2, "starting": 1, "ready": 2}) + setMetricKlusterInfo("namespace","klusterName","version","projectID",map[string]string{"creator":"D012345"},map[string]string{"account":"account"}) + setMetricKlusterStatusPhase("klusterID", models.KlusterPhaseRunning) + + + registry := prometheus.NewPedanticRegistry() + for collector, expectedMetricString := range expectedMetrics { + // register the metric we're checking right now + registry.MustRegister(collector) + + // collect aka gather + actualMetrics, err := registry.Gather() + if err != nil { + t.Errorf("could not gather metrics: %#v", err) + } + // the actual check + assert.NoError(t, compareMetrics(expectedMetricString, actualMetrics)) + + // unregister to make sure we only have the metric we're checking right now + if !registry.Unregister(collector) { + t.Errorf("could not unregister %#v", collector) + } + } +} + +// compare and return human readable error in case it's not equal +func compareMetrics(expectedMetrics string, actualMetrics []*dto.MetricFamily) error { + var tp expfmt.TextParser + expected, err := tp.TextToMetricFamilies(bytes.NewReader([]byte(expectedMetrics))) + if err != nil { + return fmt.Errorf("parsing expected metrics failed: %s", err) + } + + if !reflect.DeepEqual(actualMetrics, normalizeMetricFamilies(expected)) { + var buf1 bytes.Buffer + enc := expfmt.NewEncoder(&buf1, expfmt.FmtText) + for _, mf := range actualMetrics { + if err := enc.Encode(mf); err != nil { + return fmt.Errorf("encoding failed: %s", err) + } + } + var buf2 bytes.Buffer + enc = expfmt.NewEncoder(&buf2, expfmt.FmtText) + for _, mf := range normalizeMetricFamilies(expected) { + if err := enc.Encode(mf); err != nil { + return fmt.Errorf("encoding failed: %s", err) + } + } + + return fmt.Errorf(` +unequal metric output; +want: + +%s + +got: + +%s +`, buf2.String(), buf1.String()) + } + return nil +} + +func normalizeMetricFamilies(metricFamiliesByName map[string]*dto.MetricFamily) []*dto.MetricFamily { + for _, mf := range metricFamiliesByName { + sort.Sort(metricSorter(mf.Metric)) + } + names := make([]string, 0, len(metricFamiliesByName)) + for name, mf := range metricFamiliesByName { + if len(mf.Metric) > 0 { + names = append(names, name) + } + } + sort.Strings(names) + result := make([]*dto.MetricFamily, 0, len(names)) + for _, name := range names { + result = append(result, metricFamiliesByName[name]) + } + return result +} + +type metricSorter []*dto.Metric + +func (s metricSorter) Len() int { + return len(s) +} + +func (s metricSorter) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +func (s metricSorter) Less(i, j int) bool { + sort.Sort(prometheus.LabelPairSorter(s[i].Label)) + sort.Sort(prometheus.LabelPairSorter(s[j].Label)) + + if len(s[i].Label) != len(s[j].Label) { + return len(s[i].Label) < len(s[j].Label) + } + + for n, lp := range s[i].Label { + vi := lp.GetValue() + vj := s[j].Label[n].GetValue() + if vi != vj { + return vi < vj + } + } + + if s[i].TimestampMs == nil { + return false + } + if s[j].TimestampMs == nil { + return true + } + return s[i].GetTimestampMs() < s[j].GetTimestampMs() +}