Skip to content

Commit

Permalink
handler: Expose nmstatectl stats as k8s metrics (#1221)
Browse files Browse the repository at this point in the history
Now that nmstatectl is able to calculate some useful stats from network
configuration [1], we can bubble them up and expose them as k8s metrics so
k-nmstate users can digg on them using prometheus, graphana or the like.

This change add a new "Features" under nnce Status with the output of
`nmstatectl st` and also create a new deployment `nmstate-metrics` that
will gather the NNCEs features and reflecta that at a cluster wide
gaugue prometheus metric.

[1] nmstate/nmstate#2420

Signed-off-by: Enrique Llorente <[email protected]>
  • Loading branch information
qinqon authored Apr 17, 2024
1 parent 291a49b commit bf889c0
Show file tree
Hide file tree
Showing 33 changed files with 842 additions and 41 deletions.
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ linters-settings:
hugeParam:
sizeThreshold: 128
gocyclo:
min-complexity: 16
min-complexity: 19
goheader:
template-path: hack/header.tpl
goimports:
Expand Down
9 changes: 7 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@ export IMAGE_BUILDER ?= $(shell if podman ps >/dev/null 2>&1; then echo podman;

WHAT ?= ./pkg/... ./controllers/...

LINTER_IMAGE_TAG ?= v0.0.3

unit_test_args ?= -r -keep-going --randomize-all --randomize-suites --race --trace $(UNIT_TEST_ARGS)

export KUBEVIRT_PROVIDER ?= k8s-1.26-centos9
export KUBEVIRT_NUM_NODES ?= 2 # 1 control-plane, 1 worker needed for e2e tests
export KUBEVIRT_NUM_NODES ?= 3 # 1 control-plane, 2 worker needed for e2e tests
export KUBEVIRT_NUM_SECONDARY_NICS ?= 2

export E2E_TEST_TIMEOUT ?= 80m
Expand Down Expand Up @@ -93,7 +95,7 @@ SKIP_IMAGE_BUILD ?= false

all: check handler operator

check: lint vet whitespace-check gofmt-check
check: lint vet whitespace-check gofmt-check promlint-check

format: whitespace-format gofmt

Expand All @@ -112,6 +114,9 @@ whitespace-check:
gofmt-check:
test -z "`gofmt -l cmd/ test/ hack/ api/ controllers/ pkg/ | grep -v "/vendor/"`" || (gofmt -l cmd/ test/ hack/ api/ controllers/ pkg/ && exit 1)

promlint-check:
LINTER_IMAGE_TAG=${LINTER_IMAGE_TAG} hack/prom_metric_linter.sh

lint:
hack/lint.sh

Expand Down
2 changes: 2 additions & 0 deletions api/shared/nodenetworkconfigurationenactment_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ type NodeNetworkConfigurationEnactmentStatus struct {
PolicyGeneration int64 `json:"policyGeneration,omitempty"`

Conditions ConditionList `json:"conditions,omitempty"`

Features []string `json:"features,omitempty"`
}

type NodeNetworkConfigurationEnactmentCapturedState struct {
Expand Down
2 changes: 2 additions & 0 deletions automation/check-patch.e2e-operator-k8s.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ teardown() {
}

main() {
export KUBEVIRT_DEPLOY_PROMETHEUS=false
export KUBEVIRT_DEPLOY_GRAFANA=false
export KUBEVIRT_NUM_NODES=3 # 1 control-plane, 2 workers
source automation/check-patch.setup.sh
cd ${TMP_PROJECT_PATH}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,16 @@ spec:
- consoleplugins
verbs:
- '*'
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- create
- get
- list
- update
- watch
- apiGroups:
- nmstate.io
resources:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ spec:
version:
type: string
type: object
features:
items:
type: string
type: array
policyGeneration:
description: The generation from policy needed to check if an enactment
condition status belongs to the same policy version
Expand Down Expand Up @@ -200,6 +204,10 @@ spec:
version:
type: string
type: object
features:
items:
type: string
type: array
policyGeneration:
description: The generation from policy needed to check if an enactment
condition status belongs to the same policy version
Expand Down
2 changes: 2 additions & 0 deletions cluster/kubevirtci.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
export KUBEVIRT_PROVIDER=${KUBEVIRT_PROVIDER:-'k8s-1.26-centos9'}
export KUBEVIRTCI_TAG=2303201102-ef46217
export KUBEVIRT_DEPLOY_PROMETHEUS=${KUBEVIRT_DEPLOY_PROMETHEUS:-true}
export KUBEVIRT_DEPLOY_GRAFANA=${KUBEVIRT_DEPLOY_GRAFANA:-true}

KUBEVIRTCI_REPO='https://github.com/kubevirt/kubevirtci.git'
KUBEVIRTCI_PATH="${PWD}/_kubevirtci"
Expand Down
43 changes: 40 additions & 3 deletions cmd/handler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package main

import (
"context"
"encoding/json"
"flag"
"fmt"
"net/http"
Expand All @@ -36,6 +37,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/metrics"

// +kubebuilder:scaffold:imports

Expand All @@ -52,8 +54,10 @@ import (
nmstatev1alpha1 "github.com/nmstate/kubernetes-nmstate/api/v1alpha1"
nmstatev1beta1 "github.com/nmstate/kubernetes-nmstate/api/v1beta1"
controllers "github.com/nmstate/kubernetes-nmstate/controllers/handler"
controllersmetrics "github.com/nmstate/kubernetes-nmstate/controllers/metrics"
"github.com/nmstate/kubernetes-nmstate/pkg/environment"
"github.com/nmstate/kubernetes-nmstate/pkg/file"
"github.com/nmstate/kubernetes-nmstate/pkg/monitoring"
"github.com/nmstate/kubernetes-nmstate/pkg/nmstatectl"
"github.com/nmstate/kubernetes-nmstate/pkg/webhook"
)
Expand All @@ -77,6 +81,8 @@ func init() {
utilruntime.Must(nmstatev1beta1.AddToScheme(scheme))
utilruntime.Must(nmstatev1alpha1.AddToScheme(scheme))
// +kubebuilder:scaffold:scheme

metrics.Registry.MustRegister(monitoring.AppliedFeatures)
}

func main() {
Expand All @@ -92,11 +98,17 @@ func mainHandler() int {
opt := zap.Options{}
opt.BindFlags(flag.CommandLine)
var logType string
var dumpMetricFamilies bool
pflag.StringVar(&logType, "v", "production", "Log type (debug/production).")
pflag.BoolVar(&dumpMetricFamilies, "dump-metric-families", false, "Dump the prometheus metric families and exit.")
pflag.CommandLine.MarkDeprecated("v", "please use the --zap-devel flag for debug logging instead")
pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
pflag.Parse()

if dumpMetricFamilies {
return dumpMetricFamiliesToStdout()
}

if logType == "debug" {
// workaround until --v flag got removed
flag.CommandLine.Set("zap-devel", "true")
Expand All @@ -115,10 +127,9 @@ func mainHandler() int {
defer handlerLock.Unlock()
setupLog.Info("Successfully took nmstate exclusive lock")
}

ctrlOptions := ctrl.Options{
Scheme: scheme,
MetricsBindAddress: "0", // disable metrics
MetricsBindAddress: metrics.DefaultBindAddress, // Explicitly enable metrics
}

if environment.IsHandler() {
Expand All @@ -145,6 +156,10 @@ func mainHandler() int {
setupLog.Error(err, "Cannot initialize webhook")
return generalExitStatus
}
} else if environment.IsMetricsManager() {
if err = setupMetricsManager(mgr); err != nil {
return generalExitStatus
}
} else if environment.IsHandler() {
if err = setupHandlerControllers(mgr); err != nil {
return generalExitStatus
Expand All @@ -158,7 +173,6 @@ func mainHandler() int {
}

setProfiler()

setupLog.Info("starting manager")
if err = mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
Expand Down Expand Up @@ -307,6 +321,19 @@ func setupCertManager(mgr manager.Manager, certManagerOpts certificate.Options)
return nil
}

func setupMetricsManager(mgr manager.Manager) error {
setupLog.Info("Creating Metrics NodeNetworkConfigurationEnactment controller")
if err := (&controllersmetrics.NodeNetworkConfigurationEnactmentReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("metrics").WithName("NodeNetworkConfigurationEnactment"),
Scheme: mgr.GetScheme(),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create NodeNetworkConfigurationEnactment metrics controller", "metrics", "NMState")
return err
}
return nil
}

// Start profiler on given port if ENABLE_PROFILER is True
func setProfiler() {
cfg := ProfilerConfig{}
Expand Down Expand Up @@ -344,3 +371,13 @@ func lockHandler() (*flock.Flock, error) {
})
return handlerLock, err
}

func dumpMetricFamiliesToStdout() int {
metricFamiliesJSON, err := json.Marshal(monitoring.Families())
if err != nil {
setupLog.Error(err, "Failed dumping metric families")
return generalExitStatus
}
fmt.Printf("%s", string(metricFamiliesJSON))
return 0
}
12 changes: 12 additions & 0 deletions controllers/handler/nodenetworkconfigurationpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ func (r *NodeNetworkConfigurationPolicyReconciler) fillInEnactmentStatus(
policy *nmstatev1.NodeNetworkConfigurationPolicy,
enactmentInstance *nmstatev1beta1.NodeNetworkConfigurationEnactment,
enactmentConditions enactmentconditions.EnactmentConditions) error {
log := r.Log.WithValues("nodenetworkconfigurationpolicy.fillInEnactmentStatus", enactmentInstance.Name)
currentState, err := nmstatectlShowFn()
if err != nil {
return err
Expand Down Expand Up @@ -366,13 +367,24 @@ func (r *NodeNetworkConfigurationPolicyReconciler) fillInEnactmentStatus(
return err
}

features := []string{}
stats, err := nmstatectl.Statistic(desiredStateWithDefaults)
if err != nil {
log.Error(err, "failed calculating nmstate statistics")
} else {
for feature := range stats.Features {
features = append(features, feature)
}
}

return enactmentstatus.Update(
r.APIClient,
nmstateapi.EnactmentKey(nodeName, policy.Name),
func(status *nmstateapi.NodeNetworkConfigurationEnactmentStatus) {
status.DesiredState = desiredStateWithDefaults
status.CapturedStates = capturedStates
status.PolicyGeneration = policy.Generation
status.Features = features
},
)
}
Expand Down
Loading

0 comments on commit bf889c0

Please sign in to comment.