From bf889c0818b233981430fddb38bcb1706a11acbe Mon Sep 17 00:00:00 2001 From: Enrique Llorente Pastora Date: Wed, 17 Apr 2024 19:42:00 +0200 Subject: [PATCH] handler: Expose nmstatectl stats as k8s metrics (#1221) Now that nmstatectl is able to calculate some useful stats from network configuration [1], we can bubble them up and expose them as k8s metrics so k-nmstate users can digg on them using prometheus, graphana or the like. This change add a new "Features" under nnce Status with the output of `nmstatectl st` and also create a new deployment `nmstate-metrics` that will gather the NNCEs features and reflecta that at a cluster wide gaugue prometheus metric. [1] https://github.com/nmstate/nmstate/pull/2420 Signed-off-by: Enrique Llorente --- .golangci.yml | 2 +- Makefile | 9 +- ...nodenetworkconfigurationenactment_types.go | 2 + automation/check-patch.e2e-operator-k8s.sh | 2 + ...mstate-operator.clusterserviceversion.yaml | 10 ++ ...io_nodenetworkconfigurationenactments.yaml | 8 + cluster/kubevirtci.sh | 2 + cmd/handler/main.go | 43 ++++- ...denetworkconfigurationpolicy_controller.go | 12 ++ ...etworkconfigurationenactment_controller.go | 153 ++++++++++++++++++ controllers/operator/nmstate_controller.go | 1 + .../operator/nmstate_controller_test.go | 17 ++ ...io_nodenetworkconfigurationenactments.yaml | 8 + deploy/handler/cluster_role.yaml | 4 + deploy/handler/namespace.yaml | 1 + deploy/handler/operator.yaml | 134 +++++++++++++++ deploy/handler/role.yaml | 36 ++++- deploy/handler/role_binding.yaml | 14 ++ deploy/operator/role.yaml | 10 ++ go.mod | 6 +- hack/prom_metric_linter.sh | 38 +++++ pkg/environment/environment.go | 8 +- pkg/monitoring/metrics.go | 52 ++++++ pkg/nmstatectl/nmstatectl.go | 47 ++++++ test/e2e/handler/metrics.go | 59 +++++++ test/e2e/handler/metrics_test.go | 97 +++++++++++ test/e2e/operator/main_test.go | 5 +- test/e2e/operator/nmstate_install_test.go | 4 +- test/e2e/operator/operator.go | 27 +++- test/e2e/upgrade/main_test.go | 14 +- test/reporter/writers.go | 36 +++-- test/runner/pod.go | 20 ++- ...nodenetworkconfigurationenactment_types.go | 2 + 33 files changed, 842 insertions(+), 41 deletions(-) create mode 100644 controllers/metrics/nodenetworkconfigurationenactment_controller.go create mode 100755 hack/prom_metric_linter.sh create mode 100644 pkg/monitoring/metrics.go create mode 100644 test/e2e/handler/metrics.go create mode 100644 test/e2e/handler/metrics_test.go diff --git a/.golangci.yml b/.golangci.yml index c9663e444..8a9a816b4 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -28,7 +28,7 @@ linters-settings: hugeParam: sizeThreshold: 128 gocyclo: - min-complexity: 16 + min-complexity: 19 goheader: template-path: hack/header.tpl goimports: diff --git a/Makefile b/Makefile index 0f0db015c..00371720d 100644 --- a/Makefile +++ b/Makefile @@ -30,10 +30,12 @@ export IMAGE_BUILDER ?= $(shell if podman ps >/dev/null 2>&1; then echo podman; WHAT ?= ./pkg/... ./controllers/... +LINTER_IMAGE_TAG ?= v0.0.3 + unit_test_args ?= -r -keep-going --randomize-all --randomize-suites --race --trace $(UNIT_TEST_ARGS) export KUBEVIRT_PROVIDER ?= k8s-1.26-centos9 -export KUBEVIRT_NUM_NODES ?= 2 # 1 control-plane, 1 worker needed for e2e tests +export KUBEVIRT_NUM_NODES ?= 3 # 1 control-plane, 2 worker needed for e2e tests export KUBEVIRT_NUM_SECONDARY_NICS ?= 2 export E2E_TEST_TIMEOUT ?= 80m @@ -93,7 +95,7 @@ SKIP_IMAGE_BUILD ?= false all: check handler operator -check: lint vet whitespace-check gofmt-check +check: lint vet whitespace-check gofmt-check promlint-check format: whitespace-format gofmt @@ -112,6 +114,9 @@ whitespace-check: gofmt-check: test -z "`gofmt -l cmd/ test/ hack/ api/ controllers/ pkg/ | grep -v "/vendor/"`" || (gofmt -l cmd/ test/ hack/ api/ controllers/ pkg/ && exit 1) +promlint-check: + LINTER_IMAGE_TAG=${LINTER_IMAGE_TAG} hack/prom_metric_linter.sh + lint: hack/lint.sh diff --git a/api/shared/nodenetworkconfigurationenactment_types.go b/api/shared/nodenetworkconfigurationenactment_types.go index 5acd7ba07..23355fa42 100644 --- a/api/shared/nodenetworkconfigurationenactment_types.go +++ b/api/shared/nodenetworkconfigurationenactment_types.go @@ -41,6 +41,8 @@ type NodeNetworkConfigurationEnactmentStatus struct { PolicyGeneration int64 `json:"policyGeneration,omitempty"` Conditions ConditionList `json:"conditions,omitempty"` + + Features []string `json:"features,omitempty"` } type NodeNetworkConfigurationEnactmentCapturedState struct { diff --git a/automation/check-patch.e2e-operator-k8s.sh b/automation/check-patch.e2e-operator-k8s.sh index 66ff25da3..dd55ec1cc 100755 --- a/automation/check-patch.e2e-operator-k8s.sh +++ b/automation/check-patch.e2e-operator-k8s.sh @@ -14,6 +14,8 @@ teardown() { } main() { + export KUBEVIRT_DEPLOY_PROMETHEUS=false + export KUBEVIRT_DEPLOY_GRAFANA=false export KUBEVIRT_NUM_NODES=3 # 1 control-plane, 2 workers source automation/check-patch.setup.sh cd ${TMP_PROJECT_PATH} diff --git a/bundle/manifests/kubernetes-nmstate-operator.clusterserviceversion.yaml b/bundle/manifests/kubernetes-nmstate-operator.clusterserviceversion.yaml index 765b9fca6..130e39064 100644 --- a/bundle/manifests/kubernetes-nmstate-operator.clusterserviceversion.yaml +++ b/bundle/manifests/kubernetes-nmstate-operator.clusterserviceversion.yaml @@ -169,6 +169,16 @@ spec: - consoleplugins verbs: - '*' + - apiGroups: + - monitoring.coreos.com + resources: + - servicemonitors + verbs: + - create + - get + - list + - update + - watch - apiGroups: - nmstate.io resources: diff --git a/bundle/manifests/nmstate.io_nodenetworkconfigurationenactments.yaml b/bundle/manifests/nmstate.io_nodenetworkconfigurationenactments.yaml index 9b46692e0..ca1480288 100644 --- a/bundle/manifests/nmstate.io_nodenetworkconfigurationenactments.yaml +++ b/bundle/manifests/nmstate.io_nodenetworkconfigurationenactments.yaml @@ -101,6 +101,10 @@ spec: version: type: string type: object + features: + items: + type: string + type: array policyGeneration: description: The generation from policy needed to check if an enactment condition status belongs to the same policy version @@ -200,6 +204,10 @@ spec: version: type: string type: object + features: + items: + type: string + type: array policyGeneration: description: The generation from policy needed to check if an enactment condition status belongs to the same policy version diff --git a/cluster/kubevirtci.sh b/cluster/kubevirtci.sh index 01ced15d3..4f19781f5 100644 --- a/cluster/kubevirtci.sh +++ b/cluster/kubevirtci.sh @@ -1,5 +1,7 @@ export KUBEVIRT_PROVIDER=${KUBEVIRT_PROVIDER:-'k8s-1.26-centos9'} export KUBEVIRTCI_TAG=2303201102-ef46217 +export KUBEVIRT_DEPLOY_PROMETHEUS=${KUBEVIRT_DEPLOY_PROMETHEUS:-true} +export KUBEVIRT_DEPLOY_GRAFANA=${KUBEVIRT_DEPLOY_GRAFANA:-true} KUBEVIRTCI_REPO='https://github.com/kubevirt/kubevirtci.git' KUBEVIRTCI_PATH="${PWD}/_kubevirtci" diff --git a/cmd/handler/main.go b/cmd/handler/main.go index 6c3fa102e..97bffd9a2 100644 --- a/cmd/handler/main.go +++ b/cmd/handler/main.go @@ -18,6 +18,7 @@ package main import ( "context" + "encoding/json" "flag" "fmt" "net/http" @@ -36,6 +37,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/metrics" // +kubebuilder:scaffold:imports @@ -52,8 +54,10 @@ import ( nmstatev1alpha1 "github.com/nmstate/kubernetes-nmstate/api/v1alpha1" nmstatev1beta1 "github.com/nmstate/kubernetes-nmstate/api/v1beta1" controllers "github.com/nmstate/kubernetes-nmstate/controllers/handler" + controllersmetrics "github.com/nmstate/kubernetes-nmstate/controllers/metrics" "github.com/nmstate/kubernetes-nmstate/pkg/environment" "github.com/nmstate/kubernetes-nmstate/pkg/file" + "github.com/nmstate/kubernetes-nmstate/pkg/monitoring" "github.com/nmstate/kubernetes-nmstate/pkg/nmstatectl" "github.com/nmstate/kubernetes-nmstate/pkg/webhook" ) @@ -77,6 +81,8 @@ func init() { utilruntime.Must(nmstatev1beta1.AddToScheme(scheme)) utilruntime.Must(nmstatev1alpha1.AddToScheme(scheme)) // +kubebuilder:scaffold:scheme + + metrics.Registry.MustRegister(monitoring.AppliedFeatures) } func main() { @@ -92,11 +98,17 @@ func mainHandler() int { opt := zap.Options{} opt.BindFlags(flag.CommandLine) var logType string + var dumpMetricFamilies bool pflag.StringVar(&logType, "v", "production", "Log type (debug/production).") + pflag.BoolVar(&dumpMetricFamilies, "dump-metric-families", false, "Dump the prometheus metric families and exit.") pflag.CommandLine.MarkDeprecated("v", "please use the --zap-devel flag for debug logging instead") pflag.CommandLine.AddGoFlagSet(flag.CommandLine) pflag.Parse() + if dumpMetricFamilies { + return dumpMetricFamiliesToStdout() + } + if logType == "debug" { // workaround until --v flag got removed flag.CommandLine.Set("zap-devel", "true") @@ -115,10 +127,9 @@ func mainHandler() int { defer handlerLock.Unlock() setupLog.Info("Successfully took nmstate exclusive lock") } - ctrlOptions := ctrl.Options{ Scheme: scheme, - MetricsBindAddress: "0", // disable metrics + MetricsBindAddress: metrics.DefaultBindAddress, // Explicitly enable metrics } if environment.IsHandler() { @@ -145,6 +156,10 @@ func mainHandler() int { setupLog.Error(err, "Cannot initialize webhook") return generalExitStatus } + } else if environment.IsMetricsManager() { + if err = setupMetricsManager(mgr); err != nil { + return generalExitStatus + } } else if environment.IsHandler() { if err = setupHandlerControllers(mgr); err != nil { return generalExitStatus @@ -158,7 +173,6 @@ func mainHandler() int { } setProfiler() - setupLog.Info("starting manager") if err = mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "problem running manager") @@ -307,6 +321,19 @@ func setupCertManager(mgr manager.Manager, certManagerOpts certificate.Options) return nil } +func setupMetricsManager(mgr manager.Manager) error { + setupLog.Info("Creating Metrics NodeNetworkConfigurationEnactment controller") + if err := (&controllersmetrics.NodeNetworkConfigurationEnactmentReconciler{ + Client: mgr.GetClient(), + Log: ctrl.Log.WithName("metrics").WithName("NodeNetworkConfigurationEnactment"), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create NodeNetworkConfigurationEnactment metrics controller", "metrics", "NMState") + return err + } + return nil +} + // Start profiler on given port if ENABLE_PROFILER is True func setProfiler() { cfg := ProfilerConfig{} @@ -344,3 +371,13 @@ func lockHandler() (*flock.Flock, error) { }) return handlerLock, err } + +func dumpMetricFamiliesToStdout() int { + metricFamiliesJSON, err := json.Marshal(monitoring.Families()) + if err != nil { + setupLog.Error(err, "Failed dumping metric families") + return generalExitStatus + } + fmt.Printf("%s", string(metricFamiliesJSON)) + return 0 +} diff --git a/controllers/handler/nodenetworkconfigurationpolicy_controller.go b/controllers/handler/nodenetworkconfigurationpolicy_controller.go index b756c1ff8..5cac9e46e 100644 --- a/controllers/handler/nodenetworkconfigurationpolicy_controller.go +++ b/controllers/handler/nodenetworkconfigurationpolicy_controller.go @@ -335,6 +335,7 @@ func (r *NodeNetworkConfigurationPolicyReconciler) fillInEnactmentStatus( policy *nmstatev1.NodeNetworkConfigurationPolicy, enactmentInstance *nmstatev1beta1.NodeNetworkConfigurationEnactment, enactmentConditions enactmentconditions.EnactmentConditions) error { + log := r.Log.WithValues("nodenetworkconfigurationpolicy.fillInEnactmentStatus", enactmentInstance.Name) currentState, err := nmstatectlShowFn() if err != nil { return err @@ -366,6 +367,16 @@ func (r *NodeNetworkConfigurationPolicyReconciler) fillInEnactmentStatus( return err } + features := []string{} + stats, err := nmstatectl.Statistic(desiredStateWithDefaults) + if err != nil { + log.Error(err, "failed calculating nmstate statistics") + } else { + for feature := range stats.Features { + features = append(features, feature) + } + } + return enactmentstatus.Update( r.APIClient, nmstateapi.EnactmentKey(nodeName, policy.Name), @@ -373,6 +384,7 @@ func (r *NodeNetworkConfigurationPolicyReconciler) fillInEnactmentStatus( status.DesiredState = desiredStateWithDefaults status.CapturedStates = capturedStates status.PolicyGeneration = policy.Generation + status.Features = features }, ) } diff --git a/controllers/metrics/nodenetworkconfigurationenactment_controller.go b/controllers/metrics/nodenetworkconfigurationenactment_controller.go new file mode 100644 index 000000000..aab3ad391 --- /dev/null +++ b/controllers/metrics/nodenetworkconfigurationenactment_controller.go @@ -0,0 +1,153 @@ +/* +Copyright The Kubernetes NMState Authors. + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "context" + "fmt" + "reflect" + + "github.com/go-logr/logr" + "github.com/pkg/errors" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + nmstatev1beta1 "github.com/nmstate/kubernetes-nmstate/api/v1beta1" + "github.com/nmstate/kubernetes-nmstate/pkg/monitoring" + "github.com/nmstate/kubernetes-nmstate/pkg/nmstatectl" +) + +// NodeNetworkConfigurationEnactment reconciles a NodeNetworkConfigurationEnactment object +type NodeNetworkConfigurationEnactmentReconciler struct { + client.Client + Log logr.Logger + Scheme *runtime.Scheme + oldNNCEs map[string]*nmstatev1beta1.NodeNetworkConfigurationEnactment +} + +// Reconcile reads that state of the cluster for a NodeNetworkConfigurationEnactment object and calculate +// metrics with `nmstatectl stat` +// Note: +// The Controller will requeue the Request to be processed again if the returned error is non-nil or +// Result.Requeue is true, otherwise upon completion it will remove the work from the queue. +func (r *NodeNetworkConfigurationEnactmentReconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) { + log := r.Log.WithValues("metrics.nodenetworkconfigurationenactment", request.NamespacedName) + log.Info("Reconcile") + + enactmentInstance := &nmstatev1beta1.NodeNetworkConfigurationEnactment{} + err := r.Client.Get(context.TODO(), request.NamespacedName, enactmentInstance) + if err != nil { + if apierrors.IsNotFound(err) { + // NNCE has being delete let's clean the old NNCEs map + delete(r.oldNNCEs, request.Name) + + // Request object not found, could have been deleted after reconcile request. + // Owned objects are automatically garbage collected. For additional cleanup logic use finalizers. + // Return and don't requeue + return ctrl.Result{}, nil + } + log.Error(err, "Error retrieving enactment") + // Error reading the object - requeue the request. + return ctrl.Result{}, err + } + + if err := r.reportStatistics(ctx); err != nil { + return ctrl.Result{}, fmt.Errorf("failed reporting statistics: %w", err) + } + + // After reporting metrics store this NNCE as old to calculate gaugue + r.oldNNCEs[enactmentInstance.Name] = enactmentInstance + + return ctrl.Result{}, nil +} + +func (r *NodeNetworkConfigurationEnactmentReconciler) SetupWithManager(mgr ctrl.Manager) error { + r.oldNNCEs = map[string]*nmstatev1beta1.NodeNetworkConfigurationEnactment{} + // By default all this functors return true so controller watch all events, + // but we only want to watch create for current node. + onCreationOrUpdateForThisEnactment := predicate.Funcs{ + CreateFunc: func(createEvent event.CreateEvent) bool { + return true + }, + DeleteFunc: func(e event.DeleteEvent) bool { + return true + }, + UpdateFunc: func(e event.UpdateEvent) bool { + oldNNCE, ok := e.ObjectOld.(*nmstatev1beta1.NodeNetworkConfigurationEnactment) + if !ok { + return false + } + newNNCE, ok := e.ObjectNew.(*nmstatev1beta1.NodeNetworkConfigurationEnactment) + if !ok { + return false + } + + return !reflect.DeepEqual(oldNNCE.Status.Features, newNNCE.Status.Features) + }, + GenericFunc: func(event.GenericEvent) bool { + return false + }, + } + + err := ctrl.NewControllerManagedBy(mgr). + For(&nmstatev1beta1.NodeNetworkConfigurationEnactment{}). + WithEventFilter(onCreationOrUpdateForThisEnactment). + Complete(r) + if err != nil { + return errors.Wrap(err, "failed to add controller to NNCE metrics Reconciler") + } + + return nil +} + +func (r *NodeNetworkConfigurationEnactmentReconciler) reportStatistics(ctx context.Context) error { + nnceList := nmstatev1beta1.NodeNetworkConfigurationEnactmentList{} + if err := r.List(ctx, &nnceList); err != nil { + return err + } + + // Calculate old and new cluster wide features + oldFeatures := []string{} + newFeatures := []string{} + for i := range nnceList.Items { + newFeatures = append(newFeatures, nnceList.Items[i].Status.Features...) + oldNNCE, ok := r.oldNNCEs[nnceList.Items[i].Name] + if ok { + oldFeatures = append(oldFeatures, oldNNCE.Status.Features...) + } + } + + oldStats := nmstatectl.NewStats(oldFeatures) + newStats := nmstatectl.NewStats(newFeatures) + + statsToInc := newStats.Subtract(oldStats) + for f := range statsToInc.Features { + monitoring.AppliedFeatures.WithLabelValues(f).Inc() + } + + statsToDel := oldStats.Subtract(newStats) + for f := range statsToDel.Features { + monitoring.AppliedFeatures.WithLabelValues(f).Dec() + } + return nil +} diff --git a/controllers/operator/nmstate_controller.go b/controllers/operator/nmstate_controller.go index 2e3d2df74..1c159b03e 100644 --- a/controllers/operator/nmstate_controller.go +++ b/controllers/operator/nmstate_controller.go @@ -70,6 +70,7 @@ type NMStateReconciler struct { // +kubebuilder:rbac:groups="",resources=nodes,verbs=list;get // +kubebuilder:rbac:groups="console.openshift.io",resources=consoleplugins,verbs="*" // +kubebuilder:rbac:groups="operator.openshift.io",resources=consoles,verbs=list;get;watch;update +// +kubebuilder:rbac:groups="monitoring.coreos.com",resources=servicemonitors,verbs=list;get;watch;update;create func (r *NMStateReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = context.Background() diff --git a/controllers/operator/nmstate_controller_test.go b/controllers/operator/nmstate_controller_test.go index 54f7b1a14..8e4a4050f 100644 --- a/controllers/operator/nmstate_controller_test.go +++ b/controllers/operator/nmstate_controller_test.go @@ -281,6 +281,15 @@ var _ = Describe("NMState controller reconcile", func() { Expect(deployment.Spec.Template.Spec.NodeSelector).To(HaveKeyWithValue(k, v)) } }) + It("should add InfraNodeSelector to metrics deployment", func() { + deployment := &appsv1.Deployment{} + metricsKey := types.NamespacedName{Namespace: handlerNamespace, Name: handlerPrefix + "-nmstate-metrics"} + err := cl.Get(context.TODO(), metricsKey, deployment) + Expect(err).ToNot(HaveOccurred()) + for k, v := range infraNodeSelector { + Expect(deployment.Spec.Template.Spec.NodeSelector).To(HaveKeyWithValue(k, v)) + } + }) It("should NOT add InfraNodeSelector to handler daemonset", func() { ds := &appsv1.DaemonSet{} err := cl.Get(context.TODO(), handlerKey, ds) @@ -324,6 +333,14 @@ var _ = Describe("NMState controller reconcile", func() { Expect(err).ToNot(HaveOccurred()) Expect(allTolerationsPresent(infraTolerations, deployment.Spec.Template.Spec.Tolerations)).To(BeTrue()) }) + It("should add InfraTolerations to metrics deployment", func() { + deployment := &appsv1.Deployment{} + metricsKey := types.NamespacedName{Namespace: handlerNamespace, Name: handlerPrefix + "-nmstate-metrics"} + err := cl.Get(context.TODO(), metricsKey, deployment) + Expect(err).ToNot(HaveOccurred()) + Expect(allTolerationsPresent(infraTolerations, deployment.Spec.Template.Spec.Tolerations)).To(BeTrue()) + }) + It("should NOT add InfraTolerations to handler daemonset", func() { ds := &appsv1.DaemonSet{} err := cl.Get(context.TODO(), handlerKey, ds) diff --git a/deploy/crds/nmstate.io_nodenetworkconfigurationenactments.yaml b/deploy/crds/nmstate.io_nodenetworkconfigurationenactments.yaml index 58e9dc7c4..12692737b 100644 --- a/deploy/crds/nmstate.io_nodenetworkconfigurationenactments.yaml +++ b/deploy/crds/nmstate.io_nodenetworkconfigurationenactments.yaml @@ -102,6 +102,10 @@ spec: version: type: string type: object + features: + items: + type: string + type: array policyGeneration: description: The generation from policy needed to check if an enactment condition status belongs to the same policy version @@ -201,6 +205,10 @@ spec: version: type: string type: object + features: + items: + type: string + type: array policyGeneration: description: The generation from policy needed to check if an enactment condition status belongs to the same policy version diff --git a/deploy/handler/cluster_role.yaml b/deploy/handler/cluster_role.yaml index ea305450c..bc75e0037 100644 --- a/deploy/handler/cluster_role.yaml +++ b/deploy/handler/cluster_role.yaml @@ -6,6 +6,10 @@ metadata: labels: rbac.authorization.k8s.io/aggregate-to-cluster-reader: "true" rules: +- nonResourceURLs: + - "/metrics" + verbs: + - get - apiGroups: - nmstate.io resources: diff --git a/deploy/handler/namespace.yaml b/deploy/handler/namespace.yaml index b0f0b8e74..1fefc1439 100644 --- a/deploy/handler/namespace.yaml +++ b/deploy/handler/namespace.yaml @@ -7,3 +7,4 @@ metadata: pod-security.kubernetes.io/enforce: privileged pod-security.kubernetes.io/audit: privileged pod-security.kubernetes.io/warn: privileged + openshift.io/cluster-monitoring: "true" diff --git a/deploy/handler/operator.yaml b/deploy/handler/operator.yaml index e14687ccb..f478691ee 100644 --- a/deploy/handler/operator.yaml +++ b/deploy/handler/operator.yaml @@ -2,6 +2,98 @@ --- apiVersion: apps/v1 kind: Deployment +metadata: + name: {{template "handlerPrefix" .}}nmstate-metrics + namespace: {{ .HandlerNamespace }} + labels: + prometheus.nmstate.io: "true" + app: kubernetes-nmstate + component: kubernetes-nmstate-metrics +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + name: {{template "handlerPrefix" .}}nmstate-metrics + template: + metadata: + labels: + prometheus.nmstate.io: "true" + app: kubernetes-nmstate + component: kubernetes-nmstate-metrics + name: {{template "handlerPrefix" .}}nmstate-metrics + annotations: + description: kubernetes-nmstate-metrics dump nmstate metrics + spec: + serviceAccountName: {{template "handlerPrefix" .}}nmstate-handler + nodeSelector: {{ toYaml .InfraNodeSelector | nindent 8 }} + tolerations: {{ toYaml .InfraTolerations | nindent 8 }} + affinity: {{ toYaml .WebhookAffinity | nindent 8 }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + component: kubernetes-nmstate-metrics + priorityClassName: system-cluster-critical + containers: + - name: nmstate-metrics + args: + - --zap-time-encoding=iso8601 + # Replace this with the built image name + image: {{ .HandlerImage }} + imagePullPolicy: {{ .HandlerPullPolicy }} + command: + - manager + resources: + requests: + cpu: "30m" + memory: "20Mi" + env: + - name: WATCH_NAMESPACE + value: "" + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: RUN_METRICS_MANAGER + value: "" + - name: OPERATOR_NAME + value: "{{template "handlerPrefix" .}}nmstate" + - name: ENABLE_PROFILER + value: "False" + - name: PROFILER_PORT + value: "6060" + - args: + - --logtostderr + - --secure-listen-address=:8443 + - --upstream=http://127.0.0.1:8080 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + image: quay.io/openshift/origin-kube-rbac-proxy:4.10.0 + imagePullPolicy: IfNotPresent + name: kube-rbac-proxy + ports: + - containerPort: 8443 + name: metrics + protocol: TCP + resources: + requests: + cpu: 10m + memory: 20Mi + terminationMessagePolicy: FallbackToLogsOnError +--- +apiVersion: apps/v1 +kind: Deployment metadata: name: {{template "handlerPrefix" .}}nmstate-webhook namespace: {{ .HandlerNamespace }} @@ -306,6 +398,24 @@ spec: selector: name: {{template "handlerPrefix" .}}nmstate-webhook --- +apiVersion: v1 +kind: Service +metadata: + name: {{template "handlerPrefix" .}}nmstate-monitor + namespace: {{ .HandlerNamespace }} + labels: + prometheus.nmstate.io: "true" +spec: + ports: + - name: metrics + port: 8443 + protocol: TCP + targetPort: metrics + selector: + prometheus.nmstate.io: "true" + sessionAffinity: None + type: ClusterIP +--- apiVersion: admissionregistration.k8s.io/v1 kind: MutatingWebhookConfiguration metadata: @@ -389,3 +499,27 @@ spec: selector: matchLabels: name: {{template "handlerPrefix" .}}nmstate-webhook +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + openshift.io/cluster-monitoring: "" + prometheus.nmstate.io: "true" + name: controller-manager-metrics-monitor + namespace: {{ .HandlerNamespace }} +spec: + endpoints: + - scheme: https + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + tlsConfig: + insecureSkipVerify: true + relabelings: + - action: labeldrop + regex: (instance|pod|container|job|namespace|service) + namespaceSelector: + matchNames: + - {{ .HandlerNamespace }} + selector: + matchLabels: + prometheus.nmstate.io: "true" diff --git a/deploy/handler/role.yaml b/deploy/handler/role.yaml index 12ef6606f..47d5ee9c7 100644 --- a/deploy/handler/role.yaml +++ b/deploy/handler/role.yaml @@ -50,13 +50,6 @@ rules: - statefulsets verbs: - '*' -- apiGroups: - - monitoring.coreos.com - resources: - - servicemonitors - verbs: - - get - - create - apiGroups: - apps resourceNames: @@ -75,6 +68,23 @@ rules: - update --- apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{template "handlerPrefix" .}}nmstate-monitor + namespace: {{ .HandlerNamespace }} +rules: +- apiGroups: + - "" + resources: + - services + - endpoints + - pods + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: creationTimestamp: null @@ -118,6 +128,18 @@ rules: - configmaps verbs: - "*" +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create {{- if .IsOpenShift }} - apiGroups: - security.openshift.io diff --git a/deploy/handler/role_binding.yaml b/deploy/handler/role_binding.yaml index 444c726d2..3375ecbf9 100644 --- a/deploy/handler/role_binding.yaml +++ b/deploy/handler/role_binding.yaml @@ -42,3 +42,17 @@ roleRef: apiGroup: rbac.authorization.k8s.io name: {{template "handlerPrefix" .}}nmstate-handler-events namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{template "handlerPrefix" .}}nmstate-monitor + namespace: {{ .HandlerNamespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{template "handlerPrefix" .}}nmstate-monitor +subjects: +- kind: ServiceAccount + name: prometheus-k8s + namespace: {{template "handlerPrefix" .}}monitoring diff --git a/deploy/operator/role.yaml b/deploy/operator/role.yaml index 0730ff95d..4d26d63c6 100644 --- a/deploy/operator/role.yaml +++ b/deploy/operator/role.yaml @@ -59,6 +59,16 @@ rules: - consoleplugins verbs: - '*' +- apiGroups: + - monitoring.coreos.com + resources: + - servicemonitors + verbs: + - create + - get + - list + - update + - watch - apiGroups: - nmstate.io resources: diff --git a/go.mod b/go.mod index 305739781..e14fb5f5b 100644 --- a/go.mod +++ b/go.mod @@ -11,8 +11,8 @@ require ( github.com/google/gofuzz v1.2.0 // indirect github.com/onsi/ginkgo/v2 v2.11.0 github.com/onsi/gomega v1.27.10 - github.com/prometheus/client_golang v1.16.0 // indirect - github.com/prometheus/client_model v0.4.0 // indirect + github.com/prometheus/client_golang v1.16.0 + github.com/prometheus/client_model v0.4.0 go.uber.org/zap v1.25.0 // indirect golang.org/x/sys v0.15.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect @@ -22,7 +22,7 @@ require ( k8s.io/client-go v12.0.0+incompatible k8s.io/component-base v0.26.3 // indirect k8s.io/klog/v2 v2.100.1 // indirect - k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 // indirect + k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 sigs.k8s.io/yaml v1.3.0 ) diff --git a/hack/prom_metric_linter.sh b/hack/prom_metric_linter.sh new file mode 100755 index 000000000..9d7549009 --- /dev/null +++ b/hack/prom_metric_linter.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# +# This file is part of the KubeVirt project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Copyright 2023 Red Hat, Inc. +# +# +set -e + +LINTER_IMAGE_TAG=${LINTER_IMAGE_TAG:-"v0.0.3"} + +# Get the metrics list +metric_families=$(NODE_NAME=node01 go run cmd/handler/main.go --dump-metric-families) + +# Run the linter by using the prom-metrics-linter Docker container +errors=$(podman run -i "quay.io/kubevirt/prom-metrics-linter:$LINTER_IMAGE_TAG" \ + --metric-families="$metric_families" \ + --operator-name="kubernetes" \ + --sub-operator-name="nmstate") + +# Check if there were any errors, if yes print and fail +if [[ $errors != "" ]]; then + echo "$errors" + exit 1 +fi diff --git a/pkg/environment/environment.go b/pkg/environment/environment.go index 95e8a38a8..f4b206dc1 100644 --- a/pkg/environment/environment.go +++ b/pkg/environment/environment.go @@ -42,9 +42,15 @@ func IsCertManager() bool { return runCertManager } +// IsCertManager return true when RUN_CERT_MANAGER env var is present +func IsMetricsManager() bool { + _, runMetricsManager := os.LookupEnv("RUN_METRICS_MANAGER") + return runMetricsManager +} + // IsHandler returns true if it's not the operator or webhook server func IsHandler() bool { - return !IsWebhook() && !IsOperator() && !IsCertManager() + return !IsWebhook() && !IsOperator() && !IsCertManager() && !IsMetricsManager() } // Returns node name runnig the pod diff --git a/pkg/monitoring/metrics.go b/pkg/monitoring/metrics.go new file mode 100644 index 000000000..681e0ede4 --- /dev/null +++ b/pkg/monitoring/metrics.go @@ -0,0 +1,52 @@ +/* +Copyright The Kubernetes NMState Authors. + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package monitoring + +import ( + "github.com/prometheus/client_golang/prometheus" + pgo "github.com/prometheus/client_model/go" + "k8s.io/utils/pointer" +) + +var ( + AppliedFeaturesOpts = prometheus.GaugeOpts{ + Name: "kubernetes_nmstate_features_applied", + Help: "Number of nmstate features applied labeled by its name", + } + + AppliedFeatures = prometheus.NewGaugeVec( + AppliedFeaturesOpts, + []string{"name"}, + ) + gaugeOpts = []prometheus.GaugeOpts{ + AppliedFeaturesOpts, + } +) + +func Families() []pgo.MetricFamily { + metricFamilies := []pgo.MetricFamily{} + for _, gauge := range gaugeOpts { + metricTypeGauge := pgo.MetricType_GAUGE + metricFamilies = append(metricFamilies, pgo.MetricFamily{ + Name: pointer.String(gauge.Name), + Help: pointer.String(gauge.Help), + Type: &metricTypeGauge, + }) + } + return metricFamilies +} diff --git a/pkg/nmstatectl/nmstatectl.go b/pkg/nmstatectl/nmstatectl.go index 4387d12e7..316cf8ce0 100644 --- a/pkg/nmstatectl/nmstatectl.go +++ b/pkg/nmstatectl/nmstatectl.go @@ -27,6 +27,7 @@ import ( "time" "github.com/pkg/errors" + "sigs.k8s.io/yaml" nmstate "github.com/nmstate/kubernetes-nmstate/api/shared" ) @@ -94,3 +95,49 @@ func Rollback() error { } return nil } + +type Stats struct { + Features map[string]bool +} + +func NewStats(features []string) *Stats { + stats := Stats{ + Features: map[string]bool{}, + } + for _, f := range features { + stats.Features[f] = true + } + return &stats +} + +func (s *Stats) Subtract(statsToSubstract *Stats) Stats { + // Clone the features + result := Stats{Features: map[string]bool{}} + for k, v := range s.Features { + result.Features[k] = v + } + + // Subtract the selected ones + for f := range statsToSubstract.Features { + delete(result.Features, f) + } + return result +} + +func Statistic(desiredState nmstate.State) (*Stats, error) { + statsOutput, err := nmstatectlWithInput( + []string{"st", "-"}, + string(desiredState.Raw), + ) + if err != nil { + return nil, errors.Wrapf(err, "failed calling nmstatectl statistics") + } + stats := struct { + Features []string `json:"features"` + }{} + err = yaml.Unmarshal([]byte(statsOutput), &stats) + if err != nil { + return nil, errors.Wrapf(err, "failed unmarshaling nmstatectl statistics") + } + return NewStats(stats.Features), nil +} diff --git a/test/e2e/handler/metrics.go b/test/e2e/handler/metrics.go new file mode 100644 index 000000000..d855cacfd --- /dev/null +++ b/test/e2e/handler/metrics.go @@ -0,0 +1,59 @@ +/* +Copyright The Kubernetes NMState Authors. + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package handler + +import ( + "strings" + + "sigs.k8s.io/controller-runtime/pkg/metrics" + + "github.com/nmstate/kubernetes-nmstate/test/cmd" + "github.com/nmstate/kubernetes-nmstate/test/runner" +) + +func getMetrics(token string) map[string]string { + bearer := "Authorization: Bearer " + token + return indexMetrics(runner.RunAtMetricsPod("curl", "-s", "-k", "--header", + bearer, metrics.DefaultBindAddress, "https://127.0.0.1:8443/metrics")) +} + +func getPrometheusToken() (string, error) { + const ( + monitoringNamespace = "monitoring" + prometheusPod = "prometheus-k8s-0" + container = "prometheus" + tokenPath = "/var/run/secrets/kubernetes.io/serviceaccount/token" // #nosec G101 + ) + + return cmd.Kubectl("exec", "-n", monitoringNamespace, prometheusPod, "-c", container, "--", "cat", tokenPath) +} + +func indexMetrics(metrics string) map[string]string { + metricsMap := map[string]string{} + for _, metric := range strings.Split(metrics, "\n") { + if strings.Contains(metric, "#") { // Ignore comments + continue + } + metricSplit := strings.Split(metric, " ") + if len(metricSplit) != 2 { + continue + } + metricsMap[metricSplit[0]] = metricSplit[1] + } + return metricsMap +} diff --git a/test/e2e/handler/metrics_test.go b/test/e2e/handler/metrics_test.go new file mode 100644 index 000000000..b8a2a33b6 --- /dev/null +++ b/test/e2e/handler/metrics_test.go @@ -0,0 +1,97 @@ +/* +Copyright The Kubernetes NMState Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package handler + +import ( + "fmt" + "time" + + nmstate "github.com/nmstate/kubernetes-nmstate/api/shared" + "github.com/nmstate/kubernetes-nmstate/pkg/monitoring" + "github.com/nmstate/kubernetes-nmstate/test/e2e/policy" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Metrics", func() { + var ( + extraBridgeName = func() string { return bridge1 + "-extra" } + linuxBridgeWithCustomHostname = func(bridge string) nmstate.State { + return nmstate.NewState(fmt.Sprintf(` +interfaces: + - name: %s + type: linux-bridge + state: up + ipv4: + enabled: true + dhcp: true + dhcp-custom-hostname: foo + bridge: + options: + stp: + enabled: false + port: [] +`, bridge)) + } + ) + Context("when desiredState is configured", func() { + Context("with a state that increase gauge", func() { + BeforeEach(func() { + By("Apply first NNCP") + updateDesiredStateAndWait(linuxBridgeWithCustomHostname(bridge1)) + + By("Apply second NNCP with same features") + setDesiredStateWithPolicyAndCapture(extraBridgeName(), linuxBridgeWithCustomHostname(extraBridgeName()), map[string]string{}) + policy.WaitForAvailablePolicy(extraBridgeName()) + }) + AfterEach(func() { + updateDesiredStateAndWait(linuxBrAbsent(bridge1)) + setDesiredStateWithPolicyAndCapture(extraBridgeName(), linuxBrAbsent(extraBridgeName()), map[string]string{}) + policy.WaitForAvailablePolicy(extraBridgeName()) + resetDesiredStateForNodes() + }) + It("should report a metrics with proper gauge increased", func() { + + token, err := getPrometheusToken() + Expect(err).ToNot(HaveOccurred()) + Eventually(func() map[string]string { + return getMetrics(token) + }). + WithPolling(time.Second). + WithTimeout(2 * time.Second). + Should(HaveKeyWithValue(monitoring.AppliedFeaturesOpts.Name+`{name="dhcpv4-custom-hostname"}`, "1")) + }) + Context("and update with an state that decrease the gaugue", func() { + BeforeEach(func() { + updateDesiredStateAndWait(linuxBrAbsent(bridge1)) + setDesiredStateWithPolicyAndCapture(extraBridgeName(), linuxBrAbsent(extraBridgeName()), map[string]string{}) + policy.WaitForAvailablePolicy(extraBridgeName()) + }) + It("should report a metrics with proper gauge decrease", func() { + token, err := getPrometheusToken() + Expect(err).ToNot(HaveOccurred()) + Eventually(func() map[string]string { + return getMetrics(token) + }). + WithPolling(time.Second). + WithTimeout(2 * time.Second). + Should(HaveKeyWithValue(monitoring.AppliedFeaturesOpts.Name+`{name="dhcpv4-custom-hostname"}`, "0")) + }) + }) + }) + }) +}) diff --git a/test/e2e/operator/main_test.go b/test/e2e/operator/main_test.go index 597e113dc..88236acf1 100644 --- a/test/e2e/operator/main_test.go +++ b/test/e2e/operator/main_test.go @@ -42,7 +42,7 @@ var ( nodes []string knmstateReporter *knmstatereporter.KubernetesNMStateReporter manifestFiles = []string{"namespace.yaml", "service_account.yaml", "operator.yaml", "role.yaml", "role_binding.yaml"} - defaultOperator = NewOperatorTestData(os.Getenv("HANDLER_NAMESPACE"), manifestsDir, manifestFiles) + defaultOperator TestData ) func TestE2E(t *testing.T) { @@ -53,10 +53,11 @@ func TestE2E(t *testing.T) { } var _ = BeforeSuite(func() { - // Change to root directory some test expect that os.Chdir("../../../") + defaultOperator = NewOperatorTestData(os.Getenv("HANDLER_NAMESPACE"), manifestsDir, manifestFiles) + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) testenv.Start() diff --git a/test/e2e/operator/nmstate_install_test.go b/test/e2e/operator/nmstate_install_test.go index 8759eae54..dcd47f3c5 100644 --- a/test/e2e/operator/nmstate_install_test.go +++ b/test/e2e/operator/nmstate_install_test.go @@ -108,9 +108,10 @@ var _ = Describe("NMState operator", func() { }) Context("and another handler is installed with different namespace", func() { var ( - altOperator = NewOperatorTestData(os.Getenv("HANDLER_NAMESPACE")+"-alt", manifestsDir, manifestFiles) + altOperator TestData ) BeforeEach(func() { + altOperator = NewOperatorTestData(os.Getenv("HANDLER_NAMESPACE")+"-alt", manifestsDir, manifestFiles) By("Wait for operand to be ready") EventuallyOperandIsReady(defaultOperator) @@ -197,6 +198,7 @@ func drainNode(nodeName string) func() { Ctx: context.TODO(), Client: testenv.KubeClient, IgnoreAllDaemonSets: true, + DeleteEmptyDirData: true, Out: GinkgoWriter, ErrOut: GinkgoWriter, } diff --git a/test/e2e/operator/operator.go b/test/e2e/operator/operator.go index 2bdf14459..cb86a9f7e 100644 --- a/test/e2e/operator/operator.go +++ b/test/e2e/operator/operator.go @@ -20,6 +20,9 @@ package operator import ( "context" "fmt" + "os" + "path" + "strings" "time" . "github.com/onsi/ginkgo/v2" @@ -44,12 +47,13 @@ type TestData struct { Ns string Nmstate nmstatev1.NMState WebhookKey, HandlerKey, CertManagerKey types.NamespacedName + MetricsKey *types.NamespacedName ManifestsDir string ManifestFiles []string } func NewOperatorTestData(ns string, manifestsDir string, manifestFiles []string) TestData { - return TestData{ + td := TestData{ Ns: ns, Nmstate: nmstatev1.NMState{ ObjectMeta: metav1.ObjectMeta{ @@ -63,6 +67,17 @@ func NewOperatorTestData(ns string, manifestsDir string, manifestFiles []string) ManifestsDir: manifestsDir, ManifestFiles: manifestFiles, } + // If there is a "servicemonitors" RBAC then nmstate-metrics deployment + // should be there + for _, manifestFile := range manifestFiles { + manifest, err := os.ReadFile(path.Join(manifestsDir, manifestFile)) + Expect(err).ToNot(HaveOccurred(), "should successfully open manifests to check if nmstate-metrics is needed") + if strings.Contains(string(manifest), "servicemonitors") { + td.MetricsKey = &types.NamespacedName{Namespace: ns, Name: "nmstate-metrics"} + break + } + } + return td } func InstallNMState(nmstate nmstatev1.NMState) { @@ -106,12 +121,19 @@ func EventuallyOperandIsReady(testData TestData) { deployment.GetEventually(testData.WebhookKey).Should(deployment.BeReady(), "should start webhook deployment") By("Wait deployment cert-manager is ready") deployment.GetEventually(testData.CertManagerKey).Should(deployment.BeReady(), "should start cert-manager deployment") + if testData.MetricsKey != nil { + By("Wait deployment metrics is ready") + deployment.GetEventually(*testData.MetricsKey).Should(deployment.BeReady(), "should start metrics deployment") + } } func EventuallyOperandIsNotFound(testData TestData) { EventuallyIsNotFound(testData.HandlerKey, &appsv1.DaemonSet{}, "should delete handler daemonset") EventuallyIsNotFound(testData.WebhookKey, &appsv1.Deployment{}, "should delete webhook deployment") EventuallyIsNotFound(testData.CertManagerKey, &appsv1.Deployment{}, "should delete cert-manager deployment") + if testData.MetricsKey != nil { + EventuallyIsNotFound(*testData.MetricsKey, &appsv1.Deployment{}, "should delete metrics deployment") + } By("Wait for operand pods to terminate") Eventually(func() ([]corev1.Pod, error) { podList := corev1.PodList{} @@ -128,6 +150,9 @@ func EventuallyOperandIsFound(testData TestData) { EventuallyIsFound(testData.HandlerKey, &appsv1.DaemonSet{}, "should create handler daemonset") EventuallyIsFound(testData.WebhookKey, &appsv1.Deployment{}, "should create webhook deployment") EventuallyIsFound(testData.CertManagerKey, &appsv1.Deployment{}, "should create cert-manager deployment") + if testData.MetricsKey != nil { + EventuallyIsFound(*testData.MetricsKey, &appsv1.Deployment{}, "should create metrics deployment") + } } func InstallOperator(operator TestData) { diff --git a/test/e2e/upgrade/main_test.go b/test/e2e/upgrade/main_test.go index 82f67b766..b7f5cf1fb 100644 --- a/test/e2e/upgrade/main_test.go +++ b/test/e2e/upgrade/main_test.go @@ -53,9 +53,14 @@ var ( ) var ( - manifestFiles = []string{"namespace.yaml", "service_account.yaml", "operator.yaml", "role.yaml", "role_binding.yaml"} - latestOperator = operator.NewOperatorTestData("nmstate", latestManifestsDir, manifestFiles) - previousReleaseOperator = operator.NewOperatorTestData("nmstate", previousReleaseManifestsDir, manifestFiles) + manifestFiles = []string{ + "namespace.yaml", + "service_account.yaml", + "operator.yaml", + "role.yaml", + "role_binding.yaml", + } + latestOperator, previousReleaseOperator operator.TestData ) func TestE2E(t *testing.T) { @@ -69,6 +74,9 @@ var _ = BeforeSuite(func() { // Change to root directory some test expect that os.Chdir("../../../") + latestOperator = operator.NewOperatorTestData("nmstate", latestManifestsDir, manifestFiles) + previousReleaseOperator = operator.NewOperatorTestData("nmstate", previousReleaseManifestsDir, manifestFiles) + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) testenv.Start() diff --git a/test/reporter/writers.go b/test/reporter/writers.go index 9bca2a13f..b95eb586c 100644 --- a/test/reporter/writers.go +++ b/test/reporter/writers.go @@ -88,8 +88,6 @@ func networkManagerLogsWriter(nodes []string, sinceTime time.Time) func(io.Write } func writePodsLogs(writer io.Writer, namespace string, sinceTime time.Time) { - podLogOpts := corev1.PodLogOptions{} - podLogOpts.SinceTime = &metav1.Time{Time: sinceTime} podList := &corev1.PodList{} err := testenv.Client.List(context.TODO(), podList, &dynclient.ListOptions{}) Expect(err).ToNot(HaveOccurred()) @@ -101,20 +99,28 @@ func writePodsLogs(writer io.Writer, namespace string, sinceTime time.Time) { if !hasAppLabel || appLabel != "kubernetes-nmstate" { continue } - req := podsClientset.GetLogs(pod.Name, &podLogOpts) - podLogs, err := req.Stream(context.TODO()) - if err != nil { - io.WriteString(GinkgoWriter, fmt.Sprintf("error in opening stream: %v\n", err)) - continue - } - defer podLogs.Close() - rawLogs, err := io.ReadAll(podLogs) - if err != nil { - io.WriteString(GinkgoWriter, fmt.Sprintf("error reading kubernetes-nmstate logs: %v\n", err)) - continue + for containerIndex := range pod.Spec.Containers { + containerName := pod.Spec.Containers[containerIndex].Name + podLogOpts := corev1.PodLogOptions{ + SinceTime: &metav1.Time{Time: sinceTime}, + Container: containerName, + } + podLogOpts.SinceTime = &metav1.Time{Time: sinceTime} + req := podsClientset.GetLogs(pod.Name, &podLogOpts) + podLogs, err := req.Stream(context.TODO()) + if err != nil { + io.WriteString(GinkgoWriter, fmt.Sprintf("error in opening stream: %v\n", err)) + continue + } + defer podLogs.Close() + rawLogs, err := io.ReadAll(podLogs) + if err != nil { + io.WriteString(GinkgoWriter, fmt.Sprintf("error reading kubernetes-nmstate logs: %v\n", err)) + continue + } + formattedLogs := strings.Replace(string(rawLogs), "\\n", "\n", -1) + io.WriteString(writer, formattedLogs) } - formattedLogs := strings.Replace(string(rawLogs), "\\n", "\n", -1) - io.WriteString(writer, formattedLogs) } } diff --git a/test/runner/pod.go b/test/runner/pod.go index 9c7b57344..e0fb97a98 100644 --- a/test/runner/pod.go +++ b/test/runner/pod.go @@ -18,6 +18,7 @@ limitations under the License. package runner import ( + "fmt" "strings" . "github.com/onsi/gomega" @@ -27,7 +28,7 @@ import ( testenv "github.com/nmstate/kubernetes-nmstate/test/env" ) -func nmstateHandlerPods() ([]string, error) { +func nmstatePods(component string) ([]string, error) { output, err := cmd.Kubectl( "get", "pod", @@ -37,13 +38,21 @@ func nmstateHandlerPods() ([]string, error) { "-o", "custom-columns=:metadata.name", "-l", - "component=kubernetes-nmstate-handler", + fmt.Sprintf("component=%s", component), ) ExpectWithOffset(2, err).ToNot(HaveOccurred()) names := strings.Split(strings.TrimSpace(output), "\n") return names, err } +func nmstateHandlerPods() ([]string, error) { + return nmstatePods("kubernetes-nmstate-handler") +} + +func nmstateMetricsPods() ([]string, error) { + return nmstatePods("kubernetes-nmstate-metrics") +} + func runAtPod(pod string, arguments ...string) string { exec := []string{"exec", "-n", testenv.OperatorNamespace, pod, "--"} exec = append(exec, arguments...) @@ -70,3 +79,10 @@ func RunAtHandlerPods(arguments ...string) { ExpectWithOffset(1, err).ToNot(HaveOccurred()) runAtPods(handlerPods, arguments...) } + +func RunAtMetricsPod(arguments ...string) string { + metricsPods, err := nmstateMetricsPods() + ExpectWithOffset(1, err).ToNot(HaveOccurred()) + ExpectWithOffset(1, metricsPods).ToNot(BeEmpty()) + return runAtPod(metricsPods[0], arguments...) +} diff --git a/vendor/github.com/nmstate/kubernetes-nmstate/api/shared/nodenetworkconfigurationenactment_types.go b/vendor/github.com/nmstate/kubernetes-nmstate/api/shared/nodenetworkconfigurationenactment_types.go index 5acd7ba07..23355fa42 100644 --- a/vendor/github.com/nmstate/kubernetes-nmstate/api/shared/nodenetworkconfigurationenactment_types.go +++ b/vendor/github.com/nmstate/kubernetes-nmstate/api/shared/nodenetworkconfigurationenactment_types.go @@ -41,6 +41,8 @@ type NodeNetworkConfigurationEnactmentStatus struct { PolicyGeneration int64 `json:"policyGeneration,omitempty"` Conditions ConditionList `json:"conditions,omitempty"` + + Features []string `json:"features,omitempty"` } type NodeNetworkConfigurationEnactmentCapturedState struct {