From dd8c8c4f9bd7d2071907b2c1029ee9c3ae88551c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=AF=93?= Date: Thu, 17 Aug 2023 15:25:02 +0800 Subject: [PATCH] feat(metric): add some metrics for some controllers --- go.mod | 2 -- go.sum | 6 ------ pkg/controllers/nsautoprop/controller.go | 16 ++++++++++++++++ pkg/controllers/policyrc/controller.go | 9 +++++++-- pkg/controllers/status/controller.go | 18 ++++++++++++++++++ 5 files changed, 41 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index 3c67a6f69..275047a86 100644 --- a/go.mod +++ b/go.mod @@ -12,10 +12,8 @@ require ( github.com/onsi/gomega v1.27.8 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.14.0 - github.com/sirupsen/logrus v1.9.3 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.4 - golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 golang.org/x/sync v0.2.0 golang.org/x/time v0.3.0 k8s.io/api v0.26.6 diff --git a/go.sum b/go.sum index c84d23c77..45f6a5024 100644 --- a/go.sum +++ b/go.sum @@ -270,8 +270,6 @@ github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB84g= github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= @@ -285,7 +283,6 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -316,8 +313,6 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2 h1:Jvc7gsqn21cJHCmAWx0LiimpP18LZmUxkT5Mp7EZ1mI= -golang.org/x/exp v0.0.0-20230224173230-c95f2b4c22f2/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -433,7 +428,6 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= diff --git a/pkg/controllers/nsautoprop/controller.go b/pkg/controllers/nsautoprop/controller.go index 68eb17372..34a7f82a0 100644 --- a/pkg/controllers/nsautoprop/controller.go +++ b/pkg/controllers/nsautoprop/controller.go @@ -255,6 +255,8 @@ func (c *Controller) reconcile(ctx context.Context, qualifiedName common.Qualifi return worker.StatusAllOK } + c.recordNamespacePropagationFailedMetric(fedNamespace) + needsUpdate := false // Set placement to propagate to all clusters @@ -396,3 +398,17 @@ func (c *Controller) HasSynced() bool { c.namespaceInformer.Informer().HasSynced() && c.informerManager.HasSynced() } + +func (c *Controller) recordNamespacePropagationFailedMetric(fedNamespace *fedcorev1a1.ClusterFederatedObject) { + errorClusterCount := 0 + + for _, clusterStatus := range fedNamespace.Status.Clusters { + if clusterStatus.Status != fedcorev1a1.ClusterPropagationOK && clusterStatus.Status != fedcorev1a1.WaitingForRemoval { + errorClusterCount++ + } + } + + if errorClusterCount != 0 { + c.metrics.Store("namespace_propagate_failed_total", errorClusterCount, stats.Tag{Name: "namespace", Value: fedNamespace.Name}) + } +} diff --git a/pkg/controllers/policyrc/controller.go b/pkg/controllers/policyrc/controller.go index 6c81252e9..55867ec6a 100644 --- a/pkg/controllers/policyrc/controller.go +++ b/pkg/controllers/policyrc/controller.go @@ -95,7 +95,7 @@ func NewPolicyRCController( func(ctx context.Context, qualifiedName common.QualifiedName) worker.Result { return c.reconcilePersist( ctx, - "propagation-policy", + "propagation_policy_reference_count", qualifiedName, c.propagationPolicyInformer.Informer().GetStore(), c.clusterPropagationPolicyInformer.Informer().GetStore(), @@ -112,7 +112,7 @@ func NewPolicyRCController( func(ctx context.Context, qualifiedName common.QualifiedName) worker.Result { return c.reconcilePersist( ctx, - "override-policy", + "override_policy_reference_count", qualifiedName, c.overridePolicyInformer.Informer().GetStore(), c.clusterOverridePolicyInformer.Informer().GetStore(), @@ -317,5 +317,10 @@ func (c *Controller) reconcilePersist( } } + c.metrics.Store(metricName, newRefCount, []stats.Tag{ + {Name: "name", Value: qualifiedName.Name}, + {Name: "namespace", Value: qualifiedName.Namespace}, + }...) + return worker.StatusAllOK } diff --git a/pkg/controllers/status/controller.go b/pkg/controllers/status/controller.go index 981ec0daa..4ed24589b 100644 --- a/pkg/controllers/status/controller.go +++ b/pkg/controllers/status/controller.go @@ -555,6 +555,7 @@ func (s *StatusController) clusterStatuses( var errList []string for _, clusterName := range clusterNames { + startTime := time.Now() resourceClusterStatus := fedcorev1a1.CollectedFieldsWithCluster{Cluster: clusterName} clusterObj, exist, err := informermanager.GetClusterObject( @@ -571,6 +572,7 @@ func (s *StatusController) clusterStatuses( resourceClusterStatus.Error = errMsg clusterStatus = append(clusterStatus, resourceClusterStatus) errList = append(errList, fmt.Sprintf("cluster-name: %s, error-info: %s", clusterName, errMsg)) + s.recordStatusCollectionError(fedObject.GetName(), fedObject.GetNamespace(), targetGVK.String(), clusterName) continue } if !exist { @@ -609,6 +611,7 @@ func (s *StatusController) clusterStatuses( if err != nil { keyedLogger.WithValues("cluster-name", clusterName). Error(err, "Failed to marshal collected fields") + s.recordStatusCollectionError(fedObject.GetName(), fedObject.GetNamespace(), targetGVK.String(), clusterName) continue } @@ -627,6 +630,12 @@ func (s *StatusController) clusterStatuses( ) } clusterStatus = append(clusterStatus, resourceClusterStatus) + s.metrics.Duration("status_collection_duration_seconds", startTime, []stats.Tag{ + {Name: "name", Value: fedObject.GetName()}, + {Name: "namespace", Value: fedObject.GetNamespace()}, + {Name: "resource", Value: targetGVK.String()}, + {Name: "cluster", Value: clusterName}, + }...) } if len(errList) != 0 { @@ -643,6 +652,15 @@ func (s *StatusController) clusterStatuses( return clusterStatus } +func (s *StatusController) recordStatusCollectionError(name, namespace, resourceGvk, cluster string) { + s.metrics.Counter("status_collection_error_total", 1, []stats.Tag{ + {Name: "name", Value: name}, + {Name: "namespace", Value: namespace}, + {Name: "resource", Value: resourceGvk}, + {Name: "cluster", Value: cluster}, + }...) +} + // latestReplicasetDigests returns digests of latest replicaSets in member cluster func (s *StatusController) latestReplicasetDigests( ctx context.Context,