From fe5239f1ca198acaf09e883547b209ea6fa5258e Mon Sep 17 00:00:00 2001 From: r0zbot Date: Fri, 1 Nov 2024 02:40:05 -0300 Subject: [PATCH 1/9] Add atlasScheduledAutoscaling (SRE-720) --- api/v1alpha1/mongodbcluster_types.go | 27 ++++ api/v1alpha1/zz_generated.deepcopy.go | 22 ++- ...ock.cloud.rocket.chat_mongodbclusters.yaml | 66 ++++++++ .../airlock_v1alpha1_mongodbcluster.yaml | 20 ++- controllers/common.go | 17 ++ controllers/mongodbcluster_controller.go | 146 ++++++++++++++++++ go.mod | 17 +- go.sum | 27 ++-- 8 files changed, 324 insertions(+), 18 deletions(-) diff --git a/api/v1alpha1/mongodbcluster_types.go b/api/v1alpha1/mongodbcluster_types.go index 944e7bd..3acbb9c 100644 --- a/api/v1alpha1/mongodbcluster_types.go +++ b/api/v1alpha1/mongodbcluster_types.go @@ -51,6 +51,33 @@ type MongoDBClusterSpec struct { // If this is set, along with useAtlasApi, all the kubernetes nodes on the cluster will be added to the Atlas firewall. The only available value right now is "rancher-annotation", which uses the rke.cattle.io/external-ip annotation. AtlasNodeIPAccessStrategy string `json:"atlasNodeIpAccessStrategy,omitempty"` + + AtlasScheduledAutoscaling *AtlasScheduledAutoscaling `json:"atlasScheduledAutoscaling,omitempty"` +} + +type AtlasScheduledAutoscaling struct { + // If this is set, the cluster will be enabled for scheduled autoscaling. + // The way it works is that the cluster will be scaled up to the high tier at the specified time, and scaled down to the lowTier at the specified time. + // +kubebuilder:default=false + Enabled bool `json:"enabled,omitempty"` + + // The minimum tier the cluster can scale down to. + // +kubebuilder:validation:Enum=M0;M2;M5;M10;M20;M30;M40;M50;M60;M80;M140;M200;M300;M400;M500;M700;M900;M1000 + // +kubebuilder:default="M20" + LowTier string `json:"lowTier,omitempty"` + + // The maximum tier the cluster can scale up to. + // +kubebuilder:validation:Enum=M0;M2;M5;M10;M20;M30;M40;M50;M60;M80;M140;M200;M300;M400;M500;M700;M900;M1000 + // +kubebuilder:default="M50" + HighTier string `json:"highTier,omitempty"` + + // Cron expression for the time the cluster should be scaled down. + // +kubebuilder:default="0 20 * * 1-5" + ScaleDownExpression string `json:"scaleDownExpression,omitempty"` + + // Cron expression for the time the cluster should be scaled up. + // +kubebuilder:default="0 6 * * 1-5" + ScaleUpExpression string `json:"scaleUpExpression,omitempty"` } // MongoDBClusterStatus defines the observed state of MongoDBCluster diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 9837716..56179a6 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -26,6 +26,21 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AtlasScheduledAutoscaling) DeepCopyInto(out *AtlasScheduledAutoscaling) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AtlasScheduledAutoscaling. +func (in *AtlasScheduledAutoscaling) DeepCopy() *AtlasScheduledAutoscaling { + if in == nil { + return nil + } + out := new(AtlasScheduledAutoscaling) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MongoDBAccessRequest) DeepCopyInto(out *MongoDBAccessRequest) { *out = *in @@ -127,7 +142,7 @@ func (in *MongoDBCluster) DeepCopyInto(out *MongoDBCluster) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) in.Status.DeepCopyInto(&out.Status) } @@ -184,6 +199,11 @@ func (in *MongoDBClusterList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MongoDBClusterSpec) DeepCopyInto(out *MongoDBClusterSpec) { *out = *in + if in.AtlasScheduledAutoscaling != nil { + in, out := &in.AtlasScheduledAutoscaling, &out.AtlasScheduledAutoscaling + *out = new(AtlasScheduledAutoscaling) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MongoDBClusterSpec. diff --git a/config/crd/bases/airlock.cloud.rocket.chat_mongodbclusters.yaml b/config/crd/bases/airlock.cloud.rocket.chat_mongodbclusters.yaml index 5a89405..f509700 100644 --- a/config/crd/bases/airlock.cloud.rocket.chat_mongodbclusters.yaml +++ b/config/crd/bases/airlock.cloud.rocket.chat_mongodbclusters.yaml @@ -47,6 +47,72 @@ spec: available value right now is "rancher-annotation", which uses the rke.cattle.io/external-ip annotation. type: string + atlasScheduledAutoscaling: + properties: + enabled: + default: false + description: If this is set, the cluster will be enabled for scheduled + autoscaling. The way it works is that the cluster will be scaled + up to the high tier at the specified time, and scaled down to + the lowTier at the specified time. + type: boolean + highTier: + default: M50 + description: The maximum tier the cluster can scale up to. + enum: + - M0 + - M2 + - M5 + - M10 + - M20 + - M30 + - M40 + - M50 + - M60 + - M80 + - M140 + - M200 + - M300 + - M400 + - M500 + - M700 + - M900 + - M1000 + type: string + lowTier: + default: M20 + description: The minimum tier the cluster can scale down to. + enum: + - M0 + - M2 + - M5 + - M10 + - M20 + - M30 + - M40 + - M50 + - M60 + - M80 + - M140 + - M200 + - M300 + - M400 + - M500 + - M700 + - M900 + - M1000 + type: string + scaleDownExpression: + default: 0 20 * * 1-5 + description: Cron expression for the time the cluster should be + scaled down. + type: string + scaleUpExpression: + default: 0 6 * * 1-5 + description: Cron expression for the time the cluster should be + scaled up. + type: string + type: object connectionSecret: description: Secret in which Airlock will look for a ConnectionString or Atlas credentials, that will be used to connect to the cluster. diff --git a/config/samples/airlock_v1alpha1_mongodbcluster.yaml b/config/samples/airlock_v1alpha1_mongodbcluster.yaml index 7333503..d34f467 100644 --- a/config/samples/airlock_v1alpha1_mongodbcluster.yaml +++ b/config/samples/airlock_v1alpha1_mongodbcluster.yaml @@ -15,7 +15,7 @@ metadata: name: teste-atlas1 spec: # The host with port that clients will receive when requesting credentials. - hostTemplate: "cluster0.vpz0mct.mongodb.net" + hostTemplate: "cluster0.4h0sjun.mongodb.net" # Secret in which Airlock will look for a ConnectionString or Atlas credentials, that will be used to connect to the cluster. connectionSecret: airlock-atlas-connection @@ -38,6 +38,22 @@ spec: # Optional. If this is set, along with useAtlasApi, all the kubernetes nodes on the cluster will be added to the Atlas firewall. The only available value right now is "rancher-annotation", which uses the rke.cattle.io/external-ip annotation. atlasNodeIpAccessStrategy: rancher-annotation + atlasScheduledAutoscaling: + # Whether the autoscaling is enabled or not. Defaults to false. + enabled: true + + # The low tier the cluster will scale down to. Defaults to "M20". + lowTier: "M10" + + # The high tier the cluster will scale up to. Defaults to "M50". + highTier: "M20" + + # The cron expression that will be used to scale down the cluster. Defaults to "0 20 * * 1-5". + scaleDownExpression: "* * * * *" + + # The cron expression that will be used to scale up the cluster. Defaults to "0 6 * * 1-5". + scaleUpExpression: "0 0 * * *" + --- apiVersion: v1 kind: Secret @@ -47,7 +63,7 @@ metadata: type: Opaque stringData: # It should have enough privileges to manage users and access. This is not gonna be used by the created users. - connectionString: "mongodb://rcadmin:pamonha@mongodb.airlock-test/test?replicaSet=rs0" + connectionString: "mongodb://rcadmin:pamonha@mongodb.airlock-test/test?replicaSet=rs*" --- apiVersion: v1 diff --git a/controllers/common.go b/controllers/common.go index 69481a2..19184c1 100644 --- a/controllers/common.go +++ b/controllers/common.go @@ -1,6 +1,9 @@ package controllers import ( + "context" + "strings" + "github.com/mongodb-forks/digest" "go.mongodb.org/atlas/mongodbatlas" corev1 "k8s.io/api/core/v1" @@ -44,3 +47,17 @@ func getAtlasClientFromSecret(secret *corev1.Secret) (*mongodbatlas.Client, stri return client, atlasGroupID, nil } + +func getClusterNameFromHostTemplate(ctx context.Context, client *mongodbatlas.Client, groupID, hostTemplate string) (string, error) { + clusters, _, err := client.Clusters.List(ctx, groupID, &mongodbatlas.ListOptions{}) + if err != nil { + return "", err + } + for _, cluster := range clusters { + if strings.Contains(cluster.SrvAddress, hostTemplate) { + return cluster.Name, nil + } + } + + return "", errors.NewBadRequest("Cluster not found for when searching for it's connectionString in atlas") +} diff --git a/controllers/mongodbcluster_controller.go b/controllers/mongodbcluster_controller.go index a1567d2..0095841 100644 --- a/controllers/mongodbcluster_controller.go +++ b/controllers/mongodbcluster_controller.go @@ -23,6 +23,7 @@ import ( "strings" "time" + "github.com/go-co-op/gocron/v2" "github.com/go-logr/logr" "go.mongodb.org/atlas/mongodbatlas" "go.mongodb.org/mongo-driver/bson" @@ -53,6 +54,7 @@ import ( type MongoDBClusterReconciler struct { client.Client Scheme *runtime.Scheme + gocron.Scheduler } //+kubebuilder:rbac:groups=airlock.cloud.rocket.chat,resources=mongodbclusters,verbs=get;list;watch;create;update;patch;delete @@ -141,6 +143,22 @@ func (r *MongoDBClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, utilerrors.NewAggregate([]error{err, r.Status().Update(ctx, mongodbClusterCR)}) } } + + // Reconile scheduled autoscaling + err = r.reconcileAtlasScheduledAutoscaling(ctx, mongodbClusterCR, secret) + if err != nil { + meta.SetStatusCondition(&mongodbClusterCR.Status.Conditions, + metav1.Condition{ + Type: "Ready", + Status: metav1.ConditionFalse, + Reason: "AtlasScheduledAutoscalingFailed", + LastTransitionTime: metav1.NewTime(time.Now()), + Message: fmt.Sprintf("Failed to reconcile scheduled autoscaling: %s", err.Error()), + }) + + return ctrl.Result{}, utilerrors.NewAggregate([]error{err, r.Status().Update(ctx, mongodbClusterCR)}) + } + } else { err = testMongoConnection(ctx, mongodbClusterCR, secret) if err != nil { @@ -210,6 +228,17 @@ func (r *MongoDBClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { return err } + { + var err error + r.Scheduler, err = gocron.NewScheduler() + if err != nil { + ctrl.Log.WithName("controllers").WithName("MongoDBCluster").V(1).Error(err, "Error creating scheduler") + return err + } + + r.Scheduler.Start() + } + return ctrl.NewControllerManagedBy(mgr). For(&airlockv1alpha1.MongoDBCluster{}). Watches( @@ -472,3 +501,120 @@ func (r *MongoDBClusterReconciler) reconcileAtlasFirewall(ctx context.Context, m return nil } + +func (r *MongoDBClusterReconciler) reconcileAtlasScheduledAutoscaling(ctx context.Context, mongodbClusterCR *airlockv1alpha1.MongoDBCluster, secret *corev1.Secret) error { + logger := log.FromContext(ctx) + + scheduledAutoscaling := mongodbClusterCR.Spec.AtlasScheduledAutoscaling + + if scheduledAutoscaling != nil && scheduledAutoscaling.Enabled { + + var foundUp gocron.Job + var foundDown gocron.Job + + jobs := r.Scheduler.Jobs() + if jobs != nil { + for _, job := range jobs { + if job.Tags()[0] == mongodbClusterCR.Name && job.Tags()[2] == "up" { + foundUp = job + } else if job.Tags()[0] == mongodbClusterCR.Name && job.Tags()[2] == "down" { + foundDown = job + } + } + } else { + return fmt.Errorf("list of jobs is nil, wtf? Did the scheduler not initialize?") + } + + // Is this client gonna expire on me? Or is it eternal? ChatGPT says it wont expire, but I don't trust it. + client, atlasGroupID, err := getAtlasClientFromSecret(secret) + if err != nil { + logger.Error(err, "Couldn't get a client for Atlas") + return err + } + + clusterName, err := getClusterNameFromHostTemplate(ctx, client, atlasGroupID, mongodbClusterCR.Spec.HostTemplate) + if err != nil { + logger.Error(err, "Couldn't find cluster in Atlas") + return err + } + + clusterDetails, response, err := client.Clusters.Get(ctx, atlasGroupID, clusterName) + if err != nil || response.StatusCode != http.StatusOK { + logger.Error(err, "Couldn't get cluster details from Atlas") + return err + } + + if foundDown == nil || foundDown.Tags()[1] != scheduledAutoscaling.ScaleDownExpression+scheduledAutoscaling.LowTier { + + if foundDown != nil { + logger.Info("Removing outdated downscaling job for " + mongodbClusterCR.Name) + r.Scheduler.RemoveJob(foundDown.ID()) + } + + logger.Info("Creating scheduled downscaling job for " + mongodbClusterCR.Name + " with expression " + scheduledAutoscaling.ScaleDownExpression + " to " + scheduledAutoscaling.LowTier) + r.Scheduler.NewJob( + gocron.CronJob(scheduledAutoscaling.ScaleDownExpression, false), + gocron.NewTask( + func() error { + logger.Info("Scaling down " + mongodbClusterCR.Name + " to " + scheduledAutoscaling.LowTier) + + _, response, err := client.Clusters.Update(ctx, atlasGroupID, clusterName, &mongodbatlas.Cluster{ + ProviderSettings: &mongodbatlas.ProviderSettings{ + ProviderName: "AWS", + InstanceSizeName: scheduledAutoscaling.LowTier, + RegionName: clusterDetails.ProviderSettings.RegionName, + }, + }) + + if err != nil || response.StatusCode != http.StatusOK { + logger.Error(err, "Couldn't scale down "+mongodbClusterCR.Name) + // TODO: Flip a metric so we can alert on this. This one is a warning. + return err + } + return nil + }, + ), + gocron.WithTags(mongodbClusterCR.Name, scheduledAutoscaling.ScaleDownExpression+scheduledAutoscaling.LowTier, "down"), + ) + } + + if foundUp == nil || foundUp.Tags()[1] != scheduledAutoscaling.ScaleUpExpression+scheduledAutoscaling.HighTier { + + if foundUp != nil { + logger.Info("Removing outdated upscaling job for " + mongodbClusterCR.Name) + r.Scheduler.RemoveJob(foundUp.ID()) + } + + logger.Info("Creating scheduled upscaling job for " + mongodbClusterCR.Name + " with expression " + scheduledAutoscaling.ScaleUpExpression + " to " + scheduledAutoscaling.HighTier) + + r.Scheduler.NewJob( + gocron.CronJob(scheduledAutoscaling.ScaleUpExpression, false), + gocron.NewTask( + func() error { + logger.Info("Scaling up " + mongodbClusterCR.Name + " to " + scheduledAutoscaling.HighTier) + + _, response, err := client.Clusters.Update(ctx, atlasGroupID, clusterName, &mongodbatlas.Cluster{ + ProviderSettings: &mongodbatlas.ProviderSettings{ + ProviderName: "AWS", + InstanceSizeName: scheduledAutoscaling.HighTier, + RegionName: clusterDetails.ProviderSettings.RegionName, + }, + }) + + if err != nil || response.StatusCode != http.StatusOK { + logger.Error(err, "Couldn't scale up "+mongodbClusterCR.Name) + // TODO: Flip a metric so we can alert on this. If this fails, it's VERY CRITICAL + return err + } + return nil + }, + ), + gocron.WithTags(mongodbClusterCR.Name, scheduledAutoscaling.ScaleUpExpression+scheduledAutoscaling.HighTier, "up"), + ) + } + + } else { + r.Scheduler.RemoveByTags(mongodbClusterCR.Name) + } + return nil +} diff --git a/go.mod b/go.mod index 747ffac..849d9ba 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.19 require ( github.com/davecgh/go-spew v1.1.1 + github.com/go-co-op/gocron/v2 v2.12.1 github.com/onsi/ginkgo/v2 v2.1.4 github.com/onsi/gomega v1.19.0 github.com/thanhpk/randstr v1.0.4 @@ -14,7 +15,12 @@ require ( sigs.k8s.io/controller-runtime v0.13.0 ) -require github.com/google/go-querystring v1.1.0 // indirect +require ( + github.com/google/go-querystring v1.1.0 // indirect + github.com/jonboulle/clockwork v0.4.0 // indirect + github.com/robfig/cron/v3 v3.0.1 // indirect + golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 // indirect +) require ( cloud.google.com/go v0.97.0 // indirect @@ -31,7 +37,7 @@ require ( github.com/emicklei/go-restful/v3 v3.8.0 // indirect github.com/evanphx/json-patch/v5 v5.6.0 // indirect github.com/fsnotify/fsnotify v1.5.4 // indirect - github.com/go-logr/logr v1.2.3 // indirect + github.com/go-logr/logr v1.2.3 github.com/go-logr/zapr v1.2.3 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect github.com/go-openapi/jsonreference v0.19.5 // indirect @@ -42,9 +48,9 @@ require ( github.com/golang/protobuf v1.5.2 // indirect github.com/golang/snappy v0.0.3 // indirect github.com/google/gnostic v0.5.7-v3refs // indirect - github.com/google/go-cmp v0.5.8 // indirect + github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.1.0 // indirect - github.com/google/uuid v1.1.2 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/imdario/mergo v0.3.12 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -67,14 +73,13 @@ require ( github.com/xdg-go/stringprep v1.0.3 // indirect github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect go.mongodb.org/atlas v0.24.0 - go.mongodb.org/mongo-driver v1.11.1 // indirect go.uber.org/atomic v1.7.0 // indirect go.uber.org/multierr v1.6.0 // indirect go.uber.org/zap v1.21.0 // indirect golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect - golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 // indirect + golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect golang.org/x/text v0.3.7 // indirect diff --git a/go.sum b/go.sum index bcafd1d..de9d068 100644 --- a/go.sum +++ b/go.sum @@ -114,6 +114,8 @@ github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2Vvl github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI= github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-co-op/gocron/v2 v2.12.1 h1:dCIIBFbzhWKdgXeEifBjHPzgQ1hoWhjS4289Hjjy1uw= +github.com/go-co-op/gocron/v2 v2.12.1/go.mod h1:xY7bJxGazKam1cz04EebrlP4S9q4iWdiAylMGP3jY9w= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -139,6 +141,7 @@ github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh github.com/go-openapi/swag v0.19.14 h1:gm3vOOXfiuw5i9p5N9xJvfjvuofpyvLA9Wr6QfK5Fng= github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/go-test/deep v1.1.0 h1:WOcxcdHcvdgThNXjw0t76K42FXTU7HpNQWHpA2HHNlg= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= @@ -197,8 +200,8 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= -github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -223,8 +226,9 @@ github.com/google/pprof v0.0.0-20210601050228-01bbb1931b22/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/uuid v1.1.2 h1:EVhdT+1Kseyi1/pUmXKaFxYsDNy9RQYkMWRH68J/W7Y= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0= @@ -236,6 +240,8 @@ github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1: github.com/imdario/mergo v0.3.12 h1:b6R2BslTbIEToALKP7LxUvijTsNI9TAe80pLWN2g/HU= github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= +github.com/jonboulle/clockwork v0.4.0 h1:p4Cf1aMWXnXAUh8lVfewRBx1zaTSYKrKMF2g3ST4RZ4= +github.com/jonboulle/clockwork v0.4.0/go.mod h1:xgRqUGwRcjKCO1vbZUEtSLrqKoPSsUpK7fnezOII0kc= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= @@ -284,7 +290,6 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= @@ -320,6 +325,8 @@ github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4O github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.7.3 h1:4jVXhlkAyzOScmCkXBTOLRLTz8EeU+eyjrwB/EPq0VU= github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= +github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= +github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= @@ -337,8 +344,8 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/thanhpk/randstr v1.0.4 h1:IN78qu/bR+My+gHCvMEXhR/i5oriVHcTB/BJJIRTsNo= github.com/thanhpk/randstr v1.0.4/go.mod h1:M/H2P1eNLZzlDwAzpkkkUvoyNNMbzRGhESZuEQk3r0U= github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4= @@ -372,7 +379,7 @@ go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= -go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= @@ -398,6 +405,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= +golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY= +golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -493,8 +502,8 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -768,8 +777,8 @@ gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLks gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= From ae5f569f41aa1b5cbc7166a0ce778c6bdff961c3 Mon Sep 17 00:00:00 2001 From: r0zbot Date: Thu, 23 Jan 2025 15:01:04 -0300 Subject: [PATCH 2/9] Add events and metrics to scaling jobs --- .golangci.yml | 6 +- api/v1alpha1/mongodbcluster_types.go | 8 +- ...ock.cloud.rocket.chat_mongodbclusters.yaml | 22 +--- .../airlock_v1alpha1_mongodbcluster.yaml | 4 +- controllers/common.go | 1 + .../mongodbaccessrequest_controller.go | 3 + controllers/mongodbcluster_controller.go | 104 ++++++++++++++---- 7 files changed, 100 insertions(+), 48 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index f1acdb0..22f2ddb 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -2,7 +2,7 @@ issues: new: true - fix: true + fix: false new-from-rev: 1de9aaed0441349ba34f1d3cfb18cc2b36114bc5 run: @@ -14,7 +14,7 @@ run: linters: enable: - bodyclose - - depguard + # - depguard - dogsled #- dupl - errcheck @@ -23,7 +23,7 @@ linters: - exportloopref - goconst - gocritic #TODO: https://go-critic.com/overview.html - - gocyclo + # - gocyclo - gofmt - goimports - gomnd diff --git a/api/v1alpha1/mongodbcluster_types.go b/api/v1alpha1/mongodbcluster_types.go index 3acbb9c..a818230 100644 --- a/api/v1alpha1/mongodbcluster_types.go +++ b/api/v1alpha1/mongodbcluster_types.go @@ -62,22 +62,22 @@ type AtlasScheduledAutoscaling struct { Enabled bool `json:"enabled,omitempty"` // The minimum tier the cluster can scale down to. - // +kubebuilder:validation:Enum=M0;M2;M5;M10;M20;M30;M40;M50;M60;M80;M140;M200;M300;M400;M500;M700;M900;M1000 + // +kubebuilder:validation:Enum=M10;M20;M30;M40;M50;M60;M80;M140;M200 // +kubebuilder:default="M20" LowTier string `json:"lowTier,omitempty"` // The maximum tier the cluster can scale up to. - // +kubebuilder:validation:Enum=M0;M2;M5;M10;M20;M30;M40;M50;M60;M80;M140;M200;M300;M400;M500;M700;M900;M1000 + // +kubebuilder:validation:Enum=M10;M20;M30;M40;M50;M60;M80;M140;M200 // +kubebuilder:default="M50" HighTier string `json:"highTier,omitempty"` // Cron expression for the time the cluster should be scaled down. // +kubebuilder:default="0 20 * * 1-5" - ScaleDownExpression string `json:"scaleDownExpression,omitempty"` + ScaleDownCronExpression string `json:"scaleDownCronExpression,omitempty"` // Cron expression for the time the cluster should be scaled up. // +kubebuilder:default="0 6 * * 1-5" - ScaleUpExpression string `json:"scaleUpExpression,omitempty"` + ScaleUpCronExpression string `json:"scaleUpCronExpression,omitempty"` } // MongoDBClusterStatus defines the observed state of MongoDBCluster diff --git a/config/crd/bases/airlock.cloud.rocket.chat_mongodbclusters.yaml b/config/crd/bases/airlock.cloud.rocket.chat_mongodbclusters.yaml index f509700..2df808b 100644 --- a/config/crd/bases/airlock.cloud.rocket.chat_mongodbclusters.yaml +++ b/config/crd/bases/airlock.cloud.rocket.chat_mongodbclusters.yaml @@ -60,9 +60,6 @@ spec: default: M50 description: The maximum tier the cluster can scale up to. enum: - - M0 - - M2 - - M5 - M10 - M20 - M30 @@ -72,20 +69,11 @@ spec: - M80 - M140 - M200 - - M300 - - M400 - - M500 - - M700 - - M900 - - M1000 type: string lowTier: default: M20 description: The minimum tier the cluster can scale down to. enum: - - M0 - - M2 - - M5 - M10 - M20 - M30 @@ -95,19 +83,13 @@ spec: - M80 - M140 - M200 - - M300 - - M400 - - M500 - - M700 - - M900 - - M1000 type: string - scaleDownExpression: + scaleDownCronExpression: default: 0 20 * * 1-5 description: Cron expression for the time the cluster should be scaled down. type: string - scaleUpExpression: + scaleUpCronExpression: default: 0 6 * * 1-5 description: Cron expression for the time the cluster should be scaled up. diff --git a/config/samples/airlock_v1alpha1_mongodbcluster.yaml b/config/samples/airlock_v1alpha1_mongodbcluster.yaml index d34f467..f7312a8 100644 --- a/config/samples/airlock_v1alpha1_mongodbcluster.yaml +++ b/config/samples/airlock_v1alpha1_mongodbcluster.yaml @@ -49,10 +49,10 @@ spec: highTier: "M20" # The cron expression that will be used to scale down the cluster. Defaults to "0 20 * * 1-5". - scaleDownExpression: "* * * * *" + scaleDownCronExpression: "* * * * *" # The cron expression that will be used to scale up the cluster. Defaults to "0 6 * * 1-5". - scaleUpExpression: "0 0 * * *" + scaleUpCronExpression: "0 0 * * *" --- apiVersion: v1 diff --git a/controllers/common.go b/controllers/common.go index 19184c1..3bc7bb4 100644 --- a/controllers/common.go +++ b/controllers/common.go @@ -53,6 +53,7 @@ func getClusterNameFromHostTemplate(ctx context.Context, client *mongodbatlas.Cl if err != nil { return "", err } + for _, cluster := range clusters { if strings.Contains(cluster.SrvAddress, hostTemplate) { return cluster.Name, nil diff --git a/controllers/mongodbaccessrequest_controller.go b/controllers/mongodbaccessrequest_controller.go index 4943f3b..9fa900b 100644 --- a/controllers/mongodbaccessrequest_controller.go +++ b/controllers/mongodbaccessrequest_controller.go @@ -162,6 +162,7 @@ func (r *MongoDBAccessRequestReconciler) Reconcile(ctx context.Context, req ctrl err = r.cleanupAtlasUser(ctx, mongodbAccessRequestCR, mongodbClusterCR, clusterSecret) if err != nil { logger.Error(err, "Cleanup failed for atlas.") + if isStatusReady(mongodbAccessRequestCR) { meta.SetStatusCondition(&mongodbAccessRequestCR.Status.Conditions, metav1.Condition{ @@ -181,6 +182,7 @@ func (r *MongoDBAccessRequestReconciler) Reconcile(ctx context.Context, req ctrl err = r.cleanupMongoUser(ctx, mongodbAccessRequestCR, mongodbClusterCR, clusterSecret) if err != nil { logger.Error(err, "Cleanup failed for mongodb.") + if isStatusReady(mongodbAccessRequestCR) { meta.SetStatusCondition(&mongodbAccessRequestCR.Status.Conditions, metav1.Condition{ @@ -200,6 +202,7 @@ func (r *MongoDBAccessRequestReconciler) Reconcile(ctx context.Context, req ctrl // remove our finalizer from the list and update it. controllerutil.RemoveFinalizer(mongodbAccessRequestCR, airlockFinalizer) + if err := r.Update(ctx, mongodbAccessRequestCR); err != nil { return ctrl.Result{}, err } diff --git a/controllers/mongodbcluster_controller.go b/controllers/mongodbcluster_controller.go index 0095841..e0e0b97 100644 --- a/controllers/mongodbcluster_controller.go +++ b/controllers/mongodbcluster_controller.go @@ -25,6 +25,7 @@ import ( "github.com/go-co-op/gocron/v2" "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" "go.mongodb.org/atlas/mongodbatlas" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" @@ -38,11 +39,13 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" @@ -55,8 +58,28 @@ type MongoDBClusterReconciler struct { client.Client Scheme *runtime.Scheme gocron.Scheduler + EventRecorder record.EventRecorder } +var ( + // Metrics + ScalingUpErrorGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "airlock_scaling_up_failure", + Help: "When above 0, means a cluster has failed to scale up", + }, + []string{"cluster"}, + ) + + ScalingDownErrorGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "airlock_scaling_down_failure", + Help: "When above 0, means a cluster has failed to scale down", + }, + []string{"cluster"}, + ) +) + //+kubebuilder:rbac:groups=airlock.cloud.rocket.chat,resources=mongodbclusters,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=airlock.cloud.rocket.chat,resources=mongodbclusters/status,verbs=get;update;patch //+kubebuilder:rbac:groups=airlock.cloud.rocket.chat,resources=mongodbclusters/finalizers,verbs=update @@ -158,7 +181,6 @@ func (r *MongoDBClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, utilerrors.NewAggregate([]error{err, r.Status().Update(ctx, mongodbClusterCR)}) } - } else { err = testMongoConnection(ctx, mongodbClusterCR, secret) if err != nil { @@ -230,6 +252,7 @@ func (r *MongoDBClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { { var err error + r.Scheduler, err = gocron.NewScheduler() if err != nil { ctrl.Log.WithName("controllers").WithName("MongoDBCluster").V(1).Error(err, "Error creating scheduler") @@ -239,6 +262,10 @@ func (r *MongoDBClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { r.Scheduler.Start() } + r.EventRecorder = mgr.GetEventRecorderFor("airlock") + + metrics.Registry.MustRegister(ScalingUpErrorGauge, ScalingDownErrorGauge) + return ctrl.NewControllerManagedBy(mgr). For(&airlockv1alpha1.MongoDBCluster{}). Watches( @@ -508,8 +535,8 @@ func (r *MongoDBClusterReconciler) reconcileAtlasScheduledAutoscaling(ctx contex scheduledAutoscaling := mongodbClusterCR.Spec.AtlasScheduledAutoscaling if scheduledAutoscaling != nil && scheduledAutoscaling.Enabled { - var foundUp gocron.Job + var foundDown gocron.Job jobs := r.Scheduler.Jobs() @@ -544,20 +571,27 @@ func (r *MongoDBClusterReconciler) reconcileAtlasScheduledAutoscaling(ctx contex return err } - if foundDown == nil || foundDown.Tags()[1] != scheduledAutoscaling.ScaleDownExpression+scheduledAutoscaling.LowTier { - + if foundDown == nil || foundDown.Tags()[1] != scheduledAutoscaling.ScaleDownCronExpression+scheduledAutoscaling.LowTier { if foundDown != nil { logger.Info("Removing outdated downscaling job for " + mongodbClusterCR.Name) - r.Scheduler.RemoveJob(foundDown.ID()) + + err = r.Scheduler.RemoveJob(foundDown.ID()) + if err != nil { + logger.Error(err, "Error removing outdated downscaling job") + return err + } } - logger.Info("Creating scheduled downscaling job for " + mongodbClusterCR.Name + " with expression " + scheduledAutoscaling.ScaleDownExpression + " to " + scheduledAutoscaling.LowTier) - r.Scheduler.NewJob( - gocron.CronJob(scheduledAutoscaling.ScaleDownExpression, false), + logger.Info("Creating scheduled downscaling job for " + mongodbClusterCR.Name + " with expression " + scheduledAutoscaling.ScaleDownCronExpression + " to " + scheduledAutoscaling.LowTier) + + _, err = r.Scheduler.NewJob( + gocron.CronJob(scheduledAutoscaling.ScaleDownCronExpression, false), gocron.NewTask( func() error { logger.Info("Scaling down " + mongodbClusterCR.Name + " to " + scheduledAutoscaling.LowTier) + r.EventRecorder.Event(mongodbClusterCR, corev1.EventTypeNormal, "Scaling", "Scaling down to "+scheduledAutoscaling.LowTier) + _, response, err := client.Clusters.Update(ctx, atlasGroupID, clusterName, &mongodbatlas.Cluster{ ProviderSettings: &mongodbatlas.ProviderSettings{ ProviderName: "AWS", @@ -568,31 +602,50 @@ func (r *MongoDBClusterReconciler) reconcileAtlasScheduledAutoscaling(ctx contex if err != nil || response.StatusCode != http.StatusOK { logger.Error(err, "Couldn't scale down "+mongodbClusterCR.Name) - // TODO: Flip a metric so we can alert on this. This one is a warning. + r.EventRecorder.Event(mongodbClusterCR, corev1.EventTypeWarning, "Scaling", "Failed to scale down to "+scheduledAutoscaling.LowTier) + + // Flip a metric so we can alert on this. This one is a warning. + ScalingDownErrorGauge.WithLabelValues(mongodbClusterCR.Name).Inc() + return err } + + // Reset metric if the reconcile then succeeded + ScalingUpErrorGauge.WithLabelValues(mongodbClusterCR.Name).Set(0) + return nil }, ), - gocron.WithTags(mongodbClusterCR.Name, scheduledAutoscaling.ScaleDownExpression+scheduledAutoscaling.LowTier, "down"), + gocron.WithTags(mongodbClusterCR.Name, scheduledAutoscaling.ScaleDownCronExpression+scheduledAutoscaling.LowTier, "down"), ) - } - if foundUp == nil || foundUp.Tags()[1] != scheduledAutoscaling.ScaleUpExpression+scheduledAutoscaling.HighTier { + if err != nil { + logger.Error(err, "Error creating new upscaling job") + return err + } + } + if foundUp == nil || foundUp.Tags()[1] != scheduledAutoscaling.ScaleUpCronExpression+scheduledAutoscaling.HighTier { if foundUp != nil { logger.Info("Removing outdated upscaling job for " + mongodbClusterCR.Name) - r.Scheduler.RemoveJob(foundUp.ID()) + + err = r.Scheduler.RemoveJob(foundUp.ID()) + if err != nil { + logger.Error(err, "Error removing outdated upscaling job") + return err + } } - logger.Info("Creating scheduled upscaling job for " + mongodbClusterCR.Name + " with expression " + scheduledAutoscaling.ScaleUpExpression + " to " + scheduledAutoscaling.HighTier) + logger.Info("Creating scheduled upscaling job for " + mongodbClusterCR.Name + " with expression " + scheduledAutoscaling.ScaleUpCronExpression + " to " + scheduledAutoscaling.HighTier) - r.Scheduler.NewJob( - gocron.CronJob(scheduledAutoscaling.ScaleUpExpression, false), + _, err = r.Scheduler.NewJob( + gocron.CronJob(scheduledAutoscaling.ScaleUpCronExpression, false), gocron.NewTask( func() error { logger.Info("Scaling up " + mongodbClusterCR.Name + " to " + scheduledAutoscaling.HighTier) + r.EventRecorder.Event(mongodbClusterCR, corev1.EventTypeNormal, "Scaling", "Scaling up to "+scheduledAutoscaling.HighTier) + _, response, err := client.Clusters.Update(ctx, atlasGroupID, clusterName, &mongodbatlas.Cluster{ ProviderSettings: &mongodbatlas.ProviderSettings{ ProviderName: "AWS", @@ -603,18 +656,31 @@ func (r *MongoDBClusterReconciler) reconcileAtlasScheduledAutoscaling(ctx contex if err != nil || response.StatusCode != http.StatusOK { logger.Error(err, "Couldn't scale up "+mongodbClusterCR.Name) - // TODO: Flip a metric so we can alert on this. If this fails, it's VERY CRITICAL + r.EventRecorder.Event(mongodbClusterCR, corev1.EventTypeWarning, "Scaling", "Failed to scale up to "+scheduledAutoscaling.HighTier) + + // Flip a metric so we can alert on this. If this fails, it's VERY CRITICAL + ScalingUpErrorGauge.WithLabelValues(mongodbClusterCR.Name).Inc() + return err } + + // Reset metric if the reconcile then succeeded + ScalingUpErrorGauge.WithLabelValues(mongodbClusterCR.Name).Set(0) + return nil }, ), - gocron.WithTags(mongodbClusterCR.Name, scheduledAutoscaling.ScaleUpExpression+scheduledAutoscaling.HighTier, "up"), + gocron.WithTags(mongodbClusterCR.Name, scheduledAutoscaling.ScaleUpCronExpression+scheduledAutoscaling.HighTier, "up"), ) - } + if err != nil { + logger.Error(err, "Error creating new upscaling job") + return err + } + } } else { r.Scheduler.RemoveByTags(mongodbClusterCR.Name) } + return nil } From 47727a7bad343cb49611a090903bdbb90c78d69c Mon Sep 17 00:00:00 2001 From: r0zbot Date: Thu, 23 Jan 2025 15:02:32 -0300 Subject: [PATCH 3/9] tiny little 0 --- config/samples/airlock_v1alpha1_mongodbcluster.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/samples/airlock_v1alpha1_mongodbcluster.yaml b/config/samples/airlock_v1alpha1_mongodbcluster.yaml index f7312a8..ebf6808 100644 --- a/config/samples/airlock_v1alpha1_mongodbcluster.yaml +++ b/config/samples/airlock_v1alpha1_mongodbcluster.yaml @@ -63,7 +63,7 @@ metadata: type: Opaque stringData: # It should have enough privileges to manage users and access. This is not gonna be used by the created users. - connectionString: "mongodb://rcadmin:pamonha@mongodb.airlock-test/test?replicaSet=rs*" + connectionString: "mongodb://rcadmin:pamonha@mongodb.airlock-test/test?replicaSet=rs0" --- apiVersion: v1 From ef952e45b4190638dc95f6ca1911da600fa32967 Mon Sep 17 00:00:00 2001 From: r0zbot Date: Thu, 23 Jan 2025 15:06:39 -0300 Subject: [PATCH 4/9] fix version to golang 1.19 --- .github/workflows/build.yml | 2 +- go.mod | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c945dd5..70a2209 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,7 @@ jobs: path: ${{ env.__W_SRC_REL }} - uses: actions/setup-go@v3 with: - go-version: '>=1.19.0' + go-version: '=1.19.0' cache: false - name: Build binary run: | diff --git a/go.mod b/go.mod index 849d9ba..a53e150 100644 --- a/go.mod +++ b/go.mod @@ -63,7 +63,7 @@ require ( github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/client_golang v1.12.2 // indirect + github.com/prometheus/client_golang v1.12.2 github.com/prometheus/client_model v0.2.0 // indirect github.com/prometheus/common v0.32.1 // indirect github.com/prometheus/procfs v0.7.3 // indirect From 2e57599d5f3a3c09e1b9dba75a7cc921bf7978c8 Mon Sep 17 00:00:00 2001 From: r0zbot Date: Thu, 23 Jan 2025 15:09:51 -0300 Subject: [PATCH 5/9] fix version to golang 1.21 actually, since gocron depends on 1.20 --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 70a2209..0f26057 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,7 @@ jobs: path: ${{ env.__W_SRC_REL }} - uses: actions/setup-go@v3 with: - go-version: '=1.19.0' + go-version: '=1.21.0' cache: false - name: Build binary run: | From 3a34c7685a2f96c1058bc3e1f51ff110f0a0a6cb Mon Sep 17 00:00:00 2001 From: r0zbot Date: Thu, 23 Jan 2025 15:12:51 -0300 Subject: [PATCH 6/9] do the same to linter's job --- .github/workflows/golangci-lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 2a8d0da..18ed847 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -18,7 +18,7 @@ jobs: steps: - uses: actions/setup-go@v3 with: - go-version: 1.19 + go-version: 1.21 - uses: actions/checkout@v3 - name: Configure git for private modules env: From a14cdbafdf15d554e448e83a92925432d1068ab1 Mon Sep 17 00:00:00 2001 From: r0zbot Date: Thu, 23 Jan 2025 15:14:50 -0300 Subject: [PATCH 7/9] now update the linter so that it also doesnt crash on newer versions...... --- .github/workflows/golangci-lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 18ed847..b9a4799 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -27,7 +27,7 @@ jobs: - name: Run golangci-lint uses: golangci/golangci-lint-action@v3 with: - version: v1.48 + version: v1.62 skip-build-cache: true only-new-issues: true From 57fa8b8b155b2f48958ce315f4463f6643d66931 Mon Sep 17 00:00:00 2001 From: r0zbot Date: Thu, 23 Jan 2025 15:17:39 -0300 Subject: [PATCH 8/9] now removing gomnd which has been deprecated.... --- .golangci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.golangci.yml b/.golangci.yml index 22f2ddb..a141e82 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -26,7 +26,7 @@ linters: # - gocyclo - gofmt - goimports - - gomnd + # - gomnd - gosec - gosimple - govet From f405e8989260f57c76b3532055a16df8a5836eac Mon Sep 17 00:00:00 2001 From: r0zbot Date: Fri, 24 Jan 2025 15:01:33 -0300 Subject: [PATCH 9/9] Review fixes --- controllers/mongodbcluster_controller.go | 242 ++++++++++++----------- 1 file changed, 130 insertions(+), 112 deletions(-) diff --git a/controllers/mongodbcluster_controller.go b/controllers/mongodbcluster_controller.go index e0e0b97..55d434a 100644 --- a/controllers/mongodbcluster_controller.go +++ b/controllers/mongodbcluster_controller.go @@ -534,152 +534,170 @@ func (r *MongoDBClusterReconciler) reconcileAtlasScheduledAutoscaling(ctx contex scheduledAutoscaling := mongodbClusterCR.Spec.AtlasScheduledAutoscaling - if scheduledAutoscaling != nil && scheduledAutoscaling.Enabled { - var foundUp gocron.Job - - var foundDown gocron.Job - - jobs := r.Scheduler.Jobs() - if jobs != nil { - for _, job := range jobs { - if job.Tags()[0] == mongodbClusterCR.Name && job.Tags()[2] == "up" { - foundUp = job - } else if job.Tags()[0] == mongodbClusterCR.Name && job.Tags()[2] == "down" { - foundDown = job - } - } - } else { - return fmt.Errorf("list of jobs is nil, wtf? Did the scheduler not initialize?") + if scheduledAutoscaling == nil || !scheduledAutoscaling.Enabled { + r.Scheduler.RemoveByTags(mongodbClusterCR.Name) + return nil + } + + var foundUp gocron.Job + + var foundDown gocron.Job + + jobs := r.Scheduler.Jobs() + if jobs == nil { + return fmt.Errorf("list of jobs is nil, wtf? Did the scheduler not initialize?") + } + + for _, job := range jobs { + if job.Tags()[0] == mongodbClusterCR.Name && job.Tags()[2] == "up" { + foundUp = job + } else if job.Tags()[0] == mongodbClusterCR.Name && job.Tags()[2] == "down" { + foundDown = job } + } - // Is this client gonna expire on me? Or is it eternal? ChatGPT says it wont expire, but I don't trust it. - client, atlasGroupID, err := getAtlasClientFromSecret(secret) - if err != nil { - logger.Error(err, "Couldn't get a client for Atlas") - return err + // Is this client gonna expire on me? Or is it eternal? ChatGPT says it wont expire, but I don't trust it. + client, atlasGroupID, err := getAtlasClientFromSecret(secret) + if err != nil { + logger.Error(err, "Couldn't get a client for Atlas") + return err + } + + clusterName, err := getClusterNameFromHostTemplate(ctx, client, atlasGroupID, mongodbClusterCR.Spec.HostTemplate) + if err != nil { + logger.Error(err, "Couldn't find cluster in Atlas") + return err + } + + clusterDetails, response, err := client.Clusters.Get(ctx, atlasGroupID, clusterName) + if err != nil || response.StatusCode != http.StatusOK { + if err == nil { + err = fmt.Errorf("HTTP status %d", response.StatusCode) } - clusterName, err := getClusterNameFromHostTemplate(ctx, client, atlasGroupID, mongodbClusterCR.Spec.HostTemplate) + logger.Error(err, "Couldn't get cluster details from Atlas") + + return err + } + + if foundDown != nil && foundDown.Tags()[1] != scheduledAutoscaling.ScaleDownCronExpression+scheduledAutoscaling.LowTier { + logger.Info("Removing outdated downscaling job for " + mongodbClusterCR.Name) + + err = r.Scheduler.RemoveJob(foundDown.ID()) if err != nil { - logger.Error(err, "Couldn't find cluster in Atlas") + logger.Error(err, "Error removing outdated downscaling job") return err } - clusterDetails, response, err := client.Clusters.Get(ctx, atlasGroupID, clusterName) - if err != nil || response.StatusCode != http.StatusOK { - logger.Error(err, "Couldn't get cluster details from Atlas") - return err - } + foundDown = nil + } - if foundDown == nil || foundDown.Tags()[1] != scheduledAutoscaling.ScaleDownCronExpression+scheduledAutoscaling.LowTier { - if foundDown != nil { - logger.Info("Removing outdated downscaling job for " + mongodbClusterCR.Name) + if foundDown == nil { + logger.Info("Creating scheduled downscaling job for " + mongodbClusterCR.Name + " with expression " + scheduledAutoscaling.ScaleDownCronExpression + " to " + scheduledAutoscaling.LowTier) - err = r.Scheduler.RemoveJob(foundDown.ID()) - if err != nil { - logger.Error(err, "Error removing outdated downscaling job") - return err - } - } + _, err = r.Scheduler.NewJob( + gocron.CronJob(scheduledAutoscaling.ScaleDownCronExpression, false), + gocron.NewTask( + func() error { + logger.Info("Scaling down " + mongodbClusterCR.Name + " to " + scheduledAutoscaling.LowTier) - logger.Info("Creating scheduled downscaling job for " + mongodbClusterCR.Name + " with expression " + scheduledAutoscaling.ScaleDownCronExpression + " to " + scheduledAutoscaling.LowTier) + r.EventRecorder.Event(mongodbClusterCR, corev1.EventTypeNormal, "Scaling", "Scaling down to "+scheduledAutoscaling.LowTier) - _, err = r.Scheduler.NewJob( - gocron.CronJob(scheduledAutoscaling.ScaleDownCronExpression, false), - gocron.NewTask( - func() error { - logger.Info("Scaling down " + mongodbClusterCR.Name + " to " + scheduledAutoscaling.LowTier) + _, response, err := client.Clusters.Update(ctx, atlasGroupID, clusterName, &mongodbatlas.Cluster{ + ProviderSettings: &mongodbatlas.ProviderSettings{ + ProviderName: "AWS", + InstanceSizeName: scheduledAutoscaling.LowTier, + RegionName: clusterDetails.ProviderSettings.RegionName, + }, + }) - r.EventRecorder.Event(mongodbClusterCR, corev1.EventTypeNormal, "Scaling", "Scaling down to "+scheduledAutoscaling.LowTier) + if err != nil || response.StatusCode != http.StatusOK { + if err == nil { + err = fmt.Errorf("HTTP status %d", response.StatusCode) + } - _, response, err := client.Clusters.Update(ctx, atlasGroupID, clusterName, &mongodbatlas.Cluster{ - ProviderSettings: &mongodbatlas.ProviderSettings{ - ProviderName: "AWS", - InstanceSizeName: scheduledAutoscaling.LowTier, - RegionName: clusterDetails.ProviderSettings.RegionName, - }, - }) + logger.Error(err, "Couldn't scale down "+mongodbClusterCR.Name) + r.EventRecorder.Event(mongodbClusterCR, corev1.EventTypeWarning, "Scaling", "Failed to scale down to "+scheduledAutoscaling.LowTier+". "+err.Error()) - if err != nil || response.StatusCode != http.StatusOK { - logger.Error(err, "Couldn't scale down "+mongodbClusterCR.Name) - r.EventRecorder.Event(mongodbClusterCR, corev1.EventTypeWarning, "Scaling", "Failed to scale down to "+scheduledAutoscaling.LowTier) + // Flip a metric so we can alert on this. This one is a warning. + ScalingDownErrorGauge.WithLabelValues(mongodbClusterCR.Name).Inc() - // Flip a metric so we can alert on this. This one is a warning. - ScalingDownErrorGauge.WithLabelValues(mongodbClusterCR.Name).Inc() + return err + } - return err - } + // Reset metric if the reconcile then succeeded + ScalingUpErrorGauge.WithLabelValues(mongodbClusterCR.Name).Set(0) - // Reset metric if the reconcile then succeeded - ScalingUpErrorGauge.WithLabelValues(mongodbClusterCR.Name).Set(0) + return nil + }, + ), + gocron.WithTags(mongodbClusterCR.Name, scheduledAutoscaling.ScaleDownCronExpression+scheduledAutoscaling.LowTier, "down"), + ) - return nil - }, - ), - gocron.WithTags(mongodbClusterCR.Name, scheduledAutoscaling.ScaleDownCronExpression+scheduledAutoscaling.LowTier, "down"), - ) + if err != nil { + logger.Error(err, "Error creating new upscaling job") + return err + } + } - if err != nil { - logger.Error(err, "Error creating new upscaling job") - return err - } + if foundUp != nil && foundUp.Tags()[1] != scheduledAutoscaling.ScaleUpCronExpression+scheduledAutoscaling.HighTier { + logger.Info("Removing outdated upscaling job for " + mongodbClusterCR.Name) + + err = r.Scheduler.RemoveJob(foundUp.ID()) + if err != nil { + logger.Error(err, "Error removing outdated upscaling job") + return err } - if foundUp == nil || foundUp.Tags()[1] != scheduledAutoscaling.ScaleUpCronExpression+scheduledAutoscaling.HighTier { - if foundUp != nil { - logger.Info("Removing outdated upscaling job for " + mongodbClusterCR.Name) + foundUp = nil + } - err = r.Scheduler.RemoveJob(foundUp.ID()) - if err != nil { - logger.Error(err, "Error removing outdated upscaling job") - return err - } - } + if foundUp == nil { + logger.Info("Creating scheduled upscaling job for " + mongodbClusterCR.Name + " with expression " + scheduledAutoscaling.ScaleUpCronExpression + " to " + scheduledAutoscaling.HighTier) - logger.Info("Creating scheduled upscaling job for " + mongodbClusterCR.Name + " with expression " + scheduledAutoscaling.ScaleUpCronExpression + " to " + scheduledAutoscaling.HighTier) + _, err = r.Scheduler.NewJob( + gocron.CronJob(scheduledAutoscaling.ScaleUpCronExpression, false), + gocron.NewTask( + func() error { + logger.Info("Scaling up " + mongodbClusterCR.Name + " to " + scheduledAutoscaling.HighTier) - _, err = r.Scheduler.NewJob( - gocron.CronJob(scheduledAutoscaling.ScaleUpCronExpression, false), - gocron.NewTask( - func() error { - logger.Info("Scaling up " + mongodbClusterCR.Name + " to " + scheduledAutoscaling.HighTier) + r.EventRecorder.Event(mongodbClusterCR, corev1.EventTypeNormal, "Scaling", "Scaling up to "+scheduledAutoscaling.HighTier) - r.EventRecorder.Event(mongodbClusterCR, corev1.EventTypeNormal, "Scaling", "Scaling up to "+scheduledAutoscaling.HighTier) + _, response, err := client.Clusters.Update(ctx, atlasGroupID, clusterName, &mongodbatlas.Cluster{ + ProviderSettings: &mongodbatlas.ProviderSettings{ + ProviderName: "AWS", + InstanceSizeName: scheduledAutoscaling.HighTier, + RegionName: clusterDetails.ProviderSettings.RegionName, + }, + }) - _, response, err := client.Clusters.Update(ctx, atlasGroupID, clusterName, &mongodbatlas.Cluster{ - ProviderSettings: &mongodbatlas.ProviderSettings{ - ProviderName: "AWS", - InstanceSizeName: scheduledAutoscaling.HighTier, - RegionName: clusterDetails.ProviderSettings.RegionName, - }, - }) + if err != nil || response.StatusCode != http.StatusOK { + if err == nil { + err = fmt.Errorf("HTTP status %d", response.StatusCode) + } - if err != nil || response.StatusCode != http.StatusOK { - logger.Error(err, "Couldn't scale up "+mongodbClusterCR.Name) - r.EventRecorder.Event(mongodbClusterCR, corev1.EventTypeWarning, "Scaling", "Failed to scale up to "+scheduledAutoscaling.HighTier) + logger.Error(err, "Couldn't scale up "+mongodbClusterCR.Name) + r.EventRecorder.Event(mongodbClusterCR, corev1.EventTypeWarning, "Scaling", "Failed to scale up to "+scheduledAutoscaling.HighTier) - // Flip a metric so we can alert on this. If this fails, it's VERY CRITICAL - ScalingUpErrorGauge.WithLabelValues(mongodbClusterCR.Name).Inc() + // Flip a metric so we can alert on this. If this fails, it's VERY CRITICAL + ScalingUpErrorGauge.WithLabelValues(mongodbClusterCR.Name).Inc() - return err - } + return err + } - // Reset metric if the reconcile then succeeded - ScalingUpErrorGauge.WithLabelValues(mongodbClusterCR.Name).Set(0) + // Reset metric if the reconcile then succeeded + ScalingUpErrorGauge.WithLabelValues(mongodbClusterCR.Name).Set(0) - return nil - }, - ), - gocron.WithTags(mongodbClusterCR.Name, scheduledAutoscaling.ScaleUpCronExpression+scheduledAutoscaling.HighTier, "up"), - ) + return nil + }, + ), + gocron.WithTags(mongodbClusterCR.Name, scheduledAutoscaling.ScaleUpCronExpression+scheduledAutoscaling.HighTier, "up"), + ) - if err != nil { - logger.Error(err, "Error creating new upscaling job") - return err - } + if err != nil { + logger.Error(err, "Error creating new upscaling job") + return err } - } else { - r.Scheduler.RemoveByTags(mongodbClusterCR.Name) } return nil