Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: redis-cluster unexpected downscaling (#1167) #1171

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 20 additions & 22 deletions api/v1beta2/redisreplication_types.go
Original file line number Diff line number Diff line change
@@ -1,33 +1,31 @@
package v1beta2

import (
common "github.com/OT-CONTAINER-KIT/redis-operator/api"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type RedisReplicationSpec struct {
Size *int32 `json:"clusterSize"`
KubernetesConfig KubernetesConfig `json:"kubernetesConfig"`
RedisExporter *RedisExporter `json:"redisExporter,omitempty"`
RedisConfig *RedisConfig `json:"redisConfig,omitempty"`
Storage *Storage `json:"storage,omitempty"`
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
PodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"`
SecurityContext *corev1.SecurityContext `json:"securityContext,omitempty"`
PriorityClassName string `json:"priorityClassName,omitempty"`
Affinity *corev1.Affinity `json:"affinity,omitempty"`
Tolerations *[]corev1.Toleration `json:"tolerations,omitempty"`
TLS *TLSConfig `json:"TLS,omitempty"`
PodDisruptionBudget *common.RedisPodDisruptionBudget `json:"pdb,omitempty"`
ACL *ACLConfig `json:"acl,omitempty"`
ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty" protobuf:"bytes,11,opt,name=readinessProbe"`
LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty" protobuf:"bytes,12,opt,name=livenessProbe"`
InitContainer *InitContainer `json:"initContainer,omitempty"`
Sidecars *[]Sidecar `json:"sidecars,omitempty"`
ServiceAccountName *string `json:"serviceAccountName,omitempty"`
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" protobuf:"varint,4,opt,name=terminationGracePeriodSeconds"`
EnvVars *[]corev1.EnvVar `json:"env,omitempty"`
Size *int32 `json:"clusterSize"`
KubernetesConfig KubernetesConfig `json:"kubernetesConfig"`
RedisExporter *RedisExporter `json:"redisExporter,omitempty"`
RedisConfig *RedisConfig `json:"redisConfig,omitempty"`
Storage *Storage `json:"storage,omitempty"`
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
PodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"`
SecurityContext *corev1.SecurityContext `json:"securityContext,omitempty"`
PriorityClassName string `json:"priorityClassName,omitempty"`
Affinity *corev1.Affinity `json:"affinity,omitempty"`
Tolerations *[]corev1.Toleration `json:"tolerations,omitempty"`
TLS *TLSConfig `json:"TLS,omitempty"`
ACL *ACLConfig `json:"acl,omitempty"`
ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty" protobuf:"bytes,11,opt,name=readinessProbe"`
LivenessProbe *corev1.Probe `json:"livenessProbe,omitempty" protobuf:"bytes,12,opt,name=livenessProbe"`
InitContainer *InitContainer `json:"initContainer,omitempty"`
Sidecars *[]Sidecar `json:"sidecars,omitempty"`
ServiceAccountName *string `json:"serviceAccountName,omitempty"`
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" protobuf:"varint,4,opt,name=terminationGracePeriodSeconds"`
EnvVars *[]corev1.EnvVar `json:"env,omitempty"`
}

func (cr *RedisReplicationSpec) GetReplicationCounts(t string) int32 {
Expand Down
5 changes: 0 additions & 5 deletions api/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -6240,19 +6240,6 @@ spec:
additionalProperties:
type: string
type: object
pdb:
description: RedisPodDisruptionBudget configure a PodDisruptionBudget
on the resource (leader/follower)
properties:
enabled:
type: boolean
maxUnavailable:
format: int32
type: integer
minAvailable:
format: int32
type: integer
type: object
podSecurityContext:
description: |-
PodSecurityContext holds pod-level security attributes and common container settings.
Expand Down
10 changes: 9 additions & 1 deletion pkg/controllers/rediscluster/rediscluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@

// Check if the cluster is downscaled
if leaderCount := k8sutils.CheckRedisNodeCount(ctx, r.K8sClient, instance, "leader"); leaderReplicas < leaderCount {
if !(r.IsStatefulSetReady(ctx, instance.Namespace, instance.Name+"-leader") && r.IsStatefulSetReady(ctx, instance.Namespace, instance.Name+"-follower")) {
return intctrlutil.Reconciled()
}

Check warning on line 78 in pkg/controllers/rediscluster/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controllers/rediscluster/rediscluster_controller.go#L76-L78

Added lines #L76 - L78 were not covered by tests

logger.Info("Redis cluster is downscaling...", "Current.LeaderReplicas", leaderCount, "Desired.LeaderReplicas", leaderReplicas)
for shardIdx := leaderCount - 1; shardIdx >= leaderReplicas; shardIdx-- {
logger.Info("Remove the shard", "Shard.Index", shardIdx)
Expand All @@ -83,7 +87,11 @@
// lastLeaderPod is slaving right now Make it the master Pod
// We have to bring a manual failover here to make it a leaderPod
// clusterFailover should also include the clusterReplicate since we have to map the followers to new leader
k8sutils.ClusterFailover(ctx, r.K8sClient, instance)
logger.Info("Cluster Failover is initiated", "Shard.Index", shardIdx)
if err = k8sutils.ClusterFailover(ctx, r.K8sClient, instance); err != nil {
logger.Error(err, "Failed to initiate cluster failover")
return intctrlutil.RequeueWithError(ctx, err, "")
}

Check warning on line 94 in pkg/controllers/rediscluster/rediscluster_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controllers/rediscluster/rediscluster_controller.go#L90-L94

Added lines #L90 - L94 were not covered by tests
}
// Step 1 Remove the Follower Node
k8sutils.RemoveRedisFollowerNodesFromCluster(ctx, r.K8sClient, instance)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
{typ: "finalizer", rec: r.reconcileFinalizer},
{typ: "statefulset", rec: r.reconcileStatefulSet},
{typ: "service", rec: r.reconcileService},
{typ: "poddisruptionbudget", rec: r.reconcilePDB},
{typ: "redis", rec: r.reconcileRedis},
{typ: "status", rec: r.reconcileStatus},
}
Expand Down Expand Up @@ -132,13 +131,6 @@ func (r *Reconciler) reconcileAnnotation(ctx context.Context, instance *redisv1b
return intctrlutil.Reconciled()
}

func (r *Reconciler) reconcilePDB(ctx context.Context, instance *redisv1beta2.RedisReplication) (ctrl.Result, error) {
if err := k8sutils.ReconcileReplicationPodDisruptionBudget(ctx, instance, instance.Spec.PodDisruptionBudget, r.K8sClient); err != nil {
return intctrlutil.RequeueAfter(ctx, time.Second*60, "")
}
return intctrlutil.Reconciled()
}

func (r *Reconciler) reconcileStatefulSet(ctx context.Context, instance *redisv1beta2.RedisReplication) (ctrl.Result, error) {
if err := k8sutils.CreateReplicationRedis(ctx, instance, r.K8sClient); err != nil {
return intctrlutil.RequeueAfter(ctx, time.Second*60, "")
Expand Down
18 changes: 13 additions & 5 deletions pkg/k8sutils/cluster-scaling.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@
return false
}

func ClusterFailover(ctx context.Context, client kubernetes.Interface, cr *redisv1beta2.RedisCluster) {
func ClusterFailover(ctx context.Context, client kubernetes.Interface, cr *redisv1beta2.RedisCluster) error {

Check warning on line 394 in pkg/k8sutils/cluster-scaling.go

View check run for this annotation

Codecov / codecov/patch

pkg/k8sutils/cluster-scaling.go#L394

Added line #L394 was not covered by tests
slavePodName := cr.Name + "-leader-" + strconv.Itoa(int(CheckRedisNodeCount(ctx, client, cr, "leader"))-1)
// cmd = redis-cli cluster failover -a <pass>
var cmd []string
Expand All @@ -400,13 +400,15 @@
Namespace: cr.Namespace,
}

cmd = []string{"redis-cli", "cluster", "failover"}
cmd = []string{"redis-cli", "-h"}

Check warning on line 403 in pkg/k8sutils/cluster-scaling.go

View check run for this annotation

Codecov / codecov/patch

pkg/k8sutils/cluster-scaling.go#L403

Added line #L403 was not covered by tests

if *cr.Spec.ClusterVersion == "v7" {
cmd = append(cmd, getRedisHostname(pod, cr, "leader")+fmt.Sprintf(":%d", *cr.Spec.Port))
cmd = append(cmd, getRedisHostname(pod, cr, "leader"))

Check warning on line 406 in pkg/k8sutils/cluster-scaling.go

View check run for this annotation

Codecov / codecov/patch

pkg/k8sutils/cluster-scaling.go#L406

Added line #L406 was not covered by tests
} else {
cmd = append(cmd, getRedisServerAddress(ctx, client, pod, *cr.Spec.Port))
cmd = append(cmd, getRedisServerIP(ctx, client, pod))

Check warning on line 408 in pkg/k8sutils/cluster-scaling.go

View check run for this annotation

Codecov / codecov/patch

pkg/k8sutils/cluster-scaling.go#L408

Added line #L408 was not covered by tests
}
cmd = append(cmd, "-p")
cmd = append(cmd, strconv.Itoa(*cr.Spec.Port))

Check warning on line 411 in pkg/k8sutils/cluster-scaling.go

View check run for this annotation

Codecov / codecov/patch

pkg/k8sutils/cluster-scaling.go#L410-L411

Added lines #L410 - L411 were not covered by tests

if cr.Spec.KubernetesConfig.ExistingPasswordSecret != nil {
pass, err := getRedisPassword(ctx, client, cr.Namespace, *cr.Spec.KubernetesConfig.ExistingPasswordSecret.Name, *cr.Spec.KubernetesConfig.ExistingPasswordSecret.Key)
Expand All @@ -418,7 +420,13 @@
}

cmd = append(cmd, getRedisTLSArgs(cr.Spec.TLS, slavePodName)...)
cmd = append(cmd, "cluster", "failover")

Check warning on line 423 in pkg/k8sutils/cluster-scaling.go

View check run for this annotation

Codecov / codecov/patch

pkg/k8sutils/cluster-scaling.go#L423

Added line #L423 was not covered by tests

log.FromContext(ctx).V(1).Info("Redis cluster failover command is", "Command", cmd)
executeCommand(ctx, client, cr, cmd, slavePodName)
execOut, err := executeCommand1(ctx, client, cr, cmd, slavePodName)
if err != nil {
log.FromContext(ctx).Error(err, "Could not execute command", "Command", cmd, "Output", execOut)
return err
}
return nil

Check warning on line 431 in pkg/k8sutils/cluster-scaling.go

View check run for this annotation

Codecov / codecov/patch

pkg/k8sutils/cluster-scaling.go#L426-L431

Added lines #L426 - L431 were not covered by tests
}
59 changes: 5 additions & 54 deletions pkg/k8sutils/poddisruption.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
return CreateOrUpdatePodDisruptionBudget(ctx, pdbDef, cl)
} else {
// Check if one exists, and delete it.
_, err := getPodDisruptionBudget(ctx, cr.Namespace, pdbName, cl)
_, err := GetPodDisruptionBudget(ctx, cr.Namespace, pdbName, cl)

Check warning on line 29 in pkg/k8sutils/poddisruption.go

View check run for this annotation

Codecov / codecov/patch

pkg/k8sutils/poddisruption.go#L29

Added line #L29 was not covered by tests
if err == nil {
return deletePodDisruptionBudget(ctx, cr.Namespace, pdbName, cl)
} else if err != nil && errors.IsNotFound(err) {
Expand All @@ -48,29 +48,7 @@
return CreateOrUpdatePodDisruptionBudget(ctx, pdbDef, cl)
} else {
// Check if one exists, and delete it.
_, err := getPodDisruptionBudget(ctx, cr.Namespace, pdbName, cl)
if err == nil {
return deletePodDisruptionBudget(ctx, cr.Namespace, pdbName, cl)
} else if err != nil && errors.IsNotFound(err) {
log.FromContext(ctx).V(1).Info("Reconciliation Successful, no PodDisruptionBudget Found.")
// Its ok if its not found, as we're deleting anyway
return nil
}
return err
}
}

func ReconcileReplicationPodDisruptionBudget(ctx context.Context, cr *redisv1beta2.RedisReplication, pdbParams *commonapi.RedisPodDisruptionBudget, cl kubernetes.Interface) error {
pdbName := cr.ObjectMeta.Name + "-replication"
if pdbParams != nil && pdbParams.Enabled {
labels := getRedisLabels(cr.ObjectMeta.Name, replication, "replication", cr.GetObjectMeta().GetLabels())
annotations := generateStatefulSetsAnots(cr.ObjectMeta, cr.Spec.KubernetesConfig.IgnoreAnnotations)
pdbMeta := generateObjectMetaInformation(pdbName, cr.Namespace, labels, annotations)
pdbDef := generateReplicationPodDisruptionBudgetDef(ctx, cr, "replication", pdbMeta, pdbParams)
return CreateOrUpdatePodDisruptionBudget(ctx, pdbDef, cl)
} else {
// Check if one exists, and delete it.
_, err := getPodDisruptionBudget(ctx, cr.Namespace, pdbName, cl)
_, err := GetPodDisruptionBudget(ctx, cr.Namespace, pdbName, cl)

Check warning on line 51 in pkg/k8sutils/poddisruption.go

View check run for this annotation

Codecov / codecov/patch

pkg/k8sutils/poddisruption.go#L51

Added line #L51 was not covered by tests
if err == nil {
return deletePodDisruptionBudget(ctx, cr.Namespace, pdbName, cl)
} else if err != nil && errors.IsNotFound(err) {
Expand Down Expand Up @@ -109,33 +87,6 @@
return pdbTemplate
}

// generatePodDisruptionBudgetDef will create a PodDisruptionBudget definition
func generateReplicationPodDisruptionBudgetDef(ctx context.Context, cr *redisv1beta2.RedisReplication, role string, pdbMeta metav1.ObjectMeta, pdbParams *commonapi.RedisPodDisruptionBudget) *policyv1.PodDisruptionBudget {
lblSelector := LabelSelectors(map[string]string{
"app": fmt.Sprintf("%s-%s", cr.ObjectMeta.Name, role),
"role": role,
})
pdbTemplate := &policyv1.PodDisruptionBudget{
TypeMeta: generateMetaInformation("PodDisruptionBudget", "policy/v1"),
ObjectMeta: pdbMeta,
Spec: policyv1.PodDisruptionBudgetSpec{
Selector: lblSelector,
},
}
if pdbParams.MinAvailable != nil {
pdbTemplate.Spec.MinAvailable = &intstr.IntOrString{Type: intstr.Int, IntVal: *pdbParams.MinAvailable}
}
if pdbParams.MaxUnavailable != nil {
pdbTemplate.Spec.MaxUnavailable = &intstr.IntOrString{Type: intstr.Int, IntVal: *pdbParams.MaxUnavailable}
}
// If we don't have a value for either, assume quorum: (N/2)+1
if pdbTemplate.Spec.MaxUnavailable == nil && pdbTemplate.Spec.MinAvailable == nil {
pdbTemplate.Spec.MinAvailable = &intstr.IntOrString{Type: intstr.Int, IntVal: (*cr.Spec.Size / 2) + 1}
}
AddOwnerRefToObject(pdbTemplate, redisReplicationAsOwner(cr))
return pdbTemplate
}

// generatePodDisruptionBudgetDef will create a PodDisruptionBudget definition
func generateSentinelPodDisruptionBudgetDef(ctx context.Context, cr *redisv1beta2.RedisSentinel, role string, pdbMeta metav1.ObjectMeta, pdbParams *commonapi.RedisPodDisruptionBudget) *policyv1.PodDisruptionBudget {
lblSelector := LabelSelectors(map[string]string{
Expand Down Expand Up @@ -165,7 +116,7 @@

// CreateOrUpdateService method will create or update Redis service
func CreateOrUpdatePodDisruptionBudget(ctx context.Context, pdbDef *policyv1.PodDisruptionBudget, cl kubernetes.Interface) error {
storedPDB, err := getPodDisruptionBudget(ctx, pdbDef.Namespace, pdbDef.Name, cl)
storedPDB, err := GetPodDisruptionBudget(ctx, pdbDef.Namespace, pdbDef.Name, cl)

Check warning on line 119 in pkg/k8sutils/poddisruption.go

View check run for this annotation

Codecov / codecov/patch

pkg/k8sutils/poddisruption.go#L119

Added line #L119 was not covered by tests
if err != nil {
if err := patch.DefaultAnnotator.SetLastAppliedAnnotation(pdbDef); err != nil { //nolint
log.FromContext(ctx).Error(err, "Unable to patch redis PodDisruptionBudget with comparison object")
Expand Down Expand Up @@ -253,8 +204,8 @@
return nil
}

// getPodDisruptionBudget is a method to get PodDisruptionBudgets in Kubernetes
func getPodDisruptionBudget(ctx context.Context, namespace string, pdb string, cl kubernetes.Interface) (*policyv1.PodDisruptionBudget, error) {
// GetPodDisruptionBudget is a method to get PodDisruptionBudgets in Kubernetes
func GetPodDisruptionBudget(ctx context.Context, namespace string, pdb string, cl kubernetes.Interface) (*policyv1.PodDisruptionBudget, error) {

Check warning on line 208 in pkg/k8sutils/poddisruption.go

View check run for this annotation

Codecov / codecov/patch

pkg/k8sutils/poddisruption.go#L208

Added line #L208 was not covered by tests
getOpts := metav1.GetOptions{
TypeMeta: generateMetaInformation("PodDisruptionBudget", "policy/v1"),
}
Expand Down
Loading