diff --git a/pkg/controller/ignition/controller.go b/pkg/controller/ignition/controller.go index c70539ca1de..932fbe4781d 100644 --- a/pkg/controller/ignition/controller.go +++ b/pkg/controller/ignition/controller.go @@ -2,6 +2,7 @@ package ignition import ( "context" + "errors" "fmt" "sync/atomic" "time" @@ -12,6 +13,7 @@ import ( "github.com/scylladb/scylla-operator/pkg/helpers" "github.com/scylladb/scylla-operator/pkg/internalapi" "github.com/scylladb/scylla-operator/pkg/naming" + "github.com/scylladb/scylla-operator/pkg/pointer" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" corev1informers "k8s.io/client-go/informers/core/v1" @@ -89,18 +91,10 @@ func (c *Controller) IsIgnited() bool { return c.ignited.Load() } -func (c *Controller) Sync(ctx context.Context) error { - startTime := time.Now() - klog.V(4).InfoS("Started syncing observer", "Name", c.Observer.Name(), "startTime", startTime) - defer func() { - klog.V(4).InfoS("Finished syncing observer", "Name", c.Observer.Name(), "duration", time.Since(startTime)) - }() - - ignited := true - +func (c *Controller) evaluateIgnitionState() (bool, error) { svc, err := c.serviceLister.Services(c.namespace).Get(c.serviceName) if err != nil { - return fmt.Errorf("can't get service %q: %w", c.serviceName, err) + return false, fmt.Errorf("can't get service %q: %w", c.serviceName, err) } // TODO: This isn't bound to the lifecycle of the ScyllaDB Pod and should be evaluated in the controller for the resource. @@ -112,41 +106,38 @@ func (c *Controller) Sync(ctx context.Context) error { "Waiting for identity service to have at least one ingress point", "Service", naming.ManualRef(c.namespace, c.serviceName), ) - ignited = false - } else { - klog.V(2).InfoS( - "Service is available and has an IP address", - "Service", naming.ManualRef(svc.Namespace, svc.Name), - "UID", svc.UID, - ) + return false, nil } + klog.V(2).InfoS( + "Service is available and has an IP address", + "Service", naming.ManualRef(svc.Namespace, svc.Name), + "UID", svc.UID, + ) } pod, err := c.podLister.Pods(c.namespace).Get(c.serviceName) if err != nil { - return fmt.Errorf("can't get pod %q: %w", c.serviceName, err) + return false, fmt.Errorf("can't get pod %q: %w", c.serviceName, err) } if len(pod.Status.PodIP) == 0 { klog.V(2).InfoS("PodIP is not yet set", "Pod", klog.KObj(pod), "UID", pod.UID) - ignited = false - } else { - klog.V(2).InfoS("PodIP is present on the Pod", "Pod", klog.KObj(pod), "UID", pod.UID, "IP", pod.Status.PodIP) + return false, nil } + klog.V(2).InfoS("PodIP is present on the Pod", "Pod", klog.KObj(pod), "UID", pod.UID, "IP", pod.Status.PodIP) containerID, err := controllerhelpers.GetScyllaContainerID(pod) if err != nil { - return controllertools.NonRetriable( + return false, controllertools.NonRetriable( fmt.Errorf("can't get scylla container id in pod %q: %v", naming.ObjRef(pod), err), ) } if len(containerID) == 0 { klog.V(2).InfoS("ScyllaDB ContainerID is not yet set", "Pod", klog.KObj(pod), "UID", pod.UID) - ignited = false - } else { - klog.V(2).InfoS("Pod has ScyllaDB ContainerID set", "Pod", klog.KObj(pod), "UID", pod.UID, "ContainerID", containerID) + return false, nil } + klog.V(2).InfoS("Pod has ScyllaDB ContainerID set", "Pod", klog.KObj(pod), "UID", pod.UID, "ContainerID", containerID) cmLabelSelector := labels.Set{ naming.OwnerUIDLabel: string(pod.UID), @@ -154,20 +145,20 @@ func (c *Controller) Sync(ctx context.Context) error { }.AsSelector() configMaps, err := c.configMapLister.ConfigMaps(c.namespace).List(cmLabelSelector) if err != nil { - return fmt.Errorf("can't list tuning configmap: %w", err) + return false, fmt.Errorf("can't list tuning configmap: %w", err) } switch l := len(configMaps); l { case 0: klog.V(2).InfoS("Tuning ConfigMap for pod is not yet available", "Pod", klog.KObj(pod), "UID", pod.UID) - ignited = false + return false, nil case 1: cm := configMaps[0] src := &internalapi.SidecarRuntimeConfig{} src, err = controllerhelpers.GetSidecarRuntimeConfigFromConfigMap(cm) if err != nil { - return controllertools.NonRetriable( + return false, controllertools.NonRetriable( fmt.Errorf("can't get sidecar runtime config from configmap %q: %w", naming.ObjRef(cm), err), ) } @@ -179,7 +170,7 @@ func (c *Controller) Sync(ctx context.Context) error { "ContainerID", containerID, "NodeConfig", src.BlockingNodeConfigs, ) - ignited = false + return false, nil } } else { klog.V(2).InfoS("Scylla runtime config is not yet updated with our ContainerID", @@ -187,11 +178,51 @@ func (c *Controller) Sync(ctx context.Context) error { "ConfigContainerID", src.ContainerID, "SidecarContainerID", containerID, ) - ignited = false + return false, nil } default: - return fmt.Errorf("mutiple tuning configmaps are present for pod %q with UID %q", naming.ObjRef(pod), pod.UID) + return false, fmt.Errorf("mutiple tuning configmaps are present for pod %q with UID %q", naming.ObjRef(pod), pod.UID) + } + + return true, nil +} +func (c *Controller) Sync(ctx context.Context) error { + startTime := time.Now() + klog.V(4).InfoS("Started syncing observer", "Name", c.Observer.Name(), "startTime", startTime) + defer func() { + klog.V(4).InfoS("Finished syncing observer", "Name", c.Observer.Name(), "duration", time.Since(startTime)) + }() + + svc, err := c.serviceLister.Services(c.namespace).Get(c.serviceName) + if err != nil { + return fmt.Errorf("can't get service %q: %w", c.serviceName, err) + } + + var ignitionOverride *bool + if svc.Annotations != nil { + forceIgnitionString, hasForceIgnitionString := svc.Annotations[naming.ForceIgnitionValueAnnotation] + if hasForceIgnitionString { + switch forceIgnitionString { + case "true": + ignitionOverride = pointer.Ptr(true) + case "false": + ignitionOverride = pointer.Ptr(false) + default: + klog.ErrorS(errors.New("invalid ignition override value"), "Value", forceIgnitionString, "Key", naming.ForceIgnitionValueAnnotation) + } + } + } + + var ignited bool + if ignitionOverride != nil { + ignited = *ignitionOverride + klog.InfoS("Forcing ignition state", "Ignited", ignited, "Annotation", naming.ForceIgnitionValueAnnotation) + } else { + ignited, err = c.evaluateIgnitionState() + if err != nil { + return fmt.Errorf("can't evaluate ignition state: %w", err) + } } if ignited { diff --git a/pkg/controller/scylladbdatacenter/resource.go b/pkg/controller/scylladbdatacenter/resource.go index aa8aa13afa5..7c5baba66c0 100644 --- a/pkg/controller/scylladbdatacenter/resource.go +++ b/pkg/controller/scylladbdatacenter/resource.go @@ -633,9 +633,12 @@ func StatefulSetForRack(rack scyllav1alpha1.RackSpec, sdc *scyllav1alpha1.Scylla "inherit_errexit", "-c", strings.TrimSpace(` +trap 'kill $( jobs -p ); exit 0' TERM + printf 'INFO %s ignition - Waiting for /mnt/shared/ignition.done\n' "$( date '+%Y-%m-%d %H:%M:%S,%3N' )" > /dev/stderr until [[ -f "/mnt/shared/ignition.done" ]]; do - sleep 1; + sleep 1 & + wait done printf 'INFO %s ignition - Ignited. Starting ScyllaDB...\n' "$( date '+%Y-%m-%d %H:%M:%S,%3N' )" > /dev/stderr @@ -833,7 +836,9 @@ exec /mnt/shared/scylla-operator sidecar \ "inherit_errexit", "-c", strings.TrimSpace(` +trap 'kill $( jobs -p ); exit 0' TERM trap 'rm /mnt/shared/ignition.done' EXIT + nodetool drain & sleep ` + strconv.Itoa(minTerminationGracePeriodSeconds) + ` & wait @@ -1163,7 +1168,7 @@ func getScyllaDBManagerAgentContainer(r scyllav1alpha1.RackSpec, sdc *scyllav1al } cnt := &corev1.Container{ - Name: "scylla-manager-agent", + Name: naming.ScyllaManagerAgentContainerName, Image: *sdc.Spec.ScyllaDBManagerAgent.Image, ImagePullPolicy: corev1.PullIfNotPresent, // There is no point in starting scylla-manager before ScyllaDB is tuned and ignited. The manager agent fails after 60 attempts and hits backoff unnecessarily. @@ -1175,13 +1180,16 @@ func getScyllaDBManagerAgentContainer(r scyllav1alpha1.RackSpec, sdc *scyllav1al "inherit_errexit", "-c", strings.TrimSpace(` +trap 'kill $( jobs -p ); exit 0' TERM + printf '{"L":"INFO","T":"%s","M":"Waiting for /mnt/shared/ignition.done"}\n' "$( date -u '+%Y-%m-%dT%H:%M:%S,%3NZ' )" > /dev/stderr until [[ -f "/mnt/shared/ignition.done" ]]; do - sleep 1; + sleep 1 & + wait done printf '{"L":"INFO","T":"%s","M":"Ignited. Starting ScyllaDB Manager Agent"}\n' "$( date -u '+%Y-%m-%dT%H:%M:%S,%3NZ' )" > /dev/stderr -scylla-manager-agent \ +exec scylla-manager-agent \ -c ` + fmt.Sprintf("%q ", naming.ScyllaAgentConfigDefaultFile) + `\ -c ` + fmt.Sprintf("%q ", path.Join(naming.ScyllaAgentConfigDirName, naming.ScyllaAgentConfigFileName)) + `\ -c ` + fmt.Sprintf("%q ", path.Join(naming.ScyllaAgentConfigDirName, naming.ScyllaAgentAuthTokenFileName)) + ` diff --git a/pkg/controller/scylladbdatacenter/resource_test.go b/pkg/controller/scylladbdatacenter/resource_test.go index afa71c0197b..a87525c1933 100644 --- a/pkg/controller/scylladbdatacenter/resource_test.go +++ b/pkg/controller/scylladbdatacenter/resource_test.go @@ -722,9 +722,12 @@ func TestStatefulSetForRack(t *testing.T) { "inherit_errexit", "-c", strings.TrimSpace(` +trap 'kill $( jobs -p ); exit 0' TERM + printf 'INFO %s ignition - Waiting for /mnt/shared/ignition.done\n' "$( date '+%Y-%m-%d %H:%M:%S,%3N' )" > /dev/stderr until [[ -f "/mnt/shared/ignition.done" ]]; do - sleep 1; + sleep 1 & + wait done printf 'INFO %s ignition - Ignited. Starting ScyllaDB...\n' "$( date '+%Y-%m-%d %H:%M:%S,%3N' )" > /dev/stderr @@ -876,10 +879,13 @@ exec /mnt/shared/scylla-operator sidecar \ "-O", "inherit_errexit", "-c", - `trap 'rm /mnt/shared/ignition.done' EXIT + strings.TrimSpace(` +trap 'kill $( jobs -p ); exit 0' TERM +trap 'rm /mnt/shared/ignition.done' EXIT + nodetool drain & sleep 15 & -wait`, +wait`), }, }, }, @@ -991,9 +997,12 @@ wait`, "inherit_errexit", "-c", strings.TrimSpace(` +trap 'kill $( jobs -p ); exit 0' TERM + printf '{"L":"INFO","T":"%s","M":"Waiting for /mnt/shared/ignition.done"}\n' "$( date -u '+%Y-%m-%dT%H:%M:%S,%3NZ' )" > /dev/stderr until [[ -f "/mnt/shared/ignition.done" ]]; do - sleep 1; + sleep 1 & + wait done printf '{"L":"INFO","T":"%s","M":"Ignited. Starting ScyllaDB Manager Agent"}\n' "$( date -u '+%Y-%m-%dT%H:%M:%S,%3NZ' )" > /dev/stderr diff --git a/pkg/naming/constants.go b/pkg/naming/constants.go index d9623133ec5..4914e33690d 100644 --- a/pkg/naming/constants.go +++ b/pkg/naming/constants.go @@ -26,6 +26,9 @@ const ( // Readiness check will always fail when this label is added to member service. NodeMaintenanceLabel = "scylla/node-maintenance" + // ForceIgnitionValueAnnotation allows to force ignition state. The value can be either "true" or "false". + ForceIgnitionValueAnnotation = "internal.scylla-operator.scylladb.com/force-ignition-value" + LabelValueTrue = "true" LabelValueFalse = "false" ) @@ -110,12 +113,13 @@ const ( // Configuration Values const ( - ScyllaContainerName = "scylla" - ScyllaDBIgnitionContainerName = "scylladb-ignition" - SidecarInjectorContainerName = "sidecar-injection" - PerftuneContainerName = "perftune" - CleanupContainerName = "cleanup" - RLimitsContainerName = "rlimits" + ScyllaContainerName = "scylla" + ScyllaDBIgnitionContainerName = "scylladb-ignition" + ScyllaManagerAgentContainerName = "scylla-manager-agent" + SidecarInjectorContainerName = "sidecar-injection" + PerftuneContainerName = "perftune" + CleanupContainerName = "cleanup" + RLimitsContainerName = "rlimits" PVCTemplateName = "data" diff --git a/pkg/naming/names.go b/pkg/naming/names.go index 5e3fcb44410..568d36ada29 100644 --- a/pkg/naming/names.go +++ b/pkg/naming/names.go @@ -61,6 +61,10 @@ func MemberServiceNameForScyllaCluster(r scyllav1.RackSpec, sc *scyllav1.ScyllaC return fmt.Sprintf("%s-%d", StatefulSetNameForRackForScyllaCluster(r, sc), idx) } +func PodNameForScyllaCluster(r scyllav1.RackSpec, sc *scyllav1.ScyllaCluster, idx int) string { + return MemberServiceNameForScyllaCluster(r, sc, idx) +} + func IdentityServiceName(sdc *scyllav1alpha1.ScyllaDBDatacenter) string { return fmt.Sprintf("%s-client", sdc.Name) } diff --git a/test/e2e/set/scyllacluster/scyllacluster_restarts.go b/test/e2e/set/scyllacluster/scyllacluster_restarts.go new file mode 100644 index 00000000000..4bf474c3d28 --- /dev/null +++ b/test/e2e/set/scyllacluster/scyllacluster_restarts.go @@ -0,0 +1,207 @@ +// Copyright (C) 2021 ScyllaDB + +package scyllacluster + +import ( + "context" + "errors" + "fmt" + "strings" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + v1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1" + "github.com/scylladb/scylla-operator/pkg/controllerhelpers" + "github.com/scylladb/scylla-operator/pkg/naming" + "github.com/scylladb/scylla-operator/pkg/pointer" + "github.com/scylladb/scylla-operator/test/e2e/framework" + "github.com/scylladb/scylla-operator/test/e2e/utils" + scyllaclusterverification "github.com/scylladb/scylla-operator/test/e2e/utils/verification/scyllacluster" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + terminationTimeout = 1 * time.Minute +) + +var _ = g.Describe("ScyllaCluster graceful termination", func() { + f := framework.NewFramework("scyllacluster") + + g.It("should work while waiting for ignition", g.MustPassRepeatedly(3), func() { + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + defer cancel() + + sc := f.GetDefaultScyllaCluster() + o.Expect(sc.Spec.Datacenter.Racks).To(o.HaveLen(1)) + sc.Spec.Datacenter.Racks[0].Members = 1 + if sc.Spec.ExposeOptions == nil { + sc.Spec.ExposeOptions = &v1.ExposeOptions{} + } + if sc.Spec.ExposeOptions.NodeService == nil { + sc.Spec.ExposeOptions.NodeService = &v1.NodeServiceTemplate{} + } + if sc.Spec.ExposeOptions.NodeService.Annotations == nil { + sc.Spec.ExposeOptions.NodeService.Annotations = map[string]string{} + } + sc.Spec.ExposeOptions.NodeService.Annotations[naming.ForceIgnitionValueAnnotation] = "false" + + framework.By("Creating a ScyllaCluster with blocked ignition") + sc, err := f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Create(ctx, sc, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Waiting for the ScyllaCluster Pod to be running") + podName := naming.PodNameForScyllaCluster(sc.Spec.Datacenter.Racks[0], sc, 0) + waitRunningCtx, waitRunningCtxCancel := context.WithTimeoutCause(ctx, 10*time.Minute, fmt.Errorf("timed out waiting for ScyllaDB Pod to be running")) + defer waitRunningCtxCancel() + pod, err := controllerhelpers.WaitForPodState(waitRunningCtx, f.KubeClient().CoreV1().Pods(f.Namespace()), podName, controllerhelpers.WaitForStateOptions{}, utils.PodIsRunning) + o.Expect(err).NotTo(o.HaveOccurred()) + + execCtx, execCtxCancel := context.WithTimeoutCause(ctx, 2*time.Minute, errors.New("ignition probe didn't return expected status in time")) + defer execCtxCancel() + framework.By("Executing into %q container and verifying it's not ignited using the probe", naming.ScyllaDBIgnitionContainerName) + stdout, stderr, err := utils.ExecWithOptions(execCtx, f.ClientConfig(), f.KubeClient().CoreV1(), utils.ExecOptions{ + Command: []string{ + "/usr/bin/timeout", + "1m", + "/usr/bin/bash", + "-euxEo", + "pipefail", + "-O", + "inherit_errexit", + "-c", + strings.TrimSpace(` +while [[ "$( curl -s -o /dev/null -w '%{http_code}' -G http://localhost:42081/readyz )" != "503" ]]; do + sleep 1 & + wait; +done + `), + }, + Namespace: pod.Namespace, + PodName: pod.Name, + ContainerName: naming.ScyllaDBIgnitionContainerName, + CaptureStdout: true, + CaptureStderr: true, + }) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("* stdout:\n%q\n* stderr:\n%s\n* Cause: %s", stdout, stderr, context.Cause(execCtx))) + + sleepTime := 30 * time.Second + framework.By("Sleeping for %v to give kubelet time to update the probes", sleepTime) + // The sleep also waits for nodeconfigpod controller to unblock tuning, in case the override would be broken. + time.Sleep(sleepTime) + + framework.By("Validating container readiness") + pod, err = f.KubeClient().CoreV1().Pods(f.Namespace()).Get(ctx, podName, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + crm := utils.GetContainerReadinessMap(pod) + o.Expect(crm).To( + o.And( + o.HaveKeyWithValue(naming.ScyllaContainerName, false), + o.HaveKeyWithValue(naming.ScyllaDBIgnitionContainerName, false), + o.HaveKeyWithValue(naming.ScyllaManagerAgentContainerName, false), + ), + fmt.Sprintf("container(s) in Pod %q don't match the expected state", pod.Name), + ) + + framework.By("Deleting the ScyllaDB Pod") + err = f.KubeClient().CoreV1().Pods(f.Namespace()).Delete( + ctx, + pod.Name, + metav1.DeleteOptions{ + Preconditions: &metav1.Preconditions{ + UID: pointer.Ptr(pod.UID), + }, + GracePeriodSeconds: pointer.Ptr[int64](7 * 24 * 60 * 60), + }, + ) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Waiting for the ScyllaDB Pod to be deleted") + deletionCtx, deletionCtxCancel := context.WithTimeoutCause( + ctx, + terminationTimeout, + fmt.Errorf("pod %q has not finished termination in time", naming.ObjRef(pod)), + ) + defer deletionCtxCancel() + err = framework.WaitForObjectDeletion( + deletionCtx, + f.DynamicClient(), + corev1.SchemeGroupVersion.WithResource("pods"), + pod.Namespace, + pod.Name, + pointer.Ptr(pod.UID), + ) + o.Expect(err).NotTo(o.HaveOccurred()) + }) + + g.It("should work when a cluster is fully rolled out", func() { + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + defer cancel() + + sc := f.GetDefaultScyllaCluster() + o.Expect(sc.Spec.Datacenter.Racks).To(o.HaveLen(1)) + sc.Spec.Datacenter.Racks[0].Members = 1 + + framework.By("Creating a ScyllaCluster") + sc, err := f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Create(ctx, sc, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion) + waitCtx1, waitCtx1Cancel := utils.ContextForRollout(ctx, sc) + defer waitCtx1Cancel() + sc, err = controllerhelpers.WaitForScyllaClusterState(waitCtx1, f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace), sc.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut) + o.Expect(err).NotTo(o.HaveOccurred()) + + scyllaclusterverification.Verify(ctx, f.KubeClient(), f.ScyllaClient(), sc) + + framework.By("Fetching the Pod and validating container readiness") + podName := naming.PodNameForScyllaCluster(sc.Spec.Datacenter.Racks[0], sc, 0) + pod, err := f.KubeClient().CoreV1().Pods(f.Namespace()).Get(ctx, podName, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + pod, err = f.KubeClient().CoreV1().Pods(f.Namespace()).Get(ctx, podName, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + crm := utils.GetContainerReadinessMap(pod) + o.Expect(crm).To( + o.And( + o.HaveKeyWithValue(naming.ScyllaContainerName, true), + o.HaveKeyWithValue(naming.ScyllaDBIgnitionContainerName, true), + o.HaveKeyWithValue(naming.ScyllaManagerAgentContainerName, true), + ), + fmt.Sprintf("container(s) in Pod %q don't match the expected state", pod.Name), + ) + + framework.By("Deleting the ScyllaDB Pod") + err = f.KubeClient().CoreV1().Pods(f.Namespace()).Delete( + ctx, + pod.Name, + metav1.DeleteOptions{ + Preconditions: &metav1.Preconditions{ + UID: pointer.Ptr(pod.UID), + }, + GracePeriodSeconds: pointer.Ptr[int64](7 * 24 * 60 * 60), + }, + ) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Waiting for the ScyllaDB Pod to be deleted") + deletionCtx, deletionCtxCancel := context.WithTimeoutCause( + ctx, + terminationTimeout, + fmt.Errorf("pod %q has not finished termination in time", naming.ObjRef(pod)), + ) + defer deletionCtxCancel() + err = framework.WaitForObjectDeletion( + deletionCtx, + f.DynamicClient(), + corev1.SchemeGroupVersion.WithResource("pods"), + pod.Namespace, + pod.Name, + pointer.Ptr(pod.UID), + ) + o.Expect(err).NotTo(o.HaveOccurred()) + }) +})