diff --git a/pkg/controller/ignition/controller.go b/pkg/controller/ignition/controller.go index a177ffdc65f..9d6adf9e54f 100644 --- a/pkg/controller/ignition/controller.go +++ b/pkg/controller/ignition/controller.go @@ -2,6 +2,7 @@ package ignition import ( "context" + "errors" "fmt" "sync/atomic" "time" @@ -12,6 +13,7 @@ import ( "github.com/scylladb/scylla-operator/pkg/helpers" "github.com/scylladb/scylla-operator/pkg/internalapi" "github.com/scylladb/scylla-operator/pkg/naming" + "github.com/scylladb/scylla-operator/pkg/pointer" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" corev1informers "k8s.io/client-go/informers/core/v1" @@ -89,18 +91,10 @@ func (c *Controller) IsIgnited() bool { return c.ignited.Load() } -func (c *Controller) Sync(ctx context.Context) error { - startTime := time.Now() - klog.V(4).InfoS("Started syncing observer", "Name", c.Observer.Name(), "startTime", startTime) - defer func() { - klog.V(4).InfoS("Finished syncing observer", "Name", c.Observer.Name(), "duration", time.Since(startTime)) - }() - - ignited := true - +func (c *Controller) evaluateIgnitionState() (bool, error) { svc, err := c.serviceLister.Services(c.namespace).Get(c.serviceName) if err != nil { - return fmt.Errorf("can't get service %q: %w", c.serviceName, err) + return false, fmt.Errorf("can't get service %q: %w", c.serviceName, err) } // TODO: This isn't bound to the lifecycle of the ScyllaDB Pod and should be evaluated in the controller for the resource. @@ -112,41 +106,38 @@ func (c *Controller) Sync(ctx context.Context) error { "Waiting for identity service to have at least one ingress point", "Service", naming.ManualRef(c.namespace, c.serviceName), ) - ignited = false - } else { - klog.V(2).InfoS( - "Service is available and has an IP address", - "Service", naming.ManualRef(svc.Namespace, svc.Name), - "UID", svc.UID, - ) + return false, nil } + klog.V(2).InfoS( + "Service is available and has an IP address", + "Service", naming.ManualRef(svc.Namespace, svc.Name), + "UID", svc.UID, + ) } pod, err := c.podLister.Pods(c.namespace).Get(c.serviceName) if err != nil { - return fmt.Errorf("can't get pod %q: %w", c.serviceName, err) + return false, fmt.Errorf("can't get pod %q: %w", c.serviceName, err) } if len(pod.Status.PodIP) == 0 { klog.V(2).InfoS("PodIP is not yet set", "Pod", klog.KObj(pod), "UID", pod.UID) - ignited = false - } else { - klog.V(2).InfoS("PodIP is present on the Pod", "Pod", klog.KObj(pod), "UID", pod.UID, "IP", pod.Status.PodIP) + return false, nil } + klog.V(2).InfoS("PodIP is present on the Pod", "Pod", klog.KObj(pod), "UID", pod.UID, "IP", pod.Status.PodIP) containerID, err := controllerhelpers.GetScyllaContainerID(pod) if err != nil { - return controllertools.NonRetriable( + return false, controllertools.NonRetriable( fmt.Errorf("can't get scylla container id in pod %q: %v", naming.ObjRef(pod), err), ) } if len(containerID) == 0 { klog.V(2).InfoS("ScyllaDB ContainerID is not yet set", "Pod", klog.KObj(pod), "UID", pod.UID) - ignited = false - } else { - klog.V(2).InfoS("Pod has ScyllaDB ContainerID set", "Pod", klog.KObj(pod), "UID", pod.UID, "ContainerID", containerID) + return false, nil } + klog.V(2).InfoS("Pod has ScyllaDB ContainerID set", "Pod", klog.KObj(pod), "UID", pod.UID, "ContainerID", containerID) cmLabelSelector := labels.Set{ naming.OwnerUIDLabel: string(pod.UID), @@ -154,20 +145,20 @@ func (c *Controller) Sync(ctx context.Context) error { }.AsSelector() configMaps, err := c.configMapLister.ConfigMaps(c.namespace).List(cmLabelSelector) if err != nil { - return fmt.Errorf("can't list tuning configmap: %w", err) + return false, fmt.Errorf("can't list tuning configmap: %w", err) } switch l := len(configMaps); l { case 0: klog.V(2).InfoS("Tuning ConfigMap for pod is not yet available", "Pod", klog.KObj(pod), "UID", pod.UID) - ignited = false + return false, nil case 1: cm := configMaps[0] src := &internalapi.SidecarRuntimeConfig{} src, err = controllerhelpers.GetSidecarRuntimeConfigFromConfigMap(cm) if err != nil { - return controllertools.NonRetriable( + return false, controllertools.NonRetriable( fmt.Errorf("can't get sidecar runtime config from configmap %q: %w", naming.ObjRef(cm), err), ) } @@ -179,7 +170,7 @@ func (c *Controller) Sync(ctx context.Context) error { "ContainerID", containerID, "NodeConfig", src.BlockingNodeConfigs, ) - ignited = false + return false, nil } } else { klog.V(2).InfoS("Scylla runtime config is not yet updated with our ContainerID", @@ -187,20 +178,65 @@ func (c *Controller) Sync(ctx context.Context) error { "ConfigContainerID", src.ContainerID, "SidecarContainerID", containerID, ) - ignited = false + return false, nil } default: - return fmt.Errorf("mutiple tuning configmaps are present for pod %q with UID %q", naming.ObjRef(pod), pod.UID) + return false, fmt.Errorf("mutiple tuning configmaps are present for pod %q with UID %q", naming.ObjRef(pod), pod.UID) + } + + return true, nil +} +func (c *Controller) Sync(ctx context.Context) error { + startTime := time.Now() + klog.V(4).InfoS("Started syncing observer", "Name", c.Observer.Name(), "startTime", startTime) + defer func() { + klog.V(4).InfoS("Finished syncing observer", "Name", c.Observer.Name(), "duration", time.Since(startTime)) + }() + + svc, err := c.serviceLister.Services(c.namespace).Get(c.serviceName) + if err != nil { + return fmt.Errorf("can't get service %q: %w", c.serviceName, err) + } + + var ignitionOverride *bool + if svc.Annotations != nil { + forceIgnitionString, hasForceIgnitionString := svc.Annotations[naming.ForceIgnitionValueAnnotation] + if hasForceIgnitionString { + switch forceIgnitionString { + case "true": + ignitionOverride = pointer.Ptr(true) + case "false": + ignitionOverride = pointer.Ptr(false) + default: + klog.ErrorS(errors.New("invalid ignition override value"), "Value", forceIgnitionString, "Key", naming.ForceIgnitionValueAnnotation) + ignitionOverride = nil + } + } + } + + var ignited bool + if ignitionOverride != nil { + ignited = *ignitionOverride + klog.InfoS("Forcing ignition state", "Ignited", ignited, "Annotation", naming.ForceIgnitionValueAnnotation) + } else { + ignited, err = c.evaluateIgnitionState() + if err != nil { + return fmt.Errorf("can't evaluate ignition state: %w", err) + } } if ignited { klog.V(2).InfoS("Ignition successful", "SignalFile", naming.ScyllaDBIgnitionDonePath) + err = helpers.TouchFile(naming.ScyllaDBIgnitionDonePath) + if err != nil { + return fmt.Errorf("can't touch signal file %q: %w", naming.ScyllaDBIgnitionDonePath, err) + } } else { klog.V(2).InfoS("Waiting for ignition to complete.", "SignalFile", naming.ScyllaDBIgnitionDonePath) } - klog.V(2).InfoS("Updating ignition", "Ignited", ignited, "SignalFile", naming.ScyllaDBIgnitionDonePath) + klog.V(2).InfoS("Updating ignition", "Ignited", ignited) oldIgnited := c.ignited.Load() if ignited != oldIgnited { @@ -208,10 +244,5 @@ func (c *Controller) Sync(ctx context.Context) error { } c.ignited.Store(ignited) - err = helpers.TouchFile(naming.ScyllaDBIgnitionDonePath) - if err != nil { - return fmt.Errorf("can't touch signal file %q: %w", naming.ScyllaDBIgnitionDonePath, err) - } - return nil } diff --git a/pkg/controller/scylladbdatacenter/resource.go b/pkg/controller/scylladbdatacenter/resource.go index 3a0c7ce7151..e67f229ae1d 100644 --- a/pkg/controller/scylladbdatacenter/resource.go +++ b/pkg/controller/scylladbdatacenter/resource.go @@ -636,9 +636,12 @@ func StatefulSetForRack(rack scyllav1alpha1.RackSpec, sdc *scyllav1alpha1.Scylla "inherit_errexit", "-c", strings.TrimSpace(` +trap 'kill $( jobs -p ); exit 0' TERM + printf 'INFO %s ignition - Waiting for /mnt/shared/ignition.done\n' "$( date '+%Y-%m-%d %H:%M:%S,%3N' )" > /dev/stderr until [[ -f "/mnt/shared/ignition.done" ]]; do - sleep 1; + sleep 1 & + wait done printf 'INFO %s ignition - Ignited. Starting ScyllaDB...\n' "$( date '+%Y-%m-%d %H:%M:%S,%3N' )" > /dev/stderr @@ -836,7 +839,9 @@ exec /mnt/shared/scylla-operator sidecar \ "inherit_errexit", "-c", strings.TrimSpace(` -trap 'rm /mnt/shared/ignition.done' EXIT +trap 'kill $( jobs -p ); exit 0' TERM +trap 'rm -f /mnt/shared/ignition.done' EXIT + nodetool drain & sleep ` + strconv.Itoa(minTerminationGracePeriodSeconds) + ` & wait @@ -892,7 +897,7 @@ wait }, }, { - Name: "scylladb-ignition", + Name: naming.ScyllaDBIgnitionContainerName, Image: sidecarImage, ImagePullPolicy: corev1.PullIfNotPresent, Command: []string{ @@ -1166,7 +1171,7 @@ func getScyllaDBManagerAgentContainer(r scyllav1alpha1.RackSpec, sdc *scyllav1al } cnt := &corev1.Container{ - Name: "scylla-manager-agent", + Name: naming.ScyllaManagerAgentContainerName, Image: *sdc.Spec.ScyllaDBManagerAgent.Image, ImagePullPolicy: corev1.PullIfNotPresent, // There is no point in starting scylla-manager before ScyllaDB is tuned and ignited. The manager agent fails after 60 attempts and hits backoff unnecessarily. @@ -1178,13 +1183,16 @@ func getScyllaDBManagerAgentContainer(r scyllav1alpha1.RackSpec, sdc *scyllav1al "inherit_errexit", "-c", strings.TrimSpace(` +trap 'kill $( jobs -p ); exit 0' TERM + printf '{"L":"INFO","T":"%s","M":"Waiting for /mnt/shared/ignition.done"}\n' "$( date -u '+%Y-%m-%dT%H:%M:%S,%3NZ' )" > /dev/stderr until [[ -f "/mnt/shared/ignition.done" ]]; do - sleep 1; + sleep 1 & + wait done printf '{"L":"INFO","T":"%s","M":"Ignited. Starting ScyllaDB Manager Agent"}\n' "$( date -u '+%Y-%m-%dT%H:%M:%S,%3NZ' )" > /dev/stderr -scylla-manager-agent \ +exec scylla-manager-agent \ -c ` + fmt.Sprintf("%q ", naming.ScyllaAgentConfigDefaultFile) + `\ -c ` + fmt.Sprintf("%q ", path.Join(naming.ScyllaAgentConfigDirName, naming.ScyllaAgentConfigFileName)) + `\ -c ` + fmt.Sprintf("%q ", path.Join(naming.ScyllaAgentConfigDirName, naming.ScyllaAgentAuthTokenFileName)) + ` diff --git a/pkg/controller/scylladbdatacenter/resource_test.go b/pkg/controller/scylladbdatacenter/resource_test.go index 3b724c57b50..67235d235b3 100644 --- a/pkg/controller/scylladbdatacenter/resource_test.go +++ b/pkg/controller/scylladbdatacenter/resource_test.go @@ -724,9 +724,12 @@ func TestStatefulSetForRack(t *testing.T) { "inherit_errexit", "-c", strings.TrimSpace(` +trap 'kill $( jobs -p ); exit 0' TERM + printf 'INFO %s ignition - Waiting for /mnt/shared/ignition.done\n' "$( date '+%Y-%m-%d %H:%M:%S,%3N' )" > /dev/stderr until [[ -f "/mnt/shared/ignition.done" ]]; do - sleep 1; + sleep 1 & + wait done printf 'INFO %s ignition - Ignited. Starting ScyllaDB...\n' "$( date '+%Y-%m-%d %H:%M:%S,%3N' )" > /dev/stderr @@ -878,10 +881,13 @@ exec /mnt/shared/scylla-operator sidecar \ "-O", "inherit_errexit", "-c", - `trap 'rm /mnt/shared/ignition.done' EXIT + strings.TrimSpace(` +trap 'kill $( jobs -p ); exit 0' TERM +trap 'rm -f /mnt/shared/ignition.done' EXIT + nodetool drain & sleep 15 & -wait`, +wait`), }, }, }, @@ -993,13 +999,16 @@ wait`, "inherit_errexit", "-c", strings.TrimSpace(` +trap 'kill $( jobs -p ); exit 0' TERM + printf '{"L":"INFO","T":"%s","M":"Waiting for /mnt/shared/ignition.done"}\n' "$( date -u '+%Y-%m-%dT%H:%M:%S,%3NZ' )" > /dev/stderr until [[ -f "/mnt/shared/ignition.done" ]]; do - sleep 1; + sleep 1 & + wait done printf '{"L":"INFO","T":"%s","M":"Ignited. Starting ScyllaDB Manager Agent"}\n' "$( date -u '+%Y-%m-%dT%H:%M:%S,%3NZ' )" > /dev/stderr -scylla-manager-agent \ +exec scylla-manager-agent \ -c "/etc/scylla-manager-agent/scylla-manager-agent.yaml" \ -c "/mnt/scylla-agent-config/scylla-manager-agent.yaml" \ -c "/mnt/scylla-agent-config/auth-token.yaml" diff --git a/pkg/naming/constants.go b/pkg/naming/constants.go index 80c544c7aa0..e771052c967 100644 --- a/pkg/naming/constants.go +++ b/pkg/naming/constants.go @@ -26,6 +26,9 @@ const ( // Readiness check will always fail when this label is added to member service. NodeMaintenanceLabel = "scylla/node-maintenance" + // ForceIgnitionValueAnnotation allows to force ignition state. The value can be either "true" or "false". + ForceIgnitionValueAnnotation = "internal.scylla-operator.scylladb.com/force-ignition-value" + LabelValueTrue = "true" LabelValueFalse = "false" ) @@ -110,11 +113,13 @@ const ( // Configuration Values const ( - ScyllaContainerName = "scylla" - SidecarInjectorContainerName = "sidecar-injection" - PerftuneContainerName = "perftune" - CleanupContainerName = "cleanup" - RLimitsContainerName = "rlimits" + ScyllaContainerName = "scylla" + ScyllaDBIgnitionContainerName = "scylladb-ignition" + ScyllaManagerAgentContainerName = "scylla-manager-agent" + SidecarInjectorContainerName = "sidecar-injection" + PerftuneContainerName = "perftune" + CleanupContainerName = "cleanup" + RLimitsContainerName = "rlimits" PVCTemplateName = "data" diff --git a/pkg/naming/names.go b/pkg/naming/names.go index d226b329c48..5ada1b2b896 100644 --- a/pkg/naming/names.go +++ b/pkg/naming/names.go @@ -61,6 +61,10 @@ func MemberServiceNameForScyllaCluster(r scyllav1.RackSpec, sc *scyllav1.ScyllaC return fmt.Sprintf("%s-%d", StatefulSetNameForRackForScyllaCluster(r, sc), idx) } +func PodNameForScyllaCluster(r scyllav1.RackSpec, sc *scyllav1.ScyllaCluster, idx int) string { + return MemberServiceNameForScyllaCluster(r, sc, idx) +} + func IdentityServiceName(sdc *scyllav1alpha1.ScyllaDBDatacenter) string { return fmt.Sprintf("%s-client", sdc.Name) } diff --git a/test/e2e/set/nodeconfig/nodeconfig_disksetup.go b/test/e2e/set/nodeconfig/nodeconfig_disksetup.go index e11b70b4ef9..77d66a54fe4 100644 --- a/test/e2e/set/nodeconfig/nodeconfig_disksetup.go +++ b/test/e2e/set/nodeconfig/nodeconfig_disksetup.go @@ -154,14 +154,14 @@ var _ = g.Describe("Node Setup", framework.Serial, func() { o.Eventually(func(g o.Gomega) { for _, ldName := range loopDeviceNames { loopDevicePath := path.Join(hostLoopsDir, ldName) - stdout, stderr, err := executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "stat", loopDevicePath) + stdout, stderr, err := executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "stat", loopDevicePath) g.Expect(err).NotTo(o.HaveOccurred(), stdout, stderr) } }).WithPolling(1 * time.Second).WithTimeout(3 * time.Minute).Should(o.Succeed()) var findmntOutput string o.Eventually(func(g o.Gomega) { - stdout, stderr, err := executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "findmnt", "--raw", "--output=SOURCE", "--noheadings", hostMountPath) + stdout, stderr, err := executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "findmnt", "--raw", "--output=SOURCE", "--noheadings", hostMountPath) g.Expect(err).NotTo(o.HaveOccurred(), stdout, stderr) findmntOutput = stdout }).WithPolling(10 * time.Second).WithTimeout(3 * time.Minute).Should(o.Succeed()) @@ -173,15 +173,15 @@ var _ = g.Describe("Node Setup", framework.Serial, func() { framework.By("Checking if RAID device has been created at %q", discoveredRaidDevice) o.Eventually(func(g o.Gomega) { - stdout, stderr, err := executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "stat", discoveredRaidDeviceOnHost) + stdout, stderr, err := executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "stat", discoveredRaidDeviceOnHost) g.Expect(err).NotTo(o.HaveOccurred(), stdout, stderr) - stdout, stderr, err = executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "readlink", "-f", discoveredRaidDeviceOnHost) + stdout, stderr, err = executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "readlink", "-f", discoveredRaidDeviceOnHost) g.Expect(err).NotTo(o.HaveOccurred(), stdout, stderr) raidDeviceName := path.Base(discoveredRaidDeviceOnHost) - stdout, stderr, err = executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "cat", fmt.Sprintf("/sys/block/%s/md/level", raidDeviceName)) + stdout, stderr, err = executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "cat", fmt.Sprintf("/sys/block/%s/md/level", raidDeviceName)) g.Expect(err).NotTo(o.HaveOccurred(), stdout, stderr) raidLevel := strings.TrimSpace(stdout) @@ -190,7 +190,7 @@ var _ = g.Describe("Node Setup", framework.Serial, func() { framework.By("Checking if RAID device has been formatted") o.Eventually(func(g o.Gomega) { - stdout, stderr, err := executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "blkid", "--output=value", "--match-tag=TYPE", discoveredRaidDeviceOnHost) + stdout, stderr, err := executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "blkid", "--output=value", "--match-tag=TYPE", discoveredRaidDeviceOnHost) g.Expect(err).NotTo(o.HaveOccurred(), stderr) g.Expect(strings.TrimSpace(stdout)).To(o.Equal("xfs")) @@ -198,7 +198,7 @@ var _ = g.Describe("Node Setup", framework.Serial, func() { framework.By("Checking if RAID was mounted at the provided location with correct options") o.Eventually(func(g o.Gomega) { - stdout, stderr, err := executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "mount") + stdout, stderr, err := executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "mount") g.Expect(err).NotTo(o.HaveOccurred(), stderr) // mount output format @@ -437,22 +437,22 @@ var _ = g.Describe("Node Setup", framework.Serial, func() { o.Expect(err).NotTo(o.HaveOccurred()) framework.By("Creating a temp directory on host") - stdout, stderr, err := executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "mktemp", "--tmpdir=/host/tmp/", "--directory") + stdout, stderr, err := executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "mktemp", "--tmpdir=/host/tmp/", "--directory") o.Expect(err).NotTo(o.HaveOccurred(), stdout, stderr) hostTMPDir := strings.TrimSpace(stdout) framework.By("Creating the target mount point on host") - stdout, stderr, err = executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "mkdir", hostMountPath) + stdout, stderr, err = executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "mkdir", hostMountPath) o.Expect(err).NotTo(o.HaveOccurred(), stdout, stderr) framework.By("Bind mounting temp directory on host to target mount point") - stdout, stderr, err = executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "mount", "--bind", hostTMPDir, hostMountPath) + stdout, stderr, err = executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "mount", "--bind", hostTMPDir, hostMountPath) o.Expect(err).NotTo(o.HaveOccurred(), stdout, stderr) cleanupFunc := func(ctx context.Context) { framework.By("Unmounting bind mounted target") - stdout, stderr, err = executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "umount", hostMountPath) + stdout, stderr, err = executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "umount", hostMountPath) o.Expect(err).NotTo(o.HaveOccurred(), stdout, stderr) } @@ -509,15 +509,15 @@ var _ = g.Describe("Node Setup", framework.Serial, func() { o.Expect(err).NotTo(o.HaveOccurred()) framework.By("Verifying XFS filesystem integrity") - stdout, stderr, err := executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "xfs_repair", "-o", "force_geometry", "-f", "-n", hostDevicePath) + stdout, stderr, err := executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "xfs_repair", "-o", "force_geometry", "-f", "-n", hostDevicePath) o.Expect(err).NotTo(o.HaveOccurred(), stdout, stderr) framework.By("Corrupting XFS filesystem") - stdout, stderr, err = executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "xfs_db", "-x", "-c", "blockget", "-c", "blocktrash -s 12345678 -n 1000", hostDevicePath) + stdout, stderr, err = executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "xfs_db", "-x", "-c", "blockget", "-c", "blocktrash -s 12345678 -n 1000", hostDevicePath) o.Expect(err).NotTo(o.HaveOccurred(), stdout, stderr) framework.By("Verifying that XFS filesystem is corrupted") - stdout, stderr, err = executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "xfs_repair", "-o", "force_geometry", "-f", "-n", hostDevicePath) + stdout, stderr, err = executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), clientPod, "xfs_repair", "-o", "force_geometry", "-f", "-n", hostDevicePath) o.Expect(err).To(o.HaveOccurred()) framework.By("Patching NodeConfig's mount configuration with a mount over a corrupted filesystem") @@ -589,8 +589,8 @@ func newClientPod(nc *scyllav1alpha1.NodeConfig) *corev1.Pod { } } -func executeInPod(config *rest.Config, client corev1client.CoreV1Interface, pod *corev1.Pod, command string, args ...string) (string, string, error) { - return utils.ExecWithOptions(config, client, utils.ExecOptions{ +func executeInPod(ctx context.Context, config *rest.Config, client corev1client.CoreV1Interface, pod *corev1.Pod, command string, args ...string) (string, string, error) { + return utils.ExecWithOptions(ctx, config, client, utils.ExecOptions{ Command: append([]string{command}, args...), Namespace: pod.Namespace, PodName: pod.Name, diff --git a/test/e2e/set/nodeconfig/nodeconfig_optimizations.go b/test/e2e/set/nodeconfig/nodeconfig_optimizations.go index 90abff69559..6157d4fdc72 100644 --- a/test/e2e/set/nodeconfig/nodeconfig_optimizations.go +++ b/test/e2e/set/nodeconfig/nodeconfig_optimizations.go @@ -165,7 +165,7 @@ var _ = g.Describe("NodeConfig Optimizations", framework.Serial, func() { scyllaPod, err := f.KubeClient().CoreV1().Pods(sc.Namespace).Get(ctx, podName, metav1.GetOptions{}) o.Expect(err).NotTo(o.HaveOccurred()) - stdout, stderr, err := executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), scyllaPod, + stdout, stderr, err := executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), scyllaPod, "bash", "-euExo", "pipefail", @@ -180,7 +180,7 @@ var _ = g.Describe("NodeConfig Optimizations", framework.Serial, func() { o.Expect(stdout).To(o.Equal(nrOpenLimit)) framework.By("Validating hard file limit of Scylla process") - stdout, stderr, err = executeInPod(f.ClientConfig(), f.KubeClient().CoreV1(), scyllaPod, + stdout, stderr, err = executeInPod(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), scyllaPod, "bash", "-euExo", "pipefail", @@ -263,6 +263,7 @@ var _ = g.Describe("NodeConfig Optimizations", framework.Serial, func() { o.Expect(err).NotTo(o.HaveOccurred()) sc := f.GetDefaultScyllaCluster() + o.Expect(sc.Spec.Datacenter.Racks).To(o.HaveLen(1)) sc.Spec.Datacenter.Racks[0].AgentResources = corev1.ResourceRequirements{ Requests: map[corev1.ResourceName]resource.Quantity{ corev1.ResourceCPU: resource.MustParse("50m"), @@ -332,6 +333,26 @@ var _ = g.Describe("NodeConfig Optimizations", framework.Serial, func() { o.Expect(err).NotTo(o.HaveOccurred()) o.Expect(utils.IsScyllaClusterRolledOut(sc)).To(o.BeFalse()) + framework.By("Verifying the containers are blocked and not ready") + podSelector := labels.Set(naming.ClusterLabelsForScyllaCluster(sc)).AsSelector() + scPods, err := f.KubeClient().CoreV1().Pods(sc.Namespace).List(ctx, metav1.ListOptions{ + LabelSelector: podSelector.String(), + }) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(scPods.Items).NotTo(o.BeEmpty()) + + for _, pod := range scPods.Items { + crm := utils.GetContainerReadinessMap(&pod) + o.Expect(crm).To( + o.And( + o.HaveKeyWithValue(naming.ScyllaContainerName, false), + o.HaveKeyWithValue(naming.ScyllaDBIgnitionContainerName, false), + o.HaveKeyWithValue(naming.ScyllaManagerAgentContainerName, false), + ), + fmt.Sprintf("container(s) in Pod %q don't match the expected state", pod.Name), + ) + } + framework.By("Unblocking tuning") intermittentArtifactsDir := "" if len(f.GetDefaultArtifactsDir()) != 0 { @@ -400,7 +421,20 @@ var _ = g.Describe("NodeConfig Optimizations", framework.Serial, func() { sc, err = controllerhelpers.WaitForScyllaClusterState(ctx4, f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace), sc.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut) o.Expect(err).NotTo(o.HaveOccurred()) - scyllaclusterverification.Verify(ctx, f.KubeClient(), f.ScyllaClient(), sc) + scyllaclusterverification.VerifyWithOptions( + ctx, + f.KubeClient(), + f.ScyllaClient(), + sc, + scyllaclusterverification.VerifyOptions{ + VerifyStatefulSetOptions: scyllaclusterverification.VerifyStatefulSetOptions{ + PodRestartCountAssertion: func(a o.Assertion, containerName, podName string) { + // We expect restart(s) from the startup probe, usually 1. + a.To(o.BeNumerically("<=", 2), fmt.Sprintf("container %q in pod %q should not be restarted by the startup probe more than twice", containerName, podName)) + }, + }, + }, + ) framework.By("Verifying ConfigMap content") ctx5, ctx5Cancel := context.WithTimeout(ctx, apiCallTimeout) diff --git a/test/e2e/set/scyllacluster/scyllacluster_restarts.go b/test/e2e/set/scyllacluster/scyllacluster_restarts.go new file mode 100644 index 00000000000..4b60c98c5ee --- /dev/null +++ b/test/e2e/set/scyllacluster/scyllacluster_restarts.go @@ -0,0 +1,214 @@ +// Copyright (C) 2024 ScyllaDB + +package scyllacluster + +import ( + "context" + "errors" + "fmt" + "strings" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + scyllav1 "github.com/scylladb/scylla-operator/pkg/api/scylla/v1" + "github.com/scylladb/scylla-operator/pkg/controllerhelpers" + "github.com/scylladb/scylla-operator/pkg/naming" + "github.com/scylladb/scylla-operator/pkg/pointer" + "github.com/scylladb/scylla-operator/test/e2e/framework" + "github.com/scylladb/scylla-operator/test/e2e/utils" + scyllaclusterverification "github.com/scylladb/scylla-operator/test/e2e/utils/verification/scyllacluster" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + // terminationTimeout is the amount of time that the pod needs to terminate gracefully. + // In addition to process termination, this needs to account for kubelet sending signals, state propagation + // and generally being busy in the CI. + terminationTimeout = 5 * time.Minute + // A high enough grace period so it can never terminate gracefully in an e2e run. + gracePeriod = terminationTimeout + (7 * 24 * time.Hour) +) + +var _ = g.Describe("ScyllaCluster graceful termination", func() { + f := framework.NewFramework("scyllacluster") + + // This test verifies correct signal handling in the bash wait routine + // which oscillates between a sleep (external program) and a file check. + // The "problematic" external program (sleep) runs a majority of the time, but not all. + // To be reasonably sure, we should run this multiple times in a single test run + // and avoid an accidental merge that could bork the test / CI. + g.It("should work while waiting for ignition", g.MustPassRepeatedly(3), func() { + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + defer cancel() + + sc := f.GetDefaultScyllaCluster() + o.Expect(sc.Spec.Datacenter.Racks).To(o.HaveLen(1)) + sc.Spec.Datacenter.Racks[0].Members = 1 + if sc.Spec.ExposeOptions == nil { + sc.Spec.ExposeOptions = &scyllav1.ExposeOptions{} + } + if sc.Spec.ExposeOptions.NodeService == nil { + sc.Spec.ExposeOptions.NodeService = &scyllav1.NodeServiceTemplate{} + } + if sc.Spec.ExposeOptions.NodeService.Annotations == nil { + sc.Spec.ExposeOptions.NodeService.Annotations = map[string]string{} + } + sc.Spec.ExposeOptions.NodeService.Annotations[naming.ForceIgnitionValueAnnotation] = "false" + + framework.By("Creating a ScyllaCluster with blocked ignition") + sc, err := f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Create(ctx, sc, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Waiting for the ScyllaCluster Pod to be running") + podName := naming.PodNameForScyllaCluster(sc.Spec.Datacenter.Racks[0], sc, 0) + waitRunningCtx, waitRunningCtxCancel := context.WithTimeoutCause(ctx, 10*time.Minute, fmt.Errorf("timed out waiting for ScyllaDB Pod to be running")) + defer waitRunningCtxCancel() + pod, err := controllerhelpers.WaitForPodState(waitRunningCtx, f.KubeClient().CoreV1().Pods(f.Namespace()), podName, controllerhelpers.WaitForStateOptions{}, utils.PodIsRunning) + o.Expect(err).NotTo(o.HaveOccurred()) + + execCtx, execCtxCancel := context.WithTimeoutCause(ctx, 2*time.Minute, errors.New("ignition probe didn't return expected status in time")) + defer execCtxCancel() + framework.By("Executing into %q container and verifying it's not ignited using the probe", naming.ScyllaDBIgnitionContainerName) + stdout, stderr, err := utils.ExecWithOptions(execCtx, f.ClientConfig(), f.KubeClient().CoreV1(), utils.ExecOptions{ + Command: []string{ + "/usr/bin/timeout", + "1m", + "/usr/bin/bash", + "-euxEo", + "pipefail", + "-O", + "inherit_errexit", + "-c", + strings.TrimSpace(` +while [[ "$( curl -s -o /dev/null -w '%{http_code}' -G http://localhost:42081/readyz )" != "503" ]]; do + sleep 1 & + wait; +done + `), + }, + Namespace: pod.Namespace, + PodName: pod.Name, + ContainerName: naming.ScyllaDBIgnitionContainerName, + CaptureStdout: true, + CaptureStderr: true, + }) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("* stdout:\n%q\n* stderr:\n%s\n* Cause: %s", stdout, stderr, context.Cause(execCtx))) + + sleepTime := 30 * time.Second + framework.By("Sleeping for %v to give kubelet time to update the probes", sleepTime) + // The sleep also waits for nodeconfigpod controller to unblock tuning, in case the override would be broken. + time.Sleep(sleepTime) + + framework.By("Validating container readiness") + pod, err = f.KubeClient().CoreV1().Pods(f.Namespace()).Get(ctx, podName, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + crm := utils.GetContainerReadinessMap(pod) + o.Expect(crm).To( + o.And( + o.HaveKeyWithValue(naming.ScyllaContainerName, false), + o.HaveKeyWithValue(naming.ScyllaDBIgnitionContainerName, false), + o.HaveKeyWithValue(naming.ScyllaManagerAgentContainerName, false), + ), + fmt.Sprintf("container(s) in Pod %q don't match the expected state", pod.Name), + ) + + framework.By("Deleting the ScyllaDB Pod") + err = f.KubeClient().CoreV1().Pods(f.Namespace()).Delete( + ctx, + pod.Name, + metav1.DeleteOptions{ + Preconditions: &metav1.Preconditions{ + UID: pointer.Ptr(pod.UID), + }, + GracePeriodSeconds: pointer.Ptr(int64(gracePeriod.Seconds())), + }, + ) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Waiting for the ScyllaDB Pod to be deleted") + deletionCtx, deletionCtxCancel := context.WithTimeoutCause( + ctx, + terminationTimeout, + fmt.Errorf("pod %q has not finished termination in time", naming.ObjRef(pod)), + ) + defer deletionCtxCancel() + err = framework.WaitForObjectDeletion( + deletionCtx, + f.DynamicClient(), + corev1.SchemeGroupVersion.WithResource("pods"), + pod.Namespace, + pod.Name, + pointer.Ptr(pod.UID), + ) + o.Expect(err).NotTo(o.HaveOccurred()) + }) + + g.It("should work when a cluster is fully rolled out", func() { + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + defer cancel() + + sc := f.GetDefaultScyllaCluster() + o.Expect(sc.Spec.Datacenter.Racks).To(o.HaveLen(1)) + sc.Spec.Datacenter.Racks[0].Members = 1 + + framework.By("Creating a ScyllaCluster") + sc, err := f.ScyllaClient().ScyllaV1().ScyllaClusters(f.Namespace()).Create(ctx, sc, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Waiting for the ScyllaCluster to roll out (RV=%s)", sc.ResourceVersion) + waitCtx1, waitCtx1Cancel := utils.ContextForRollout(ctx, sc) + defer waitCtx1Cancel() + sc, err = controllerhelpers.WaitForScyllaClusterState(waitCtx1, f.ScyllaClient().ScyllaV1().ScyllaClusters(sc.Namespace), sc.Name, controllerhelpers.WaitForStateOptions{}, utils.IsScyllaClusterRolledOut) + o.Expect(err).NotTo(o.HaveOccurred()) + + scyllaclusterverification.Verify(ctx, f.KubeClient(), f.ScyllaClient(), sc) + + framework.By("Fetching the Pod and validating container readiness") + podName := naming.PodNameForScyllaCluster(sc.Spec.Datacenter.Racks[0], sc, 0) + pod, err := f.KubeClient().CoreV1().Pods(f.Namespace()).Get(ctx, podName, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + + crm := utils.GetContainerReadinessMap(pod) + o.Expect(crm).To( + o.And( + o.HaveKeyWithValue(naming.ScyllaContainerName, true), + o.HaveKeyWithValue(naming.ScyllaDBIgnitionContainerName, true), + o.HaveKeyWithValue(naming.ScyllaManagerAgentContainerName, true), + ), + fmt.Sprintf("container(s) in Pod %q don't match the expected state", pod.Name), + ) + + framework.By("Deleting the ScyllaDB Pod") + err = f.KubeClient().CoreV1().Pods(f.Namespace()).Delete( + ctx, + pod.Name, + metav1.DeleteOptions{ + Preconditions: &metav1.Preconditions{ + UID: pointer.Ptr(pod.UID), + }, + GracePeriodSeconds: pointer.Ptr(int64(gracePeriod.Seconds())), + }, + ) + o.Expect(err).NotTo(o.HaveOccurred()) + + framework.By("Waiting for the ScyllaDB Pod to be deleted") + deletionCtx, deletionCtxCancel := context.WithTimeoutCause( + ctx, + terminationTimeout, + fmt.Errorf("pod %q has not finished termination in time", naming.ObjRef(pod)), + ) + defer deletionCtxCancel() + err = framework.WaitForObjectDeletion( + deletionCtx, + f.DynamicClient(), + corev1.SchemeGroupVersion.WithResource("pods"), + pod.Namespace, + pod.Name, + pointer.Ptr(pod.UID), + ) + o.Expect(err).NotTo(o.HaveOccurred()) + }) +}) diff --git a/test/e2e/set/scyllacluster/scyllacluster_shardawareness.go b/test/e2e/set/scyllacluster/scyllacluster_shardawareness.go index cd230ef2a5d..053c56df5ec 100644 --- a/test/e2e/set/scyllacluster/scyllacluster_shardawareness.go +++ b/test/e2e/set/scyllacluster/scyllacluster_shardawareness.go @@ -104,7 +104,7 @@ var _ = g.Describe("ScyllaCluster", func() { for i := range connectionAttempts { framework.By("Establishing connection number %d to shard number %d", i, shard) - stdout, stderr, err := utils.ExecWithOptions(f.ClientConfig(), f.KubeClient().CoreV1(), utils.ExecOptions{ + stdout, stderr, err := utils.ExecWithOptions(ctx, f.ClientConfig(), f.KubeClient().CoreV1(), utils.ExecOptions{ Command: []string{ "/usr/bin/bash", "-euEo", diff --git a/test/e2e/set/scyllacluster/scyllamanager_object_storage.go b/test/e2e/set/scyllacluster/scyllamanager_object_storage.go index 43a404f84ac..5bec0a321a2 100644 --- a/test/e2e/set/scyllacluster/scyllamanager_object_storage.go +++ b/test/e2e/set/scyllacluster/scyllamanager_object_storage.go @@ -354,7 +354,7 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage scyllaManagerPod := scyllaManagerPods.Items[0] framework.By("Creating a schema restore task") - stdout, stderr, err := utils.ExecWithOptions(f.AdminClientConfig(), f.KubeAdminClient().CoreV1(), utils.ExecOptions{ + stdout, stderr, err := utils.ExecWithOptions(ctx, f.AdminClientConfig(), f.KubeAdminClient().CoreV1(), utils.ExecOptions{ Command: []string{ "sctool", "restore", @@ -419,7 +419,7 @@ var _ = g.Describe("Scylla Manager integration", framework.RequiresObjectStorage } framework.By("Creating a tables restore task") - stdout, stderr, err = utils.ExecWithOptions(f.AdminClientConfig(), f.KubeAdminClient().CoreV1(), utils.ExecOptions{ + stdout, stderr, err = utils.ExecWithOptions(ctx, f.AdminClientConfig(), f.KubeAdminClient().CoreV1(), utils.ExecOptions{ Command: []string{ "sctool", "restore", diff --git a/test/e2e/utils/exec.go b/test/e2e/utils/exec.go index 2b26df0c049..d5c213a3ed6 100644 --- a/test/e2e/utils/exec.go +++ b/test/e2e/utils/exec.go @@ -4,6 +4,7 @@ package utils import ( "bytes" + "context" "io" "net/http" @@ -28,7 +29,7 @@ type ExecOptions struct { // ExecWithOptions executes a command in the specified container, // returning stdout, stderr and error. `options` allowed for // additional parameters to be passed. -func ExecWithOptions(config *rest.Config, client corev1client.CoreV1Interface, options ExecOptions) (string, string, error) { +func ExecWithOptions(ctx context.Context, config *rest.Config, client corev1client.CoreV1Interface, options ExecOptions) (string, string, error) { const tty = false req := client.RESTClient().Post(). @@ -52,12 +53,14 @@ func ExecWithOptions(config *rest.Config, client corev1client.CoreV1Interface, o if err != nil { return "", "", err } - err = exec.Stream(remotecommand.StreamOptions{ - Stdin: options.Stdin, - Stdout: &stdout, - Stderr: &stderr, - Tty: tty, - }) + err = exec.StreamWithContext( + ctx, + remotecommand.StreamOptions{ + Stdin: options.Stdin, + Stdout: &stdout, + Stderr: &stderr, + Tty: tty, + }) return stdout.String(), stderr.String(), err } diff --git a/test/e2e/utils/helpers.go b/test/e2e/utils/helpers.go index 1e4be12226c..dac3a3ea140 100644 --- a/test/e2e/utils/helpers.go +++ b/test/e2e/utils/helpers.go @@ -847,3 +847,19 @@ func WaitForScyllaOperatorConfigStatus(ctx context.Context, client scyllav1alpha func IsScyllaClusterRegisteredWithManager(sc *scyllav1.ScyllaCluster) (bool, error) { return sc.Status.ManagerID != nil && len(*sc.Status.ManagerID) > 0, nil } + +func GetContainerReadinessMap(pod *corev1.Pod) map[string]bool { + res := map[string]bool{} + + for _, statusSet := range [][]corev1.ContainerStatus{ + pod.Status.InitContainerStatuses, + pod.Status.ContainerStatuses, + pod.Status.EphemeralContainerStatuses, + } { + for _, cs := range statusSet { + res[cs.Name] = cs.Ready + } + } + + return res +} diff --git a/test/e2e/utils/verification/scyllacluster/verify.go b/test/e2e/utils/verification/scyllacluster/verify.go index b5fc166d01d..854dc6da07a 100644 --- a/test/e2e/utils/verification/scyllacluster/verify.go +++ b/test/e2e/utils/verification/scyllacluster/verify.go @@ -60,7 +60,11 @@ func verifyPersistentVolumeClaims(ctx context.Context, coreClient corev1client.C o.Expect(scPVCNames).To(o.BeEquivalentTo(expectedPvcNames)) } -func verifyStatefulset(ctx context.Context, client corev1client.CoreV1Interface, sts *appsv1.StatefulSet, sdc *scyllav1alpha1.ScyllaDBDatacenter) { +type VerifyStatefulSetOptions struct { + PodRestartCountAssertion func(a o.Assertion, containerName, podName string) +} + +func verifyStatefulset(ctx context.Context, client corev1client.CoreV1Interface, sts *appsv1.StatefulSet, sdc *scyllav1alpha1.ScyllaDBDatacenter, options VerifyStatefulSetOptions) { o.Expect(sts.ObjectMeta.OwnerReferences).To(o.BeEquivalentTo( []metav1.OwnerReference{ { @@ -89,7 +93,7 @@ func verifyStatefulset(ctx context.Context, client corev1client.CoreV1Interface, for _, pod := range podMap { o.Expect(pod.Status.ContainerStatuses).NotTo(o.BeEmpty()) for _, cs := range pod.Status.ContainerStatuses { - o.Expect(cs.RestartCount).To(o.BeZero(), fmt.Sprintf("container %q in pod %q should not be restarted", cs.Name, pod.Name)) + options.PodRestartCountAssertion(o.ExpectWithOffset(1, cs.RestartCount), cs.Name, pod.Name) } } } @@ -118,7 +122,11 @@ func verifyPodDisruptionBudget(sc *scyllav1.ScyllaCluster, pdb *policyv1.PodDisr })) } -func Verify(ctx context.Context, kubeClient kubernetes.Interface, scyllaClient scyllaclient.Interface, sc *scyllav1.ScyllaCluster) { +type VerifyOptions struct { + VerifyStatefulSetOptions +} + +func VerifyWithOptions(ctx context.Context, kubeClient kubernetes.Interface, scyllaClient scyllaclient.Interface, sc *scyllav1.ScyllaCluster, options VerifyOptions) { framework.By("Verifying the ScyllaCluster") sc = sc.DeepCopy() @@ -294,7 +302,7 @@ func Verify(ctx context.Context, kubeClient kubernetes.Interface, scyllaClient s s := statefulsets[r.Name] - verifyStatefulset(ctx, kubeClient.CoreV1(), s, sdc) + verifyStatefulset(ctx, kubeClient.CoreV1(), s, sdc, options.VerifyStatefulSetOptions) o.Expect(sc.Status.Racks[r.Name].Stale).NotTo(o.BeNil()) o.Expect(*sc.Status.Racks[r.Name].Stale).To(o.BeFalse()) @@ -320,3 +328,19 @@ func Verify(ctx context.Context, kubeClient kubernetes.Interface, scyllaClient s o.Expect(hosts).To(o.HaveLen(memberCount)) } + +func Verify(ctx context.Context, kubeClient kubernetes.Interface, scyllaClient scyllaclient.Interface, sc *scyllav1.ScyllaCluster) { + VerifyWithOptions( + ctx, + kubeClient, + scyllaClient, + sc, + VerifyOptions{ + VerifyStatefulSetOptions: VerifyStatefulSetOptions{ + PodRestartCountAssertion: func(a o.Assertion, containerName, podName string) { + a.To(o.BeZero(), fmt.Sprintf("container %q in pod %q should not be restarted", containerName, podName)) + }, + }, + }, + ) +}