diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index b0349f47c211..bfa7e6676518 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -22,7 +22,7 @@ jobs: fetch-depth: 0 - name: Get changed files id: changed-files - uses: tj-actions/changed-files@c3a1bb2c992d77180ae65be6ae6c166cf40f857c # tag=v45.0.3 + uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # tag=v45.0.4 - name: Get release version id: release-version run: | @@ -105,7 +105,7 @@ jobs: curl -L "https://raw.githubusercontent.com/${{ github.repository }}/main/CHANGELOG/${{ env.RELEASE_TAG }}.md" \ -o "${{ env.RELEASE_TAG }}.md" - name: Release - uses: softprops/action-gh-release@e7a8f85e1c67a31e6ed99a94b41bd0b71bbee6b8 # tag=v2.0.9 + uses: softprops/action-gh-release@01570a1f39cb168c169c802c3bceb9e93fb10974 # tag=v2.1.0 with: draft: true files: out/* diff --git a/Makefile b/Makefile index 56a9a60e2c50..7fc0a277e3ef 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,7 @@ SHELL:=/usr/bin/env bash # # Go. # -GO_VERSION ?= 1.22.8 +GO_VERSION ?= 1.22.9 GO_DIRECTIVE_VERSION ?= 1.22.0 GO_CONTAINER_IMAGE ?= docker.io/library/golang:$(GO_VERSION) diff --git a/Tiltfile b/Tiltfile index f39f31c7d98a..b6d692baf537 100644 --- a/Tiltfile +++ b/Tiltfile @@ -184,7 +184,7 @@ def load_provider_tiltfiles(): tilt_helper_dockerfile_header = """ # Tilt image -FROM golang:1.22.8 as tilt-helper +FROM golang:1.22.9 as tilt-helper # Install delve. Note this should be kept in step with the Go release minor version. RUN go install github.com/go-delve/delve/cmd/dlv@v1.22 # Support live reloading with Tilt @@ -195,7 +195,7 @@ RUN wget --output-document /restart.sh --quiet https://raw.githubusercontent.com """ tilt_dockerfile_header = """ -FROM golang:1.22.8 as tilt +FROM golang:1.22.9 as tilt WORKDIR / COPY --from=tilt-helper /process.txt . COPY --from=tilt-helper /start.sh . diff --git a/api/v1beta1/cluster_types.go b/api/v1beta1/cluster_types.go index c07db3b89592..3abe40d7e975 100644 --- a/api/v1beta1/cluster_types.go +++ b/api/v1beta1/cluster_types.go @@ -56,6 +56,59 @@ const ( // ClusterTopologyReconciledV1Beta2Condition is true if the topology controller is working properly. // Note: This condition is added only if the Cluster is referencing a ClusterClass / defining a managed Topology. ClusterTopologyReconciledV1Beta2Condition = "TopologyReconciled" + + // ClusterTopologyReconcileSucceededV1Beta2Reason documents the reconciliation of a Cluster topology succeeded. + ClusterTopologyReconcileSucceededV1Beta2Reason = "TopologyReconcileSucceeded" + + // ClusterTopologyReconciledFailedV1Beta2Reason documents the reconciliation of a Cluster topology + // failing due to an error. + ClusterTopologyReconciledFailedV1Beta2Reason = "TopologyReconcileFailed" + + // ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because Control Plane is not yet updated to match the desired topology spec. + ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason = "ControlPlaneUpgradePending" + + // ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because at least one of the MachineDeployments is yet to be created. + // This generally happens because new MachineDeployment creations are held off while the ControlPlane is not stable. + ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason = "MachineDeploymentsCreatePending" + + // ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because at least one of the MachineDeployments is not yet updated to match the desired topology spec. + ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason = "MachineDeploymentsUpgradePending" + + // ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because the upgrade for at least one of the MachineDeployments has been deferred. + ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason = "MachineDeploymentsUpgradeDeferred" + + // ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because at least one of the MachinePools is not yet updated to match the desired topology spec. + ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason = "MachinePoolsUpgradePending" + + // ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because at least one of the MachinePools is yet to be created. + // This generally happens because new MachinePool creations are held off while the ControlPlane is not stable. + ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason = "MachinePoolsCreatePending" + + // ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because the upgrade for at least one of the MachinePools has been deferred. + ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason = "MachinePoolsUpgradeDeferred" + + // ClusterTopologyReconciledHookBlockingV1Beta2Reason documents reconciliation of a Cluster topology + // not yet completed because at least one of the lifecycle hooks is blocking. + ClusterTopologyReconciledHookBlockingV1Beta2Reason = "LifecycleHookBlocking" + + // ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason documents reconciliation of a Cluster topology not + // yet completed because the ClusterClass has not reconciled yet. If this condition persists there may be an issue + // with the ClusterClass surfaced in the ClusterClass status or controller logs. + ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason = "ClusterClassNotReconciled" + + // ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason surfaces when the Cluster is deleting because the + // DeletionTimestamp is set. + ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason + + // ClusterTopologyReconcilePausedV1Beta2Reason surfaces when the Cluster is paused. + ClusterTopologyReconcilePausedV1Beta2Reason = PausedV1Beta2Reason ) // Cluster's InfrastructureReady condition and corresponding reasons that will be used in v1Beta2 API version. diff --git a/api/v1beta1/condition_consts.go b/api/v1beta1/condition_consts.go index 059b4ccc74a9..19fe608b71d1 100644 --- a/api/v1beta1/condition_consts.go +++ b/api/v1beta1/condition_consts.go @@ -337,6 +337,9 @@ const ( // yet completed because the ClusterClass has not reconciled yet. If this condition persists there may be an issue // with the ClusterClass surfaced in the ClusterClass status or controller logs. TopologyReconciledClusterClassNotReconciledReason = "ClusterClassNotReconciled" + + // TopologyReconciledPausedReason (Severity=Info) surfaces when the Cluster is paused. + TopologyReconciledPausedReason = "Paused" ) // Conditions and condition reasons for ClusterClass. diff --git a/api/v1beta1/machine_types.go b/api/v1beta1/machine_types.go index c6827e6cd6af..3a3038370e5e 100644 --- a/api/v1beta1/machine_types.go +++ b/api/v1beta1/machine_types.go @@ -123,7 +123,15 @@ const ( const ( // MachineUpToDateV1Beta2Condition is true if the Machine spec matches the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment. // The Machine's owner (e.g. MachineDeployment) is authoritative to set their owned Machine's UpToDate conditions based on its current spec. + // NOTE: The Machine's owner might use this condition to surface also other use cases when Machine is considered not up to date, e.g. when MachineDeployment spec.rolloutAfter + // is expired and the Machine needs to be rolled out. MachineUpToDateV1Beta2Condition = "UpToDate" + + // MachineUpToDateV1Beta2Reason surface when a Machine spec matches the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment. + MachineUpToDateV1Beta2Reason = "UpToDate" + + // MachineNotUpToDateV1Beta2Reason surface when a Machine spec does not match the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment. + MachineNotUpToDateV1Beta2Reason = "NotUpToDate" ) // Machine's BootstrapConfigReady condition and corresponding reasons that will be used in v1Beta2 API version. diff --git a/api/v1beta1/machineset_types.go b/api/v1beta1/machineset_types.go index 8b9fc282d9ee..03c8edd2aa09 100644 --- a/api/v1beta1/machineset_types.go +++ b/api/v1beta1/machineset_types.go @@ -153,10 +153,37 @@ const ( MachineSetMachinesUpToDateInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason ) -// Conditions that will be used for the MachineSet object in v1Beta2 API version. +// MachineSet's Remediating condition and corresponding reasons that will be used in v1Beta2 API version. const ( // MachineSetRemediatingV1Beta2Condition surfaces details about ongoing remediation of the controlled machines, if any. MachineSetRemediatingV1Beta2Condition = RemediatingV1Beta2Condition + + // MachineSetRemediatingV1Beta2Reason surfaces when the MachineSet has at least one machine with HealthCheckSucceeded set to false + // and with the OwnerRemediated condition set to false. + MachineSetRemediatingV1Beta2Reason = RemediatingV1Beta2Reason + + // MachineSetNotRemediatingV1Beta2Reason surfaces when the MachineSet does not have any machine with HealthCheckSucceeded set to false + // and with the OwnerRemediated condition set to false. + MachineSetNotRemediatingV1Beta2Reason = NotRemediatingV1Beta2Reason + + // MachineSetRemediatingInternalErrorV1Beta2Reason surfaces unexpected failures when computing the Remediating condition. + MachineSetRemediatingInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason +) + +// Reasons that will be used for the OwnerRemediated condition set by MachineHealthCheck on MachineSet controlled machines +// being remediated in v1Beta2 API version. +const ( + // MachineSetMachineCannotBeRemediatedV1Beta2Reason surfaces when remediation of a MachineSet machine can't be started. + MachineSetMachineCannotBeRemediatedV1Beta2Reason = "CannotBeRemediated" + + // MachineSetMachineRemediationDeferredV1Beta2Reason surfaces when remediation of a MachineSet machine must be deferred. + MachineSetMachineRemediationDeferredV1Beta2Reason = "RemediationDeferred" + + // MachineSetMachineRemediationMachineDeletedV1Beta2Reason surfaces when remediation of a MachineSet machine + // has been completed by deleting the unhealthy machine. + // Note: After an unhealthy machine is deleted, a new one is created by the MachineSet as part of the + // regular reconcile loop that ensures the correct number of replicas exist. + MachineSetMachineRemediationMachineDeletedV1Beta2Reason = "MachineDeleted" ) // MachineSet's Deleting condition and corresponding reasons that will be used in v1Beta2 API version. diff --git a/cmd/clusterctl/client/config/providers_client.go b/cmd/clusterctl/client/config/providers_client.go index fa74ff6df9d4..f3e9ed6ffb08 100644 --- a/cmd/clusterctl/client/config/providers_client.go +++ b/cmd/clusterctl/client/config/providers_client.go @@ -101,6 +101,7 @@ const ( // IPAM providers. const ( InClusterIPAMProviderName = "in-cluster" + NutanixIPAMProviderName = "nutanix" ) // Add-on providers. @@ -108,6 +109,11 @@ const ( HelmAddonProviderName = "helm" ) +// Runtime extensions providers. +const ( + NutanixRuntimeExtensionsProviderName = "nutanix" +) + // Other. const ( // ProvidersConfigKey is a constant for finding provider configurations with the ProvidersClient. @@ -422,6 +428,11 @@ func (p *providersClient) defaults() []Provider { url: "https://github.com/kubernetes-sigs/cluster-api-ipam-provider-in-cluster/releases/latest/ipam-components.yaml", providerType: clusterctlv1.IPAMProviderType, }, + &provider{ + name: NutanixIPAMProviderName, + url: "https://github.com/nutanix-cloud-native/cluster-api-ipam-provider-nutanix/releases/latest/ipam-components.yaml", + providerType: clusterctlv1.IPAMProviderType, + }, // Add-on providers &provider{ @@ -429,6 +440,13 @@ func (p *providersClient) defaults() []Provider { url: "https://github.com/kubernetes-sigs/cluster-api-addon-provider-helm/releases/latest/addon-components.yaml", providerType: clusterctlv1.AddonProviderType, }, + + // Runtime extensions providers + &provider{ + name: NutanixRuntimeExtensionsProviderName, + url: "https://github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/releases/latest/runtime-extensions-components.yaml", + providerType: clusterctlv1.RuntimeExtensionProviderType, + }, } return defaults diff --git a/cmd/clusterctl/client/config_test.go b/cmd/clusterctl/client/config_test.go index b08105653f11..6497a1c8443c 100644 --- a/cmd/clusterctl/client/config_test.go +++ b/cmd/clusterctl/client/config_test.go @@ -109,6 +109,8 @@ func Test_clusterctlClient_GetProvidersConfig(t *testing.T) { config.VSphereProviderName, config.VultrProviderName, config.InClusterIPAMProviderName, + config.NutanixIPAMProviderName, + config.NutanixRuntimeExtensionsProviderName, config.HelmAddonProviderName, }, wantErr: false, @@ -174,6 +176,8 @@ func Test_clusterctlClient_GetProvidersConfig(t *testing.T) { config.VSphereProviderName, config.VultrProviderName, config.InClusterIPAMProviderName, + config.NutanixIPAMProviderName, + config.NutanixRuntimeExtensionsProviderName, config.HelmAddonProviderName, }, wantErr: false, diff --git a/cmd/clusterctl/cmd/config_repositories_test.go b/cmd/clusterctl/cmd/config_repositories_test.go index 708f3584a4dd..8b107980188c 100644 --- a/cmd/clusterctl/cmd/config_repositories_test.go +++ b/cmd/clusterctl/cmd/config_repositories_test.go @@ -23,7 +23,6 @@ import ( "path/filepath" "testing" - "github.com/google/go-cmp/cmp" . "github.com/onsi/gomega" ) @@ -46,13 +45,13 @@ func Test_runGetRepositories(t *testing.T) { out, err := io.ReadAll(buf) g.Expect(err).ToNot(HaveOccurred()) - var diff string + // Use gomega's BeComparableTo as opposed to Equals to compare output which uses gocmp under + // the hood and correctly prints any differences between the two strings. if val == RepositoriesOutputText { - diff = cmp.Diff(expectedOutputText, string(out)) + g.Expect(string(out)).To(BeComparableTo(expectedOutputText)) } else if val == RepositoriesOutputYaml { - diff = cmp.Diff(expectedOutputYaml, string(out)) + g.Expect(string(out)).To(BeComparableTo(expectedOutputYaml)) } - g.Expect(diff).To(BeEmpty()) // Use diff to compare as Gomega output does not actually print the string values on failure } }) @@ -102,63 +101,65 @@ providers: type: "CoreProvider" ` -var expectedOutputText = `NAME TYPE URL FILE -cluster-api CoreProvider https://github.com/myorg/myforkofclusterapi/releases/latest/ core_components.yaml -another-provider BootstrapProvider ./ bootstrap-components.yaml -canonical-kubernetes BootstrapProvider https://github.com/canonical/cluster-api-k8s/releases/latest/ bootstrap-components.yaml -k0sproject-k0smotron BootstrapProvider https://github.com/k0sproject/k0smotron/releases/latest/ bootstrap-components.yaml -kubeadm BootstrapProvider https://github.com/kubernetes-sigs/cluster-api/releases/latest/ bootstrap-components.yaml -kubekey-k3s BootstrapProvider https://github.com/kubesphere/kubekey/releases/latest/ bootstrap-components.yaml -microk8s BootstrapProvider https://github.com/canonical/cluster-api-bootstrap-provider-microk8s/releases/latest/ bootstrap-components.yaml -ocne BootstrapProvider https://github.com/verrazzano/cluster-api-provider-ocne/releases/latest/ bootstrap-components.yaml -rke2 BootstrapProvider https://github.com/rancher/cluster-api-provider-rke2/releases/latest/ bootstrap-components.yaml -talos BootstrapProvider https://github.com/siderolabs/cluster-api-bootstrap-provider-talos/releases/latest/ bootstrap-components.yaml -canonical-kubernetes ControlPlaneProvider https://github.com/canonical/cluster-api-k8s/releases/latest/ control-plane-components.yaml -k0sproject-k0smotron ControlPlaneProvider https://github.com/k0sproject/k0smotron/releases/latest/ control-plane-components.yaml -kamaji ControlPlaneProvider https://github.com/clastix/cluster-api-control-plane-provider-kamaji/releases/latest/ control-plane-components.yaml -kubeadm ControlPlaneProvider https://github.com/kubernetes-sigs/cluster-api/releases/latest/ control-plane-components.yaml -kubekey-k3s ControlPlaneProvider https://github.com/kubesphere/kubekey/releases/latest/ control-plane-components.yaml -microk8s ControlPlaneProvider https://github.com/canonical/cluster-api-control-plane-provider-microk8s/releases/latest/ control-plane-components.yaml -nested ControlPlaneProvider https://github.com/kubernetes-sigs/cluster-api-provider-nested/releases/latest/ control-plane-components.yaml -ocne ControlPlaneProvider https://github.com/verrazzano/cluster-api-provider-ocne/releases/latest/ control-plane-components.yaml -rke2 ControlPlaneProvider https://github.com/rancher/cluster-api-provider-rke2/releases/latest/ control-plane-components.yaml -talos ControlPlaneProvider https://github.com/siderolabs/cluster-api-control-plane-provider-talos/releases/latest/ control-plane-components.yaml -aws InfrastructureProvider my-aws-infrastructure-components.yaml -azure InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-azure/releases/latest/ infrastructure-components.yaml -byoh InfrastructureProvider https://github.com/vmware-tanzu/cluster-api-provider-bringyourownhost/releases/latest/ infrastructure-components.yaml -cloudstack InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-cloudstack/releases/latest/ infrastructure-components.yaml -coxedge InfrastructureProvider https://github.com/coxedge/cluster-api-provider-coxedge/releases/latest/ infrastructure-components.yaml -digitalocean InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-digitalocean/releases/latest/ infrastructure-components.yaml -docker InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api/releases/latest/ infrastructure-components-development.yaml -gcp InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-gcp/releases/latest/ infrastructure-components.yaml -hetzner InfrastructureProvider https://github.com/syself/cluster-api-provider-hetzner/releases/latest/ infrastructure-components.yaml -hivelocity-hivelocity InfrastructureProvider https://github.com/hivelocity/cluster-api-provider-hivelocity/releases/latest/ infrastructure-components.yaml -ibmcloud InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-ibmcloud/releases/latest/ infrastructure-components.yaml -in-memory InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api/releases/latest/ infrastructure-components-in-memory-development.yaml -ionoscloud-ionoscloud InfrastructureProvider https://github.com/ionos-cloud/cluster-api-provider-ionoscloud/releases/latest/ infrastructure-components.yaml -k0sproject-k0smotron InfrastructureProvider https://github.com/k0sproject/k0smotron/releases/latest/ infrastructure-components.yaml -kubekey InfrastructureProvider https://github.com/kubesphere/kubekey/releases/latest/ infrastructure-components.yaml -kubevirt InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-kubevirt/releases/latest/ infrastructure-components.yaml -linode-linode InfrastructureProvider https://github.com/linode/cluster-api-provider-linode/releases/latest/ infrastructure-components.yaml -maas InfrastructureProvider https://github.com/spectrocloud/cluster-api-provider-maas/releases/latest/ infrastructure-components.yaml -metal3 InfrastructureProvider https://github.com/metal3-io/cluster-api-provider-metal3/releases/latest/ infrastructure-components.yaml -my-infra-provider InfrastructureProvider /home/.config/cluster-api/overrides/infrastructure-docker/latest/ infrastructure-components.yaml -nested InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-nested/releases/latest/ infrastructure-components.yaml -nutanix InfrastructureProvider https://github.com/nutanix-cloud-native/cluster-api-provider-nutanix/releases/latest/ infrastructure-components.yaml -oci InfrastructureProvider https://github.com/oracle/cluster-api-provider-oci/releases/latest/ infrastructure-components.yaml -openstack InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-openstack/releases/latest/ infrastructure-components.yaml -outscale InfrastructureProvider https://github.com/outscale/cluster-api-provider-outscale/releases/latest/ infrastructure-components.yaml -packet InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-packet/releases/latest/ infrastructure-components.yaml -proxmox InfrastructureProvider https://github.com/ionos-cloud/cluster-api-provider-proxmox/releases/latest/ infrastructure-components.yaml -sidero InfrastructureProvider https://github.com/siderolabs/sidero/releases/latest/ infrastructure-components.yaml -tinkerbell-tinkerbell InfrastructureProvider https://github.com/tinkerbell/cluster-api-provider-tinkerbell/releases/latest/ infrastructure-components.yaml -vcd InfrastructureProvider https://github.com/vmware/cluster-api-provider-cloud-director/releases/latest/ infrastructure-components.yaml -vcluster InfrastructureProvider https://github.com/loft-sh/cluster-api-provider-vcluster/releases/latest/ infrastructure-components.yaml -virtink InfrastructureProvider https://github.com/smartxworks/cluster-api-provider-virtink/releases/latest/ infrastructure-components.yaml -vsphere InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-vsphere/releases/latest/ infrastructure-components.yaml -vultr-vultr InfrastructureProvider https://github.com/vultr/cluster-api-provider-vultr/releases/latest/ infrastructure-components.yaml -in-cluster IPAMProvider https://github.com/kubernetes-sigs/cluster-api-ipam-provider-in-cluster/releases/latest/ ipam-components.yaml -helm AddonProvider https://github.com/kubernetes-sigs/cluster-api-addon-provider-helm/releases/latest/ addon-components.yaml +var expectedOutputText = `NAME TYPE URL FILE +cluster-api CoreProvider https://github.com/myorg/myforkofclusterapi/releases/latest/ core_components.yaml +another-provider BootstrapProvider ./ bootstrap-components.yaml +canonical-kubernetes BootstrapProvider https://github.com/canonical/cluster-api-k8s/releases/latest/ bootstrap-components.yaml +k0sproject-k0smotron BootstrapProvider https://github.com/k0sproject/k0smotron/releases/latest/ bootstrap-components.yaml +kubeadm BootstrapProvider https://github.com/kubernetes-sigs/cluster-api/releases/latest/ bootstrap-components.yaml +kubekey-k3s BootstrapProvider https://github.com/kubesphere/kubekey/releases/latest/ bootstrap-components.yaml +microk8s BootstrapProvider https://github.com/canonical/cluster-api-bootstrap-provider-microk8s/releases/latest/ bootstrap-components.yaml +ocne BootstrapProvider https://github.com/verrazzano/cluster-api-provider-ocne/releases/latest/ bootstrap-components.yaml +rke2 BootstrapProvider https://github.com/rancher/cluster-api-provider-rke2/releases/latest/ bootstrap-components.yaml +talos BootstrapProvider https://github.com/siderolabs/cluster-api-bootstrap-provider-talos/releases/latest/ bootstrap-components.yaml +canonical-kubernetes ControlPlaneProvider https://github.com/canonical/cluster-api-k8s/releases/latest/ control-plane-components.yaml +k0sproject-k0smotron ControlPlaneProvider https://github.com/k0sproject/k0smotron/releases/latest/ control-plane-components.yaml +kamaji ControlPlaneProvider https://github.com/clastix/cluster-api-control-plane-provider-kamaji/releases/latest/ control-plane-components.yaml +kubeadm ControlPlaneProvider https://github.com/kubernetes-sigs/cluster-api/releases/latest/ control-plane-components.yaml +kubekey-k3s ControlPlaneProvider https://github.com/kubesphere/kubekey/releases/latest/ control-plane-components.yaml +microk8s ControlPlaneProvider https://github.com/canonical/cluster-api-control-plane-provider-microk8s/releases/latest/ control-plane-components.yaml +nested ControlPlaneProvider https://github.com/kubernetes-sigs/cluster-api-provider-nested/releases/latest/ control-plane-components.yaml +ocne ControlPlaneProvider https://github.com/verrazzano/cluster-api-provider-ocne/releases/latest/ control-plane-components.yaml +rke2 ControlPlaneProvider https://github.com/rancher/cluster-api-provider-rke2/releases/latest/ control-plane-components.yaml +talos ControlPlaneProvider https://github.com/siderolabs/cluster-api-control-plane-provider-talos/releases/latest/ control-plane-components.yaml +aws InfrastructureProvider my-aws-infrastructure-components.yaml +azure InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-azure/releases/latest/ infrastructure-components.yaml +byoh InfrastructureProvider https://github.com/vmware-tanzu/cluster-api-provider-bringyourownhost/releases/latest/ infrastructure-components.yaml +cloudstack InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-cloudstack/releases/latest/ infrastructure-components.yaml +coxedge InfrastructureProvider https://github.com/coxedge/cluster-api-provider-coxedge/releases/latest/ infrastructure-components.yaml +digitalocean InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-digitalocean/releases/latest/ infrastructure-components.yaml +docker InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api/releases/latest/ infrastructure-components-development.yaml +gcp InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-gcp/releases/latest/ infrastructure-components.yaml +hetzner InfrastructureProvider https://github.com/syself/cluster-api-provider-hetzner/releases/latest/ infrastructure-components.yaml +hivelocity-hivelocity InfrastructureProvider https://github.com/hivelocity/cluster-api-provider-hivelocity/releases/latest/ infrastructure-components.yaml +ibmcloud InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-ibmcloud/releases/latest/ infrastructure-components.yaml +in-memory InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api/releases/latest/ infrastructure-components-in-memory-development.yaml +ionoscloud-ionoscloud InfrastructureProvider https://github.com/ionos-cloud/cluster-api-provider-ionoscloud/releases/latest/ infrastructure-components.yaml +k0sproject-k0smotron InfrastructureProvider https://github.com/k0sproject/k0smotron/releases/latest/ infrastructure-components.yaml +kubekey InfrastructureProvider https://github.com/kubesphere/kubekey/releases/latest/ infrastructure-components.yaml +kubevirt InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-kubevirt/releases/latest/ infrastructure-components.yaml +linode-linode InfrastructureProvider https://github.com/linode/cluster-api-provider-linode/releases/latest/ infrastructure-components.yaml +maas InfrastructureProvider https://github.com/spectrocloud/cluster-api-provider-maas/releases/latest/ infrastructure-components.yaml +metal3 InfrastructureProvider https://github.com/metal3-io/cluster-api-provider-metal3/releases/latest/ infrastructure-components.yaml +my-infra-provider InfrastructureProvider /home/.config/cluster-api/overrides/infrastructure-docker/latest/ infrastructure-components.yaml +nested InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-nested/releases/latest/ infrastructure-components.yaml +nutanix InfrastructureProvider https://github.com/nutanix-cloud-native/cluster-api-provider-nutanix/releases/latest/ infrastructure-components.yaml +oci InfrastructureProvider https://github.com/oracle/cluster-api-provider-oci/releases/latest/ infrastructure-components.yaml +openstack InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-openstack/releases/latest/ infrastructure-components.yaml +outscale InfrastructureProvider https://github.com/outscale/cluster-api-provider-outscale/releases/latest/ infrastructure-components.yaml +packet InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-packet/releases/latest/ infrastructure-components.yaml +proxmox InfrastructureProvider https://github.com/ionos-cloud/cluster-api-provider-proxmox/releases/latest/ infrastructure-components.yaml +sidero InfrastructureProvider https://github.com/siderolabs/sidero/releases/latest/ infrastructure-components.yaml +tinkerbell-tinkerbell InfrastructureProvider https://github.com/tinkerbell/cluster-api-provider-tinkerbell/releases/latest/ infrastructure-components.yaml +vcd InfrastructureProvider https://github.com/vmware/cluster-api-provider-cloud-director/releases/latest/ infrastructure-components.yaml +vcluster InfrastructureProvider https://github.com/loft-sh/cluster-api-provider-vcluster/releases/latest/ infrastructure-components.yaml +virtink InfrastructureProvider https://github.com/smartxworks/cluster-api-provider-virtink/releases/latest/ infrastructure-components.yaml +vsphere InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-vsphere/releases/latest/ infrastructure-components.yaml +vultr-vultr InfrastructureProvider https://github.com/vultr/cluster-api-provider-vultr/releases/latest/ infrastructure-components.yaml +in-cluster IPAMProvider https://github.com/kubernetes-sigs/cluster-api-ipam-provider-in-cluster/releases/latest/ ipam-components.yaml +nutanix IPAMProvider https://github.com/nutanix-cloud-native/cluster-api-ipam-provider-nutanix/releases/latest/ ipam-components.yaml +nutanix RuntimeExtensionProvider https://github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/releases/latest/ runtime-extensions-components.yaml +helm AddonProvider https://github.com/kubernetes-sigs/cluster-api-addon-provider-helm/releases/latest/ addon-components.yaml ` var expectedOutputYaml = `- File: core_components.yaml @@ -381,6 +382,14 @@ var expectedOutputYaml = `- File: core_components.yaml Name: in-cluster ProviderType: IPAMProvider URL: https://github.com/kubernetes-sigs/cluster-api-ipam-provider-in-cluster/releases/latest/ +- File: ipam-components.yaml + Name: nutanix + ProviderType: IPAMProvider + URL: https://github.com/nutanix-cloud-native/cluster-api-ipam-provider-nutanix/releases/latest/ +- File: runtime-extensions-components.yaml + Name: nutanix + ProviderType: RuntimeExtensionProvider + URL: https://github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/releases/latest/ - File: addon-components.yaml Name: helm ProviderType: AddonProvider diff --git a/controllers/clustercache/cluster_accessor.go b/controllers/clustercache/cluster_accessor.go index 1e304d7c28be..ff24b0d6e1e2 100644 --- a/controllers/clustercache/cluster_accessor.go +++ b/controllers/clustercache/cluster_accessor.go @@ -32,7 +32,6 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/source" "sigs.k8s.io/cluster-api/util/certs" ) @@ -407,16 +406,16 @@ func (ca *clusterAccessor) GetClientCertificatePrivateKey(ctx context.Context) * } // Watch watches a workload cluster for events. -// Each unique watch (by input.Name) is only added once after a Connect (otherwise we return early). +// Each unique watch (by watcher.Name()) is only added once after a Connect (otherwise we return early). // During a disconnect existing watches (i.e. informers) are shutdown when stopping the cache. // After a re-connect watches will be re-added (assuming the Watch method is called again). -func (ca *clusterAccessor) Watch(ctx context.Context, input WatchInput) error { - if input.Name == "" { - return errors.New("input.Name is required") +func (ca *clusterAccessor) Watch(ctx context.Context, watcher Watcher) error { + if watcher.Name() == "" { + return errors.New("watcher.Name() cannot be empty") } if !ca.Connected(ctx) { - return errors.Wrapf(ErrClusterNotConnected, "error creating watch %s for %T", input.Name, input.Kind) + return errors.Wrapf(ErrClusterNotConnected, "error creating watch %s for %T", watcher.Name(), watcher.Object()) } log := ctrl.LoggerFrom(ctx) @@ -429,21 +428,21 @@ func (ca *clusterAccessor) Watch(ctx context.Context, input WatchInput) error { // Checking connection again while holding the lock, because maybe Disconnect was called since checking above. if ca.lockedState.connection == nil { - return errors.Wrapf(ErrClusterNotConnected, "error creating watch %s for %T", input.Name, input.Kind) + return errors.Wrapf(ErrClusterNotConnected, "error creating watch %s for %T", watcher.Name(), watcher.Object()) } // Return early if the watch was already added. - if ca.lockedState.connection.watches.Has(input.Name) { - log.V(6).Info(fmt.Sprintf("Skip creation of watch %s for %T because it already exists", input.Name, input.Kind)) + if ca.lockedState.connection.watches.Has(watcher.Name()) { + log.V(6).Info(fmt.Sprintf("Skip creation of watch %s for %T because it already exists", watcher.Name(), watcher.Object())) return nil } - log.Info(fmt.Sprintf("Creating watch %s for %T", input.Name, input.Kind)) - if err := input.Watcher.Watch(source.Kind(ca.lockedState.connection.cache, input.Kind, input.EventHandler, input.Predicates...)); err != nil { - return errors.Wrapf(err, "error creating watch %s for %T", input.Name, input.Kind) + log.Info(fmt.Sprintf("Creating watch %s for %T", watcher.Name(), watcher.Object())) + if err := watcher.Watch(ca.lockedState.connection.cache); err != nil { + return errors.Wrapf(err, "error creating watch %s for %T", watcher.Name(), watcher.Object()) } - ca.lockedState.connection.watches.Insert(input.Name) + ca.lockedState.connection.watches.Insert(watcher.Name()) return nil } diff --git a/controllers/clustercache/cluster_accessor_test.go b/controllers/clustercache/cluster_accessor_test.go index 0a8cef6d9754..4e48df37cd0d 100644 --- a/controllers/clustercache/cluster_accessor_test.go +++ b/controllers/clustercache/cluster_accessor_test.go @@ -327,7 +327,7 @@ func TestWatch(t *testing.T) { accessor := newClusterAccessor(clusterKey, config) tw := &testWatcher{} - wi := WatchInput{ + wi := WatcherOptions{ Name: "test-watch", Watcher: tw, Kind: &corev1.Node{}, @@ -335,7 +335,7 @@ func TestWatch(t *testing.T) { } // Add watch when not connected (fails) - err := accessor.Watch(ctx, wi) + err := accessor.Watch(ctx, NewWatcher(wi)) g.Expect(err).To(HaveOccurred()) g.Expect(errors.Is(err, ErrClusterNotConnected)).To(BeTrue()) @@ -346,12 +346,12 @@ func TestWatch(t *testing.T) { g.Expect(accessor.lockedState.connection.watches).To(BeEmpty()) // Add watch - g.Expect(accessor.Watch(ctx, wi)).To(Succeed()) + g.Expect(accessor.Watch(ctx, NewWatcher(wi))).To(Succeed()) g.Expect(accessor.lockedState.connection.watches.Has("test-watch")).To(BeTrue()) g.Expect(accessor.lockedState.connection.watches.Len()).To(Equal(1)) // Add watch again (no-op as watch already exists) - g.Expect(accessor.Watch(ctx, wi)).To(Succeed()) + g.Expect(accessor.Watch(ctx, NewWatcher(wi))).To(Succeed()) g.Expect(accessor.lockedState.connection.watches.Has("test-watch")).To(BeTrue()) g.Expect(accessor.lockedState.connection.watches.Len()).To(Equal(1)) diff --git a/controllers/clustercache/cluster_cache.go b/controllers/clustercache/cluster_cache.go index ddb59532ee67..f907e252569d 100644 --- a/controllers/clustercache/cluster_cache.go +++ b/controllers/clustercache/cluster_cache.go @@ -149,7 +149,7 @@ type ClusterCache interface { // During a disconnect existing watches (i.e. informers) are shutdown when stopping the cache. // After a re-connect watches will be re-added (assuming the Watch method is called again). // If there is no connection to the workload cluster ErrClusterNotConnected will be returned. - Watch(ctx context.Context, cluster client.ObjectKey, input WatchInput) error + Watch(ctx context.Context, cluster client.ObjectKey, watcher Watcher) error // GetLastProbeSuccessTimestamp returns the time when the health probe was successfully executed last. GetLastProbeSuccessTimestamp(ctx context.Context, cluster client.ObjectKey) time.Time @@ -169,17 +169,29 @@ type ClusterCache interface { // because there is no connection to the workload cluster. var ErrClusterNotConnected = errors.New("connection to the workload cluster is down") -// Watcher is a scoped-down interface from Controller that only has the Watch func. +// Watcher is an interface that can start a Watch. type Watcher interface { - // Watch watches the provided Source. - Watch(src source.Source) error + Name() string + Object() client.Object + Watch(cache cache.Cache) error } -// WatchInput specifies the parameters used to establish a new watch for a workload cluster. -// A source.Kind source (configured with Kind, EventHandler and Predicates) will be added to the Watcher. -// To watch for events, the source.Kind will create an informer on the Cache that we have created and cached +// SourceWatcher is a scoped-down interface from Controller that only has the Watch func. +type SourceWatcher[request comparable] interface { + Watch(src source.TypedSource[request]) error +} + +// WatcherOptions specifies the parameters used to establish a new watch for a workload cluster. +// A source.TypedKind source (configured with Kind, TypedEventHandler and Predicates) will be added to the Watcher. +// To watch for events, the source.TypedKind will create an informer on the Cache that we have created and cached +// for the given Cluster. +type WatcherOptions = TypedWatcherOptions[client.Object, ctrl.Request] + +// TypedWatcherOptions specifies the parameters used to establish a new watch for a workload cluster. +// A source.TypedKind source (configured with Kind, TypedEventHandler and Predicates) will be added to the Watcher. +// To watch for events, the source.TypedKind will create an informer on the Cache that we have created and cached // for the given Cluster. -type WatchInput struct { +type TypedWatcherOptions[object client.Object, request comparable] struct { // Name represents a unique Watch request for the specified Cluster. // The name is used to track that a specific watch is only added once to a cache. // After a connection (and thus also the cache) has been re-created, watches have to be added @@ -187,16 +199,44 @@ type WatchInput struct { Name string // Watcher is the watcher (controller) whose Reconcile() function will be called for events. - Watcher Watcher + Watcher SourceWatcher[request] // Kind is the type of resource to watch. - Kind client.Object + Kind object // EventHandler contains the event handlers to invoke for resource events. - EventHandler handler.EventHandler + EventHandler handler.TypedEventHandler[object, request] // Predicates is used to filter resource events. - Predicates []predicate.Predicate + Predicates []predicate.TypedPredicate[object] +} + +// NewWatcher creates a Watcher for the workload cluster. +// A source.TypedKind source (configured with Kind, TypedEventHandler and Predicates) will be added to the SourceWatcher. +// To watch for events, the source.TypedKind will create an informer on the Cache that we have created and cached +// for the given Cluster. +func NewWatcher[object client.Object, request comparable](options TypedWatcherOptions[object, request]) Watcher { + return &watcher[object, request]{ + name: options.Name, + kind: options.Kind, + eventHandler: options.EventHandler, + predicates: options.Predicates, + watcher: options.Watcher, + } +} + +type watcher[object client.Object, request comparable] struct { + name string + kind object + eventHandler handler.TypedEventHandler[object, request] + predicates []predicate.TypedPredicate[object] + watcher SourceWatcher[request] +} + +func (tw *watcher[object, request]) Name() string { return tw.name } +func (tw *watcher[object, request]) Object() client.Object { return tw.kind } +func (tw *watcher[object, request]) Watch(cache cache.Cache) error { + return tw.watcher.Watch(source.TypedKind[object, request](cache, tw.kind, tw.eventHandler, tw.predicates...)) } // GetClusterSourceOption is an option that modifies GetClusterSourceOptions for a GetClusterSource call. @@ -342,12 +382,12 @@ func (cc *clusterCache) GetClientCertificatePrivateKey(ctx context.Context, clus return accessor.GetClientCertificatePrivateKey(ctx), nil } -func (cc *clusterCache) Watch(ctx context.Context, cluster client.ObjectKey, input WatchInput) error { +func (cc *clusterCache) Watch(ctx context.Context, cluster client.ObjectKey, watcher Watcher) error { accessor := cc.getClusterAccessor(cluster) if accessor == nil { - return errors.Wrapf(ErrClusterNotConnected, "error creating watch %s for %T", input.Name, input.Kind) + return errors.Wrapf(ErrClusterNotConnected, "error creating watch %s for %T", watcher.Name(), watcher.Object()) } - return accessor.Watch(ctx, input) + return accessor.Watch(ctx, watcher) } func (cc *clusterCache) GetLastProbeSuccessTimestamp(ctx context.Context, cluster client.ObjectKey) time.Time { diff --git a/controllers/external/tracker.go b/controllers/external/tracker.go index 364e5c48293e..edca28d190f1 100644 --- a/controllers/external/tracker.go +++ b/controllers/external/tracker.go @@ -37,15 +37,16 @@ import ( type ObjectTracker struct { m sync.Map - Controller controller.Controller - Cache cache.Cache - Scheme *runtime.Scheme + Controller controller.Controller + Cache cache.Cache + Scheme *runtime.Scheme + PredicateLogger *logr.Logger } // Watch uses the controller to issue a Watch only if the object hasn't been seen before. func (o *ObjectTracker) Watch(log logr.Logger, obj client.Object, handler handler.EventHandler, p ...predicate.Predicate) error { - if o.Controller == nil || o.Cache == nil || o.Scheme == nil { - return errors.New("all of controller, cache and scheme must be set for object tracker") + if o.Controller == nil || o.Cache == nil || o.Scheme == nil || o.PredicateLogger == nil { + return errors.New("all of Controller, Cache, Scheme and PredicateLogger must be set for object tracker") } gvk := obj.GetObjectKind().GroupVersionKind() @@ -59,7 +60,7 @@ func (o *ObjectTracker) Watch(log logr.Logger, obj client.Object, handler handle o.Cache, obj.DeepCopyObject().(client.Object), handler, - append(p, predicates.ResourceNotPaused(o.Scheme, log))..., + append(p, predicates.ResourceNotPaused(o.Scheme, *o.PredicateLogger))..., )) if err != nil { o.m.Delete(key) diff --git a/controllers/external/tracker_test.go b/controllers/external/tracker_test.go index 6c89db695de6..913b125b40b6 100644 --- a/controllers/external/tracker_test.go +++ b/controllers/external/tracker_test.go @@ -24,6 +24,7 @@ import ( "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/cache/informertest" "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/log" @@ -66,7 +67,7 @@ func (c *watchCountController) Watch(_ source.Source) error { func TestRetryWatch(t *testing.T) { g := NewWithT(t) ctrl := newWatchCountController(true) - tracker := ObjectTracker{Controller: ctrl, Scheme: runtime.NewScheme(), Cache: &informertest.FakeInformers{}} + tracker := ObjectTracker{Controller: ctrl, Scheme: runtime.NewScheme(), Cache: &informertest.FakeInformers{}, PredicateLogger: ptr.To(logr.New(log.NullLogSink{}))} err := tracker.Watch(logger, &clusterv1.Cluster{}, nil) g.Expect(err).To(HaveOccurred()) @@ -80,7 +81,7 @@ func TestRetryWatch(t *testing.T) { func TestWatchMultipleTimes(t *testing.T) { g := NewWithT(t) ctrl := &watchCountController{} - tracker := ObjectTracker{Controller: ctrl, Scheme: runtime.NewScheme(), Cache: &informertest.FakeInformers{}} + tracker := ObjectTracker{Controller: ctrl, Scheme: runtime.NewScheme(), Cache: &informertest.FakeInformers{}, PredicateLogger: ptr.To(logr.New(log.NullLogSink{}))} obj := &clusterv1.Cluster{ TypeMeta: metav1.TypeMeta{ diff --git a/controllers/external/types.go b/controllers/external/types.go index 36d2965d3000..0d9462682611 100644 --- a/controllers/external/types.go +++ b/controllers/external/types.go @@ -35,7 +35,4 @@ type ReconcileOutput struct { // Details of the referenced external object. // +optional Result *unstructured.Unstructured - // Indicates if the external object is paused. - // +optional - Paused bool } diff --git a/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go b/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go index d3049763ae92..3565e31b3dde 100644 --- a/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go +++ b/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go @@ -20,14 +20,32 @@ import clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" // KubeadmControlPlane's Available condition and corresponding reasons that will be used in v1Beta2 API version. const ( - // KubeadmControlPlaneAvailableV1Beta2Condition is True if the control plane can be reached, EtcdClusterHealthy is true, - // and CertificatesAvailable is true. + // KubeadmControlPlaneAvailableV1Beta2Condition is true if KubeadmControlPlane is not deleted, `CertificatesAvailable` is true, + // at least one Machine with healthy control plane components, and etcd has enough operational members to meet quorum requirements. + // More specifically, considering how kubeadm layouts components: + // - Kubernetes API server, scheduler and controller manager health is inferred by the status of + // the corresponding Pods hosted on each machine. + // - In case of managed etcd, also a healthy etcd Pod and a healthy etcd member must exist on the same + // machine with the healthy Kubernetes API server, scheduler and controller manager, otherwise the k8s control + // plane cannot be considered operational (if etcd is not operational on a machine, most likely also API server, + // scheduler and controller manager on the same machine will be impacted). + // - In case of external etcd, KCP cannot make any assumption on etcd status, so all the etcd checks are skipped. KubeadmControlPlaneAvailableV1Beta2Condition = clusterv1.AvailableV1Beta2Condition + + // KubeadmControlPlaneAvailableInspectionFailedV1Beta2Reason documents a failure when inspecting the status of the + // etcd cluster hosted on KubeadmControlPlane controlled machines. + KubeadmControlPlaneAvailableInspectionFailedV1Beta2Reason = clusterv1.InspectionFailedV1Beta2Reason + + // KubeadmControlPlaneAvailableV1Beta2Reason surfaces when the KubeadmControlPlane is available. + KubeadmControlPlaneAvailableV1Beta2Reason = clusterv1.AvailableV1Beta2Reason + + // KubeadmControlPlaneNotAvailableV1Beta2Reason surfaces when the KubeadmControlPlane is not available. + KubeadmControlPlaneNotAvailableV1Beta2Reason = clusterv1.NotAvailableV1Beta2Reason ) // KubeadmControlPlane's Initialized condition and corresponding reasons that will be used in v1Beta2 API version. const ( - // KubeadmControlPlaneInitializedV1Beta2Condition is True when the control plane is functional enough to accept + // KubeadmControlPlaneInitializedV1Beta2Condition is true when the control plane is functional enough to accept // requests. This information is usually used as a signal for starting all the provisioning operations that // depend on a functional API server, but do not require a full HA control plane to exist. KubeadmControlPlaneInitializedV1Beta2Condition = "Initialized" @@ -191,7 +209,7 @@ const ( // Reasons that will be used for the OwnerRemediated condition set by MachineHealthCheck on KubeadmControlPlane controlled machines // being remediated in v1Beta2 API version. const ( - // KubeadmControlPlaneMachineRemediationInternalErrorV1Beta2Reason surfaces unexpected failures while remediation a control plane machine. + // KubeadmControlPlaneMachineRemediationInternalErrorV1Beta2Reason surfaces unexpected failures while remediating a control plane machine. KubeadmControlPlaneMachineRemediationInternalErrorV1Beta2Reason = clusterv1.InternalErrorV1Beta2Reason // KubeadmControlPlaneMachineCannotBeRemediatedV1Beta2Reason surfaces when remediation of a control plane machine can't be started. diff --git a/controlplane/kubeadm/internal/control_plane.go b/controlplane/kubeadm/internal/control_plane.go index 7d624645305a..618bdceeca80 100644 --- a/controlplane/kubeadm/internal/control_plane.go +++ b/controlplane/kubeadm/internal/control_plane.go @@ -30,6 +30,7 @@ import ( bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controllers/external" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" + "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd" "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/failuredomains" "sigs.k8s.io/cluster-api/util/patch" @@ -44,6 +45,10 @@ type ControlPlane struct { Machines collections.Machines machinesPatchHelpers map[string]*patch.Helper + machinesNotUptoDate collections.Machines + machinesNotUptoDateLogMessages map[string][]string + machinesNotUptoDateConditionMessages map[string][]string + // reconciliationTime is the time of the current reconciliation, and should be used for all "now" calculations reconciliationTime metav1.Time @@ -58,6 +63,16 @@ type ControlPlane struct { KubeadmConfigs map[string]*bootstrapv1.KubeadmConfig InfraResources map[string]*unstructured.Unstructured + // EtcdMembers is the list of members read while computing reconcileControlPlaneConditions; also additional info below + // comes from the same func. + // NOTE: Those info are computed based on the info KCP was able to collect during inspection (e.g. if on a 3 CP + // control plane one etcd member is down, those info are based on the answer collected from two members only). + // NOTE: Those info are specifically designed for computing KCP's Available condition. + EtcdMembers []*etcd.Member + EtcdMembersAgreeOnMemberList bool + EtcdMembersAgreeOnClusterID bool + EtcdMembersAndMachinesAreMatching bool + managementCluster ManagementCluster workloadCluster WorkloadCluster @@ -97,15 +112,35 @@ func NewControlPlane(ctx context.Context, managementCluster ManagementCluster, c patchHelpers[machine.Name] = patchHelper } + // Select machines that should be rolled out because of an outdated configuration or because rolloutAfter/Before expired. + reconciliationTime := metav1.Now() + machinesNotUptoDate := make(collections.Machines, len(ownedMachines)) + machinesNotUptoDateLogMessages := map[string][]string{} + machinesNotUptoDateConditionMessages := map[string][]string{} + for _, m := range ownedMachines { + upToDate, logMessages, conditionMessages, err := UpToDate(m, kcp, &reconciliationTime, infraObjects, kubeadmConfigs) + if err != nil { + return nil, err + } + if !upToDate { + machinesNotUptoDate.Insert(m) + machinesNotUptoDateLogMessages[m.Name] = logMessages + machinesNotUptoDateConditionMessages[m.Name] = conditionMessages + } + } + return &ControlPlane{ - KCP: kcp, - Cluster: cluster, - Machines: ownedMachines, - machinesPatchHelpers: patchHelpers, - KubeadmConfigs: kubeadmConfigs, - InfraResources: infraObjects, - reconciliationTime: metav1.Now(), - managementCluster: managementCluster, + KCP: kcp, + Cluster: cluster, + Machines: ownedMachines, + machinesPatchHelpers: patchHelpers, + machinesNotUptoDate: machinesNotUptoDate, + machinesNotUptoDateLogMessages: machinesNotUptoDateLogMessages, + machinesNotUptoDateConditionMessages: machinesNotUptoDateConditionMessages, + KubeadmConfigs: kubeadmConfigs, + InfraResources: infraObjects, + reconciliationTime: reconciliationTime, + managementCluster: managementCluster, }, nil } @@ -152,16 +187,12 @@ func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, machin return failuredomains.PickMost(ctx, c.Cluster.Status.FailureDomains.FilterControlPlane(), c.Machines, machines) } -// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date machines. +// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date, not deleted machines. func (c *ControlPlane) NextFailureDomainForScaleUp(ctx context.Context) (*string, error) { if len(c.Cluster.Status.FailureDomains.FilterControlPlane()) == 0 { return nil, nil } - upToDateMachines, err := c.UpToDateMachines() - if err != nil { - return nil, errors.Wrapf(err, "failed to determine next failure domain for scale up") - } - return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), upToDateMachines), nil + return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), c.UpToDateMachines().Filter(collections.Not(collections.HasDeletionTimestamp))), nil } // InitialControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for an initializing control plane. @@ -198,40 +229,21 @@ func (c *ControlPlane) GetKubeadmConfig(machineName string) (*bootstrapv1.Kubead } // MachinesNeedingRollout return a list of machines that need to be rolled out. -func (c *ControlPlane) MachinesNeedingRollout() (collections.Machines, map[string]string, error) { - // Ignore machines to be deleted. - machines := c.Machines.Filter(collections.Not(collections.HasDeletionTimestamp)) +func (c *ControlPlane) MachinesNeedingRollout() (collections.Machines, map[string][]string) { + // Note: Machines already deleted are dropped because they will be replaced by new machines after deletion completes. + return c.machinesNotUptoDate.Filter(collections.Not(collections.HasDeletionTimestamp)), c.machinesNotUptoDateLogMessages +} - // Return machines if they are scheduled for rollout or if with an outdated configuration. - machinesNeedingRollout := make(collections.Machines, len(machines)) - rolloutReasons := map[string]string{} - for _, m := range machines { - reason, needsRollout, err := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m) - if err != nil { - return nil, nil, err - } - if needsRollout { - machinesNeedingRollout.Insert(m) - rolloutReasons[m.Name] = reason - } - } - return machinesNeedingRollout, rolloutReasons, nil +// NotUpToDateMachines return a list of machines that are not up to date with the control +// plane's configuration. +func (c *ControlPlane) NotUpToDateMachines() (collections.Machines, map[string][]string) { + return c.machinesNotUptoDate, c.machinesNotUptoDateConditionMessages } // UpToDateMachines returns the machines that are up to date with the control -// plane's configuration and therefore do not require rollout. -func (c *ControlPlane) UpToDateMachines() (collections.Machines, error) { - upToDateMachines := make(collections.Machines, len(c.Machines)) - for _, m := range c.Machines { - _, needsRollout, err := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m) - if err != nil { - return nil, err - } - if !needsRollout { - upToDateMachines.Insert(m) - } - } - return upToDateMachines, nil +// plane's configuration. +func (c *ControlPlane) UpToDateMachines() collections.Machines { + return c.Machines.Difference(c.machinesNotUptoDate) } // getInfraResources fetches the external infrastructure resource for each machine in the collection and returns a map of machine.Name -> infraResource. @@ -316,6 +328,7 @@ func (c *ControlPlane) PatchMachines(ctx context.Context) error { controlplanev1.MachineEtcdPodHealthyCondition, controlplanev1.MachineEtcdMemberHealthyCondition, }}, patch.WithOwnedV1Beta2Conditions{Conditions: []string{ + clusterv1.MachineUpToDateV1Beta2Condition, controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, diff --git a/controlplane/kubeadm/internal/control_plane_test.go b/controlplane/kubeadm/internal/control_plane_test.go index 792bdd360c89..5d02724de87d 100644 --- a/controlplane/kubeadm/internal/control_plane_test.go +++ b/controlplane/kubeadm/internal/control_plane_test.go @@ -22,6 +22,7 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" @@ -30,8 +31,6 @@ import ( ) func TestControlPlane(t *testing.T) { - g := NewWithT(t) - t.Run("Failure domains", func(t *testing.T) { controlPlane := &ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{}, @@ -53,14 +52,95 @@ func TestControlPlane(t *testing.T) { } t.Run("With all machines in known failure domain, should return the FD with most number of machines", func(*testing.T) { + g := NewWithT(t) g.Expect(*controlPlane.FailureDomainWithMostMachines(ctx, controlPlane.Machines)).To(Equal("two")) }) t.Run("With some machines in non defined failure domains", func(*testing.T) { + g := NewWithT(t) controlPlane.Machines.Insert(machine("machine-5", withFailureDomain("unknown"))) g.Expect(*controlPlane.FailureDomainWithMostMachines(ctx, controlPlane.Machines)).To(Equal("unknown")) }) }) + + t.Run("MachinesUpToDate", func(t *testing.T) { + g := NewWithT(t) + cluster := &clusterv1.Cluster{ + Status: clusterv1.ClusterStatus{ + FailureDomains: clusterv1.FailureDomains{ + "one": failureDomain(true), + "two": failureDomain(true), + "three": failureDomain(true), + }, + }, + } + kcp := &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Version: "v1.31.0", + }, + } + machines := collections.Machines{ + "machine-1": &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m1"}, + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), // up-to-date + FailureDomain: ptr.To("one"), + InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m1"}, + }}, + "machine-2": &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m2"}, + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.29.0"), // not up-to-date + FailureDomain: ptr.To("two"), + InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m2"}, + }}, + "machine-3": &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m3", DeletionTimestamp: ptr.To(metav1.Now())}, // deleted + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.29.3"), // not up-to-date + FailureDomain: ptr.To("three"), + InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m3"}, + }}, + "machine-4": &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m4", DeletionTimestamp: ptr.To(metav1.Now())}, // deleted + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), // up-to-date + FailureDomain: ptr.To("two"), + InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m4"}, + }}, + "machine-5": &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Name: "m5"}, + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), // up-to-date + FailureDomain: ptr.To("three"), + InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m5"}, + }}, + } + controlPlane, err := NewControlPlane(ctx, nil, env.GetClient(), cluster, kcp, machines) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(controlPlane.Machines).To(HaveLen(5)) + + machinesNotUptoDate, machinesNotUptoDateConditionMessages := controlPlane.NotUpToDateMachines() + g.Expect(machinesNotUptoDate.Names()).To(ConsistOf("m2", "m3")) + g.Expect(machinesNotUptoDateConditionMessages).To(HaveLen(2)) + g.Expect(machinesNotUptoDateConditionMessages).To(HaveKeyWithValue("m2", []string{"Version v1.29.0, v1.31.0 required"})) + g.Expect(machinesNotUptoDateConditionMessages).To(HaveKeyWithValue("m3", []string{"Version v1.29.3, v1.31.0 required"})) + + machinesNeedingRollout, machinesNotUptoDateLogMessages := controlPlane.MachinesNeedingRollout() + g.Expect(machinesNeedingRollout.Names()).To(ConsistOf("m2")) + g.Expect(machinesNotUptoDateLogMessages).To(HaveLen(2)) + g.Expect(machinesNotUptoDateLogMessages).To(HaveKeyWithValue("m2", []string{"Machine version \"v1.29.0\" is not equal to KCP version \"v1.31.0\""})) + g.Expect(machinesNotUptoDateLogMessages).To(HaveKeyWithValue("m3", []string{"Machine version \"v1.29.3\" is not equal to KCP version \"v1.31.0\""})) + + upToDateMachines := controlPlane.UpToDateMachines() + g.Expect(upToDateMachines).To(HaveLen(3)) + g.Expect(upToDateMachines.Names()).To(ConsistOf("m1", "m4", "m5")) + + fd, err := controlPlane.NextFailureDomainForScaleUp(ctx) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(fd).To(Equal(ptr.To("two"))) // deleted up-to-date machines (m4) should not be counted when picking the next failure domain for scale up + }) } func TestHasMachinesToBeRemediated(t *testing.T) { diff --git a/controlplane/kubeadm/internal/controllers/controller.go b/controlplane/kubeadm/internal/controllers/controller.go index f1ece2c75a90..61705ba9aee2 100644 --- a/controlplane/kubeadm/internal/controllers/controller.go +++ b/controlplane/kubeadm/internal/controllers/controller.go @@ -29,6 +29,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" "k8s.io/utils/ptr" @@ -374,7 +375,7 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPl // Wait for the cluster infrastructure to be ready before creating machines if !controlPlane.Cluster.Status.InfrastructureReady { - // Note: in future we might want to move this inside reconcileControlPlaneConditions. + // Note: in future we might want to move this inside reconcileControlPlaneAndMachinesConditions. v1beta2conditions.Set(controlPlane.KCP, metav1.Condition{ Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -400,7 +401,7 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPl // If ControlPlaneEndpoint is not set, return early if !controlPlane.Cluster.Spec.ControlPlaneEndpoint.IsValid() { - // Note: in future we might want to move this inside reconcileControlPlaneConditions. + // Note: in future we might want to move this inside reconcileControlPlaneAndMachinesConditions. v1beta2conditions.Set(controlPlane.KCP, metav1.Condition{ Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -437,7 +438,7 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPl // Updates conditions reporting the status of static pods and the status of the etcd cluster. // NOTE: Conditions reporting KCP operation progress like e.g. Resized or SpecUpToDate are inlined with the rest of the execution. - if err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil { + if err := r.reconcileControlPlaneAndMachinesConditions(ctx, controlPlane); err != nil { return ctrl.Result{}, err } @@ -460,17 +461,14 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPl } // Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations. - machinesNeedingRollout, rolloutReasons, err := controlPlane.MachinesNeedingRollout() - if err != nil { - return ctrl.Result{}, err - } + machinesNeedingRollout, machinesNeedingRolloutLogMessages := controlPlane.MachinesNeedingRollout() switch { case len(machinesNeedingRollout) > 0: - var reasons []string - for _, rolloutReason := range rolloutReasons { - reasons = append(reasons, rolloutReason) + var allMessages []string + for machine, messages := range machinesNeedingRolloutLogMessages { + allMessages = append(allMessages, fmt.Sprintf("Machine %s needs rollout: %s", machine, strings.Join(messages, ","))) } - log.Info(fmt.Sprintf("Rolling out Control Plane machines: %s", strings.Join(reasons, ",")), "machinesNeedingRollout", machinesNeedingRollout.Names()) + log.Info(fmt.Sprintf("Rolling out Control Plane machines: %s", strings.Join(allMessages, ",")), "machinesNeedingRollout", machinesNeedingRollout.Names()) conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(machinesNeedingRollout), len(controlPlane.Machines)-len(machinesNeedingRollout)) return r.upgradeControlPlane(ctx, controlPlane, machinesNeedingRollout) default: @@ -607,7 +605,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con // Updates conditions reporting the status of static pods and the status of the etcd cluster. // NOTE: Ignoring failures given that we are deleting - if err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil { + if err := r.reconcileControlPlaneAndMachinesConditions(ctx, controlPlane); err != nil { log.Error(err, "Failed to reconcile conditions") } @@ -641,7 +639,11 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "Waiting for worker nodes to be deleted first") controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason - controlPlane.DeletingMessage = fmt.Sprintf("KCP deletion blocked because %s still exist", objectsPendingDeleteNames(allMachines, allMachinePools, controlPlane.Cluster)) + names := objectsPendingDeleteNames(allMachines, allMachinePools, controlPlane.Cluster) + for i := range names { + names[i] = "* " + names[i] + } + controlPlane.DeletingMessage = fmt.Sprintf("KubeadmControlPlane deletion blocked because following objects still exist:\n%s", strings.Join(names, "\n")) return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil } @@ -705,7 +707,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con } // objectsPendingDeleteNames return the names of worker Machines and MachinePools pending delete. -func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools *expv1.MachinePoolList, cluster *clusterv1.Cluster) string { +func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools *expv1.MachinePoolList, cluster *clusterv1.Cluster) []string { controlPlaneMachines := allMachines.Filter(collections.ControlPlaneMachines(cluster.Name)) workerMachines := allMachines.Difference(controlPlaneMachines) @@ -727,9 +729,9 @@ func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools } if len(workerMachineNames) > 0 { sort.Strings(workerMachineNames) - descendants = append(descendants, "worker Machines: "+clog.StringListToString(workerMachineNames)) + descendants = append(descendants, "Machines: "+clog.StringListToString(workerMachineNames)) } - return strings.Join(descendants, "; ") + return descendants } func (r *KubeadmControlPlaneReconciler) removePreTerminateHookAnnotationFromMachine(ctx context.Context, machine *clusterv1.Machine) error { @@ -817,7 +819,7 @@ func (r *KubeadmControlPlaneReconciler) syncMachines(ctx context.Context, contro controlPlane.Machines[machineName] = updatedMachine // Since the machine is updated, re-create the patch helper so that any subsequent // Patch calls use the correct base machine object to calculate the diffs. - // Example: reconcileControlPlaneConditions patches the machine objects in a subsequent call + // Example: reconcileControlPlaneAndMachinesConditions patches the machine objects in a subsequent call // and, it should use the updated machine to calculate the diff. // Note: If the patchHelpers are not re-computed based on the new updated machines, subsequent // Patch calls will fail because the patch will be calculated based on an outdated machine and will error @@ -874,9 +876,18 @@ func (r *KubeadmControlPlaneReconciler) syncMachines(ctx context.Context, contro return nil } -// reconcileControlPlaneConditions is responsible of reconciling conditions reporting the status of static pods and -// the status of the etcd cluster. -func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneConditions(ctx context.Context, controlPlane *internal.ControlPlane) (reterr error) { +// reconcileControlPlaneAndMachinesConditions is responsible of reconciling conditions reporting the status of static pods and +// the status of the etcd cluster both on the KubeadmControlPlane and on machines. +// It also reconciles the UpToDate condition on Machines, so we can update them with a single patch operation. +func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneAndMachinesConditions(ctx context.Context, controlPlane *internal.ControlPlane) (reterr error) { + defer func() { + // Patch machines with the updated conditions. + reterr = kerrors.NewAggregate([]error{reterr, controlPlane.PatchMachines(ctx)}) + }() + + // Always reconcile machine's UpToDate condition + reconcileMachineUpToDateCondition(ctx, controlPlane) + // If the cluster is not yet initialized, there is no way to connect to the workload cluster and fetch information // for updating conditions. Return early. // We additionally check for the Available condition. The Available condition is set at the same time @@ -889,28 +900,19 @@ func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneConditions(ctx cont controlPlaneInitialized := conditions.Get(controlPlane.KCP, controlplanev1.AvailableCondition) if !controlPlane.KCP.Status.Initialized || controlPlaneInitialized == nil || controlPlaneInitialized.Status != corev1.ConditionTrue { - v1beta2conditions.Set(controlPlane.KCP, metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: controlplanev1.KubeadmControlPlaneEtcdClusterInspectionFailedV1Beta2Reason, - Message: "Waiting for Cluster control plane to be initialized", - }) - - v1beta2conditions.Set(controlPlane.KCP, metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsInspectionFailedV1Beta2Reason, - Message: "Waiting for Cluster control plane to be initialized", + // Overwrite conditions to InspectionFailed. + setConditionsToUnknown(setConditionsToUnknownInput{ + ControlPlane: controlPlane, + Overwrite: true, + EtcdClusterHealthyReason: controlplanev1.KubeadmControlPlaneEtcdClusterInspectionFailedV1Beta2Reason, + ControlPlaneComponentsHealthyReason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsInspectionFailedV1Beta2Reason, + StaticPodReason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + EtcdMemberHealthyReason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, + Message: "Waiting for Cluster control plane to be initialized", }) - return nil } - defer func() { - // Patch machines with the updated conditions. - reterr = kerrors.NewAggregate([]error{reterr, controlPlane.PatchMachines(ctx)}) - }() - // Remote conditions grace period is counted from the later of last probe success and control plane initialized. lastProbeSuccessTime := r.ClusterCache.GetLastProbeSuccessTimestamp(ctx, client.ObjectKeyFromObject(controlPlane.Cluster)) if time.Since(maxTime(lastProbeSuccessTime, controlPlaneInitialized.LastTransitionTime.Time)) > r.RemoteConditionsGracePeriod { @@ -968,6 +970,34 @@ func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneConditions(ctx cont return nil } +func reconcileMachineUpToDateCondition(_ context.Context, controlPlane *internal.ControlPlane) { + machinesNotUptoDate, machinesNotUptoDateConditionMessages := controlPlane.NotUpToDateMachines() + machinesNotUptoDateNames := sets.New(machinesNotUptoDate.Names()...) + + for _, machine := range controlPlane.Machines { + if machinesNotUptoDateNames.Has(machine.Name) { + message := "" + if reasons, ok := machinesNotUptoDateConditionMessages[machine.Name]; ok { + message = strings.Join(reasons, "; ") + } + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotUpToDateV1Beta2Reason, + Message: message, + }) + + continue + } + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + }) + } +} + type setConditionsToUnknownInput struct { ControlPlane *internal.ControlPlane Overwrite bool @@ -1050,7 +1080,7 @@ func maxTime(t1, t2 time.Time) time.Time { // reconcileEtcdMembers ensures the number of etcd members is in sync with the number of machines/nodes. // This is usually required after a machine deletion. // -// NOTE: this func uses KCP conditions, it is required to call reconcileControlPlaneConditions before this. +// NOTE: this func uses KCP conditions, it is required to call reconcileControlPlaneAndMachinesConditions before this. func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context, controlPlane *internal.ControlPlane) error { log := ctrl.LoggerFrom(ctx) diff --git a/controlplane/kubeadm/internal/controllers/controller_test.go b/controlplane/kubeadm/internal/controllers/controller_test.go index fe1e893e84fb..38739dabaecd 100644 --- a/controlplane/kubeadm/internal/controllers/controller_test.go +++ b/controlplane/kubeadm/internal/controllers/controller_test.go @@ -1842,7 +1842,7 @@ func TestKubeadmControlPlaneReconciler_syncMachines(t *testing.T) { g.Expect(updatedDeletingMachine.Spec).Should(BeComparableTo(deletingMachine.Spec)) } -func TestKubeadmControlPlaneReconciler_reconcileControlPlaneConditions(t *testing.T) { +func TestKubeadmControlPlaneReconciler_reconcileControlPlaneAndMachinesConditions(t *testing.T) { now := time.Now() defaultMachine1 := clusterv1.Machine{ @@ -1850,7 +1850,14 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneConditions(t *testin Name: "machine1-test", Namespace: metav1.NamespaceDefault, }, + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), + InfrastructureRef: corev1.ObjectReference{Kind: "GenericInfrastructureMachine", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Name: "m1"}, + }, } + defaultMachine1NotUpToDate := defaultMachine1.DeepCopy() + defaultMachine1NotUpToDate.Spec.Version = ptr.To("v1.30.0") + defaultMachine2 := clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{ Name: "machine2-test", @@ -1870,6 +1877,9 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneConditions(t *testin Name: "kcp-test", Namespace: metav1.NamespaceDefault, }, + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Version: "v1.31.0", + }, Status: controlplanev1.KubeadmControlPlaneStatus{ Initialized: true, Conditions: clusterv1.Conditions{ @@ -1912,6 +1922,9 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneConditions(t *testin }) return kcp }(), + Machines: map[string]*clusterv1.Machine{ + defaultMachine1.Name: defaultMachine1.DeepCopy(), + }, }, expectKCPConditions: []metav1.Condition{ { @@ -1932,6 +1945,204 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneConditions(t *testin Message: "Waiting for Cluster control plane to be initialized", }, }, + expectMachineConditions: []metav1.Condition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Waiting for Cluster control plane to be initialized", + }, + { + Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Waiting for Cluster control plane to be initialized", + }, + { + Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Waiting for Cluster control plane to be initialized", + }, + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Waiting for Cluster control plane to be initialized", + }, + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, + Message: "Waiting for Cluster control plane to be initialized", + }, + { + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + }, + }, + }, + { + name: "Machines up to date", + controlPlane: func() *internal.ControlPlane { + controlPlane, err := internal.NewControlPlane(ctx, nil, env.GetClient(), defaultCluster, defaultKCP, collections.FromMachines( + defaultMachine1.DeepCopy(), + )) + if err != nil { + panic(err) + } + return controlPlane + }(), + managementCluster: &fakeManagementCluster{ + Workload: &fakeWorkloadCluster{ + Workload: &internal.Workload{ + Client: fake.NewClientBuilder().Build(), + }, + }, + }, + expectKCPConditions: []metav1.Condition{ + { + Type: controlplanev1.KubeadmControlPlaneInitializedV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneInitializedV1Beta2Reason, + }, + { + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason, + Message: "* Machine machine1-test:\n" + + " * EtcdMemberHealthy: Node does not exist", + }, + { + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, + Message: "* Machine machine1-test:\n" + + " * APIServerPodHealthy: Node does not exist\n" + + " * ControllerManagerPodHealthy: Node does not exist\n" + + " * SchedulerPodHealthy: Node does not exist\n" + + " * EtcdPodHealthy: Node does not exist", + }, + }, + expectMachineConditions: []metav1.Condition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Node does not exist", + }, + { + Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Node does not exist", + }, + { + Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Node does not exist", + }, + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Node does not exist", + }, + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, + Message: "Node does not exist", + }, + { + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + }, + }, + }, + { + name: "Machines not up to date", + controlPlane: func() *internal.ControlPlane { + controlPlane, err := internal.NewControlPlane(ctx, nil, env.GetClient(), defaultCluster, defaultKCP, collections.FromMachines( + defaultMachine1NotUpToDate.DeepCopy(), + )) + if err != nil { + panic(err) + } + return controlPlane + }(), + managementCluster: &fakeManagementCluster{ + Workload: &fakeWorkloadCluster{ + Workload: &internal.Workload{ + Client: fake.NewClientBuilder().Build(), + }, + }, + }, + expectKCPConditions: []metav1.Condition{ + { + Type: controlplanev1.KubeadmControlPlaneInitializedV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneInitializedV1Beta2Reason, + }, + { + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason, + Message: "* Machine machine1-test:\n" + + " * EtcdMemberHealthy: Node does not exist", + }, + { + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, + Message: "* Machine machine1-test:\n" + + " * APIServerPodHealthy: Node does not exist\n" + + " * ControllerManagerPodHealthy: Node does not exist\n" + + " * SchedulerPodHealthy: Node does not exist\n" + + " * EtcdPodHealthy: Node does not exist", + }, + }, + expectMachineConditions: []metav1.Condition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Node does not exist", + }, + { + Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Node does not exist", + }, + { + Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Node does not exist", + }, + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Node does not exist", + }, + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, + Message: "Node does not exist", + }, + { + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotUpToDateV1Beta2Reason, + Message: "Version v1.30.0, v1.31.0 required", + }, + }, }, { name: "connection down, preserve conditions as they have been set before (remote conditions grace period not passed yet)", @@ -2014,6 +2225,11 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneConditions(t *testin Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachinePodRunningV1Beta2Reason, }, + { + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + }, }, }, { @@ -2098,6 +2314,11 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneConditions(t *testin Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberConnectionDownV1Beta2Reason, Message: fmt.Sprintf("Last successful probe at %s", now.Add(-3*time.Minute).Format(time.RFC3339)), }, + { + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + }, }, }, { @@ -2204,6 +2425,11 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneConditions(t *testin Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberConnectionDownV1Beta2Reason, Message: fmt.Sprintf("Last successful probe at %s", now.Add(-6*time.Minute).Format(time.RFC3339)), }, + { + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + }, }, }, { @@ -2271,6 +2497,11 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneConditions(t *testin Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Please check controller logs for errors", }, + { + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + }, }, }, { @@ -2338,7 +2569,7 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneConditions(t *testin } tc.controlPlane.SetPatchHelpers(patchHelpers) - err := r.reconcileControlPlaneConditions(ctx, tc.controlPlane) + err := r.reconcileControlPlaneAndMachinesConditions(ctx, tc.controlPlane) if tc.expectErr != "" { g.Expect(err).To(HaveOccurred()) g.Expect(err.Error()).To(Equal(tc.expectErr)) @@ -3123,7 +3354,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) { g.Expect(err).ToNot(HaveOccurred()) g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer)) g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason)) - g.Expect(controlPlane.DeletingMessage).To(Equal("KCP deletion blocked because worker Machines: worker-0, worker-1, worker-2, worker-3, worker-4, ... (5 more) still exist")) + g.Expect(controlPlane.DeletingMessage).To(Equal("KubeadmControlPlane deletion blocked because following objects still exist:\n* Machines: worker-0, worker-1, worker-2, worker-3, worker-4, ... (5 more)")) controlPlaneMachines := clusterv1.MachineList{} labels := map[string]string{ @@ -3184,7 +3415,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) { g.Expect(err).ToNot(HaveOccurred()) g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer)) g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason)) - g.Expect(controlPlane.DeletingMessage).To(Equal("KCP deletion blocked because MachinePools: mp-0, mp-1, mp-2, mp-3, mp-4, ... (5 more) still exist")) + g.Expect(controlPlane.DeletingMessage).To(Equal("KubeadmControlPlane deletion blocked because following objects still exist:\n* MachinePools: mp-0, mp-1, mp-2, mp-3, mp-4, ... (5 more)")) controlPlaneMachines := clusterv1.MachineList{} labels := map[string]string{ @@ -3268,7 +3499,7 @@ func TestObjectsPendingDelete(t *testing.T) { g := NewWithT(t) - g.Expect(objectsPendingDeleteNames(allMachines, machinePools, c)).To(Equal("MachinePools: mp1; worker Machines: w1, w2, w3, w4, w5, ... (3 more)")) + g.Expect(objectsPendingDeleteNames(allMachines, machinePools, c)).To(Equal([]string{"MachinePools: mp1", "Machines: w1, w2, w3, w4, w5, ... (3 more)"})) } // test utils. diff --git a/controlplane/kubeadm/internal/controllers/remediation.go b/controlplane/kubeadm/internal/controllers/remediation.go index 409f946c5b47..48253222768d 100644 --- a/controlplane/kubeadm/internal/controllers/remediation.go +++ b/controlplane/kubeadm/internal/controllers/remediation.go @@ -315,9 +315,10 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") v1beta2conditions.Set(machineToBeRemediated, metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, + Message: "Machine deletionTimestamp set", }) // Prepare the info for tracking the remediation progress into the RemediationInProgressAnnotation. @@ -462,7 +463,7 @@ func (r *KubeadmControlPlaneReconciler) checkRetryLimits(log logr.Logger, machin // - etc. // // NOTE: this func assumes the list of members in sync with the list of machines/nodes, it is required to call reconcileEtcdMembers -// as well as reconcileControlPlaneConditions before this. +// as well as reconcileControlPlaneAndMachinesConditions before this. func (r *KubeadmControlPlaneReconciler) canSafelyRemoveEtcdMember(ctx context.Context, controlPlane *internal.ControlPlane, machineToBeRemediated *clusterv1.Machine) (bool, error) { log := ctrl.LoggerFrom(ctx) diff --git a/controlplane/kubeadm/internal/controllers/remediation_test.go b/controlplane/kubeadm/internal/controllers/remediation_test.go index 086f43611638..89203037b759 100644 --- a/controlplane/kubeadm/internal/controllers/remediation_test.go +++ b/controlplane/kubeadm/internal/controllers/remediation_test.go @@ -198,7 +198,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m.Namespace, Name: m.Name}, m) g.Expect(err).ToNot(HaveOccurred()) @@ -322,7 +322,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -381,7 +381,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -707,7 +707,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -758,7 +758,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -794,7 +794,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(i - 1)) assertMachineCondition(ctx, g, mi, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, mi, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, mi, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: mi.Namespace, Name: mi.Name}, mi) g.Expect(err).ToNot(HaveOccurred()) @@ -850,7 +850,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -903,7 +903,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -956,7 +956,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -1010,7 +1010,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -1064,7 +1064,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -1163,7 +1163,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -1199,7 +1199,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(i - 4)) assertMachineCondition(ctx, g, mi, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, mi, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, mi, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: mi.Namespace, Name: mi.Name}, mi) g.Expect(err).ToNot(HaveOccurred()) @@ -1270,7 +1270,7 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m1, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m1, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m1.Namespace, Name: m1.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -1306,7 +1306,7 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(1)) assertMachineCondition(ctx, g, m2, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m2, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m2, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m2.Namespace, Name: m2.Name}, m1) g.Expect(err).ToNot(HaveOccurred()) @@ -1382,7 +1382,7 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(0)) assertMachineCondition(ctx, g, m2, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m2, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m2, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m2.Namespace, Name: m2.Name}, m2) g.Expect(err).ToNot(HaveOccurred()) @@ -1419,7 +1419,7 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { g.Expect(remediationData.RetryCount).To(Equal(1)) assertMachineCondition(ctx, g, m3, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m3, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m3, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") err = env.Get(ctx, client.ObjectKey{Namespace: m3.Namespace, Name: m3.Name}, m3) g.Expect(err).ToNot(HaveOccurred()) @@ -1498,8 +1498,8 @@ func TestReconcileUnhealthyMachinesSequences(t *testing.T) { assertMachineCondition(ctx, g, m2, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "") assertMachineCondition(ctx, g, m3, clusterv1.MachineOwnerRemediatedCondition, corev1.ConditionFalse, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "") - assertMachineV1beta2Condition(ctx, g, m2, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "") - assertMachineV1beta2Condition(ctx, g, m3, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, "") + assertMachineV1beta2Condition(ctx, g, m2, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, "Machine deletionTimestamp set") + assertMachineV1beta2Condition(ctx, g, m3, clusterv1.MachineOwnerRemediatedV1Beta2Condition, metav1.ConditionFalse, clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, "Waiting for remediation") err = env.Get(ctx, client.ObjectKey{Namespace: m2.Namespace, Name: m2.Name}, m2) g.Expect(err).ToNot(HaveOccurred()) @@ -1934,9 +1934,10 @@ func withMachineHealthCheckFailed() machineOption { Reason: clusterv1.MachineHealthCheckNodeDeletedV1Beta2Reason, }) v1beta2conditions.Set(machine, metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }) } } @@ -1952,9 +1953,10 @@ func withStuckRemediation() machineOption { Reason: clusterv1.MachineHealthCheckSucceededV1Beta2Reason, }) v1beta2conditions.Set(machine, metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }) } } diff --git a/controlplane/kubeadm/internal/controllers/scale.go b/controlplane/kubeadm/internal/controllers/scale.go index 7f366e19c8b5..aa3a8190408b 100644 --- a/controlplane/kubeadm/internal/controllers/scale.go +++ b/controlplane/kubeadm/internal/controllers/scale.go @@ -157,7 +157,7 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( // - All the health conditions on the control plane machines are true. // If the control plane is not passing preflight checks, it requeue. // -// NOTE: this func uses KCP conditions, it is required to call reconcileControlPlaneConditions before this. +// NOTE: this func uses KCP conditions, it is required to call reconcileControlPlaneAndMachinesConditions before this. func (r *KubeadmControlPlaneReconciler) preflightChecks(ctx context.Context, controlPlane *internal.ControlPlane, excludeFor ...*clusterv1.Machine) (ctrl.Result, error) { //nolint:unparam logger := ctrl.LoggerFrom(ctx) diff --git a/controlplane/kubeadm/internal/controllers/status.go b/controlplane/kubeadm/internal/controllers/status.go index 2327d19f2cc5..485351b7e5e4 100644 --- a/controlplane/kubeadm/internal/controllers/status.go +++ b/controlplane/kubeadm/internal/controllers/status.go @@ -31,6 +31,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" + "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd" "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" @@ -45,10 +46,7 @@ func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, contro // This is necessary for CRDs including scale subresources. controlPlane.KCP.Status.Selector = selector.String() - upToDateMachines, err := controlPlane.UpToDateMachines() - if err != nil { - return errors.Wrapf(err, "failed to update status") - } + upToDateMachines := controlPlane.UpToDateMachines() controlPlane.KCP.Status.UpdatedReplicas = int32(len(upToDateMachines)) replicas := int32(len(controlPlane.Machines)) @@ -153,11 +151,11 @@ func (r *KubeadmControlPlaneReconciler) updateV1Beta2Status(ctx context.Context, return } - // Note: some of the status is set on reconcileControlPlaneConditions (EtcdClusterHealthy, ControlPlaneComponentsHealthy conditions), + // Note: some of the status is set on reconcileControlPlaneAndMachinesConditions (EtcdClusterHealthy, ControlPlaneComponentsHealthy conditions), // reconcileClusterCertificates (CertificatesAvailable condition), and also in the defer patch at the end of // the main reconcile loop (status.ObservedGeneration) etc - // Note: KCP also sets status on machines in reconcileUnhealthyMachines and reconcileControlPlaneConditions; if for + // Note: KCP also sets status on machines in reconcileUnhealthyMachines and reconcileControlPlaneAndMachinesConditions; if for // any reason those functions are not called before, e.g. an error, this func relies on existing Machine's condition. setReplicas(ctx, controlPlane.KCP, controlPlane.Machines) @@ -168,7 +166,7 @@ func (r *KubeadmControlPlaneReconciler) updateV1Beta2Status(ctx context.Context, setMachinesUpToDateCondition(ctx, controlPlane.KCP, controlPlane.Machines) setRemediatingCondition(ctx, controlPlane.KCP, controlPlane.MachinesToBeRemediatedByKCP(), controlPlane.UnhealthyMachines()) setDeletingCondition(ctx, controlPlane.KCP, controlPlane.DeletingReason, controlPlane.DeletingMessage) - // TODO: Available + setAvailableCondition(ctx, controlPlane.KCP, controlPlane.IsEtcdManaged(), controlPlane.EtcdMembers, controlPlane.EtcdMembersAgreeOnMemberList, controlPlane.EtcdMembersAgreeOnClusterID, controlPlane.EtcdMembersAndMachinesAreMatching, controlPlane.Machines) } func setReplicas(_ context.Context, kcp *controlplanev1.KubeadmControlPlane, machines collections.Machines) { @@ -441,6 +439,141 @@ func setDeletingCondition(_ context.Context, kcp *controlplanev1.KubeadmControlP }) } +func setAvailableCondition(_ context.Context, kcp *controlplanev1.KubeadmControlPlane, etcdIsManaged bool, etcdMembers []*etcd.Member, etcdMembersAgreeOnMemberList, etcdMembersAgreeOnClusterID, etcdMembersAndMachinesAreMatching bool, machines collections.Machines) { + if !kcp.Status.Initialized { + v1beta2conditions.Set(kcp, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "Control plane not yet initialized", + }) + return + } + + if etcdIsManaged { + if etcdMembers == nil { + v1beta2conditions.Set(kcp, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneAvailableInspectionFailedV1Beta2Reason, + Message: "Failed to get etcd members", + }) + return + } + + if !etcdMembersAgreeOnMemberList { + v1beta2conditions.Set(kcp, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "At least one etcd member reports a list of etcd members different than the list reported by other members", + }) + return + } + + if !etcdMembersAgreeOnClusterID { + v1beta2conditions.Set(kcp, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "At least one etcd member reports a cluster ID different than the cluster ID reported by other members", + }) + return + } + + if !etcdMembersAndMachinesAreMatching { + v1beta2conditions.Set(kcp, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "The list of etcd members does not match the list of Machines and Nodes", + }) + return + } + } + + // Determine control plane availability looking at machines conditions, which at this stage are + // already surfacing status from etcd member and all control plane pods hosted on every machine. + // Note: we intentionally use the number of etcd members to determine the etcd quorum because + // etcd members might not match with machines, e.g. while provisioning a new machine. + etcdQuorum := (len(etcdMembers) / 2.0) + 1 + k8sControlPlaneHealthy := 0 + etcdMembersHealthy := 0 + for _, machine := range machines { + // if external etcd, only look at the status of the K8s control plane components on this machine. + if !etcdIsManaged { + if v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition) && + v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition) && + v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition) { + k8sControlPlaneHealthy++ + } + continue + } + + // Otherwise, etcd is managed. + // In this case, when looking at the k8s control plane we should consider how kubeadm layouts control plane components, + // and more specifically: + // - API server on one machine only connect to the local etcd member + // - ControllerManager and scheduler on a machine connect to the local API server (not to the control plane endpoint) + // As a consequence, we consider the K8s control plane on this machine healthy only if everything is healthy. + + if v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition) { + etcdMembersHealthy++ + } + + if v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition) && + v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition) && + v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition) && + v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition) && + v1beta2conditions.IsTrue(machine, controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition) { + k8sControlPlaneHealthy++ + } + } + + if kcp.DeletionTimestamp.IsZero() && + (!etcdIsManaged || etcdMembersHealthy >= etcdQuorum) && + k8sControlPlaneHealthy >= 1 && + v1beta2conditions.IsTrue(kcp, controlplanev1.KubeadmControlPlaneCertificatesAvailableV1Beta2Condition) { + v1beta2conditions.Set(kcp, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + }) + return + } + + messages := []string{} + if !kcp.DeletionTimestamp.IsZero() { + messages = append(messages, "Control plane metadata.deletionTimestamp is set") + } + + if !v1beta2conditions.IsTrue(kcp, controlplanev1.KubeadmControlPlaneCertificatesAvailableV1Beta2Condition) { + messages = append(messages, "Control plane certificates are not available") + } + + if etcdIsManaged && etcdMembersHealthy < etcdQuorum { + switch etcdMembersHealthy { + case 0: + messages = append(messages, fmt.Sprintf("There are no healthy etcd member, at least %d required for etcd quorum", etcdQuorum)) + case 1: + messages = append(messages, fmt.Sprintf("There is 1 healthy etcd member, at least %d required for etcd quorum", etcdQuorum)) + default: + messages = append(messages, fmt.Sprintf("There are %d healthy etcd members, at least %d required for etcd quorum", etcdMembersHealthy, etcdQuorum)) + } + } + + if k8sControlPlaneHealthy < 1 { + messages = append(messages, "There are no Machines with healthy control plane components, at least 1 required") + } + + v1beta2conditions.Set(kcp, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: strings.Join(messages, ";"), + }) +} + func aggregateStaleMachines(machines collections.Machines) string { if len(machines) == 0 { return "" @@ -480,10 +613,7 @@ func aggregateUnhealthyMachines(machines collections.Machines) string { return "" } - machineNames := []string{} - for _, machine := range machines { - machineNames = append(machineNames, machine.GetName()) - } + machineNames := machines.Names() if len(machineNames) == 0 { return "" @@ -502,7 +632,7 @@ func aggregateUnhealthyMachines(machines collections.Machines) string { } else { message += " are " } - message += "not healthy (not to be remediated by KCP)" + message += "not healthy (not to be remediated by KubeadmControlPlane)" return message } diff --git a/controlplane/kubeadm/internal/controllers/status_test.go b/controlplane/kubeadm/internal/controllers/status_test.go index aee7b88a53d4..33f14d3f2874 100644 --- a/controlplane/kubeadm/internal/controllers/status_test.go +++ b/controlplane/kubeadm/internal/controllers/status_test.go @@ -29,8 +29,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" + "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd" controlplanev1webhooks "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/webhooks" "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" @@ -436,7 +438,7 @@ func Test_setScalingDownCondition(t *testing.T) { } } -func Test_setMachinesReadyAndMachinesUpToDate(t *testing.T) { +func Test_setMachinesReadyAndMachinesUpToDateConditions(t *testing.T) { readyTrue := metav1.Condition{Type: clusterv1.MachineReadyV1Beta2Condition, Status: metav1.ConditionTrue} readyFalse := metav1.Condition{Type: clusterv1.MachineReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "SomeReason", Message: "NotReady"} @@ -480,13 +482,13 @@ func Test_setMachinesReadyAndMachinesUpToDate(t *testing.T) { Type: controlplanev1.KubeadmControlPlaneMachinesReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "SomeReason", // There is only one machine reporting issues, using the reason from that machine. - Message: "NotReady from Machine m3", + Message: "* Machine m3: NotReady", }, expectMachinesUpToDateCondition: metav1.Condition{ Type: controlplanev1.KubeadmControlPlaneMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionFalse, Reason: v1beta2conditions.MultipleIssuesReportedReason, // There are many machines reporting issues, using a generic reason. - Message: "NotUpToDate from Machines m2, m3", + Message: "* Machines m2, m3: NotUpToDate", }, }, } @@ -512,7 +514,7 @@ func Test_setRemediatingCondition(t *testing.T) { healthCheckSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionTrue} healthCheckNotSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionFalse} ownerRemediated := clusterv1.Condition{Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse} - ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Message: "Remediation in progress"} + ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineRemediationMachineDeletedV1Beta2Reason, Message: "Machine deletionTimestamp set"} tests := []struct { name string @@ -548,7 +550,7 @@ func Test_setRemediatingCondition(t *testing.T) { Type: controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Reason, - Message: "Remediation in progress from Machine m3", + Message: "* Machine m3: Machine deletionTimestamp set", }, }, { @@ -565,7 +567,7 @@ func Test_setRemediatingCondition(t *testing.T) { Type: controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneNotRemediatingV1Beta2Reason, - Message: "Machine m2 is not healthy (not to be remediated by KCP)", + Message: "Machine m2 is not healthy (not to be remediated by KubeadmControlPlane)", }, }, { @@ -582,7 +584,7 @@ func Test_setRemediatingCondition(t *testing.T) { Type: controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneNotRemediatingV1Beta2Reason, - Message: "Machines m1, m2 are not healthy (not to be remediated by KCP)", + Message: "Machines m1, m2 are not healthy (not to be remediated by KubeadmControlPlane)", }, }, } @@ -656,6 +658,413 @@ func TestDeletingCondition(t *testing.T) { } } +func Test_setAvailableCondition(t *testing.T) { + certificatesReady := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneCertificatesAvailableV1Beta2Condition, Status: metav1.ConditionTrue} + certificatesNotReady := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneCertificatesAvailableV1Beta2Condition, Status: metav1.ConditionFalse} + + apiServerPodHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, Status: metav1.ConditionTrue} + apiServerPodNotHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, Status: metav1.ConditionFalse} + controllerManagerPodHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, Status: metav1.ConditionTrue} + schedulerPodHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, Status: metav1.ConditionTrue} + etcdPodHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition, Status: metav1.ConditionTrue} + + etcdMemberHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionTrue} + etcdMemberNotHealthy := metav1.Condition{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse} + + tests := []struct { + name string + controlPlane *internal.ControlPlane + expectCondition metav1.Condition + }{ + { + name: "Kcp not yet initialized", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ + ClusterConfiguration: &bootstrapv1.ClusterConfiguration{ + Etcd: bootstrapv1.Etcd{Local: &bootstrapv1.LocalEtcd{}}, + }, + }, + }, + }, + EtcdMembers: []*etcd.Member{}, + EtcdMembersAgreeOnMemberList: false, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "Control plane not yet initialized", + }, + }, + { + name: "Failed to get etcd members", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ + ClusterConfiguration: &bootstrapv1.ClusterConfiguration{ + Etcd: bootstrapv1.Etcd{Local: &bootstrapv1.LocalEtcd{}}, + }, + }, + }, + Status: controlplanev1.KubeadmControlPlaneStatus{Initialized: true}, + }, + EtcdMembers: nil, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneAvailableInspectionFailedV1Beta2Reason, + Message: "Failed to get etcd members", + }, + }, + { + name: "Etcd members do not agree on member list", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ + ClusterConfiguration: &bootstrapv1.ClusterConfiguration{ + Etcd: bootstrapv1.Etcd{Local: &bootstrapv1.LocalEtcd{}}, + }, + }, + }, + Status: controlplanev1.KubeadmControlPlaneStatus{Initialized: true}, + }, + EtcdMembers: []*etcd.Member{}, + EtcdMembersAgreeOnMemberList: false, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "At least one etcd member reports a list of etcd members different than the list reported by other members", + }, + }, + { + name: "Etcd members do not agree on cluster ID", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ + ClusterConfiguration: &bootstrapv1.ClusterConfiguration{ + Etcd: bootstrapv1.Etcd{Local: &bootstrapv1.LocalEtcd{}}, + }, + }, + }, + Status: controlplanev1.KubeadmControlPlaneStatus{Initialized: true}, + }, + EtcdMembers: []*etcd.Member{}, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: false, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "At least one etcd member reports a cluster ID different than the cluster ID reported by other members", + }, + }, + { + name: "Etcd members and machines list do not match", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ + ClusterConfiguration: &bootstrapv1.ClusterConfiguration{ + Etcd: bootstrapv1.Etcd{Local: &bootstrapv1.LocalEtcd{}}, + }, + }, + }, + Status: controlplanev1.KubeadmControlPlaneStatus{Initialized: true}, + }, + EtcdMembers: []*etcd.Member{}, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: false, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "The list of etcd members does not match the list of Machines and Nodes", + }, + }, + { + name: "KCP is available", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + ), + EtcdMembers: []*etcd.Member{}, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + }, + }, + { + name: "One not healthy etcd members, but within quorum", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy}}}}, + ), + EtcdMembers: []*etcd.Member{{}, {}, {}}, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + }, + }, + { + name: "Two not healthy k8s control plane, but one working", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + ), + EtcdMembers: []*etcd.Member{{}, {}, {}}, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + }, + }, + { + name: "KCP is deleting", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + DeletionTimestamp: ptr.To(metav1.Now()), + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + ), + EtcdMembers: []*etcd.Member{}, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "Control plane metadata.deletionTimestamp is set", + }, + }, + { + name: "Certificates are not available", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesNotReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + ), + EtcdMembers: []*etcd.Member{}, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "Control plane certificates are not available", + }, + }, + { + name: "Not enough healthy etcd members", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberNotHealthy}}}}, + ), + EtcdMembers: []*etcd.Member{{}, {}, {}}, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "There is 1 healthy etcd member, at least 2 required for etcd quorum", + }, + }, + { + name: "Not enough healthy K8s control planes", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy, etcdPodHealthy, etcdMemberHealthy}}}}, + ), + EtcdMembers: []*etcd.Member{{}, {}, {}}, + EtcdMembersAgreeOnMemberList: true, + EtcdMembersAgreeOnClusterID: true, + EtcdMembersAndMachinesAreMatching: true, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "There are no Machines with healthy control plane components, at least 1 required", + }, + }, + { + name: "External etcd, at least one K8s control plane", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ + ClusterConfiguration: &bootstrapv1.ClusterConfiguration{ + Etcd: bootstrapv1.Etcd{External: &bootstrapv1.ExternalEtcd{}}, + }, + }, + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, + ), + EtcdMembers: nil, + EtcdMembersAgreeOnMemberList: false, + EtcdMembersAgreeOnClusterID: false, + EtcdMembersAndMachinesAreMatching: false, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Reason, + }, + }, + { + name: "External etcd, not enough healthy K8s control planes", + controlPlane: &internal.ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ + ClusterConfiguration: &bootstrapv1.ClusterConfiguration{ + Etcd: bootstrapv1.Etcd{External: &bootstrapv1.ExternalEtcd{}}, + }, + }, + }, + Status: controlplanev1.KubeadmControlPlaneStatus{ + Initialized: true, + V1Beta2: &controlplanev1.KubeadmControlPlaneV1Beta2Status{ + Conditions: []metav1.Condition{certificatesReady}, + }, + }, + }, + Machines: collections.FromMachines( + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}, Status: clusterv1.MachineStatus{V1Beta2: &clusterv1.MachineV1Beta2Status{Conditions: []metav1.Condition{apiServerPodNotHealthy, controllerManagerPodHealthy, schedulerPodHealthy}}}}, + ), + EtcdMembers: nil, + EtcdMembersAgreeOnMemberList: false, + EtcdMembersAgreeOnClusterID: false, + EtcdMembersAndMachinesAreMatching: false, + }, + expectCondition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneNotAvailableV1Beta2Reason, + Message: "There are no Machines with healthy control plane components, at least 1 required", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + setAvailableCondition(ctx, tt.controlPlane.KCP, tt.controlPlane.IsEtcdManaged(), tt.controlPlane.EtcdMembers, tt.controlPlane.EtcdMembersAgreeOnMemberList, tt.controlPlane.EtcdMembersAgreeOnClusterID, tt.controlPlane.EtcdMembersAndMachinesAreMatching, tt.controlPlane.Machines) + + availableCondition := v1beta2conditions.Get(tt.controlPlane.KCP, controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition) + g.Expect(availableCondition).ToNot(BeNil()) + g.Expect(*availableCondition).To(v1beta2conditions.MatchCondition(tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + func TestKubeadmControlPlaneReconciler_updateStatusNoMachines(t *testing.T) { g := NewWithT(t) diff --git a/controlplane/kubeadm/internal/filters.go b/controlplane/kubeadm/internal/filters.go index acb54d51e545..65a788e62dbb 100644 --- a/controlplane/kubeadm/internal/filters.go +++ b/controlplane/kubeadm/internal/filters.go @@ -20,7 +20,6 @@ import ( "encoding/json" "fmt" "reflect" - "strings" "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -41,65 +40,74 @@ import ( // - mutated in-place (ex: NodeDrainTimeout) // - are not dictated by KCP (ex: ProviderID) // - are not relevant for the rollout decision (ex: failureDomain). -func matchesMachineSpec(infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (string, bool, error) { - mismatchReasons := []string{} +func matchesMachineSpec(infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (bool, []string, []string, error) { + logMessages := []string{} + conditionMessages := []string{} if !collections.MatchesKubernetesVersion(kcp.Spec.Version)(machine) { machineVersion := "" if machine != nil && machine.Spec.Version != nil { machineVersion = *machine.Spec.Version } - mismatchReasons = append(mismatchReasons, fmt.Sprintf("Machine version %q is not equal to KCP version %q", machineVersion, kcp.Spec.Version)) + logMessages = append(logMessages, fmt.Sprintf("Machine version %q is not equal to KCP version %q", machineVersion, kcp.Spec.Version)) + conditionMessages = append(conditionMessages, fmt.Sprintf("Version %s, %s required", machineVersion, kcp.Spec.Version)) } reason, matches, err := matchesKubeadmBootstrapConfig(machineConfigs, kcp, machine) if err != nil { - return "", false, errors.Wrapf(err, "failed to match Machine spec") + return false, nil, nil, errors.Wrapf(err, "failed to match Machine spec") } if !matches { - mismatchReasons = append(mismatchReasons, reason) + logMessages = append(logMessages, reason) + conditionMessages = append(conditionMessages, "KubeadmConfig is not up-to-date") } if reason, matches := matchesTemplateClonedFrom(infraConfigs, kcp, machine); !matches { - mismatchReasons = append(mismatchReasons, reason) + logMessages = append(logMessages, reason) + conditionMessages = append(conditionMessages, fmt.Sprintf("%s is not up-to-date", machine.Spec.InfrastructureRef.Kind)) } - if len(mismatchReasons) > 0 { - return strings.Join(mismatchReasons, ","), false, nil + if len(logMessages) > 0 || len(conditionMessages) > 0 { + return false, logMessages, conditionMessages, nil } - return "", true, nil + return true, nil, nil, nil } -// NeedsRollout checks if a Machine needs to be rolled out and returns the reason why. -func NeedsRollout(reconciliationTime, rolloutAfter *metav1.Time, rolloutBefore *controlplanev1.RolloutBefore, infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (string, bool, error) { - rolloutReasons := []string{} +// UpToDate checks if a Machine is up to date with the control plane's configuration. +// If not, messages explaining why are provided with different level of detail for logs and conditions. +func UpToDate(machine *clusterv1.Machine, kcp *controlplanev1.KubeadmControlPlane, reconciliationTime *metav1.Time, infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig) (bool, []string, []string, error) { + logMessages := []string{} + conditionMessages := []string{} // Machines whose certificates are about to expire. - if collections.ShouldRolloutBefore(reconciliationTime, rolloutBefore)(machine) { - rolloutReasons = append(rolloutReasons, "certificates will expire soon, rolloutBefore expired") + if collections.ShouldRolloutBefore(reconciliationTime, kcp.Spec.RolloutBefore)(machine) { + logMessages = append(logMessages, "certificates will expire soon, rolloutBefore expired") + conditionMessages = append(conditionMessages, "Certificates will expire soon") } // Machines that are scheduled for rollout (KCP.Spec.RolloutAfter set, // the RolloutAfter deadline is expired, and the machine was created before the deadline). - if collections.ShouldRolloutAfter(reconciliationTime, rolloutAfter)(machine) { - rolloutReasons = append(rolloutReasons, "rolloutAfter expired") + if collections.ShouldRolloutAfter(reconciliationTime, kcp.Spec.RolloutAfter)(machine) { + logMessages = append(logMessages, "rolloutAfter expired") + conditionMessages = append(conditionMessages, "KubeadmControlPlane spec.rolloutAfter expired") } // Machines that do not match with KCP config. - mismatchReason, matches, err := matchesMachineSpec(infraConfigs, machineConfigs, kcp, machine) + matches, specLogMessages, specConditionMessages, err := matchesMachineSpec(infraConfigs, machineConfigs, kcp, machine) if err != nil { - return "", false, errors.Wrapf(err, "failed to determine if Machine %s needs rollout", machine.Name) + return false, nil, nil, errors.Wrapf(err, "failed to determine if Machine %s is up-to-date", machine.Name) } if !matches { - rolloutReasons = append(rolloutReasons, mismatchReason) + logMessages = append(logMessages, specLogMessages...) + conditionMessages = append(conditionMessages, specConditionMessages...) } - if len(rolloutReasons) > 0 { - return fmt.Sprintf("Machine %s needs rollout: %s", machine.Name, strings.Join(rolloutReasons, ",")), true, nil + if len(logMessages) > 0 || len(conditionMessages) > 0 { + return false, logMessages, conditionMessages, nil } - return "", false, nil + return true, nil, nil, nil } // matchesTemplateClonedFrom checks if a Machine has a corresponding infrastructure machine that diff --git a/controlplane/kubeadm/internal/filters_test.go b/controlplane/kubeadm/internal/filters_test.go index db78d4b06a4f..1d6ffde3c029 100644 --- a/controlplane/kubeadm/internal/filters_test.go +++ b/controlplane/kubeadm/internal/filters_test.go @@ -19,11 +19,13 @@ package internal import ( "encoding/json" "testing" + "time" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" @@ -1420,3 +1422,173 @@ func TestMatchesTemplateClonedFrom_WithClonedFromAnnotations(t *testing.T) { }) } } + +func TestUpToDate(t *testing.T) { + reconciliationTime := metav1.Now() + + defaultKcp := &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Replicas: nil, + Version: "v1.31.0", + MachineTemplate: controlplanev1.KubeadmControlPlaneMachineTemplate{ + InfrastructureRef: corev1.ObjectReference{APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Kind: "AWSMachineTemplate", Name: "template1"}, + }, + KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ + ClusterConfiguration: &bootstrapv1.ClusterConfiguration{ + ClusterName: "foo", + }, + }, + RolloutBefore: &controlplanev1.RolloutBefore{ + CertificatesExpiryDays: ptr.To(int32(60)), // rollout if certificates will expire in less then 60 days. + }, + RolloutAfter: ptr.To(metav1.Time{Time: reconciliationTime.Add(10 * 24 * time.Hour)}), // rollout 10 days from now. + }, + } + defaultMachine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + CreationTimestamp: metav1.Time{Time: reconciliationTime.Add(-2 * 24 * time.Hour)}, // two days ago. + Annotations: map[string]string{ + controlplanev1.KubeadmClusterConfigurationAnnotation: "{\n \"clusterName\": \"foo\"\n}", + }, + }, + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), + InfrastructureRef: corev1.ObjectReference{APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Kind: "AWSMachine", Name: "infra-machine1"}, + }, + Status: clusterv1.MachineStatus{ + CertificatesExpiryDate: &metav1.Time{Time: reconciliationTime.Add(100 * 24 * time.Hour)}, // certificates will expire in 100 days from now. + }, + } + + defaultInfraConfigs := map[string]*unstructured.Unstructured{ + defaultMachine.Name: { + Object: map[string]interface{}{ + "kind": "AWSMachine", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-config1", + "namespace": "default", + "annotations": map[string]interface{}{ + "cluster.x-k8s.io/cloned-from-name": "template1", + "cluster.x-k8s.io/cloned-from-groupkind": "AWSMachineTemplate.infrastructure.cluster.x-k8s.io", + }, + }, + }, + }, + } + + defaultMachineConfigs := map[string]*bootstrapv1.KubeadmConfig{ + defaultMachine.Name: { + Spec: bootstrapv1.KubeadmConfigSpec{ + InitConfiguration: &bootstrapv1.InitConfiguration{}, // first control-plane + }, + }, + } + + tests := []struct { + name string + kcp *controlplanev1.KubeadmControlPlane + machine *clusterv1.Machine + infraConfigs map[string]*unstructured.Unstructured + machineConfigs map[string]*bootstrapv1.KubeadmConfig + expectUptoDate bool + expectLogMessages []string + expectConditionMessages []string + }{ + { + name: "machine up-to-date", + kcp: defaultKcp, + machine: defaultMachine, + infraConfigs: defaultInfraConfigs, + machineConfigs: defaultMachineConfigs, + expectUptoDate: true, + expectLogMessages: nil, + expectConditionMessages: nil, + }, + { + name: "certificate are expiring soon", + kcp: func() *controlplanev1.KubeadmControlPlane { + kcp := defaultKcp.DeepCopy() + kcp.Spec.RolloutBefore = &controlplanev1.RolloutBefore{ + CertificatesExpiryDays: ptr.To(int32(150)), // rollout if certificates will expire in less then 150 days. + } + return kcp + }(), + machine: defaultMachine, // certificates will expire in 100 days from now. + infraConfigs: defaultInfraConfigs, + machineConfigs: defaultMachineConfigs, + expectUptoDate: false, + expectLogMessages: []string{"certificates will expire soon, rolloutBefore expired"}, + expectConditionMessages: []string{"Certificates will expire soon"}, + }, + { + name: "rollout after expired", + kcp: func() *controlplanev1.KubeadmControlPlane { + kcp := defaultKcp.DeepCopy() + kcp.Spec.RolloutAfter = ptr.To(metav1.Time{Time: reconciliationTime.Add(-1 * 24 * time.Hour)}) // one day ago + return kcp + }(), + machine: defaultMachine, // created two days ago + infraConfigs: defaultInfraConfigs, + machineConfigs: defaultMachineConfigs, + expectUptoDate: false, + expectLogMessages: []string{"rolloutAfter expired"}, + expectConditionMessages: []string{"KubeadmControlPlane spec.rolloutAfter expired"}, + }, + { + name: "kubernetes version does not match", + kcp: func() *controlplanev1.KubeadmControlPlane { + kcp := defaultKcp.DeepCopy() + kcp.Spec.Version = "v1.31.2" + return kcp + }(), + machine: defaultMachine, // defaultMachine has "v1.31.0" + infraConfigs: defaultInfraConfigs, + machineConfigs: defaultMachineConfigs, + expectUptoDate: false, + expectLogMessages: []string{"Machine version \"v1.31.0\" is not equal to KCP version \"v1.31.2\""}, + expectConditionMessages: []string{"Version v1.31.0, v1.31.2 required"}, + }, + { + name: "KubeadmConfig is not up-to-date", + kcp: func() *controlplanev1.KubeadmControlPlane { + kcp := defaultKcp.DeepCopy() + kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.ClusterName = "bar" + return kcp + }(), + machine: defaultMachine, // was created with cluster name "foo" + infraConfigs: defaultInfraConfigs, + machineConfigs: defaultMachineConfigs, + expectUptoDate: false, + expectLogMessages: []string{"Machine KubeadmConfig ClusterConfiguration is outdated: diff: &v1beta1.ClusterConfiguration{\n ... // 10 identical fields\n ImageRepository: \"\",\n FeatureGates: nil,\n- ClusterName: \"foo\",\n+ ClusterName: \"bar\",\n }"}, + expectConditionMessages: []string{"KubeadmConfig is not up-to-date"}, + }, + { + name: "AWSMachine is not up-to-date", + kcp: func() *controlplanev1.KubeadmControlPlane { + kcp := defaultKcp.DeepCopy() + kcp.Spec.MachineTemplate.InfrastructureRef = corev1.ObjectReference{APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Kind: "AWSMachineTemplate", Name: "template2"} // kcp moving to template 2 + return kcp + }(), + machine: defaultMachine, + infraConfigs: defaultInfraConfigs, // infra config cloned from template1 + machineConfigs: defaultMachineConfigs, + expectUptoDate: false, + expectLogMessages: []string{"Infrastructure template on KCP rotated from AWSMachineTemplate.infrastructure.cluster.x-k8s.io template1 to AWSMachineTemplate.infrastructure.cluster.x-k8s.io template2"}, + expectConditionMessages: []string{"AWSMachine is not up-to-date"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + upToDate, logMessages, conditionMessages, err := UpToDate(tt.machine, tt.kcp, &reconciliationTime, tt.infraConfigs, tt.machineConfigs) + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(upToDate).To(Equal(tt.expectUptoDate)) + g.Expect(logMessages).To(Equal(tt.expectLogMessages)) + g.Expect(conditionMessages).To(Equal(tt.expectConditionMessages)) + }) + } +} diff --git a/controlplane/kubeadm/internal/workload_cluster_conditions.go b/controlplane/kubeadm/internal/workload_cluster_conditions.go index fabc32f0a35d..7c650a8198cb 100644 --- a/controlplane/kubeadm/internal/workload_cluster_conditions.go +++ b/controlplane/kubeadm/internal/workload_cluster_conditions.go @@ -19,7 +19,9 @@ package internal import ( "context" "fmt" + "sort" "strings" + "time" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -37,6 +39,7 @@ import ( "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" + clog "sigs.k8s.io/cluster-api/util/log" ) // UpdateEtcdConditions is responsible for updating machine conditions reflecting the status of all the etcd members. @@ -63,7 +66,6 @@ func (w *Workload) updateExternalEtcdConditions(_ context.Context, controlPlane func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane *ControlPlane) { // NOTE: This methods uses control plane nodes only to get in contact with etcd but then it relies on etcd // as ultimate source of truth for the list of members and for their health. - // TODO: Integrate this with clustercache / handle the grace period controlPlaneNodes, err := w.getControlPlaneNodes(ctx) if err != nil { for _, m := range controlPlane.Machines { @@ -94,8 +96,6 @@ func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane kcpErrors []string // clusterID is used to store and compare the etcd's cluster id. clusterID *uint64 - // members is used to store the list of etcd members and compare with all the other nodes in the cluster. - members []*etcd.Member ) provisioningMachines := controlPlane.Machines.Filter(collections.Not(collections.HasNode())) @@ -141,22 +141,29 @@ func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane currentMembers, err := w.getCurrentEtcdMembers(ctx, machine, node.Name) if err != nil { + // Note. even if we fail reading the member list from one node/etcd members we do not set EtcdMembersAgreeOnMemberList and EtcdMembersAgreeOnClusterID to false + // (those info are computed on what we can collect during inspection, so we can reason about availability even if there is a certain degree of problems in the cluster). continue } // Check if the list of members IDs reported is the same as all other members. // NOTE: the first member reporting this information is the baseline for this information. - if members == nil { - members = currentMembers + // Also, if this is the first node we are reading from let's + // assume all the members agree on member list and cluster id. + if controlPlane.EtcdMembers == nil { + controlPlane.EtcdMembers = currentMembers + controlPlane.EtcdMembersAgreeOnMemberList = true + controlPlane.EtcdMembersAgreeOnClusterID = true } - if !etcdutil.MemberEqual(members, currentMembers) { - conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Etcd member reports the cluster is composed by members %s, but all previously seen etcd members are reporting %s", etcdutil.MemberNames(currentMembers), etcdutil.MemberNames(members)) + if !etcdutil.MemberEqual(controlPlane.EtcdMembers, currentMembers) { + controlPlane.EtcdMembersAgreeOnMemberList = false + conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Etcd member reports the cluster is composed by members %s, but all previously seen etcd members are reporting %s", etcdutil.MemberNames(currentMembers), etcdutil.MemberNames(controlPlane.EtcdMembers)) v1beta2conditions.Set(machine, metav1.Condition{ Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, - Message: fmt.Sprintf("The etcd member hosted on this Machine reports the cluster is composed by %s, but all previously seen etcd members are reporting %s", etcdutil.MemberNames(currentMembers), etcdutil.MemberNames(members)), + Message: fmt.Sprintf("The etcd member hosted on this Machine reports the cluster is composed by %s, but all previously seen etcd members are reporting %s", etcdutil.MemberNames(currentMembers), etcdutil.MemberNames(controlPlane.EtcdMembers)), }) continue } @@ -204,6 +211,7 @@ func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane clusterID = &member.ClusterID } if *clusterID != member.ClusterID { + controlPlane.EtcdMembersAgreeOnClusterID = false conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Etcd member has cluster ID %d, but all previously seen etcd members have cluster ID %d", member.ClusterID, *clusterID) v1beta2conditions.Set(machine, metav1.Condition{ @@ -225,7 +233,17 @@ func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane } // Make sure that the list of etcd members and machines is consistent. - kcpErrors = compareMachinesAndMembers(controlPlane, members, kcpErrors) + // NOTE: Members/Machines consistency is computed based on the info KCP was able to collect during inspection (e.g. if on a 3 CP + // control plane one etcd member is down, the comparison is based on the answer collected from two members only). + // NOTE: We surface the result of compareMachinesAndMembers for the Available condition only if all the etcd members agree + // on member list and cluster id (if not, we consider the list of members not reliable). + membersAndMachinesAreMatching, membersAndMachinesCompareErrors := compareMachinesAndMembers(controlPlane, controlPlaneNodes, controlPlane.EtcdMembers) + if controlPlane.EtcdMembersAgreeOnMemberList && controlPlane.EtcdMembersAgreeOnClusterID { + controlPlane.EtcdMembersAndMachinesAreMatching = membersAndMachinesAreMatching + } else { + controlPlane.EtcdMembersAndMachinesAreMatching = false + } + kcpErrors = append(kcpErrors, membersAndMachinesCompareErrors...) // Aggregate components error from machines at KCP level aggregateConditionsFromMachinesToKCP(aggregateConditionsFromMachinesToKCPInput{ @@ -298,11 +316,17 @@ func (w *Workload) getCurrentEtcdMembers(ctx context.Context, machine *clusterv1 return currentMembers, nil } -func compareMachinesAndMembers(controlPlane *ControlPlane, members []*etcd.Member, kcpErrors []string) []string { - // NOTE: We run this check only if we actually know the list of members, otherwise the first for loop - // could generate a false negative when reporting missing etcd members. +func compareMachinesAndMembers(controlPlane *ControlPlane, nodes *corev1.NodeList, members []*etcd.Member) (bool, []string) { + membersAndMachinesAreMatching := true + var kcpErrors []string + + // If it failed to get members, consider the check failed in case there is at least a machine already provisioned + // (tolerate if we fail getting members when the cluster is provisioning the first machine). if members == nil { - return kcpErrors + if len(controlPlane.Machines.Filter(collections.HasNode())) > 0 { + membersAndMachinesAreMatching = false + } + return membersAndMachinesAreMatching, nil } // Check Machine -> Etcd member. @@ -318,6 +342,8 @@ func compareMachinesAndMembers(controlPlane *ControlPlane, members []*etcd.Membe } } if !found { + // Surface there is a machine without etcd member on machine's EtcdMemberHealthy condition. + // The same info will also surface into the EtcdClusterHealthy condition on kcp. conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Missing etcd member") v1beta2conditions.Set(machine, metav1.Condition{ @@ -326,27 +352,57 @@ func compareMachinesAndMembers(controlPlane *ControlPlane, members []*etcd.Membe Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: fmt.Sprintf("Etcd doesn't have an etcd member for Node %s", machine.Status.NodeRef.Name), }) + + // Instead, surface there is a machine without etcd member on kcp's' Available condition + // only if the machine is not deleting and the node exists by more than two minutes + // (this prevents the condition to flick during scale up operations). + // Note: Two minutes is the time after which we expect the system to detect the new etcd member on the machine. + if machine.DeletionTimestamp.IsZero() { + oldNode := false + if nodes != nil { + for _, node := range nodes.Items { + if machine.Status.NodeRef.Name == node.Name && time.Since(node.CreationTimestamp.Time) > 2*time.Minute { + oldNode = true + } + } + } + if oldNode { + membersAndMachinesAreMatching = false + } + } } } // Check Etcd member -> Machine. for _, member := range members { found := false + hasProvisioningMachine := false for _, machine := range controlPlane.Machines { - if machine.Status.NodeRef != nil && machine.Status.NodeRef.Name == member.Name { + if machine.Status.NodeRef == nil { + hasProvisioningMachine = true + continue + } + if machine.Status.NodeRef.Name == member.Name { found = true break } } if !found { + // Surface there is an etcd member without a machine into the EtcdClusterHealthy condition on kcp. name := member.Name if name == "" { name = fmt.Sprintf("%d (Name not yet assigned)", member.ID) } kcpErrors = append(kcpErrors, fmt.Sprintf("Etcd member %s does not have a corresponding Machine", name)) + + // Instead, surface there is an etcd member without a machine on kcp's Available condition + // only if there are no provisioning machines (this prevents the condition to flick during scale up operations). + if !hasProvisioningMachine { + membersAndMachinesAreMatching = false + } } } - return kcpErrors + return membersAndMachinesAreMatching, kcpErrors } // UpdateStaticPodConditions is responsible for updating machine conditions reflecting the status of all the control plane @@ -372,7 +428,6 @@ func (w *Workload) UpdateStaticPodConditions(ctx context.Context, controlPlane * } // NOTE: this fun uses control plane nodes from the workload cluster as a source of truth for the current state. - // TODO: integrate this with clustercache / handle the grace period controlPlaneNodes, err := w.getControlPlaneNodes(ctx) if err != nil { for i := range controlPlane.Machines { @@ -881,8 +936,10 @@ func aggregateV1Beta2ConditionsFromMachinesToKCP(input aggregateV1Beta2Condition kcpMachinesWithUnknown := sets.Set[string]{} kcpMachinesWithInfo := sets.Set[string]{} + messageMap := map[string][]string{} for i := range input.controlPlane.Machines { machine := input.controlPlane.Machines[i] + machineMessages := []string{} for _, condition := range input.machineConditions { if machineCondition := v1beta2conditions.Get(machine, condition); machineCondition != nil { switch machineCondition.Status { @@ -890,24 +947,70 @@ func aggregateV1Beta2ConditionsFromMachinesToKCP(input aggregateV1Beta2Condition kcpMachinesWithInfo.Insert(machine.Name) case metav1.ConditionFalse: kcpMachinesWithErrors.Insert(machine.Name) + m := machineCondition.Message + if m == "" { + m = fmt.Sprintf("condition is %s", machineCondition.Status) + } + machineMessages = append(machineMessages, fmt.Sprintf(" * %s: %s", machineCondition.Type, m)) case metav1.ConditionUnknown: kcpMachinesWithUnknown.Insert(machine.Name) + m := machineCondition.Message + if m == "" { + m = fmt.Sprintf("condition is %s", machineCondition.Status) + } + machineMessages = append(machineMessages, fmt.Sprintf(" * %s: %s", machineCondition.Type, m)) } } } + + if len(machineMessages) > 0 { + message := strings.Join(machineMessages, "\n") + messageMap[message] = append(messageMap[message], machine.Name) + } + } + + // compute the order of messages according to the number of machines reporting the same message. + // Note: The list of object names is used as a secondary criteria to sort messages with the same number of objects. + messageIndex := make([]string, 0, len(messageMap)) + for m := range messageMap { + messageIndex = append(messageIndex, m) + } + + sort.SliceStable(messageIndex, func(i, j int) bool { + return len(messageMap[messageIndex[i]]) > len(messageMap[messageIndex[j]]) || + (len(messageMap[messageIndex[i]]) == len(messageMap[messageIndex[j]]) && strings.Join(messageMap[messageIndex[i]], ",") < strings.Join(messageMap[messageIndex[j]], ",")) + }) + + // Build the message + messages := []string{} + for _, message := range messageIndex { + machines := messageMap[message] + machinesMessage := "Machine" + if len(messageMap[message]) > 1 { + machinesMessage += "s" + } + + sort.Strings(machines) + machinesMessage += " " + clog.ListToString(machines, func(s string) string { return s }, 3) + + messages = append(messages, fmt.Sprintf("* %s:\n%s", machinesMessage, message)) + } + + // Append messages impacting KCP as a whole, if any + if len(input.kcpErrors) > 0 { + for _, message := range input.kcpErrors { + messages = append(messages, fmt.Sprintf("* %s", message)) + } } + message := strings.Join(messages, "\n") // In case of at least one machine with errors or KCP level errors (nodes without machines), report false. if len(input.kcpErrors) > 0 || len(kcpMachinesWithErrors) > 0 { - messages := input.kcpErrors - if len(kcpMachinesWithErrors) > 0 { - messages = append(messages, fmt.Sprintf("Following Machines are reporting %s errors: %s", input.note, strings.Join(sets.List(kcpMachinesWithErrors), ", "))) - } v1beta2conditions.Set(input.controlPlane.KCP, metav1.Condition{ Type: input.condition, Status: metav1.ConditionFalse, Reason: input.falseReason, - Message: strings.Join(messages, ", "), + Message: message, }) return } @@ -918,7 +1021,7 @@ func aggregateV1Beta2ConditionsFromMachinesToKCP(input aggregateV1Beta2Condition Type: input.condition, Status: metav1.ConditionUnknown, Reason: input.unknownReason, - Message: fmt.Sprintf("Following Machines are reporting %s unknown: %s", input.note, strings.Join(sets.List(kcpMachinesWithUnknown), ", ")), + Message: message, }) return } diff --git a/controlplane/kubeadm/internal/workload_cluster_conditions_test.go b/controlplane/kubeadm/internal/workload_cluster_conditions_test.go index e79c538089f2..c8c520218493 100644 --- a/controlplane/kubeadm/internal/workload_cluster_conditions_test.go +++ b/controlplane/kubeadm/internal/workload_cluster_conditions_test.go @@ -19,6 +19,7 @@ package internal import ( "fmt" "testing" + "time" . "github.com/onsi/gomega" "github.com/pkg/errors" @@ -42,15 +43,19 @@ import ( func TestUpdateEtcdConditions(t *testing.T) { tests := []struct { - name string - kcp *controlplanev1.KubeadmControlPlane - machines []*clusterv1.Machine - injectClient client.Client // This test is injecting a fake client because it is required to create nodes with a controlled Status or to fail with a specific error. - injectEtcdClientGenerator etcdClientFor // This test is injecting a fake etcdClientGenerator because it is required to nodes with a controlled Status or to fail with a specific error. - expectedKCPCondition *clusterv1.Condition - expectedKCPV1Beta2Condition *metav1.Condition - expectedMachineConditions map[string]clusterv1.Conditions - expectedMachineV1Beta2Conditions map[string][]metav1.Condition + name string + kcp *controlplanev1.KubeadmControlPlane + machines []*clusterv1.Machine + injectClient client.Client // This test is injecting a fake client because it is required to create nodes with a controlled Status or to fail with a specific error. + injectEtcdClientGenerator etcdClientFor // This test is injecting a fake etcdClientGenerator because it is required to nodes with a controlled Status or to fail with a specific error. + expectedKCPCondition *clusterv1.Condition + expectedKCPV1Beta2Condition *metav1.Condition + expectedMachineConditions map[string]clusterv1.Conditions + expectedMachineV1Beta2Conditions map[string][]metav1.Condition + expectedEtcdMembers []string + expectedEtcdMembersAgreeOnMemberList bool + expectedEtcdMembersAgreeOnClusterID bool + expectedEtcdMembersAndMachinesAreMatching bool }{ { name: "if list nodes return an error should report all the conditions Unknown", @@ -77,6 +82,9 @@ func TestUpdateEtcdConditions(t *testing.T) { {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Failed to get the Node hosting the etcd member"}, }, }, + expectedEtcdMembersAgreeOnMemberList: false, // without reading nodes, we can not make assumptions. + expectedEtcdMembersAgreeOnClusterID: false, // without reading nodes, we can not make assumptions. + expectedEtcdMembersAndMachinesAreMatching: false, // without reading nodes, we can not make assumptions. }, { name: "If there are provisioning machines, a node without machine should be ignored in v1beta1, reported in v1beta2", @@ -93,16 +101,20 @@ func TestUpdateEtcdConditions(t *testing.T) { "m1": {}, }, expectedKCPV1Beta2Condition: &metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason, - Message: "Following Machines are reporting etcd member unknown: m1", + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason, + Message: "* Machine m1:\n" + + " * EtcdMemberHealthy: Node does not exist", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Node does not exist"}, }, }, + expectedEtcdMembersAgreeOnMemberList: false, // without reading members, we can not make assumptions. + expectedEtcdMembersAgreeOnClusterID: false, // without reading members, we can not make assumptions. + expectedEtcdMembersAndMachinesAreMatching: false, // without reading members, we can not make assumptions. }, { name: "If there are no provisioning machines, a node without machine should be reported as False condition at KCP level", @@ -117,8 +129,11 @@ func TestUpdateEtcdConditions(t *testing.T) { Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, - Message: "Control plane Node n1 does not have a corresponding Machine", + Message: "* Control plane Node n1 does not have a corresponding Machine", }, + expectedEtcdMembersAgreeOnMemberList: false, // without reading members, we can not make assumptions. + expectedEtcdMembersAgreeOnClusterID: false, // without reading members, we can not make assumptions. + expectedEtcdMembersAndMachinesAreMatching: false, // without reading members, we can not make assumptions. }, { name: "failure creating the etcd client should report unknown condition", @@ -140,16 +155,20 @@ func TestUpdateEtcdConditions(t *testing.T) { }, }, expectedKCPV1Beta2Condition: &metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason, - Message: "Following Machines are reporting etcd member unknown: m1", + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason, + Message: "* Machine m1:\n" + + " * EtcdMemberHealthy: Failed to connect to the etcd Pod on the n1 Node: failed to get client for node", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Failed to connect to the etcd Pod on the n1 Node: failed to get client for node"}, }, }, + expectedEtcdMembersAgreeOnMemberList: false, // failure in reading members, we can not make assumptions. + expectedEtcdMembersAgreeOnClusterID: false, // failure in reading members, we can not make assumptions. + expectedEtcdMembersAndMachinesAreMatching: false, // failure in reading members, we can not make assumptions. }, { name: "etcd client reporting status errors should be reflected into a false condition", @@ -176,16 +195,20 @@ func TestUpdateEtcdConditions(t *testing.T) { }, }, expectedKCPV1Beta2Condition: &metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, - Message: "Following Machines are reporting etcd member errors: m1", + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, + Message: "* Machine m1:\n" + + " * EtcdMemberHealthy: Etcd reports errors: some errors", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "Etcd reports errors: some errors"}, }, }, + expectedEtcdMembersAgreeOnMemberList: false, // without reading members, we can not make assumptions. + expectedEtcdMembersAgreeOnClusterID: false, // without reading members, we can not make assumptions. + expectedEtcdMembersAndMachinesAreMatching: false, // without reading members, we can not make assumptions. }, { name: "failure listing members should report false condition in v1beta1, unknown in v1beta2", @@ -212,16 +235,20 @@ func TestUpdateEtcdConditions(t *testing.T) { }, }, expectedKCPV1Beta2Condition: &metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason, - Message: "Following Machines are reporting etcd member unknown: m1", + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason, + Message: "* Machine m1:\n" + + " * EtcdMemberHealthy: Failed to get answer from the etcd member on the n1 Node: failed to get list of members for etcd cluster: failed to list members", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Failed to get answer from the etcd member on the n1 Node: failed to get list of members for etcd cluster: failed to list members"}, }, }, + expectedEtcdMembersAgreeOnMemberList: false, // without reading members, we can not make assumptions. + expectedEtcdMembersAgreeOnClusterID: false, // without reading members, we can not make assumptions. + expectedEtcdMembersAndMachinesAreMatching: false, // without reading members, we can not make assumptions. }, { name: "an etcd member with alarms should report false condition", @@ -257,16 +284,21 @@ func TestUpdateEtcdConditions(t *testing.T) { }, }, expectedKCPV1Beta2Condition: &metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, - Message: "Following Machines are reporting etcd member errors: m1", + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, + Message: "* Machine m1:\n" + + " * EtcdMemberHealthy: Etcd reports alarms: NOSPACE", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "Etcd reports alarms: NOSPACE"}, }, }, + expectedEtcdMembers: []string{"n1"}, + expectedEtcdMembersAgreeOnMemberList: true, + expectedEtcdMembersAgreeOnClusterID: true, + expectedEtcdMembersAndMachinesAreMatching: true, }, { name: "etcd members with different Cluster ID should report false condition", @@ -336,10 +368,11 @@ func TestUpdateEtcdConditions(t *testing.T) { }, }, expectedKCPV1Beta2Condition: &metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, - Message: "Following Machines are reporting etcd member errors: m2", + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, + Message: "* Machine m2:\n" + + " * EtcdMemberHealthy: Etcd member has cluster ID 2, but all previously seen etcd members have cluster ID 1", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { @@ -349,6 +382,10 @@ func TestUpdateEtcdConditions(t *testing.T) { {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "Etcd member has cluster ID 2, but all previously seen etcd members have cluster ID 1"}, }, }, + expectedEtcdMembers: []string{"n1", "n2"}, + expectedEtcdMembersAgreeOnMemberList: true, + expectedEtcdMembersAgreeOnClusterID: false, + expectedEtcdMembersAndMachinesAreMatching: false, }, { name: "etcd members with different member list should report false condition", @@ -418,10 +455,11 @@ func TestUpdateEtcdConditions(t *testing.T) { }, }, expectedKCPV1Beta2Condition: &metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, - Message: "Following Machines are reporting etcd member errors: m2", + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, + Message: "* Machine m2:\n" + + " * EtcdMemberHealthy: The etcd member hosted on this Machine reports the cluster is composed by [n2 n3], but all previously seen etcd members are reporting [n1 n2]", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { @@ -431,6 +469,10 @@ func TestUpdateEtcdConditions(t *testing.T) { {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "The etcd member hosted on this Machine reports the cluster is composed by [n2 n3], but all previously seen etcd members are reporting [n1 n2]"}, }, }, + expectedEtcdMembers: []string{"n1", "n2"}, + expectedEtcdMembersAgreeOnMemberList: false, + expectedEtcdMembersAgreeOnClusterID: true, + expectedEtcdMembersAndMachinesAreMatching: false, }, { name: "a machine without a member should report false condition", @@ -482,10 +524,11 @@ func TestUpdateEtcdConditions(t *testing.T) { }, }, expectedKCPV1Beta2Condition: &metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, - Message: "Following Machines are reporting etcd member errors: m2", + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, + Message: "* Machine m2:\n" + + " * EtcdMemberHealthy: Etcd doesn't have an etcd member for Node n2", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { @@ -495,6 +538,10 @@ func TestUpdateEtcdConditions(t *testing.T) { {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "Etcd doesn't have an etcd member for Node n2"}, }, }, + expectedEtcdMembers: []string{"n1"}, + expectedEtcdMembersAgreeOnMemberList: true, + expectedEtcdMembersAgreeOnClusterID: true, + expectedEtcdMembersAndMachinesAreMatching: false, }, { name: "healthy etcd members should report true", @@ -576,6 +623,10 @@ func TestUpdateEtcdConditions(t *testing.T) { {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Reason, Message: ""}, }, }, + expectedEtcdMembers: []string{"n1", "n2"}, + expectedEtcdMembersAgreeOnMemberList: true, + expectedEtcdMembersAgreeOnClusterID: true, + expectedEtcdMembersAndMachinesAreMatching: true, }, { name: "External etcd should set a condition at KCP level for v1beta1, not for v1beta2", @@ -590,8 +641,11 @@ func TestUpdateEtcdConditions(t *testing.T) { }, }, }, - expectedKCPCondition: conditions.TrueCondition(controlplanev1.EtcdClusterHealthyCondition), - expectedKCPV1Beta2Condition: nil, + expectedKCPCondition: conditions.TrueCondition(controlplanev1.EtcdClusterHealthyCondition), + expectedKCPV1Beta2Condition: nil, + expectedEtcdMembersAgreeOnMemberList: false, + expectedEtcdMembersAgreeOnClusterID: false, + expectedEtcdMembersAndMachinesAreMatching: false, }, } for _, tt := range tests { @@ -623,6 +677,16 @@ func TestUpdateEtcdConditions(t *testing.T) { g.Expect(m.GetConditions()).To(conditions.MatchConditions(tt.expectedMachineConditions[m.Name]), "unexpected conditions for Machine %s", m.Name) g.Expect(m.GetV1Beta2Conditions()).To(v1beta2conditions.MatchConditions(tt.expectedMachineV1Beta2Conditions[m.Name], v1beta2conditions.IgnoreLastTransitionTime(true)), "unexpected conditions for Machine %s", m.Name) } + + g.Expect(controlPane.EtcdMembersAgreeOnMemberList).To(Equal(tt.expectedEtcdMembersAgreeOnMemberList), "EtcdMembersAgreeOnMemberList does not match") + g.Expect(controlPane.EtcdMembersAgreeOnClusterID).To(Equal(tt.expectedEtcdMembersAgreeOnClusterID), "EtcdMembersAgreeOnClusterID does not match") + g.Expect(controlPane.EtcdMembersAndMachinesAreMatching).To(Equal(tt.expectedEtcdMembersAndMachinesAreMatching), "EtcdMembersAndMachinesAreMatching does not match") + + var membersNames []string + for _, m := range controlPane.EtcdMembers { + membersNames = append(membersNames, m.Name) + } + g.Expect(membersNames).To(Equal(tt.expectedEtcdMembers)) }) } } @@ -705,10 +769,14 @@ func TestUpdateStaticPodConditions(t *testing.T) { "m1": {}, }, expectedKCPV1Beta2Condition: metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, - Message: "Following Machines are reporting control plane unknown: m1", + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, + Message: "* Machine m1:\n" + + " * APIServerPodHealthy: Node does not exist\n" + + " * ControllerManagerPodHealthy: Node does not exist\n" + + " * SchedulerPodHealthy: Node does not exist\n" + + " * EtcdPodHealthy: Node does not exist", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { @@ -732,7 +800,7 @@ func TestUpdateStaticPodConditions(t *testing.T) { Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsNotHealthyV1Beta2Reason, - Message: "Control plane Node n1 does not have a corresponding Machine", + Message: "* Control plane Node n1 does not have a corresponding Machine", }, }, { @@ -755,10 +823,14 @@ func TestUpdateStaticPodConditions(t *testing.T) { }, }, expectedKCPV1Beta2Condition: metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, - Message: "Following Machines are reporting control plane unknown: m1", + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, + Message: "* Machine m1:\n" + + " * APIServerPodHealthy: Node n1 is unreachable\n" + + " * ControllerManagerPodHealthy: Node n1 is unreachable\n" + + " * SchedulerPodHealthy: Node n1 is unreachable\n" + + " * EtcdPodHealthy: Node n1 is unreachable", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { @@ -782,10 +854,14 @@ func TestUpdateStaticPodConditions(t *testing.T) { "m1": {}, }, expectedKCPV1Beta2Condition: metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, - Message: "Following Machines are reporting control plane unknown: m1", + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, + Message: "* Machine m1:\n" + + " * APIServerPodHealthy: Node does not exist\n" + + " * ControllerManagerPodHealthy: Node does not exist\n" + + " * SchedulerPodHealthy: Node does not exist\n" + + " * EtcdPodHealthy: Node does not exist", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { @@ -814,10 +890,14 @@ func TestUpdateStaticPodConditions(t *testing.T) { }, }, expectedKCPV1Beta2Condition: metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, - Message: "Following Machines are reporting control plane unknown: m1", + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, + Message: "* Machine m1:\n" + + " * APIServerPodHealthy: Node n1 does not exist\n" + + " * ControllerManagerPodHealthy: Node n1 does not exist\n" + + " * SchedulerPodHealthy: Node n1 does not exist\n" + + " * EtcdPodHealthy: Node n1 does not exist", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { @@ -864,10 +944,13 @@ func TestUpdateStaticPodConditions(t *testing.T) { }, }, expectedKCPV1Beta2Condition: metav1.Condition{ - Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsNotHealthyV1Beta2Reason, - Message: "Following Machines are reporting control plane errors: m1", + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsNotHealthyV1Beta2Reason, + Message: "* Machine m1:\n" + + " * ControllerManagerPodHealthy: Waiting to be scheduled\n" + + " * SchedulerPodHealthy: All the containers have been terminated\n" + + " * EtcdPodHealthy: All the containers have been terminated", }, expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ "m1": { @@ -1393,9 +1476,10 @@ func withMachineReadyV1beta2Condition(status metav1.ConditionStatus) fakeMachine machine.Status.V1Beta2 = &clusterv1.MachineV1Beta2Status{} } machine.Status.V1Beta2.Conditions = append(machine.Status.V1Beta2.Conditions, metav1.Condition{ - Type: clusterv1.MachinesReadyV1Beta2Condition, - Status: status, - Reason: "SomeReason", + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: status, + Reason: "SomeReason", + Message: fmt.Sprintf("ready condition is %s", status), }) } } @@ -1533,16 +1617,19 @@ func TestAggregateV1Beta2ConditionsFromMachinesToKCP(t *testing.T) { { name: "kcp machines with errors", machines: []*clusterv1.Machine{ + fakeMachine("m2", withMachineReadyV1beta2Condition(metav1.ConditionFalse)), // machines are intentionally not ordered + fakeMachine("m4", withMachineReadyV1beta2Condition(metav1.ConditionUnknown)), fakeMachine("m1", withMachineReadyV1beta2Condition(metav1.ConditionFalse)), - fakeMachine("m2", withMachineReadyV1beta2Condition(metav1.ConditionFalse)), fakeMachine("m3", withMachineReadyV1beta2Condition(metav1.ConditionTrue)), - fakeMachine("m4", withMachineReadyV1beta2Condition(metav1.ConditionUnknown)), }, expectedCondition: metav1.Condition{ - Type: conditionType, - Status: metav1.ConditionFalse, - Reason: falseReason, - Message: "Following Machines are reporting something errors: m1, m2", + Type: conditionType, + Status: metav1.ConditionFalse, + Reason: falseReason, + Message: "* Machines m1, m2:\n" + + " * Ready: ready condition is False\n" + + "* Machine m4:\n" + + " * Ready: ready condition is Unknown", }, }, { @@ -1555,21 +1642,22 @@ func TestAggregateV1Beta2ConditionsFromMachinesToKCP(t *testing.T) { Type: conditionType, Status: metav1.ConditionFalse, Reason: falseReason, - Message: "something error", + Message: "* something error", }, }, { name: "kcp machines with unknown", machines: []*clusterv1.Machine{ + fakeMachine("m3", withMachineReadyV1beta2Condition(metav1.ConditionUnknown)), // machines are intentionally not ordered fakeMachine("m1", withMachineReadyV1beta2Condition(metav1.ConditionUnknown)), fakeMachine("m2", withMachineReadyV1beta2Condition(metav1.ConditionTrue)), - fakeMachine("m3", withMachineReadyV1beta2Condition(metav1.ConditionUnknown)), }, expectedCondition: metav1.Condition{ - Type: conditionType, - Status: metav1.ConditionUnknown, - Reason: unknownReason, - Message: "Following Machines are reporting something unknown: m1, m3", + Type: conditionType, + Status: metav1.ConditionUnknown, + Reason: unknownReason, + Message: "* Machines m1, m3:\n" + + " * Ready: ready condition is Unknown", }, }, { @@ -1606,7 +1694,7 @@ func TestAggregateV1Beta2ConditionsFromMachinesToKCP(t *testing.T) { KCP: &controlplanev1.KubeadmControlPlane{}, Machines: collections.FromMachines(tt.machines...), }, - machineConditions: []string{clusterv1.MachinesReadyV1Beta2Condition}, + machineConditions: []string{clusterv1.MachineReadyV1Beta2Condition}, kcpErrors: tt.kcpErrors, condition: conditionType, trueReason: trueReason, @@ -1620,3 +1708,180 @@ func TestAggregateV1Beta2ConditionsFromMachinesToKCP(t *testing.T) { }) } } + +func TestCompareMachinesAndMembers(t *testing.T) { + tests := []struct { + name string + controlPlane *ControlPlane + nodes *corev1.NodeList + members []*etcd.Member + expectMembersAndMachinesAreMatching bool + expectKCPErrors []string + }{ + { + name: "true if the list of members is empty and there are no provisioned machines", + controlPlane: &ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{}, + Machines: collections.FromMachines(fakeMachine("m1")), + }, + members: nil, + nodes: nil, + expectMembersAndMachinesAreMatching: true, + expectKCPErrors: nil, + }, + { + name: "false if the list of members is empty and there are provisioned machines", + controlPlane: &ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{}, + Machines: collections.FromMachines(fakeMachine("m1", withNodeRef("m1"))), + }, + members: nil, + nodes: nil, + expectMembersAndMachinesAreMatching: false, + expectKCPErrors: nil, + }, + { + name: "true if the list of members match machines", + controlPlane: &ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{}, + Machines: collections.FromMachines( + fakeMachine("m1", withNodeRef("m1")), + fakeMachine("m2", withNodeRef("m2")), + ), + }, + members: []*etcd.Member{ + {Name: "m1"}, + {Name: "m2"}, + }, + nodes: nil, + expectMembersAndMachinesAreMatching: true, + expectKCPErrors: nil, + }, + { + name: "true if there is a machine without a member but at least a machine is still provisioning", + controlPlane: &ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{}, + Machines: collections.FromMachines( + fakeMachine("m1", withNodeRef("m1")), + fakeMachine("m2", withNodeRef("m2")), + fakeMachine("m3"), // m3 is still provisioning + ), + }, + members: []*etcd.Member{ + {Name: "m1"}, + {Name: "m2"}, + // m3 is missing + }, + nodes: nil, + expectMembersAndMachinesAreMatching: true, + expectKCPErrors: nil, + }, + { + name: "true if there is a machine without a member but node on this machine does not exist yet", + controlPlane: &ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{}, + Machines: collections.FromMachines( + fakeMachine("m1", withNodeRef("m1")), + fakeMachine("m2", withNodeRef("m2")), + fakeMachine("m3", withNodeRef("m3")), + ), + }, + members: []*etcd.Member{ + {Name: "m1"}, + {Name: "m2"}, + // m3 is missing + }, + nodes: &corev1.NodeList{Items: []corev1.Node{ + // m3 is missing + }}, + expectMembersAndMachinesAreMatching: true, + expectKCPErrors: nil, + }, + { + name: "true if there is a machine without a member but node on this machine has been just created", + controlPlane: &ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{}, + Machines: collections.FromMachines( + fakeMachine("m1", withNodeRef("m1")), + fakeMachine("m2", withNodeRef("m2")), + fakeMachine("m3", withNodeRef("m3")), + ), + }, + members: []*etcd.Member{ + {Name: "m1"}, + {Name: "m2"}, + // m3 is missing + }, + nodes: &corev1.NodeList{Items: []corev1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "m3", CreationTimestamp: metav1.Time{Time: time.Now().Add(-110 * time.Second)}}}, // m3 is just provisioned + }}, + expectMembersAndMachinesAreMatching: true, + expectKCPErrors: nil, + }, + { + name: "false if there is a machine without a member and node on this machine is old", + controlPlane: &ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{}, + Machines: collections.FromMachines( + fakeMachine("m1", withNodeRef("m1")), + fakeMachine("m2", withNodeRef("m2")), + fakeMachine("m3", withNodeRef("m3")), + ), + }, + members: []*etcd.Member{ + {Name: "m1"}, + {Name: "m2"}, + // m3 is missing + }, + nodes: &corev1.NodeList{Items: []corev1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "m3", CreationTimestamp: metav1.Time{Time: time.Now().Add(-10 * time.Minute)}}}, // m3 is old + }}, + expectMembersAndMachinesAreMatching: false, + expectKCPErrors: nil, + }, + { + name: "false if there is a member without a machine", + controlPlane: &ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{}, + Machines: collections.FromMachines( + fakeMachine("m1", withNodeRef("m1")), + // m2 is missing + ), + }, + members: []*etcd.Member{ + {Name: "m1"}, + {Name: "m2"}, + }, + nodes: nil, + expectMembersAndMachinesAreMatching: false, + expectKCPErrors: []string{"Etcd member m2 does not have a corresponding Machine"}, + }, + { + name: "true if there is a member without a machine while a machine is still provisioning ", + controlPlane: &ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{}, + Machines: collections.FromMachines( + fakeMachine("m1", withNodeRef("m1")), + fakeMachine("m2"), // m2 still provisioning + ), + }, + members: []*etcd.Member{ + {Name: "m1"}, + {Name: "m2"}, + }, + nodes: nil, + expectMembersAndMachinesAreMatching: true, + expectKCPErrors: []string{"Etcd member m2 does not have a corresponding Machine"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + got, gotErrors := compareMachinesAndMembers(tt.controlPlane, tt.nodes, tt.members) + + g.Expect(got).To(Equal(tt.expectMembersAndMachinesAreMatching)) + g.Expect(gotErrors).To(Equal(tt.expectKCPErrors)) + }) + } +} diff --git a/docs/book/src/developer/providers/contracts/clusterctl.md b/docs/book/src/developer/providers/contracts/clusterctl.md index 7916dbe8ef65..60d3ac2f1928 100644 --- a/docs/book/src/developer/providers/contracts/clusterctl.md +++ b/docs/book/src/developer/providers/contracts/clusterctl.md @@ -48,7 +48,7 @@ by allowing provider's maintainers to add their own project to the pre-defined l Provider's maintainer are the ultimately responsible for their own project. -Adding a provider to the `clusterctl` provider list does not imply any form of quality assessment, market screening, +Adding a provider to the `clusterctl` provider list does not imply any form of quality assessment, market screening, entitlement, recognition or support by the Cluster API maintainers. @@ -62,7 +62,7 @@ This is the process to add a new provider to the pre-defined list of providers s - For providers not in the kubernetes-sigs org, in order to prevent conflicts the `clusterctl` name must be prefixed with the provider's GitHub org name followed by `-` (see note below). - Create a PR making the necessary changes to clusterctl and the Cluster API book, e.g. [#9798](https://github.com/kubernetes-sigs/cluster-api/pull/9798), - [9720](https://github.com/kubernetes-sigs/cluster-api/pull/9720/files). + [9720](https://github.com/kubernetes-sigs/cluster-api/pull/9720/files). The Cluster API maintainers will review issues/PRs for adding new providers. If the PR merges before code freeze deadline for the next Cluster API minor release, changes will be included in the release, otherwise in the next minor @@ -73,7 +73,7 @@ branch to include it in the next patch release.

What about closed source providers?

-Closed source provider can not be added to the pre-defined list of provider shipped with `clusterctl`, however, +Closed source provider can not be added to the pre-defined list of provider shipped with `clusterctl`, however, those providers could be used with `clusterctl` by changing the [clusterctl configuration](../../../clusterctl/configuration.md). @@ -86,6 +86,13 @@ The need to add a prefix for providers not in the kubernetes-sigs org applies to `clusterctl`'s pre-defined list of provider starting from January 2024. This rule doesn't apply retroactively to the existing pre-defined providers, but we reserve the right to reconsider this in the future. +In the case of a provider being developed by an entity that owns multiple GitHub orgs, then it is up to the +provider to specify which of GitHub org to use as a prefix and it is the responsibility of the entity to avoid +or address provider name conflicts. + +If prefixing the provider with the provider's GitHub org prefix leads to stuttering, e.g. an `example` provider +from the `example` GitHub org would lead to `example-example`, then it is acceptable to omit the prefix. + Please note that the need to add a prefix for providers not in the kubernetes-sigs org does not apply to providers added by changing the [clusterctl configuration](../../../clusterctl/configuration.md). @@ -339,6 +346,8 @@ providers. | CAPOSC | cluster.x-k8s.io/provider=infrastructure-outscale | | CAPK0S | cluster.x-k8s.io/provider=infrastructure-k0smotron | | CAIPAMIC | cluster.x-k8s.io/provider=ipam-in-cluster | +| CAIPAMX | cluster.x-k8s.io/provider=ipam-nutanix | +| CAREX | cluster.x-k8s.io/provider=runtime-extensions-nutanix | ### Workload cluster templates @@ -537,11 +546,11 @@ management cluster by annotating any resource to be moved with `clusterctl.clust

Warning: Status subresource is never restored

-Every object's `Status` subresource, including every nested field (e.g. `Status.Conditions`), is never -restored during a `move` operation. A `Status` subresource should never contain fields that cannot +Every object's `Status` subresource, including every nested field (e.g. `Status.Conditions`), is never +restored during a `move` operation. A `Status` subresource should never contain fields that cannot be recreated or derived from information in spec, metadata, or external systems. -Provider implementers should not store non-ephemeral data in the `Status`. +Provider implementers should not store non-ephemeral data in the `Status`. `Status` should be able to be fully rebuilt by controllers by observing the current state of resources. diff --git a/docs/book/src/reference/glossary.md b/docs/book/src/reference/glossary.md index 4e96f6d27995..670dc77b0a02 100644 --- a/docs/book/src/reference/glossary.md +++ b/docs/book/src/reference/glossary.md @@ -123,6 +123,12 @@ Cluster API Provider Azure ### CAIPAMIC Cluster API IPAM Provider In Cluster +### CAIPAMX +Cluster API IPAM Provider Nutanix + +### CAREX +Cluster API Runtime Extensions Provider Nutanix + ### Cloud provider Or __Cloud service provider__ @@ -179,7 +185,7 @@ See [KCP](#kcp). ### Core Cluster API -With "core" Cluster API we refer to the common set of API and controllers that are required to run +With "core" Cluster API we refer to the common set of API and controllers that are required to run any Cluster API provider. Please note that in the Cluster API code base, side by side of "core" Cluster API components there diff --git a/docs/book/src/reference/providers.md b/docs/book/src/reference/providers.md index 95264d766bc6..19b663541a06 100644 --- a/docs/book/src/reference/providers.md +++ b/docs/book/src/reference/providers.md @@ -56,17 +56,21 @@ updated info about which API version they are supporting. - [Tinkerbell](https://github.com/tinkerbell/cluster-api-provider-tinkerbell) - [vcluster](https://github.com/loft-sh/cluster-api-provider-vcluster) - [Virtink](https://github.com/smartxworks/cluster-api-provider-virtink) -- [VMware Cloud Director](https://github.com/vmware/cluster-api-provider-cloud-director) +- [VMware Cloud Director](https://github.com/vmware/cluster-api-provider-cloud-director) - [vSphere](https://github.com/kubernetes-sigs/cluster-api-provider-vsphere) - [Vultr](https://github.com/vultr/cluster-api-provider-vultr) - [k0smotron RemoteMachine (SSH)](https://github.com/k0sproject/k0smotron) ## IP Address Management (IPAM) - [In Cluster](https://github.com/kubernetes-sigs/cluster-api-ipam-provider-in-cluster) +- [Nutanix](https://github.com/nutanix-cloud-native/cluster-api-ipam-provider-nutanix) ## Addon - [Helm](https://github.com/kubernetes-sigs/cluster-api-addon-provider-helm/) +## Runtime Extensions +- [Nutanix](https://github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/) + ## API Adopters Following are the implementations managed by third-parties adopting the standard cluster-api and/or machine-api being developed here. diff --git a/exp/internal/controllers/machinepool_controller.go b/exp/internal/controllers/machinepool_controller.go index ee956dc0d7f2..8a441e8a36aa 100644 --- a/exp/internal/controllers/machinepool_controller.go +++ b/exp/internal/controllers/machinepool_controller.go @@ -134,9 +134,10 @@ func (r *MachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.M r.controller = c r.recorder = mgr.GetEventRecorderFor("machinepool-controller") r.externalTracker = external.ObjectTracker{ - Controller: c, - Cache: mgr.GetCache(), - Scheme: mgr.GetScheme(), + Controller: c, + Cache: mgr.GetCache(), + Scheme: mgr.GetScheme(), + PredicateLogger: &predicateLog, } r.ssaCache = ssa.NewCache() @@ -371,12 +372,12 @@ func (r *MachinePoolReconciler) watchClusterNodes(ctx context.Context, cluster * return nil } - return r.ClusterCache.Watch(ctx, util.ObjectKey(cluster), clustercache.WatchInput{ + return r.ClusterCache.Watch(ctx, util.ObjectKey(cluster), clustercache.NewWatcher(clustercache.WatcherOptions{ Name: "machinepool-watchNodes", Watcher: r.controller, Kind: &corev1.Node{}, EventHandler: handler.EnqueueRequestsFromMapFunc(r.nodeToMachinePool), - }) + })) } func (r *MachinePoolReconciler) nodeToMachinePool(ctx context.Context, o client.Object) []reconcile.Request { diff --git a/exp/internal/controllers/machinepool_controller_phases.go b/exp/internal/controllers/machinepool_controller_phases.go index 3a5474d05914..43533e4d4445 100644 --- a/exp/internal/controllers/machinepool_controller_phases.go +++ b/exp/internal/controllers/machinepool_controller_phases.go @@ -106,7 +106,7 @@ func (r *MachinePoolReconciler) reconcilePhase(mp *expv1.MachinePool) { } // reconcileExternal handles generic unstructured objects referenced by a MachinePool. -func (r *MachinePoolReconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.Cluster, m *expv1.MachinePool, ref *corev1.ObjectReference) (external.ReconcileOutput, error) { +func (r *MachinePoolReconciler) reconcileExternal(ctx context.Context, m *expv1.MachinePool, ref *corev1.ObjectReference) (external.ReconcileOutput, error) { log := ctrl.LoggerFrom(ctx) if err := utilconversion.UpdateReferenceAPIContract(ctx, r.Client, ref); err != nil { @@ -127,12 +127,6 @@ func (r *MachinePoolReconciler) reconcileExternal(ctx context.Context, cluster * return external.ReconcileOutput{}, err } - // if external ref is paused, return error. - if annotations.IsPaused(cluster, obj) { - log.V(3).Info("External object referenced is paused") - return external.ReconcileOutput{Paused: true}, nil - } - // Initialize the patch helper. patchHelper, err := patch.NewHelper(obj, r.Client) if err != nil { @@ -179,19 +173,14 @@ func (r *MachinePoolReconciler) reconcileExternal(ctx context.Context, cluster * // reconcileBootstrap reconciles the Spec.Bootstrap.ConfigRef object on a MachinePool. func (r *MachinePoolReconciler) reconcileBootstrap(ctx context.Context, s *scope) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) - cluster := s.cluster m := s.machinePool // Call generic external reconciler if we have an external reference. var bootstrapConfig *unstructured.Unstructured if m.Spec.Template.Spec.Bootstrap.ConfigRef != nil { - bootstrapReconcileResult, err := r.reconcileExternal(ctx, cluster, m, m.Spec.Template.Spec.Bootstrap.ConfigRef) + bootstrapReconcileResult, err := r.reconcileExternal(ctx, m, m.Spec.Template.Spec.Bootstrap.ConfigRef) if err != nil { return ctrl.Result{}, err } - // if the external object is paused, return without any further processing - if bootstrapReconcileResult.Paused { - return ctrl.Result{}, nil - } bootstrapConfig = bootstrapReconcileResult.Result // If the bootstrap config is being deleted, return early. @@ -247,7 +236,7 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, s * cluster := s.cluster mp := s.machinePool // Call generic external reconciler. - infraReconcileResult, err := r.reconcileExternal(ctx, cluster, mp, &mp.Spec.Template.Spec.InfrastructureRef) + infraReconcileResult, err := r.reconcileExternal(ctx, mp, &mp.Spec.Template.Spec.InfrastructureRef) if err != nil { if apierrors.IsNotFound(errors.Cause(err)) { log.Error(err, "infrastructure reference could not be found") @@ -262,10 +251,6 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, s * } return ctrl.Result{}, err } - // if the external object is paused, return without any further processing - if infraReconcileResult.Paused { - return ctrl.Result{}, nil - } infraConfig := infraReconcileResult.Result if !infraConfig.GetDeletionTimestamp().IsZero() { diff --git a/exp/internal/controllers/machinepool_controller_phases_test.go b/exp/internal/controllers/machinepool_controller_phases_test.go index 070a0ef923e2..37a53341e3c8 100644 --- a/exp/internal/controllers/machinepool_controller_phases_test.go +++ b/exp/internal/controllers/machinepool_controller_phases_test.go @@ -21,6 +21,7 @@ import ( "testing" "time" + "github.com/go-logr/logr" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -33,6 +34,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/cache/informertest" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/log" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/controllers/clustercache" @@ -131,9 +133,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { Client: fakeClient, ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -173,9 +176,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { Client: fakeClient, ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -212,9 +216,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { Client: fakeClient, ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -267,9 +272,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { Client: fakeClient, ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -334,9 +340,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { Client: fakeClient, ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -379,9 +386,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { Client: fakeClient, ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -431,9 +439,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { Client: fakeClient, ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -496,9 +505,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { Client: fakeClient, ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -567,9 +577,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { Client: fakeClient, ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -636,9 +647,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { Client: fakeClient, ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -727,9 +739,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { Client: fakeClient, ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -1049,9 +1062,10 @@ func TestReconcileMachinePoolBootstrap(t *testing.T) { r := &MachinePoolReconciler{ Client: fakeClient, externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -1215,43 +1229,6 @@ func TestReconcileMachinePoolInfrastructure(t *testing.T) { g.Expect(m.Status.GetTypedPhase()).To(Equal(expv1.MachinePoolPhaseFailed)) }, }, - { - name: "infrastructure ref is paused", - infraConfig: map[string]interface{}{ - "kind": builder.TestInfrastructureMachineTemplateKind, - "apiVersion": builder.InfrastructureGroupVersion.String(), - "metadata": map[string]interface{}{ - "name": "infra-config1", - "namespace": metav1.NamespaceDefault, - "annotations": map[string]interface{}{ - "cluster.x-k8s.io/paused": "true", - }, - }, - "spec": map[string]interface{}{ - "providerIDList": []interface{}{ - "test://id-1", - }, - }, - "status": map[string]interface{}{ - "ready": true, - "addresses": []interface{}{ - map[string]interface{}{ - "type": "InternalIP", - "address": "10.0.0.1", - }, - map[string]interface{}{ - "type": "InternalIP", - "address": "10.0.0.2", - }, - }, - }, - }, - expectError: false, - expectChanged: false, - expected: func(g *WithT, m *expv1.MachinePool) { - g.Expect(m.Status.InfrastructureReady).To(BeFalse()) - }, - }, { name: "ready bootstrap, infra, and nodeRef, machinepool is running, replicas 0, providerIDList not set", machinepool: &expv1.MachinePool{ @@ -1349,9 +1326,10 @@ func TestReconcileMachinePoolInfrastructure(t *testing.T) { Client: fakeClient, ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -1437,9 +1415,10 @@ func TestReconcileMachinePoolMachines(t *testing.T) { Client: env, ssaCache: ssa.NewCache(), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: env.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: env.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } scope := &scope{ @@ -1505,9 +1484,10 @@ func TestReconcileMachinePoolMachines(t *testing.T) { Client: env, ssaCache: ssa.NewCache(), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: env.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: env.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -1868,9 +1848,10 @@ func TestReconcileMachinePoolScaleToFromZero(t *testing.T) { ClusterCache: clustercache.NewFakeClusterCache(env.GetClient(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), recorder: record.NewFakeRecorder(32), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -1935,9 +1916,10 @@ func TestReconcileMachinePoolScaleToFromZero(t *testing.T) { ClusterCache: clustercache.NewFakeClusterCache(env.GetClient(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), recorder: record.NewFakeRecorder(32), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -1985,9 +1967,10 @@ func TestReconcileMachinePoolScaleToFromZero(t *testing.T) { recorder: record.NewFakeRecorder(32), ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -2031,9 +2014,10 @@ func TestReconcileMachinePoolScaleToFromZero(t *testing.T) { recorder: record.NewFakeRecorder(32), ClusterCache: clustercache.NewFakeClusterCache(fakeClient, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -2099,9 +2083,10 @@ func TestReconcileMachinePoolScaleToFromZero(t *testing.T) { ClusterCache: clustercache.NewFakeClusterCache(env.GetClient(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), recorder: record.NewFakeRecorder(32), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: fakeClient.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: fakeClient.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } diff --git a/exp/internal/controllers/machinepool_controller_test.go b/exp/internal/controllers/machinepool_controller_test.go index 9b4cc6bd8b9c..7f0897c6497a 100644 --- a/exp/internal/controllers/machinepool_controller_test.go +++ b/exp/internal/controllers/machinepool_controller_test.go @@ -22,6 +22,7 @@ import ( "testing" "time" + "github.com/go-logr/logr" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" @@ -36,6 +37,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/client/interceptor" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -601,9 +603,10 @@ func TestReconcileMachinePoolRequest(t *testing.T) { APIReader: clientFake, ClusterCache: clustercache.NewFakeClusterCache(trackerClientFake, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: clientFake.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: clientFake.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -1165,9 +1168,10 @@ func TestMachinePoolConditions(t *testing.T) { APIReader: clientFake, ClusterCache: clustercache.NewFakeClusterCache(clientFake, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: clientFake.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: clientFake.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } diff --git a/go.mod b/go.mod index c2256777480e..a29a353c52a2 100644 --- a/go.mod +++ b/go.mod @@ -34,8 +34,8 @@ require ( go.etcd.io/etcd/api/v3 v3.5.16 go.etcd.io/etcd/client/v3 v3.5.16 golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 - golang.org/x/oauth2 v0.23.0 - golang.org/x/text v0.19.0 + golang.org/x/oauth2 v0.24.0 + golang.org/x/text v0.20.0 gomodules.xyz/jsonpatch/v2 v2.4.0 google.golang.org/grpc v1.65.1 k8s.io/api v0.31.2 @@ -137,11 +137,11 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect go4.org v0.0.0-20201209231011-d4a079459e60 // indirect - golang.org/x/crypto v0.28.0 // indirect - golang.org/x/net v0.30.0 // indirect - golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.26.0 // indirect - golang.org/x/term v0.25.0 // indirect + golang.org/x/crypto v0.29.0 // indirect + golang.org/x/net v0.31.0 // indirect + golang.org/x/sync v0.9.0 // indirect + golang.org/x/sys v0.27.0 // indirect + golang.org/x/term v0.26.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.26.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 // indirect diff --git a/go.sum b/go.sum index 8f38c03ef01b..c5ca99ada823 100644 --- a/go.sum +++ b/go.sum @@ -405,8 +405,8 @@ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= +golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -453,15 +453,15 @@ golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= +golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= -golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= +golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -469,8 +469,8 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -490,19 +490,19 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= -golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= +golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= +golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= diff --git a/hack/tools/go.mod b/hack/tools/go.mod index 93bad49fc422..c674dc50fc50 100644 --- a/hack/tools/go.mod +++ b/hack/tools/go.mod @@ -15,8 +15,8 @@ require ( github.com/pkg/errors v0.9.1 github.com/spf13/pflag v1.0.5 github.com/valyala/fastjson v1.6.4 - golang.org/x/oauth2 v0.23.0 - google.golang.org/api v0.204.0 + golang.org/x/oauth2 v0.24.0 + google.golang.org/api v0.205.0 k8s.io/api v0.31.2 k8s.io/apiextensions-apiserver v0.31.2 k8s.io/apimachinery v0.31.2 @@ -35,7 +35,7 @@ require ( require ( cel.dev/expr v0.16.1 // indirect - cloud.google.com/go/auth v0.10.0 // indirect + cloud.google.com/go/auth v0.10.1 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.5 // indirect cloud.google.com/go/monitoring v1.21.1 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.24.1 // indirect @@ -168,14 +168,14 @@ require ( go.opentelemetry.io/otel/metric v1.29.0 // indirect go.opentelemetry.io/otel/trace v1.29.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.28.0 // indirect + golang.org/x/crypto v0.29.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/mod v0.21.0 // indirect - golang.org/x/net v0.30.0 // indirect - golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.26.0 // indirect - golang.org/x/term v0.25.0 // indirect - golang.org/x/text v0.19.0 // indirect + golang.org/x/net v0.31.0 // indirect + golang.org/x/sync v0.9.0 // indirect + golang.org/x/sys v0.27.0 // indirect + golang.org/x/term v0.26.0 // indirect + golang.org/x/text v0.20.0 // indirect golang.org/x/time v0.7.0 // indirect golang.org/x/tools v0.26.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect diff --git a/hack/tools/go.sum b/hack/tools/go.sum index ea687a593f5e..ff47fc71ef9d 100644 --- a/hack/tools/go.sum +++ b/hack/tools/go.sum @@ -3,8 +3,8 @@ cel.dev/expr v0.16.1/go.mod h1:AsGA5zb3WruAEQeQng1RZdGEXmBj0jvMWh6l5SnNuC8= cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.116.0 h1:B3fRrSDkLRt5qSHWe40ERJvhvnQwdZiHu0bJOpldweE= cloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U= -cloud.google.com/go/auth v0.10.0 h1:tWlkvFAh+wwTOzXIjrwM64karR1iTBZ/GRr0S/DULYo= -cloud.google.com/go/auth v0.10.0/go.mod h1:xxA5AqpDrvS+Gkmo9RqrGGRh6WSNKKOXhY3zNOr38tI= +cloud.google.com/go/auth v0.10.1 h1:TnK46qldSfHWt2a0b/hciaiVJsmDXWy9FqyUan0uYiI= +cloud.google.com/go/auth v0.10.1/go.mod h1:xxA5AqpDrvS+Gkmo9RqrGGRh6WSNKKOXhY3zNOr38tI= cloud.google.com/go/auth/oauth2adapt v0.2.5 h1:2p29+dePqsCHPP1bqDJcKj4qxRyYCcbzKpFyKGt3MTk= cloud.google.com/go/auth/oauth2adapt v0.2.5/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8= cloud.google.com/go/compute/metadata v0.5.2 h1:UxK4uu/Tn+I3p2dYWTfiX4wva7aYlKixAHn3fyqngqo= @@ -425,8 +425,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.3.1-0.20221117191849-2c476679df9a/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= +golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= @@ -454,11 +454,11 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= +golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= -golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= +golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -466,8 +466,8 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -485,15 +485,15 @@ golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= -golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= +golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= +golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= @@ -501,8 +501,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -523,8 +523,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -google.golang.org/api v0.204.0 h1:3PjmQQEDkR/ENVZZwIYB4W/KzYtN8OrqnNcHWpeR8E4= -google.golang.org/api v0.204.0/go.mod h1:69y8QSoKIbL9F94bWgWAq6wGqGwyjBgi2y8rAK8zLag= +google.golang.org/api v0.205.0 h1:LFaxkAIpDb/GsrWV20dMMo5MR0h8UARTbn24LmD+0Pg= +google.golang.org/api v0.205.0/go.mod h1:NrK1EMqO8Xk6l6QwRAmrXXg2v6dzukhlOyvkYtnvUuc= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= diff --git a/internal/controllers/cluster/cluster_controller.go b/internal/controllers/cluster/cluster_controller.go index de05d0e5f4ce..8cde31540cce 100644 --- a/internal/controllers/cluster/cluster_controller.go +++ b/internal/controllers/cluster/cluster_controller.go @@ -30,6 +30,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" @@ -106,6 +107,14 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt &clusterv1.Machine{}, handler.EnqueueRequestsFromMapFunc(r.controlPlaneMachineToCluster), ). + Watches( + &clusterv1.MachineDeployment{}, + handler.EnqueueRequestsFromMapFunc(r.machineDeploymentToCluster), + ). + Watches( + &expv1.MachinePool{}, + handler.EnqueueRequestsFromMapFunc(r.machinePoolToCluster), + ). WithOptions(options). WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), predicateLog, r.WatchFilterValue)). Build(r) @@ -116,9 +125,10 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt r.recorder = mgr.GetEventRecorderFor("cluster-controller") r.externalTracker = external.ObjectTracker{ - Controller: c, - Cache: mgr.GetCache(), - Scheme: mgr.GetScheme(), + Controller: c, + Cache: mgr.GetCache(), + Scheme: mgr.GetScheme(), + PredicateLogger: &predicateLog, } return nil } @@ -251,6 +261,20 @@ func patchCluster(ctx context.Context, patchHelper *patch.Helper, cluster *clust clusterv1.ControlPlaneReadyCondition, clusterv1.InfrastructureReadyCondition, }}, + patch.WithOwnedV1Beta2Conditions{Conditions: []string{ + clusterv1.ClusterInfrastructureReadyV1Beta2Condition, + clusterv1.ClusterControlPlaneAvailableV1Beta2Condition, + clusterv1.ClusterControlPlaneInitializedV1Beta2Condition, + clusterv1.ClusterWorkersAvailableV1Beta2Condition, + clusterv1.ClusterMachinesReadyV1Beta2Condition, + clusterv1.ClusterMachinesUpToDateV1Beta2Condition, + clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition, + clusterv1.ClusterScalingUpV1Beta2Condition, + clusterv1.ClusterScalingDownV1Beta2Condition, + clusterv1.ClusterRemediatingV1Beta2Condition, + clusterv1.ClusterDeletingV1Beta2Condition, + clusterv1.ClusterAvailableV1Beta2Condition, + }}, ) return patchHelper.Patch(ctx, cluster, options...) } @@ -376,10 +400,15 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, s *scope) (reconcile.R if descendantCount := s.descendants.objectsPendingDeleteCount(cluster); descendantCount > 0 { indirect := descendantCount - len(children) - log.Info("Cluster still has descendants - need to requeue", "descendants", s.descendants.objectsPendingDeleteNames(cluster), "indirect descendants count", indirect) + names := s.descendants.objectsPendingDeleteNames(cluster) + + log.Info("Cluster still has descendants - need to requeue", "descendants", strings.Join(names, "; "), "indirect descendants count", indirect) s.deletingReason = clusterv1.ClusterDeletingWaitingForWorkersDeletionV1Beta2Reason - s.deletingMessage = s.descendants.objectsPendingDeleteNames(cluster) + for i := range names { + names[i] = "* " + names[i] + } + s.deletingMessage = strings.Join(names, "\n") // Requeue so we can check the next time to see if there are still any descendants left. return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil @@ -497,7 +526,7 @@ func (c *clusterDescendants) objectsPendingDeleteCount(cluster *clusterv1.Cluste // objectsPendingDeleteNames return the names of descendants pending delete. // Note: infrastructure cluster, control plane object and its controlled machines are not included. -func (c *clusterDescendants) objectsPendingDeleteNames(cluster *clusterv1.Cluster) string { +func (c *clusterDescendants) objectsPendingDeleteNames(cluster *clusterv1.Cluster) []string { descendants := make([]string, 0) if cluster.Spec.ControlPlaneRef == nil { controlPlaneMachineNames := make([]string, len(c.controlPlaneMachines)) @@ -543,7 +572,7 @@ func (c *clusterDescendants) objectsPendingDeleteNames(cluster *clusterv1.Cluste sort.Strings(workerMachineNames) descendants = append(descendants, "Worker Machines: "+clog.StringListToString(workerMachineNames)) } - return strings.Join(descendants, "; ") + return descendants } // getDescendants collects all MachineDeployments, MachineSets, MachinePools and Machines for the cluster. @@ -712,3 +741,41 @@ func (r *Reconciler) controlPlaneMachineToCluster(ctx context.Context, o client. NamespacedName: util.ObjectKey(cluster), }} } + +// machineDeploymentToCluster is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation +// for Cluster to update when one of its own MachineDeployments gets updated. +func (r *Reconciler) machineDeploymentToCluster(_ context.Context, o client.Object) []ctrl.Request { + md, ok := o.(*clusterv1.MachineDeployment) + if !ok { + panic(fmt.Sprintf("Expected a MachineDeployment but got a %T", o)) + } + if md.Spec.ClusterName == "" { + return nil + } + + return []ctrl.Request{{ + NamespacedName: types.NamespacedName{ + Namespace: md.Namespace, + Name: md.Spec.ClusterName, + }, + }} +} + +// machinePoolToCluster is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation +// for Cluster to update when one of its own MachinePools gets updated. +func (r *Reconciler) machinePoolToCluster(_ context.Context, o client.Object) []ctrl.Request { + mp, ok := o.(*expv1.MachinePool) + if !ok { + panic(fmt.Sprintf("Expected a MachinePool but got a %T", o)) + } + if mp.Spec.ClusterName == "" { + return nil + } + + return []ctrl.Request{{ + NamespacedName: types.NamespacedName{ + Namespace: mp.Namespace, + Name: mp.Spec.ClusterName, + }, + }} +} diff --git a/internal/controllers/cluster/cluster_controller_phases_test.go b/internal/controllers/cluster/cluster_controller_phases_test.go index 0f74c366b1a3..b5d96b92cd9a 100644 --- a/internal/controllers/cluster/cluster_controller_phases_test.go +++ b/internal/controllers/cluster/cluster_controller_phases_test.go @@ -20,15 +20,18 @@ import ( "testing" "time" + "github.com/go-logr/logr" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/client-go/tools/record" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache/informertest" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/log" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/controllers/external" @@ -259,9 +262,10 @@ func TestClusterReconcileInfrastructure(t *testing.T) { Client: c, recorder: record.NewFakeRecorder(32), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: c.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: c.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -541,9 +545,10 @@ func TestClusterReconcileControlPlane(t *testing.T) { Client: c, recorder: record.NewFakeRecorder(32), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: c.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: c.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -924,9 +929,10 @@ func TestClusterReconcilePhases_reconcileFailureDomains(t *testing.T) { Client: c, recorder: record.NewFakeRecorder(32), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: c.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: c.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } diff --git a/internal/controllers/cluster/cluster_controller_status.go b/internal/controllers/cluster/cluster_controller_status.go index 645cf1a1d4c7..ff7d50c9f872 100644 --- a/internal/controllers/cluster/cluster_controller_status.go +++ b/internal/controllers/cluster/cluster_controller_status.go @@ -849,6 +849,12 @@ func (c clusterConditionCustomMergeStrategy) Merge(conditions []v1beta2condition return v1beta2conditions.InfoMergePriority } } + + // Treat all reasons except TopologyReconcileFailed and ClusterClassNotReconciled of TopologyReconciled condition as info. + if condition.Type == clusterv1.ClusterTopologyReconciledV1Beta2Condition && condition.Status == metav1.ConditionFalse && + condition.Reason != clusterv1.ClusterTopologyReconciledFailedV1Beta2Reason && condition.Reason != clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason { + return v1beta2conditions.InfoMergePriority + } return v1beta2conditions.GetDefaultMergePriorityFunc(c.negativePolarityConditionTypes)(condition) }).Merge(conditions, conditionTypes) } @@ -1009,10 +1015,7 @@ func aggregateUnhealthyMachines(machines collections.Machines) string { return "" } - machineNames := []string{} - for _, machine := range machines { - machineNames = append(machineNames, machine.GetName()) - } + machineNames := machines.Names() if len(machineNames) == 0 { return "" diff --git a/internal/controllers/cluster/cluster_controller_status_test.go b/internal/controllers/cluster/cluster_controller_status_test.go index e062665c6c49..91a6a58f9e05 100644 --- a/internal/controllers/cluster/cluster_controller_status_test.go +++ b/internal/controllers/cluster/cluster_controller_status_test.go @@ -248,7 +248,7 @@ func TestSetInfrastructureReadyCondition(t *testing.T) { Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.ClusterInfrastructureReadyNoReasonReportedV1Beta2Reason, - Message: "some message (from FakeInfraCluster)", + Message: "some message", }, }, { @@ -402,7 +402,7 @@ func TestSetControlPlaneAvailableCondition(t *testing.T) { Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.ClusterControlPlaneAvailableNoReasonReportedV1Beta2Reason, - Message: "some message (from FakeControlPlane)", + Message: "some message", }, }, { @@ -694,10 +694,11 @@ func TestSetWorkersAvailableCondition(t *testing.T) { }}, getDescendantsSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: v1beta2conditions.MultipleUnknownReportedReason, - Message: "Condition Available not yet reported from MachineDeployment md1; Condition Available not yet reported from MachinePool mp1", + Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: v1beta2conditions.MultipleUnknownReportedReason, + Message: "* MachineDeployment md1: Condition Available not yet reported\n" + + "* MachinePool mp1: Condition Available not yet reported", }, }, { @@ -721,10 +722,11 @@ func TestSetWorkersAvailableCondition(t *testing.T) { }}, getDescendantsSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1", + Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* MachineDeployment md1: 3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5)\n" + + "* MachinePool mp1: 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4)", }, }, } @@ -804,7 +806,7 @@ func TestSetMachinesReadyCondition(t *testing.T) { Type: clusterv1.ClusterMachinesReadyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: v1beta2conditions.NotYetReportedReason, - Message: "Condition Ready not yet reported from Machine machine-2", + Message: "* Machine machine-2: Condition Ready not yet reported", }, }, { @@ -833,10 +835,12 @@ func TestSetMachinesReadyCondition(t *testing.T) { }, getDescendantsSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterMachinesReadyV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "Deleting: Machine deletion in progress, stage: DrainingNode from Machine machine-4; HealthCheckSucceeded: Some message from Machine machine-2; Some unknown message from Machine machine-3", + Type: clusterv1.ClusterMachinesReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* Machine machine-2: HealthCheckSucceeded: Some message\n" + + "* Machine machine-4: Deleting: Machine deletion in progress, stage: DrainingNode\n" + + "* Machine machine-3: Some unknown message", }, }, } @@ -923,7 +927,7 @@ func TestSetMachinesUpToDateCondition(t *testing.T) { Type: clusterv1.ClusterMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: "some-unknown-reason-1", - Message: "some unknown message from Machine unknown-1", + Message: "* Machine unknown-1: some unknown message", }, }, { @@ -942,7 +946,7 @@ func TestSetMachinesUpToDateCondition(t *testing.T) { Type: clusterv1.ClusterMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "some-not-up-to-date-reason", - Message: "some not up-to-date message from Machine not-up-to-date-machine-1", + Message: "* Machine not-up-to-date-machine-1: some not up-to-date message", }, }, { @@ -956,7 +960,7 @@ func TestSetMachinesUpToDateCondition(t *testing.T) { Type: clusterv1.ClusterMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: v1beta2conditions.NotYetReportedReason, - Message: "Condition UpToDate not yet reported from Machine no-condition-machine-1", + Message: "* Machine no-condition-machine-1: Condition UpToDate not yet reported", }, }, { @@ -990,10 +994,11 @@ func TestSetMachinesUpToDateCondition(t *testing.T) { }, getDescendantsSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterMachinesUpToDateV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "This is not up-to-date message from Machines not-up-to-date-machine-1, not-up-to-date-machine-2; Condition UpToDate not yet reported from Machines no-condition-machine-1, no-condition-machine-2", + Type: clusterv1.ClusterMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* Machines not-up-to-date-machine-1, not-up-to-date-machine-2: This is not up-to-date message\n" + + "* Machines no-condition-machine-1, no-condition-machine-2: Condition UpToDate not yet reported", }, }, } @@ -1084,10 +1089,12 @@ func TestSetScalingUpCondition(t *testing.T) { }}, getDescendantsSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterScalingUpV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: v1beta2conditions.MultipleUnknownReportedReason, - Message: "Condition ScalingUp not yet reported from MachineDeployment md1; Condition ScalingUp not yet reported from MachinePool mp1; Condition ScalingUp not yet reported from MachineSet ms1", + Type: clusterv1.ClusterScalingUpV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: v1beta2conditions.MultipleUnknownReportedReason, + Message: "* MachineDeployment md1: Condition ScalingUp not yet reported\n" + + "* MachinePool mp1: Condition ScalingUp not yet reported\n" + + "* MachineSet ms1: Condition ScalingUp not yet reported", }, }, { @@ -1131,10 +1138,13 @@ func TestSetScalingUpCondition(t *testing.T) { }}, getDescendantsSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterScalingUpV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "Scaling up from 0 to 3 replicas from FakeControlPlane cp1; Scaling up from 1 to 5 replicas from MachineDeployment md1; Scaling up from 0 to 3 replicas from MachinePool mp1; 1 MachineSet with other issues", + Type: clusterv1.ClusterScalingUpV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* FakeControlPlane cp1: Scaling up from 0 to 3 replicas\n" + + "* MachineDeployment md1: Scaling up from 1 to 5 replicas\n" + + "* MachinePool mp1: Scaling up from 0 to 3 replicas\n" + + "And 1 MachineSet with other issues", }, }, { @@ -1173,10 +1183,12 @@ func TestSetScalingUpCondition(t *testing.T) { }}, getDescendantsSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterScalingUpV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "Scaling up from 1 to 5 replicas from MachineDeployment md1; Scaling up from 0 to 3 replicas from MachinePool mp1; Scaling up from 2 to 7 replicas from MachineSet ms1", + Type: clusterv1.ClusterScalingUpV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* MachineDeployment md1: Scaling up from 1 to 5 replicas\n" + + "* MachinePool mp1: Scaling up from 0 to 3 replicas\n" + + "* MachineSet ms1: Scaling up from 2 to 7 replicas", }, }, { @@ -1236,10 +1248,12 @@ func TestSetScalingUpCondition(t *testing.T) { }}, getDescendantsSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterScalingUpV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "Scaling up from 1 to 5 replicas from MachineDeployment md1; Scaling up from 0 to 3 replicas from MachinePool mp1; Scaling up from 2 to 7 replicas from MachineSet ms1", + Type: clusterv1.ClusterScalingUpV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* MachineDeployment md1: Scaling up from 1 to 5 replicas\n" + + "* MachinePool mp1: Scaling up from 0 to 3 replicas\n" + + "* MachineSet ms1: Scaling up from 2 to 7 replicas", }, }, } @@ -1326,10 +1340,12 @@ func TestSetScalingDownCondition(t *testing.T) { }}, getDescendantsSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterScalingDownV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: v1beta2conditions.MultipleUnknownReportedReason, - Message: "Condition ScalingDown not yet reported from MachineDeployment md1; Condition ScalingDown not yet reported from MachinePool mp1; Condition ScalingDown not yet reported from MachineSet ms1", + Type: clusterv1.ClusterScalingDownV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: v1beta2conditions.MultipleUnknownReportedReason, + Message: "* MachineDeployment md1: Condition ScalingDown not yet reported\n" + + "* MachinePool mp1: Condition ScalingDown not yet reported\n" + + "* MachineSet ms1: Condition ScalingDown not yet reported", }, }, { @@ -1373,10 +1389,13 @@ func TestSetScalingDownCondition(t *testing.T) { }}, getDescendantsSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterScalingDownV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "Scaling down from 0 to 3 replicas from FakeControlPlane cp1; Scaling down from 1 to 5 replicas from MachineDeployment md1; Scaling down from 0 to 3 replicas from MachinePool mp1; 1 MachineSet with other issues", + Type: clusterv1.ClusterScalingDownV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* FakeControlPlane cp1: Scaling down from 0 to 3 replicas\n" + + "* MachineDeployment md1: Scaling down from 1 to 5 replicas\n" + + "* MachinePool mp1: Scaling down from 0 to 3 replicas\n" + + "And 1 MachineSet with other issues", }, }, { @@ -1415,10 +1434,12 @@ func TestSetScalingDownCondition(t *testing.T) { }}, getDescendantsSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterScalingDownV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "Scaling down from 1 to 5 replicas from MachineDeployment md1; Scaling down from 0 to 3 replicas from MachinePool mp1; Scaling down from 2 to 7 replicas from MachineSet ms1", + Type: clusterv1.ClusterScalingDownV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* MachineDeployment md1: Scaling down from 1 to 5 replicas\n" + + "* MachinePool mp1: Scaling down from 0 to 3 replicas\n" + + "* MachineSet ms1: Scaling down from 2 to 7 replicas", }, }, { @@ -1478,10 +1499,12 @@ func TestSetScalingDownCondition(t *testing.T) { }}, getDescendantsSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterScalingDownV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "Scaling down from 1 to 5 replicas from MachineDeployment md1; Scaling down from 0 to 3 replicas from MachinePool mp1; Scaling down from 2 to 7 replicas from MachineSet ms1", + Type: clusterv1.ClusterScalingDownV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* MachineDeployment md1: Scaling down from 1 to 5 replicas\n" + + "* MachinePool mp1: Scaling down from 0 to 3 replicas\n" + + "* MachineSet ms1: Scaling down from 2 to 7 replicas", }, }, } @@ -1502,7 +1525,7 @@ func TestSetRemediatingCondition(t *testing.T) { healthCheckSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionTrue} healthCheckNotSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionFalse} ownerRemediated := clusterv1.Condition{Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse} - ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Message: "Remediation in progress"} + ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, Message: "Machine deletionTimestamp set"} tests := []struct { name string @@ -1550,7 +1573,7 @@ func TestSetRemediatingCondition(t *testing.T) { Type: clusterv1.ClusterRemediatingV1Beta2Condition, Status: metav1.ConditionTrue, Reason: clusterv1.ClusterRemediatingV1Beta2Reason, - Message: "Remediation in progress from Machine m3", + Message: "* Machine m3: Machine deletionTimestamp set", }, }, { @@ -1625,15 +1648,21 @@ func TestDeletingCondition(t *testing.T) { }, }, { - name: "deletionTimestamp set (some reason/message reported)", - cluster: fakeCluster("c", deleted(true)), - deletingReason: clusterv1.ClusterDeletingWaitingForBeforeDeleteHookV1Beta2Reason, - deletingMessage: "Some message", + name: "deletionTimestamp set (some reason/message reported)", + cluster: fakeCluster("c", deleted(true)), + deletingReason: clusterv1.ClusterDeletingWaitingForWorkersDeletionV1Beta2Reason, + deletingMessage: "* Control plane Machines: cp1, cp2, cp3\n" + + "* MachineDeployments: md1, md2\n" + + "* MachineSets: ms1, ms2\n" + + "* Worker Machines: w1, w2, w3, w4, w5, ... (3 more)", expectCondition: metav1.Condition{ - Type: clusterv1.ClusterDeletingV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: clusterv1.ClusterDeletingWaitingForBeforeDeleteHookV1Beta2Reason, - Message: "Some message", + Type: clusterv1.ClusterDeletingV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.ClusterDeletingWaitingForWorkersDeletionV1Beta2Reason, + Message: "* Control plane Machines: cp1, cp2, cp3\n" + + "* MachineDeployments: md1, md2\n" + + "* MachineSets: ms1, ms2\n" + + "* Worker Machines: w1, w2, w3, w4, w5, ... (3 more)", }, }, } @@ -1676,10 +1705,14 @@ func TestSetAvailableCondition(t *testing.T) { }, }, expectCondition: metav1.Condition{ - Type: clusterv1.ClusterAvailableV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: v1beta2conditions.MultipleUnknownReportedReason, - Message: "InfrastructureReady: Condition InfrastructureReady not yet reported; ControlPlaneAvailable: Condition ControlPlaneAvailable not yet reported; WorkersAvailable: Condition WorkersAvailable not yet reported; RemoteConnectionProbe: Condition RemoteConnectionProbe not yet reported; Deleting: Condition Deleting not yet reported", + Type: clusterv1.ClusterAvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: v1beta2conditions.MultipleUnknownReportedReason, + Message: "* InfrastructureReady: Condition not yet reported\n" + + "* ControlPlaneAvailable: Condition not yet reported\n" + + "* WorkersAvailable: Condition not yet reported\n" + + "* RemoteConnectionProbe: Condition not yet reported\n" + + "* Deleting: Condition not yet reported", }, }, { @@ -1732,6 +1765,63 @@ func TestSetAvailableCondition(t *testing.T) { Message: "", }, }, + { + name: "Handles multiline conditions", + cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Spec: clusterv1.ClusterSpec{ + Topology: nil, // not using CC + }, + Status: clusterv1.ClusterStatus{ + V1Beta2: &clusterv1.ClusterV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterDeletingV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.ClusterDeletingWaitingForWorkersDeletionV1Beta2Reason, + Message: "* Control plane Machines: cp1, cp2, cp3\n" + + "* MachineDeployments: md1, md2\n" + + "* MachineSets: ms1, ms2\n" + + "* Worker Machines: w1, w2, w3, w4, w5, ... (3 more)", + }, + }, + }, + }, + }, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterDeletingWaitingForWorkersDeletionV1Beta2Reason, + Message: "* Deleting:\n" + + " * Control plane Machines: cp1, cp2, cp3\n" + + " * MachineDeployments: md1, md2\n" + + " * MachineSets: ms1, ms2\n" + + " * Worker Machines: w1, w2, w3, w4, w5, ... (3 more)", + }, + }, { name: "TopologyReconciled is required when the cluster is using CC", cluster: &clusterv1.Cluster{ @@ -1779,7 +1869,7 @@ func TestSetAvailableCondition(t *testing.T) { Type: clusterv1.ClusterAvailableV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: v1beta2conditions.NotYetReportedReason, - Message: "TopologyReconciled: Condition TopologyReconciled not yet reported", + Message: "* TopologyReconciled: Condition not yet reported", }, }, { @@ -1838,7 +1928,7 @@ func TestSetAvailableCondition(t *testing.T) { Type: clusterv1.ClusterAvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "SomeReason", - Message: "MyAvailabilityGate: Some message", + Message: "* MyAvailabilityGate: Some message", }, }, { @@ -1886,7 +1976,7 @@ func TestSetAvailableCondition(t *testing.T) { Type: clusterv1.ClusterAvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.ClusterDeletingWaitingForBeforeDeleteHookV1Beta2Reason, - Message: "Deleting: Some message", + Message: "* Deleting: Some message", }, }, { @@ -1949,6 +2039,232 @@ func TestSetAvailableCondition(t *testing.T) { Reason: v1beta2conditions.MultipleInfoReportedReason, }, }, + { + name: "Surfaces message from TopologyReconciled for reason that doesn't affect availability (no other issues)", + cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Spec: clusterv1.ClusterSpec{ + Topology: &clusterv1.Topology{}, // using CC + }, + Status: clusterv1.ClusterStatus{ + V1Beta2: &clusterv1.ClusterV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterDeletingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason, + Message: "Control plane rollout and upgrade to version v1.29.0 on hold.", + }, + }, + }, + }, + }, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleInfoReportedReason, + Message: "* TopologyReconciled: Control plane rollout and upgrade to version v1.29.0 on hold.", + }, + }, + { + name: "Drops messages from TopologyReconciled for reason that doesn't affect availability (when there is another issue)", + cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Spec: clusterv1.ClusterSpec{ + Topology: &clusterv1.Topology{}, // using CC + }, + Status: clusterv1.ClusterStatus{ + V1Beta2: &clusterv1.ClusterV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1", + }, + { + Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterDeletingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason, + Message: "Control plane rollout and upgrade to version v1.29.0 on hold.", + }, + }, + }, + }, + }, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, // Note: There is only one condition that is an issue, but it has the MultipleIssuesReported reason. + Message: "* WorkersAvailable: 3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1", + }, + }, + { + name: "Takes into account messages from TopologyReconciled for reason that affects availability (no other issues)", + cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Spec: clusterv1.ClusterSpec{ + Topology: &clusterv1.Topology{}, // using CC + }, + Status: clusterv1.ClusterStatus{ + V1Beta2: &clusterv1.ClusterV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterDeletingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason, + Message: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" + + ".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused", + }, + }, + }, + }, + }, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason, + Message: "* TopologyReconciled: ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" + + ".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused", + }, + }, + { + name: "Takes into account messages from TopologyReconciled for reason that affects availability (when there is another issue)", + cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Spec: clusterv1.ClusterSpec{ + Topology: &clusterv1.Topology{}, // using CC + }, + Status: clusterv1.ClusterStatus{ + V1Beta2: &clusterv1.ClusterV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.ClusterInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterControlPlaneAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterWorkersAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1", + }, + { + Type: clusterv1.ClusterRemoteConnectionProbeV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterDeletingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "Foo", + }, + { + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason, + Message: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" + + ".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused", + }, + }, + }, + }, + }, + expectCondition: metav1.Condition{ + Type: clusterv1.ClusterAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* WorkersAvailable: 3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5) from MachineDeployment md1; 2 available replicas, at least 3 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 4) from MachinePool mp1\n" + + "* TopologyReconciled: ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if.status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused", + }, + }, } for _, tc := range testCases { diff --git a/internal/controllers/cluster/cluster_controller_test.go b/internal/controllers/cluster/cluster_controller_test.go index c4ad6a58416a..612b31d0bb43 100644 --- a/internal/controllers/cluster/cluster_controller_test.go +++ b/internal/controllers/cluster/cluster_controller_test.go @@ -886,7 +886,7 @@ func TestObjectsPendingDelete(t *testing.T) { c := &clusterv1.Cluster{} g.Expect(d.objectsPendingDeleteCount(c)).To(Equal(17)) - g.Expect(d.objectsPendingDeleteNames(c)).To(Equal("Control plane Machines: cp1, cp2, cp3; MachineDeployments: md1, md2; MachineSets: ms1, ms2; MachinePools: mp1, mp2; Worker Machines: w1, w2, w3, w4, w5, ... (3 more)")) + g.Expect(d.objectsPendingDeleteNames(c)).To(Equal([]string{"Control plane Machines: cp1, cp2, cp3", "MachineDeployments: md1, md2", "MachineSets: ms1, ms2", "MachinePools: mp1, mp2", "Worker Machines: w1, w2, w3, w4, w5, ... (3 more)"})) }) t.Run("With a control plane object", func(t *testing.T) { @@ -894,7 +894,7 @@ func TestObjectsPendingDelete(t *testing.T) { c := &clusterv1.Cluster{Spec: clusterv1.ClusterSpec{ControlPlaneRef: &corev1.ObjectReference{Kind: "SomeKind"}}} g.Expect(d.objectsPendingDeleteCount(c)).To(Equal(14)) - g.Expect(d.objectsPendingDeleteNames(c)).To(Equal("MachineDeployments: md1, md2; MachineSets: ms1, ms2; MachinePools: mp1, mp2; Worker Machines: w1, w2, w3, w4, w5, ... (3 more)")) + g.Expect(d.objectsPendingDeleteNames(c)).To(Equal([]string{"MachineDeployments: md1, md2", "MachineSets: ms1, ms2", "MachinePools: mp1, mp2", "Worker Machines: w1, w2, w3, w4, w5, ... (3 more)"})) }) } diff --git a/internal/controllers/clusterclass/clusterclass_controller.go b/internal/controllers/clusterclass/clusterclass_controller.go index 14fdc9121c35..da3c2d40ed80 100644 --- a/internal/controllers/clusterclass/clusterclass_controller.go +++ b/internal/controllers/clusterclass/clusterclass_controller.go @@ -47,7 +47,6 @@ import ( "sigs.k8s.io/cluster-api/feature" runtimeclient "sigs.k8s.io/cluster-api/internal/runtime/client" "sigs.k8s.io/cluster-api/internal/topology/variables" - "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/conversion" "sigs.k8s.io/cluster-api/util/patch" @@ -371,8 +370,6 @@ func refString(ref *corev1.ObjectReference) string { } func (r *Reconciler) reconcileExternal(ctx context.Context, clusterClass *clusterv1.ClusterClass, ref *corev1.ObjectReference) error { - log := ctrl.LoggerFrom(ctx) - obj, err := external.Get(ctx, r.Client, ref, clusterClass.Namespace) if err != nil { if apierrors.IsNotFound(errors.Cause(err)) { @@ -381,12 +378,6 @@ func (r *Reconciler) reconcileExternal(ctx context.Context, clusterClass *cluste return errors.Wrapf(err, "failed to get the external object for the ClusterClass. refGroupVersionKind: %s, refName: %s", ref.GroupVersionKind(), ref.Name) } - // If referenced object is paused, return early. - if annotations.HasPaused(obj) { - log.V(3).Info("External object referenced is paused", "refGroupVersionKind", ref.GroupVersionKind(), "refName", ref.Name) - return nil - } - // Initialize the patch helper. patchHelper, err := patch.NewHelper(obj, r.Client) if err != nil { diff --git a/internal/controllers/machine/drain/drain.go b/internal/controllers/machine/drain/drain.go index a0bbaf809d60..802f1448f9c3 100644 --- a/internal/controllers/machine/drain/drain.go +++ b/internal/controllers/machine/drain/drain.go @@ -249,7 +249,7 @@ func filterPods(ctx context.Context, allPods []*corev1.Pod, filters []PodFilter) // Add the pod to PodDeleteList no matter what PodDeleteStatus is, // those pods whose PodDeleteStatus is false like DaemonSet will // be caught by list.errors() - pod.Kind = "Pod" + pod.Kind = "Pod" //nolint:goconst pod.APIVersion = "v1" pods = append(pods, PodDelete{ Pod: pod, @@ -448,15 +448,17 @@ func (r EvictionResult) ConditionMessage(nodeDrainStartTime *metav1.Time) string conditionMessage := fmt.Sprintf("Drain not completed yet (started at %s):", nodeDrainStartTime.Format(time.RFC3339)) if len(r.PodsDeletionTimestampSet) > 0 { - conditionMessage = fmt.Sprintf("%s\n* Pods with deletionTimestamp that still exist: %s", - conditionMessage, PodListToString(r.PodsDeletionTimestampSet, 3)) + kind := "Pod" + if len(r.PodsDeletionTimestampSet) > 1 { + kind = "Pods" + } + conditionMessage = fmt.Sprintf("%s\n* %s %s: deletionTimestamp set, but still not removed from the Node", + conditionMessage, kind, PodListToString(r.PodsDeletionTimestampSet, 3)) } if len(r.PodsFailedEviction) > 0 { sortedFailureMessages := maps.Keys(r.PodsFailedEviction) sort.Strings(sortedFailureMessages) - conditionMessage = fmt.Sprintf("%s\n* Pods with eviction failed:", conditionMessage) - skippedFailureMessages := []string{} if len(sortedFailureMessages) > 5 { skippedFailureMessages = sortedFailureMessages[5:] @@ -464,30 +466,32 @@ func (r EvictionResult) ConditionMessage(nodeDrainStartTime *metav1.Time) string } for _, failureMessage := range sortedFailureMessages { pods := r.PodsFailedEviction[failureMessage] - conditionMessage = fmt.Sprintf("%s\n * %s: %s", conditionMessage, failureMessage, PodListToString(pods, 3)) + kind := "Pod" + if len(pods) > 1 { + kind = "Pods" + } + failureMessage = strings.Replace(failureMessage, "Cannot evict pod as it would violate the pod's disruption budget.", "cannot evict pod as it would violate the pod's disruption budget.", -1) + if !strings.HasPrefix(failureMessage, "cannot evict pod as it would violate the pod's disruption budget.") { + failureMessage = "failed to evict Pod, " + failureMessage + } + conditionMessage = fmt.Sprintf("%s\n* %s %s: %s", conditionMessage, kind, PodListToString(pods, 3), failureMessage) } if len(skippedFailureMessages) > 0 { - skippedFailureMessagesCount := len(skippedFailureMessages) podCount := 0 for _, failureMessage := range skippedFailureMessages { podCount += len(r.PodsFailedEviction[failureMessage]) } - conditionMessage = fmt.Sprintf("%s\n * ... ", conditionMessage) - if skippedFailureMessagesCount == 1 { - conditionMessage += "(1 more error " - } else { - conditionMessage += fmt.Sprintf("(%d more errors ", skippedFailureMessagesCount) - } if podCount == 1 { - conditionMessage += "applying to 1 Pod)" + conditionMessage = fmt.Sprintf("%s\n* 1 Pod with other issues", conditionMessage) } else { - conditionMessage += fmt.Sprintf("applying to %d Pods)", podCount) + conditionMessage = fmt.Sprintf("%s\n* %d Pods with other issues", + conditionMessage, podCount) } } } if len(r.PodsToTriggerEvictionLater) > 0 { - conditionMessage = fmt.Sprintf("%s\n* After above Pods have been removed from the Node, the following Pods will be evicted: %s", + conditionMessage = fmt.Sprintf("%s\nAfter above Pods have been removed from the Node, the following Pods will be evicted: %s", conditionMessage, PodListToString(r.PodsToTriggerEvictionLater, 3)) } return conditionMessage diff --git a/internal/controllers/machine/drain/drain_test.go b/internal/controllers/machine/drain/drain_test.go index 13218c062265..7852601ac9f9 100644 --- a/internal/controllers/machine/drain/drain_test.go +++ b/internal/controllers/machine/drain/drain_test.go @@ -1496,11 +1496,10 @@ func TestEvictionResult_ConditionMessage(t *testing.T) { }, }, wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z): -* Pods with deletionTimestamp that still exist: pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1 -* Pods with eviction failed: - * Cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently: pod-5-to-trigger-eviction-pdb-violated-1 - * some other error 1: pod-6-to-trigger-eviction-some-other-error -* After above Pods have been removed from the Node, the following Pods will be evicted: pod-7-eviction-later, pod-8-eviction-later`, +* Pods pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1: deletionTimestamp set, but still not removed from the Node +* Pod pod-5-to-trigger-eviction-pdb-violated-1: cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently +* Pod pod-6-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 1 +After above Pods have been removed from the Node, the following Pods will be evicted: pod-7-eviction-later, pod-8-eviction-later`, }, { name: "Compute long condition message correctly", @@ -1654,15 +1653,14 @@ func TestEvictionResult_ConditionMessage(t *testing.T) { }, }, wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z): -* Pods with deletionTimestamp that still exist: pod-2-deletionTimestamp-set-1, pod-2-deletionTimestamp-set-2, pod-2-deletionTimestamp-set-3, ... (4 more) -* Pods with eviction failed: - * Cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently: pod-5-to-trigger-eviction-pdb-violated-1, pod-5-to-trigger-eviction-pdb-violated-2, pod-5-to-trigger-eviction-pdb-violated-3, ... (3 more) - * some other error 1: pod-6-to-trigger-eviction-some-other-error - * some other error 2: pod-7-to-trigger-eviction-some-other-error - * some other error 3: pod-8-to-trigger-eviction-some-other-error - * some other error 4: pod-9-to-trigger-eviction-some-other-error - * ... (1 more error applying to 1 Pod) -* After above Pods have been removed from the Node, the following Pods will be evicted: pod-11-eviction-later, pod-12-eviction-later, pod-13-eviction-later, ... (2 more)`, +* Pods pod-2-deletionTimestamp-set-1, pod-2-deletionTimestamp-set-2, pod-2-deletionTimestamp-set-3, ... (4 more): deletionTimestamp set, but still not removed from the Node +* Pods pod-5-to-trigger-eviction-pdb-violated-1, pod-5-to-trigger-eviction-pdb-violated-2, pod-5-to-trigger-eviction-pdb-violated-3, ... (3 more): cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently +* Pod pod-6-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 1 +* Pod pod-7-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 2 +* Pod pod-8-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 3 +* Pod pod-9-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 4 +* 1 Pod with other issues +After above Pods have been removed from the Node, the following Pods will be evicted: pod-11-eviction-later, pod-12-eviction-later, pod-13-eviction-later, ... (2 more)`, }, { name: "Compute long condition message correctly with more skipped errors", @@ -1730,13 +1728,12 @@ func TestEvictionResult_ConditionMessage(t *testing.T) { }, }, wantConditionMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z): -* Pods with eviction failed: - * some other error 1: pod-1-to-trigger-eviction-some-other-error - * some other error 2: pod-2-to-trigger-eviction-some-other-error - * some other error 3: pod-3-to-trigger-eviction-some-other-error - * some other error 4: pod-4-to-trigger-eviction-some-other-error - * some other error 5: pod-5-to-trigger-eviction-some-other-error - * ... (2 more errors applying to 4 Pods)`, +* Pod pod-1-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 1 +* Pod pod-2-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 2 +* Pod pod-3-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 3 +* Pod pod-4-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 4 +* Pod pod-5-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 5 +* 4 Pods with other issues`, }, } diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index a462490d956f..4bb7b69076ad 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -171,9 +171,10 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt r.controller = c r.recorder = mgr.GetEventRecorderFor("machine-controller") r.externalTracker = external.ObjectTracker{ - Controller: c, - Cache: mgr.GetCache(), - Scheme: mgr.GetScheme(), + Controller: c, + Cache: mgr.GetCache(), + Scheme: mgr.GetScheme(), + PredicateLogger: &predicateLog, } r.ssaCache = ssa.NewCache() r.reconcileDeleteCache = cache.New[cache.ReconcileEntry]() @@ -309,8 +310,6 @@ func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clust clusterv1.BootstrapReadyCondition, clusterv1.InfrastructureReadyCondition, clusterv1.DrainingSucceededCondition, - clusterv1.MachineHealthCheckSucceededCondition, - clusterv1.MachineOwnerRemediatedCondition, }}, patch.WithOwnedV1Beta2Conditions{Conditions: []string{ clusterv1.MachineAvailableV1Beta2Condition, @@ -1053,12 +1052,12 @@ func (r *Reconciler) watchClusterNodes(ctx context.Context, cluster *clusterv1.C return nil } - return r.ClusterCache.Watch(ctx, util.ObjectKey(cluster), clustercache.WatchInput{ + return r.ClusterCache.Watch(ctx, util.ObjectKey(cluster), clustercache.NewWatcher(clustercache.WatcherOptions{ Name: "machine-watchNodes", Watcher: r.controller, Kind: &corev1.Node{}, EventHandler: handler.EnqueueRequestsFromMapFunc(r.nodeToMachine), - }) + })) } func (r *Reconciler) nodeToMachine(ctx context.Context, o client.Object) []reconcile.Request { diff --git a/internal/controllers/machine/machine_controller_noderef_test.go b/internal/controllers/machine/machine_controller_noderef_test.go index c568cf35d03c..ca4f4ce7b92c 100644 --- a/internal/controllers/machine/machine_controller_noderef_test.go +++ b/internal/controllers/machine/machine_controller_noderef_test.go @@ -351,14 +351,14 @@ func TestGetNode(t *testing.T) { // Retry because the ClusterCache might not have immediately created the clusterAccessor. g.Eventually(func(g Gomega) { - g.Expect(clusterCache.Watch(ctx, util.ObjectKey(testCluster), clustercache.WatchInput{ + g.Expect(clusterCache.Watch(ctx, util.ObjectKey(testCluster), clustercache.NewWatcher(clustercache.WatcherOptions{ Name: "TestGetNode", Watcher: w, Kind: &corev1.Node{}, EventHandler: handler.EnqueueRequestsFromMapFunc(func(context.Context, client.Object) []reconcile.Request { return nil }), - })).To(Succeed()) + }))).To(Succeed()) }, 1*time.Minute, 5*time.Second).Should(Succeed()) for _, tc := range testCases { diff --git a/internal/controllers/machine/machine_controller_phases_test.go b/internal/controllers/machine/machine_controller_phases_test.go index 635ffaa2fafd..57b9f74ccaeb 100644 --- a/internal/controllers/machine/machine_controller_phases_test.go +++ b/internal/controllers/machine/machine_controller_phases_test.go @@ -20,6 +20,7 @@ import ( "testing" "time" + "github.com/go-logr/logr" . "github.com/onsi/gomega" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -30,6 +31,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache/informertest" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/log" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/controllers/external" @@ -304,9 +306,10 @@ func TestReconcileBootstrap(t *testing.T) { r := &Reconciler{ Client: c, externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: runtime.NewScheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: runtime.NewScheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } s := &scope{cluster: defaultCluster, machine: tc.machine} @@ -861,9 +864,10 @@ func TestReconcileInfrastructure(t *testing.T) { r := &Reconciler{ Client: c, externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: c.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: c.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } s := &scope{cluster: defaultCluster, machine: tc.machine} diff --git a/internal/controllers/machine/machine_controller_status.go b/internal/controllers/machine/machine_controller_status.go index 024e9b86beb9..83a365559957 100644 --- a/internal/controllers/machine/machine_controller_status.go +++ b/internal/controllers/machine/machine_controller_status.go @@ -285,7 +285,7 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, cluster *clusterv1.Cl message := "" if condition.Message != "" { - message = fmt.Sprintf("%s (from Node)", condition.Message) + message = fmt.Sprintf("* Node.Ready: %s", condition.Message) } reason := condition.Reason if reason == "" { @@ -394,7 +394,7 @@ func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (metav semanticallyFalseStatus := 0 unknownStatus := 0 - message := "" + messages := []string{} issueReason := "" unknownReason := "" for _, conditionType := range []corev1.NodeConditionType{corev1.NodeReady, corev1.NodeMemoryPressure, corev1.NodeDiskPressure, corev1.NodePIDPressure} { @@ -405,7 +405,7 @@ func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (metav } } if condition == nil { - message += fmt.Sprintf("Node %s: condition not yet reported", conditionType) + "; " + messages = append(messages, fmt.Sprintf("* Node.%s: Condition not yet reported", conditionType)) if unknownStatus == 0 { unknownReason = clusterv1.MachineNodeConditionNotYetReportedV1Beta2Reason } else { @@ -418,7 +418,11 @@ func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (metav switch condition.Type { case corev1.NodeMemoryPressure, corev1.NodeDiskPressure, corev1.NodePIDPressure: if condition.Status != corev1.ConditionFalse { - message += fmt.Sprintf("Node %s: condition is %s", condition.Type, condition.Status) + "; " + m := condition.Message + if m == "" { + m = fmt.Sprintf("Condition is %s", condition.Status) + } + messages = append(messages, fmt.Sprintf("* Node.%s: %s", condition.Type, m)) if condition.Status == corev1.ConditionUnknown { if unknownStatus == 0 { unknownReason = condition.Reason @@ -438,7 +442,11 @@ func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (metav } case corev1.NodeReady: if condition.Status != corev1.ConditionTrue { - message += fmt.Sprintf("Node %s: condition is %s", condition.Type, condition.Status) + "; " + m := condition.Message + if m == "" { + m = fmt.Sprintf("Condition is %s", condition.Status) + } + messages = append(messages, fmt.Sprintf("* Node.%s: %s", condition.Type, m)) if condition.Status == corev1.ConditionUnknown { if unknownStatus == 0 { unknownReason = condition.Reason @@ -458,7 +466,7 @@ func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (metav } } - message = strings.TrimSuffix(message, "; ") + message := strings.Join(messages, "\n") if semanticallyFalseStatus > 0 { if issueReason == "" { issueReason = v1beta2conditions.NoReasonReported diff --git a/internal/controllers/machine/machine_controller_status_test.go b/internal/controllers/machine/machine_controller_status_test.go index 25f7c0f5a3ef..5c3617502b22 100644 --- a/internal/controllers/machine/machine_controller_status_test.go +++ b/internal/controllers/machine/machine_controller_status_test.go @@ -110,7 +110,7 @@ func TestSetBootstrapReadyCondition(t *testing.T) { Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineBootstrapConfigReadyNoReasonReportedV1Beta2Reason, - Message: "some message (from GenericBootstrapConfig)", + Message: "some message", }, }, { @@ -304,7 +304,7 @@ func TestSetInfrastructureReadyCondition(t *testing.T) { Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineInfrastructureReadyNoReasonReportedV1Beta2Reason, - Message: "some message (from GenericInfrastructureMachine)", + Message: "some message", }, }, { @@ -493,61 +493,67 @@ func TestSummarizeNodeV1Beta2Conditions(t *testing.T) { { name: "all conditions are unknown", conditions: []corev1.NodeCondition{ - {Type: corev1.NodeReady, Status: corev1.ConditionUnknown}, - {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionUnknown}, - {Type: corev1.NodeDiskPressure, Status: corev1.ConditionUnknown}, - {Type: corev1.NodePIDPressure, Status: corev1.ConditionUnknown}, - }, - expectedStatus: metav1.ConditionUnknown, - expectedReason: v1beta2conditions.MultipleUnknownReportedReason, - expectedMessage: "Node Ready: condition is Unknown; Node MemoryPressure: condition is Unknown; Node DiskPressure: condition is Unknown; Node PIDPressure: condition is Unknown", + {Type: corev1.NodeReady, Status: corev1.ConditionUnknown, Message: "Node is not reporting status"}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionUnknown, Message: "Node is not reporting status"}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionUnknown, Message: "Node is not reporting status"}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionUnknown, Message: "Node is not reporting status"}, + }, + expectedStatus: metav1.ConditionUnknown, + expectedReason: v1beta2conditions.MultipleUnknownReportedReason, + expectedMessage: "* Node.Ready: Node is not reporting status\n" + + "* Node.MemoryPressure: Node is not reporting status\n" + + "* Node.DiskPressure: Node is not reporting status\n" + + "* Node.PIDPressure: Node is not reporting status", }, { name: "multiple semantically failed condition", conditions: []corev1.NodeCondition{ - {Type: corev1.NodeReady, Status: corev1.ConditionUnknown}, - {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionTrue}, - {Type: corev1.NodeDiskPressure, Status: corev1.ConditionTrue}, - {Type: corev1.NodePIDPressure, Status: corev1.ConditionTrue}, - }, - expectedStatus: metav1.ConditionFalse, - expectedReason: v1beta2conditions.MultipleIssuesReportedReason, - expectedMessage: "Node Ready: condition is Unknown; Node MemoryPressure: condition is True; Node DiskPressure: condition is True; Node PIDPressure: condition is True", + {Type: corev1.NodeReady, Status: corev1.ConditionUnknown, Message: "Node is not reporting status"}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionTrue, Message: "kubelet has NOT sufficient memory available"}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionTrue, Message: "kubelet has disk pressure"}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionTrue, Message: "kubelet has NOT sufficient PID available"}, + }, + expectedStatus: metav1.ConditionFalse, + expectedReason: v1beta2conditions.MultipleIssuesReportedReason, + expectedMessage: "* Node.Ready: Node is not reporting status\n" + + "* Node.MemoryPressure: kubelet has NOT sufficient memory available\n" + + "* Node.DiskPressure: kubelet has disk pressure\n" + + "* Node.PIDPressure: kubelet has NOT sufficient PID available", }, { name: "one semantically failed condition when the rest is healthy", conditions: []corev1.NodeCondition{ - {Type: corev1.NodeReady, Status: corev1.ConditionFalse, Reason: "SomeReason"}, - {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse}, - {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse}, - {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodeReady, Status: corev1.ConditionFalse, Reason: "SomeReason", Message: "kubelet is NOT ready"}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse, Message: "kubelet has sufficient memory available"}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse, Message: "kubelet has no disk pressure"}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse, Message: "kubelet has sufficient PID available"}, }, expectedStatus: metav1.ConditionFalse, expectedReason: "SomeReason", - expectedMessage: "Node Ready: condition is False", + expectedMessage: "* Node.Ready: kubelet is NOT ready", }, { name: "one unknown condition when the rest is healthy", conditions: []corev1.NodeCondition{ - {Type: corev1.NodeReady, Status: corev1.ConditionUnknown, Reason: "SomeReason"}, - {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse}, - {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse}, - {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodeReady, Status: corev1.ConditionUnknown, Reason: "SomeReason", Message: "Node is not reporting status"}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse, Message: "kubelet has sufficient memory available"}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse, Message: "kubelet has no disk pressure"}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse, Message: "kubelet has sufficient PID available"}, }, expectedStatus: metav1.ConditionUnknown, expectedReason: "SomeReason", - expectedMessage: "Node Ready: condition is Unknown", + expectedMessage: "* Node.Ready: Node is not reporting status", }, { name: "one condition missing", conditions: []corev1.NodeCondition{ - {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse}, - {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse}, - {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse, Message: "kubelet has sufficient memory available"}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse, Message: "kubelet has no disk pressure"}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse, Message: "kubelet has sufficient PID available"}, }, expectedStatus: metav1.ConditionUnknown, expectedReason: clusterv1.MachineNodeConditionNotYetReportedV1Beta2Reason, - expectedMessage: "Node Ready: condition not yet reported", + expectedMessage: "* Node.Ready: Condition not yet reported", }, } for _, test := range testCases { @@ -662,7 +668,7 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { node: &corev1.Node{ Status: corev1.NodeStatus{ Conditions: []corev1.NodeCondition{ - {Type: corev1.NodeReady, Status: corev1.ConditionFalse, Reason: "SomeReason", Message: "Some message"}, + {Type: corev1.NodeReady, Status: corev1.ConditionFalse, Reason: "SomeReason", Message: "kubelet is NOT ready"}, {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse}, {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse}, {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse}, @@ -675,13 +681,13 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { Type: clusterv1.MachineNodeHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "SomeReason", - Message: "Node Ready: condition is False", + Message: "* Node.Ready: kubelet is NOT ready", }, { Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "SomeReason", - Message: "Some message (from Node)", + Message: "* Node.Ready: kubelet is NOT ready", }, }, }, @@ -710,7 +716,7 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: "KubeletReady", - Message: "kubelet is posting ready status (from Node)", + Message: "* Node.Ready: kubelet is posting ready status", }, }, }, @@ -733,7 +739,7 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { Type: clusterv1.MachineNodeHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: clusterv1.MachineNodeConditionNotYetReportedV1Beta2Reason, - Message: "Node Ready: condition not yet reported", + Message: "* Node.Ready: Condition not yet reported", }, { Type: clusterv1.MachineNodeReadyV1Beta2Condition, @@ -890,7 +896,7 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: "KubeletReady", - Message: "kubelet is posting ready status (from Node)", + Message: "kubelet is posting ready status", }) return m }(), @@ -910,7 +916,7 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: "KubeletReady", - Message: "kubelet is posting ready status (from Node)", + Message: "kubelet is posting ready status", }, }, }, @@ -969,7 +975,7 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: "KubeletReady", - Message: "kubelet is posting ready status (from Node)", + Message: "kubelet is posting ready status)", }) return m }(), @@ -1240,7 +1246,7 @@ func TestSetReadyCondition(t *testing.T) { Type: clusterv1.MachineReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineDeletingV1Beta2Reason, - Message: "Deleting: Machine deletion in progress, stage: WaitingForPreDrainHook", + Message: "* Deleting: Machine deletion in progress, stage: WaitingForPreDrainHook", }, }, { @@ -1345,7 +1351,7 @@ func TestSetReadyCondition(t *testing.T) { Type: clusterv1.MachineReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineDeletingV1Beta2Reason, - Message: "Deleting: Machine deletion in progress, stage: DrainingNode", + Message: "* Deleting: Machine deletion in progress, stage: DrainingNode", }, }, { @@ -1392,7 +1398,7 @@ func TestSetReadyCondition(t *testing.T) { Type: clusterv1.MachineReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "SomeReason", - Message: "HealthCheckSucceeded: Some message", + Message: "* HealthCheckSucceeded: Some message", }, }, { @@ -1451,7 +1457,7 @@ func TestSetReadyCondition(t *testing.T) { Type: clusterv1.MachineReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "SomeReason", - Message: "MyReadinessGate: Some message", + Message: "* MyReadinessGate: Some message", }, }, } diff --git a/internal/controllers/machine/machine_controller_test.go b/internal/controllers/machine/machine_controller_test.go index 5ebaebaa5983..cefde3315aa9 100644 --- a/internal/controllers/machine/machine_controller_test.go +++ b/internal/controllers/machine/machine_controller_test.go @@ -22,6 +22,7 @@ import ( "testing" "time" + "github.com/go-logr/logr" . "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -38,6 +39,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/apiutil" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -951,9 +953,10 @@ func TestReconcileRequest(t *testing.T) { recorder: record.NewFakeRecorder(10), reconcileDeleteCache: cache.New[cache.ReconcileEntry](), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: clientFake.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: clientFake.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -1242,9 +1245,10 @@ func TestMachineConditions(t *testing.T) { ClusterCache: clustercache.NewFakeClusterCache(clientFake, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), ssaCache: ssa.NewCache(), externalTracker: external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: clientFake.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: clientFake.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), }, } @@ -1564,11 +1568,11 @@ func TestDrainNode(t *testing.T) { Severity: clusterv1.ConditionSeverityInfo, Reason: clusterv1.DrainingReason, Message: `Drain not completed yet (started at 2024-10-09T16:13:59Z): -* Pods with deletionTimestamp that still exist: test-namespace/pod-2-delete-running-deployment-pod`, +* Pod test-namespace/pod-2-delete-running-deployment-pod: deletionTimestamp set, but still not removed from the Node`, }, wantDeletingReason: clusterv1.MachineDeletingDrainingNodeV1Beta2Reason, wantDeletingMessage: `Drain not completed yet (started at 2024-10-09T16:13:59Z): -* Pods with deletionTimestamp that still exist: test-namespace/pod-2-delete-running-deployment-pod`, +* Pod test-namespace/pod-2-delete-running-deployment-pod: deletionTimestamp set, but still not removed from the Node`, }, { name: "Node does exist but is unreachable, no Pods have to be drained because they all have old deletionTimestamps", @@ -1805,11 +1809,11 @@ func TestDrainNode_withCaching(t *testing.T) { Severity: clusterv1.ConditionSeverityInfo, Reason: clusterv1.DrainingReason, Message: `Drain not completed yet (started at 2024-10-09T16:13:59Z): -* Pods with deletionTimestamp that still exist: test-namespace/pod-delete-running-deployment-pod`, +* Pod test-namespace/pod-delete-running-deployment-pod: deletionTimestamp set, but still not removed from the Node`, })) g.Expect(s.deletingReason).To(Equal(clusterv1.MachineDeletingDrainingNodeV1Beta2Reason)) g.Expect(s.deletingMessage).To(Equal(`Drain not completed yet (started at 2024-10-09T16:13:59Z): -* Pods with deletionTimestamp that still exist: test-namespace/pod-delete-running-deployment-pod`)) +* Pod test-namespace/pod-delete-running-deployment-pod: deletionTimestamp set, but still not removed from the Node`)) // Node should be cordoned. gotNode := &corev1.Node{} diff --git a/internal/controllers/machinedeployment/machinedeployment_status.go b/internal/controllers/machinedeployment/machinedeployment_status.go index 24cee4926993..c614bdd0a193 100644 --- a/internal/controllers/machinedeployment/machinedeployment_status.go +++ b/internal/controllers/machinedeployment/machinedeployment_status.go @@ -487,10 +487,7 @@ func aggregateUnhealthyMachines(machines collections.Machines) string { return "" } - machineNames := []string{} - for _, machine := range machines { - machineNames = append(machineNames, machine.GetName()) - } + machineNames := machines.Names() if len(machineNames) == 0 { return "" diff --git a/internal/controllers/machinedeployment/machinedeployment_status_test.go b/internal/controllers/machinedeployment/machinedeployment_status_test.go index ce10e30cff06..1bc48bc29bda 100644 --- a/internal/controllers/machinedeployment/machinedeployment_status_test.go +++ b/internal/controllers/machinedeployment/machinedeployment_status_test.go @@ -670,7 +670,7 @@ func Test_setMachinesReadyCondition(t *testing.T) { Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: v1beta2conditions.NotYetReportedReason, - Message: "Condition Ready not yet reported from Machine machine-2", + Message: "* Machine machine-2: Condition Ready not yet reported", }, }, { @@ -699,10 +699,12 @@ func Test_setMachinesReadyCondition(t *testing.T) { }, getMachinesSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "Deleting: Machine deletion in progress, stage: DrainingNode from Machine machine-4; HealthCheckSucceeded: Some message from Machine machine-2; Some unknown message from Machine machine-3", + Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* Machine machine-2: HealthCheckSucceeded: Some message\n" + + "* Machine machine-4: Deleting: Machine deletion in progress, stage: DrainingNode\n" + + "* Machine machine-3: Some unknown message", }, }, } @@ -789,7 +791,7 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: "some-unknown-reason-1", - Message: "some unknown message from Machine unknown-1", + Message: "* Machine unknown-1: some unknown message", }, }, { @@ -808,7 +810,7 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "some-not-up-to-date-reason", - Message: "some not up-to-date message from Machine not-up-to-date-machine-1", + Message: "* Machine not-up-to-date-machine-1: some not up-to-date message", }, }, { @@ -822,7 +824,7 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: v1beta2conditions.NotYetReportedReason, - Message: "Condition UpToDate not yet reported from Machine no-condition-machine-1", + Message: "* Machine no-condition-machine-1: Condition UpToDate not yet reported", }, }, { @@ -856,10 +858,11 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { }, getMachinesSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "This is not up-to-date message from Machines not-up-to-date-machine-1, not-up-to-date-machine-2; Condition UpToDate not yet reported from Machines no-condition-machine-1, no-condition-machine-2", + Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* Machines not-up-to-date-machine-1, not-up-to-date-machine-2: This is not up-to-date message\n" + + "* Machines no-condition-machine-1, no-condition-machine-2: Condition UpToDate not yet reported", }, }, } @@ -884,7 +887,7 @@ func Test_setRemediatingCondition(t *testing.T) { healthCheckSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionTrue} healthCheckNotSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionFalse} ownerRemediated := clusterv1.Condition{Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse} - ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Message: "Remediation in progress"} + ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, Message: "Machine deletionTimestamp set"} tests := []struct { name string @@ -932,7 +935,7 @@ func Test_setRemediatingCondition(t *testing.T) { Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, Status: metav1.ConditionTrue, Reason: clusterv1.MachineDeploymentRemediatingV1Beta2Reason, - Message: "Remediation in progress from Machine m3", + Message: "* Machine m3: Machine deletionTimestamp set", }, }, { diff --git a/internal/controllers/machinedeployment/mdutil/util.go b/internal/controllers/machinedeployment/mdutil/util.go index 21b119a1861e..bf0694859cdb 100644 --- a/internal/controllers/machinedeployment/mdutil/util.go +++ b/internal/controllers/machinedeployment/mdutil/util.go @@ -20,6 +20,7 @@ package mdutil import ( "context" "fmt" + "reflect" "sort" "strconv" "strings" @@ -38,7 +39,6 @@ import ( ctrl "sigs.k8s.io/controller-runtime" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" - "sigs.k8s.io/cluster-api/internal/util/compare" "sigs.k8s.io/cluster-api/util/conversion" ) @@ -371,13 +371,50 @@ func getMachineSetFraction(ms clusterv1.MachineSet, md clusterv1.MachineDeployme return integer.RoundToInt32(newMSsize) - *(ms.Spec.Replicas) } -// EqualMachineTemplate returns true if two given machineTemplateSpec are equal, -// ignoring all the in-place propagated fields, and the version from external references. -func EqualMachineTemplate(template1, template2 *clusterv1.MachineTemplateSpec) (equal bool, diff string, err error) { - t1Copy := MachineTemplateDeepCopyRolloutFields(template1) - t2Copy := MachineTemplateDeepCopyRolloutFields(template2) +// MachineTemplateUpToDate returns true if the current MachineTemplateSpec is up-to-date with a corresponding desired MachineTemplateSpec. +// Note: The comparison does not consider any in-place propagated fields, as well as the version from external references. +func MachineTemplateUpToDate(current, desired *clusterv1.MachineTemplateSpec) (upToDate bool, logMessages, conditionMessages []string) { + currentCopy := MachineTemplateDeepCopyRolloutFields(current) + desiredCopy := MachineTemplateDeepCopyRolloutFields(desired) - return compare.Diff(t1Copy, t2Copy) + if !reflect.DeepEqual(currentCopy.Spec.Version, desiredCopy.Spec.Version) { + logMessages = append(logMessages, fmt.Sprintf("spec.version %s, %s required", ptr.Deref(currentCopy.Spec.Version, "nil"), ptr.Deref(desiredCopy.Spec.Version, "nil"))) + conditionMessages = append(conditionMessages, fmt.Sprintf("Version %s, %s required", ptr.Deref(currentCopy.Spec.Version, "nil"), ptr.Deref(desiredCopy.Spec.Version, "nil"))) + } + + // Note: we return a message based on desired.bootstrap.ConfigRef != nil, but we always compare the entire bootstrap + // struct to catch cases when either configRef or dataSecretName is set in current vs desired (usually MachineTemplates + // have ConfigRef != nil, might be in some edge case dataSecret are used, but switching from one to another is not a + // common operation so it is acceptable to handle it in this way). + if currentCopy.Spec.Bootstrap.ConfigRef != nil { + if !reflect.DeepEqual(currentCopy.Spec.Bootstrap, desiredCopy.Spec.Bootstrap) { + logMessages = append(logMessages, fmt.Sprintf("spec.bootstrap.configRef %s %s, %s %s required", currentCopy.Spec.Bootstrap.ConfigRef.Kind, currentCopy.Spec.Bootstrap.ConfigRef.Name, ptr.Deref(desiredCopy.Spec.Bootstrap.ConfigRef, corev1.ObjectReference{}).Kind, ptr.Deref(desiredCopy.Spec.Bootstrap.ConfigRef, corev1.ObjectReference{}).Name)) + // Note: dropping "Template" suffix because conditions message will surface on machine. + conditionMessages = append(conditionMessages, fmt.Sprintf("%s is not up-to-date", strings.TrimSuffix(currentCopy.Spec.Bootstrap.ConfigRef.Kind, clusterv1.TemplateSuffix))) + } + } else { + if !reflect.DeepEqual(currentCopy.Spec.Bootstrap, desiredCopy.Spec.Bootstrap) { + logMessages = append(logMessages, fmt.Sprintf("spec.bootstrap.dataSecretName %s, %s required", ptr.Deref(currentCopy.Spec.Bootstrap.DataSecretName, "nil"), ptr.Deref(desiredCopy.Spec.Bootstrap.DataSecretName, "nil"))) + conditionMessages = append(conditionMessages, fmt.Sprintf("spec.bootstrap.dataSecretName %s, %s required", ptr.Deref(currentCopy.Spec.Bootstrap.DataSecretName, "nil"), ptr.Deref(desiredCopy.Spec.Bootstrap.DataSecretName, "nil"))) + } + } + + if !reflect.DeepEqual(currentCopy.Spec.InfrastructureRef, desiredCopy.Spec.InfrastructureRef) { + logMessages = append(logMessages, fmt.Sprintf("spec.infrastructureRef %s %s, %s %s required", currentCopy.Spec.InfrastructureRef.Kind, currentCopy.Spec.InfrastructureRef.Name, desiredCopy.Spec.InfrastructureRef.Kind, desiredCopy.Spec.InfrastructureRef.Name)) + // Note: dropping "Template" suffix because conditions message will surface on machine. + conditionMessages = append(conditionMessages, fmt.Sprintf("%s is not up-to-date", strings.TrimSuffix(currentCopy.Spec.InfrastructureRef.Kind, clusterv1.TemplateSuffix))) + } + + if !reflect.DeepEqual(currentCopy.Spec.FailureDomain, desiredCopy.Spec.FailureDomain) { + logMessages = append(logMessages, fmt.Sprintf("spec.failureDomain %s, %s required", ptr.Deref(currentCopy.Spec.FailureDomain, "nil"), ptr.Deref(desiredCopy.Spec.FailureDomain, "nil"))) + conditionMessages = append(conditionMessages, fmt.Sprintf("Failure domain %s, %s required", ptr.Deref(currentCopy.Spec.FailureDomain, "nil"), ptr.Deref(desiredCopy.Spec.FailureDomain, "nil"))) + } + + if len(logMessages) > 0 || len(conditionMessages) > 0 { + return false, logMessages, conditionMessages + } + + return true, nil, nil } // MachineTemplateDeepCopyRolloutFields copies a MachineTemplateSpec @@ -386,6 +423,9 @@ func EqualMachineTemplate(template1, template2 *clusterv1.MachineTemplateSpec) ( func MachineTemplateDeepCopyRolloutFields(template *clusterv1.MachineTemplateSpec) *clusterv1.MachineTemplateSpec { templateCopy := template.DeepCopy() + // Moving MD from one cluster to another is not supported. + templateCopy.Spec.ClusterName = "" + // Drop labels and annotations templateCopy.Labels = nil templateCopy.Annotations = nil @@ -413,7 +453,7 @@ func MachineTemplateDeepCopyRolloutFields(template *clusterv1.MachineTemplateSpe // NOTE: If we find a matching MachineSet which only differs in in-place mutable fields we can use it to // fulfill the intent of the MachineDeployment by just updating the MachineSet to propagate in-place mutable fields. // Thus we don't have to create a new MachineSet and we can avoid an unnecessary rollout. -// NOTE: Even after we changed EqualMachineTemplate to ignore fields that are propagated in-place we can guarantee that if there exists a "new machineset" +// NOTE: Even after we changed MachineTemplateUpToDate to ignore fields that are propagated in-place we can guarantee that if there exists a "new machineset" // using the old logic then a new machineset will definitely exist using the new logic. The new logic is looser. Therefore, we will // not face a case where there exists a machine set matching the old logic but there does not exist a machineset matching the new logic. // In fact previously not matching MS can now start matching the target. Since there could be multiple matches, lets choose the @@ -432,19 +472,16 @@ func FindNewMachineSet(deployment *clusterv1.MachineDeployment, msList []*cluste var matchingMachineSets []*clusterv1.MachineSet var diffs []string for _, ms := range msList { - equal, diff, err := EqualMachineTemplate(&ms.Spec.Template, &deployment.Spec.Template) - if err != nil { - return nil, "", errors.Wrapf(err, "failed to compare MachineDeployment spec template with MachineSet %s", ms.Name) - } - if equal { + upToDate, logMessages, _ := MachineTemplateUpToDate(&ms.Spec.Template, &deployment.Spec.Template) + if upToDate { matchingMachineSets = append(matchingMachineSets, ms) } else { - diffs = append(diffs, fmt.Sprintf("MachineSet %s: diff: %s", ms.Name, diff)) + diffs = append(diffs, fmt.Sprintf("MachineSet %s: diff: %s", ms.Name, strings.Join(logMessages, ", "))) } } if len(matchingMachineSets) == 0 { - return nil, fmt.Sprintf("couldn't find MachineSet matching MachineDeployment spec template: %s", strings.Join(diffs, ",")), nil + return nil, fmt.Sprintf("couldn't find MachineSet matching MachineDeployment spec template: %s", strings.Join(diffs, "; ")), nil } // If RolloutAfter is not set, pick the first matching MachineSet. diff --git a/internal/controllers/machinedeployment/mdutil/util_test.go b/internal/controllers/machinedeployment/mdutil/util_test.go index e178c47e35af..db2e958eaccf 100644 --- a/internal/controllers/machinedeployment/mdutil/util_test.go +++ b/internal/controllers/machinedeployment/mdutil/util_test.go @@ -171,7 +171,7 @@ func TestMachineSetsByDecreasingReplicas(t *testing.T) { } } -func TestEqualMachineTemplate(t *testing.T) { +func TestMachineTemplateUpToDate(t *testing.T) { machineTemplate := &clusterv1.MachineTemplateSpec{ ObjectMeta: clusterv1.ObjectMeta{ Labels: map[string]string{"l1": "v1"}, @@ -187,14 +187,14 @@ func TestEqualMachineTemplate(t *testing.T) { InfrastructureRef: corev1.ObjectReference{ Name: "infra1", Namespace: "default", - Kind: "InfrastructureMachine", + Kind: "InfrastructureMachineTemplate", APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", }, Bootstrap: clusterv1.Bootstrap{ ConfigRef: &corev1.ObjectReference{ Name: "bootstrap1", Namespace: "default", - Kind: "BootstrapConfig", + Kind: "BootstrapConfigTemplate", APIVersion: "bootstrap.cluster.x-k8s.io/v1beta1", }, }, @@ -236,9 +236,12 @@ func TestEqualMachineTemplate(t *testing.T) { machineTemplateWithDifferentInfraRefAPIVersion := machineTemplate.DeepCopy() machineTemplateWithDifferentInfraRefAPIVersion.Spec.InfrastructureRef.APIVersion = "infrastructure.cluster.x-k8s.io/v1beta2" - machineTemplateWithDifferentBootstrap := machineTemplate.DeepCopy() - machineTemplateWithDifferentBootstrap.Spec.Bootstrap.ConfigRef = nil - machineTemplateWithDifferentBootstrap.Spec.Bootstrap.DataSecretName = ptr.To("data-secret") + machineTemplateWithBootstrapDataSecret := machineTemplate.DeepCopy() + machineTemplateWithBootstrapDataSecret.Spec.Bootstrap.ConfigRef = nil + machineTemplateWithBootstrapDataSecret.Spec.Bootstrap.DataSecretName = ptr.To("data-secret1") + + machineTemplateWithDifferentBootstrapDataSecret := machineTemplateWithBootstrapDataSecret.DeepCopy() + machineTemplateWithDifferentBootstrapDataSecret.Spec.Bootstrap.DataSecretName = ptr.To("data-secret2") machineTemplateWithDifferentBootstrapConfigRef := machineTemplate.DeepCopy() machineTemplateWithDifferentBootstrapConfigRef.Spec.Bootstrap.ConfigRef.Name = "bootstrap2" @@ -247,279 +250,122 @@ func TestEqualMachineTemplate(t *testing.T) { machineTemplateWithDifferentBootstrapConfigRefAPIVersion.Spec.Bootstrap.ConfigRef.APIVersion = "bootstrap.cluster.x-k8s.io/v1beta2" tests := []struct { - Name string - Former, Latter *clusterv1.MachineTemplateSpec - Expected bool - Diff1 string - Diff2 string + Name string + current, desired *clusterv1.MachineTemplateSpec + expectedUpToDate bool + expectedLogMessages1 []string + expectedLogMessages2 []string + expectedConditionMessages1 []string + expectedConditionMessages2 []string }{ { Name: "Same spec", // Note: This test ensures that two MachineTemplates are equal even if the pointers differ. - Former: machineTemplate, - Latter: machineTemplateEqual, - Expected: true, - }, - { - Name: "Same spec, except latter does not have labels", - Former: machineTemplate, - Latter: machineTemplateWithEmptyLabels, - Expected: true, - }, - { - Name: "Same spec, except latter has different labels", - Former: machineTemplate, - Latter: machineTemplateWithDifferentLabels, - Expected: true, - }, - { - Name: "Same spec, except latter does not have annotations", - Former: machineTemplate, - Latter: machineTemplateWithEmptyAnnotations, - Expected: true, - }, - { - Name: "Same spec, except latter has different annotations", - Former: machineTemplate, - Latter: machineTemplateWithDifferentAnnotations, - Expected: true, - }, - { - Name: "Spec changes, latter has different in-place mutable spec fields", - Former: machineTemplate, - Latter: machineTemplateWithDifferentInPlaceMutableSpecFields, - Expected: true, - }, - { - Name: "Spec changes, latter has different ClusterName", - // Note: ClusterName is immutable, but EqualMachineTemplate should still work correctly independent of that. - Former: machineTemplate, - Latter: machineTemplateWithDifferentClusterName, - Expected: false, - Diff1: `&v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ -- ClusterName: "cluster1", -+ ClusterName: "cluster2", - Bootstrap: {ConfigRef: &{Kind: "BootstrapConfig", Namespace: "default", Name: "bootstrap1", APIVersion: "bootstrap.cluster.x-k8s.io", ...}}, - InfrastructureRef: {Kind: "InfrastructureMachine", Namespace: "default", Name: "infra1", APIVersion: "infrastructure.cluster.x-k8s.io", ...}, - ... // 7 identical fields - }, - }`, - Diff2: `&v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ -- ClusterName: "cluster2", -+ ClusterName: "cluster1", - Bootstrap: {ConfigRef: &{Kind: "BootstrapConfig", Namespace: "default", Name: "bootstrap1", APIVersion: "bootstrap.cluster.x-k8s.io", ...}}, - InfrastructureRef: {Kind: "InfrastructureMachine", Namespace: "default", Name: "infra1", APIVersion: "infrastructure.cluster.x-k8s.io", ...}, - ... // 7 identical fields - }, - }`, - }, - { - Name: "Spec changes, latter has different Version", - Former: machineTemplate, - Latter: machineTemplateWithDifferentVersion, - Expected: false, - Diff1: `&v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ - ClusterName: "cluster1", - Bootstrap: {ConfigRef: &{Kind: "BootstrapConfig", Namespace: "default", Name: "bootstrap1", APIVersion: "bootstrap.cluster.x-k8s.io", ...}}, - InfrastructureRef: {Kind: "InfrastructureMachine", Namespace: "default", Name: "infra1", APIVersion: "infrastructure.cluster.x-k8s.io", ...}, -- Version: &"v1.25.0", -+ Version: &"v1.26.0", - ProviderID: nil, - FailureDomain: &"failure-domain1", - ... // 4 identical fields - }, - }`, - Diff2: `&v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ - ClusterName: "cluster1", - Bootstrap: {ConfigRef: &{Kind: "BootstrapConfig", Namespace: "default", Name: "bootstrap1", APIVersion: "bootstrap.cluster.x-k8s.io", ...}}, - InfrastructureRef: {Kind: "InfrastructureMachine", Namespace: "default", Name: "infra1", APIVersion: "infrastructure.cluster.x-k8s.io", ...}, -- Version: &"v1.26.0", -+ Version: &"v1.25.0", - ProviderID: nil, - FailureDomain: &"failure-domain1", - ... // 4 identical fields - }, - }`, - }, - { - Name: "Spec changes, latter has different FailureDomain", - Former: machineTemplate, - Latter: machineTemplateWithDifferentFailureDomain, - Expected: false, - Diff1: `&v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ - ... // 3 identical fields - Version: &"v1.25.0", - ProviderID: nil, -- FailureDomain: &"failure-domain1", -+ FailureDomain: &"failure-domain2", - ReadinessGates: nil, - NodeDrainTimeout: nil, - ... // 2 identical fields - }, - }`, - Diff2: `&v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ - ... // 3 identical fields - Version: &"v1.25.0", - ProviderID: nil, -- FailureDomain: &"failure-domain2", -+ FailureDomain: &"failure-domain1", - ReadinessGates: nil, - NodeDrainTimeout: nil, - ... // 2 identical fields - }, - }`, - }, - { - Name: "Spec changes, latter has different InfrastructureRef", - Former: machineTemplate, - Latter: machineTemplateWithDifferentInfraRef, - Expected: false, - Diff1: `&v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ - ClusterName: "cluster1", - Bootstrap: {ConfigRef: &{Kind: "BootstrapConfig", Namespace: "default", Name: "bootstrap1", APIVersion: "bootstrap.cluster.x-k8s.io", ...}}, - InfrastructureRef: v1.ObjectReference{ - Kind: "InfrastructureMachine", - Namespace: "default", -- Name: "infra1", -+ Name: "infra2", - UID: "", - APIVersion: "infrastructure.cluster.x-k8s.io", - ... // 2 identical fields - }, - Version: &"v1.25.0", - ProviderID: nil, - ... // 5 identical fields - }, - }`, - Diff2: `&v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ - ClusterName: "cluster1", - Bootstrap: {ConfigRef: &{Kind: "BootstrapConfig", Namespace: "default", Name: "bootstrap1", APIVersion: "bootstrap.cluster.x-k8s.io", ...}}, - InfrastructureRef: v1.ObjectReference{ - Kind: "InfrastructureMachine", - Namespace: "default", -- Name: "infra2", -+ Name: "infra1", - UID: "", - APIVersion: "infrastructure.cluster.x-k8s.io", - ... // 2 identical fields - }, - Version: &"v1.25.0", - ProviderID: nil, - ... // 5 identical fields - }, - }`, - }, - { - Name: "Spec changes, latter has different Bootstrap", - Former: machineTemplate, - Latter: machineTemplateWithDifferentBootstrap, - Expected: false, - Diff1: `&v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ - ClusterName: "cluster1", - Bootstrap: v1beta1.Bootstrap{ -- ConfigRef: s"&ObjectReference{Kind:BootstrapConfig,Namespace:default,Name:bootstrap1,UID:,APIVersion:bootstrap.cluster.x-k8s.io,ResourceVersion:,FieldPath:,}", -+ ConfigRef: nil, -- DataSecretName: nil, -+ DataSecretName: &"data-secret", - }, - InfrastructureRef: {Kind: "InfrastructureMachine", Namespace: "default", Name: "infra1", APIVersion: "infrastructure.cluster.x-k8s.io", ...}, - Version: &"v1.25.0", - ... // 6 identical fields - }, - }`, - Diff2: `&v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ - ClusterName: "cluster1", - Bootstrap: v1beta1.Bootstrap{ -- ConfigRef: nil, -+ ConfigRef: s"&ObjectReference{Kind:BootstrapConfig,Namespace:default,Name:bootstrap1,UID:,APIVersion:bootstrap.cluster.x-k8s.io,ResourceVersion:,FieldPath:,}", -- DataSecretName: &"data-secret", -+ DataSecretName: nil, - }, - InfrastructureRef: {Kind: "InfrastructureMachine", Namespace: "default", Name: "infra1", APIVersion: "infrastructure.cluster.x-k8s.io", ...}, - Version: &"v1.25.0", - ... // 6 identical fields - }, - }`, - }, - { - Name: "Spec changes, latter has different Bootstrap.ConfigRef", - Former: machineTemplate, - Latter: machineTemplateWithDifferentBootstrapConfigRef, - Expected: false, - Diff1: `&v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ - ClusterName: "cluster1", - Bootstrap: v1beta1.Bootstrap{ - ConfigRef: &v1.ObjectReference{ - Kind: "BootstrapConfig", - Namespace: "default", -- Name: "bootstrap1", -+ Name: "bootstrap2", - UID: "", - APIVersion: "bootstrap.cluster.x-k8s.io", - ... // 2 identical fields - }, - DataSecretName: nil, - }, - InfrastructureRef: {Kind: "InfrastructureMachine", Namespace: "default", Name: "infra1", APIVersion: "infrastructure.cluster.x-k8s.io", ...}, - Version: &"v1.25.0", - ... // 6 identical fields - }, - }`, - Diff2: `&v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ - ClusterName: "cluster1", - Bootstrap: v1beta1.Bootstrap{ - ConfigRef: &v1.ObjectReference{ - Kind: "BootstrapConfig", - Namespace: "default", -- Name: "bootstrap2", -+ Name: "bootstrap1", - UID: "", - APIVersion: "bootstrap.cluster.x-k8s.io", - ... // 2 identical fields - }, - DataSecretName: nil, - }, - InfrastructureRef: {Kind: "InfrastructureMachine", Namespace: "default", Name: "infra1", APIVersion: "infrastructure.cluster.x-k8s.io", ...}, - Version: &"v1.25.0", - ... // 6 identical fields - }, - }`, - }, - { - Name: "Same spec, except latter has different InfrastructureRef APIVersion", - Former: machineTemplate, - Latter: machineTemplateWithDifferentInfraRefAPIVersion, - Expected: true, - }, - { - Name: "Same spec, except latter has different Bootstrap.ConfigRef APIVersion", - Former: machineTemplate, - Latter: machineTemplateWithDifferentBootstrapConfigRefAPIVersion, - Expected: true, + current: machineTemplate, + desired: machineTemplateEqual, + expectedUpToDate: true, + }, + { + Name: "Same spec, except desired does not have labels", + current: machineTemplate, + desired: machineTemplateWithEmptyLabels, + expectedUpToDate: true, + }, + { + Name: "Same spec, except desired has different labels", + current: machineTemplate, + desired: machineTemplateWithDifferentLabels, + expectedUpToDate: true, + }, + { + Name: "Same spec, except desired does not have annotations", + current: machineTemplate, + desired: machineTemplateWithEmptyAnnotations, + expectedUpToDate: true, + }, + { + Name: "Same spec, except desired has different annotations", + current: machineTemplate, + desired: machineTemplateWithDifferentAnnotations, + expectedUpToDate: true, + }, + { + Name: "Spec changes, desired has different in-place mutable spec fields", + current: machineTemplate, + desired: machineTemplateWithDifferentInPlaceMutableSpecFields, + expectedUpToDate: true, + }, + { + Name: "Spec changes, desired has different Version", + current: machineTemplate, + desired: machineTemplateWithDifferentVersion, + expectedUpToDate: false, + expectedLogMessages1: []string{"spec.version v1.25.0, v1.26.0 required"}, + expectedLogMessages2: []string{"spec.version v1.26.0, v1.25.0 required"}, + expectedConditionMessages1: []string{"Version v1.25.0, v1.26.0 required"}, + expectedConditionMessages2: []string{"Version v1.26.0, v1.25.0 required"}, + }, + { + Name: "Spec changes, desired has different FailureDomain", + current: machineTemplate, + desired: machineTemplateWithDifferentFailureDomain, + expectedUpToDate: false, + expectedLogMessages1: []string{"spec.failureDomain failure-domain1, failure-domain2 required"}, + expectedLogMessages2: []string{"spec.failureDomain failure-domain2, failure-domain1 required"}, + expectedConditionMessages1: []string{"Failure domain failure-domain1, failure-domain2 required"}, + expectedConditionMessages2: []string{"Failure domain failure-domain2, failure-domain1 required"}, + }, + { + Name: "Spec changes, desired has different InfrastructureRef", + current: machineTemplate, + desired: machineTemplateWithDifferentInfraRef, + expectedUpToDate: false, + expectedLogMessages1: []string{"spec.infrastructureRef InfrastructureMachineTemplate infra1, InfrastructureMachineTemplate infra2 required"}, + expectedLogMessages2: []string{"spec.infrastructureRef InfrastructureMachineTemplate infra2, InfrastructureMachineTemplate infra1 required"}, + expectedConditionMessages1: []string{"InfrastructureMachine is not up-to-date"}, + expectedConditionMessages2: []string{"InfrastructureMachine is not up-to-date"}, + }, + { + Name: "Spec changes, desired has different Bootstrap data secret", + current: machineTemplateWithBootstrapDataSecret, + desired: machineTemplateWithDifferentBootstrapDataSecret, + expectedUpToDate: false, + expectedLogMessages1: []string{"spec.bootstrap.dataSecretName data-secret1, data-secret2 required"}, + expectedLogMessages2: []string{"spec.bootstrap.dataSecretName data-secret2, data-secret1 required"}, + expectedConditionMessages1: []string{"spec.bootstrap.dataSecretName data-secret1, data-secret2 required"}, + expectedConditionMessages2: []string{"spec.bootstrap.dataSecretName data-secret2, data-secret1 required"}, + }, + { + Name: "Spec changes, desired has different Bootstrap.ConfigRef", + current: machineTemplate, + desired: machineTemplateWithDifferentBootstrapConfigRef, + expectedUpToDate: false, + expectedLogMessages1: []string{"spec.bootstrap.configRef BootstrapConfigTemplate bootstrap1, BootstrapConfigTemplate bootstrap2 required"}, + expectedLogMessages2: []string{"spec.bootstrap.configRef BootstrapConfigTemplate bootstrap2, BootstrapConfigTemplate bootstrap1 required"}, + expectedConditionMessages1: []string{"BootstrapConfig is not up-to-date"}, + expectedConditionMessages2: []string{"BootstrapConfig is not up-to-date"}, + }, + { + Name: "Spec changes, desired has data secret instead of Bootstrap.ConfigRef", + current: machineTemplate, + desired: machineTemplateWithBootstrapDataSecret, + expectedUpToDate: false, + expectedLogMessages1: []string{"spec.bootstrap.configRef BootstrapConfigTemplate bootstrap1, required"}, + expectedLogMessages2: []string{"spec.bootstrap.dataSecretName data-secret1, nil required"}, + expectedConditionMessages1: []string{"BootstrapConfig is not up-to-date"}, + expectedConditionMessages2: []string{"spec.bootstrap.dataSecretName data-secret1, nil required"}, + }, + { + Name: "Same spec, except desired has different InfrastructureRef APIVersion", + current: machineTemplate, + desired: machineTemplateWithDifferentInfraRefAPIVersion, + expectedUpToDate: true, + }, + { + Name: "Same spec, except desired has different Bootstrap.ConfigRef APIVersion", + current: machineTemplate, + desired: machineTemplateWithDifferentBootstrapConfigRefAPIVersion, + expectedUpToDate: true, }, } @@ -527,19 +373,19 @@ func TestEqualMachineTemplate(t *testing.T) { t.Run(test.Name, func(t *testing.T) { g := NewWithT(t) - runTest := func(t1, t2 *clusterv1.MachineTemplateSpec, expectedDiff string) { + runTest := func(t1, t2 *clusterv1.MachineTemplateSpec, expectedLogMessages, expectedConditionMessages []string) { // Run - equal, diff, err := EqualMachineTemplate(t1, t2) - g.Expect(err).ToNot(HaveOccurred()) - g.Expect(equal).To(Equal(test.Expected)) - g.Expect(diff).To(BeComparableTo(expectedDiff)) + upToDate, logMessages, conditionMessages := MachineTemplateUpToDate(t1, t2) + g.Expect(upToDate).To(Equal(test.expectedUpToDate)) + g.Expect(logMessages).To(Equal(expectedLogMessages)) + g.Expect(conditionMessages).To(Equal(expectedConditionMessages)) g.Expect(t1.Labels).NotTo(BeNil()) g.Expect(t2.Labels).NotTo(BeNil()) } - runTest(test.Former, test.Latter, test.Diff1) + runTest(test.current, test.desired, test.expectedLogMessages1, test.expectedConditionMessages1) // Test the same case in reverse order - runTest(test.Latter, test.Former, test.Diff2) + runTest(test.desired, test.current, test.expectedLogMessages2, test.expectedConditionMessages2) }) } } @@ -552,6 +398,7 @@ func TestFindNewMachineSet(t *testing.T) { twoAfterRolloutAfter := metav1.NewTime(oneAfterRolloutAfter.Add(time.Minute)) deployment := generateDeployment("nginx") + deployment.Spec.Template.Spec.InfrastructureRef.Kind = "InfrastructureMachineTemplate" deployment.Spec.Template.Spec.InfrastructureRef.Name = "new-infra-ref" deploymentWithRolloutAfter := deployment.DeepCopy() @@ -608,29 +455,11 @@ func TestFindNewMachineSet(t *testing.T) { expected: &matchingMSDiffersInPlaceMutableFields, }, { - Name: "Get nil if no MachineSet matches the desired intent of the MachineDeployment", - deployment: deployment, - msList: []*clusterv1.MachineSet{&oldMS}, - expected: nil, - createReason: fmt.Sprintf(`couldn't find MachineSet matching MachineDeployment spec template: MachineSet %s: diff: &v1beta1.MachineTemplateSpec{ - ObjectMeta: {}, - Spec: v1beta1.MachineSpec{ - ClusterName: "", - Bootstrap: {}, - InfrastructureRef: v1.ObjectReference{ - Kind: "", - Namespace: "", -- Name: "old-infra-ref", -+ Name: "new-infra-ref", - UID: "", - APIVersion: "", - ... // 2 identical fields - }, - Version: nil, - ProviderID: nil, - ... // 5 identical fields - }, - }`, oldMS.Name), + Name: "Get nil if no MachineSet matches the desired intent of the MachineDeployment", + deployment: deployment, + msList: []*clusterv1.MachineSet{&oldMS}, + expected: nil, + createReason: fmt.Sprintf(`couldn't find MachineSet matching MachineDeployment spec template: MachineSet %s: diff: spec.infrastructureRef InfrastructureMachineTemplate old-infra-ref, InfrastructureMachineTemplate new-infra-ref required`, oldMS.Name), }, { Name: "Get the MachineSet if reconciliationTime < rolloutAfter", diff --git a/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go b/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go index 96b8560a8963..968ad03484aa 100644 --- a/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go +++ b/internal/controllers/machinehealthcheck/machinehealthcheck_controller.go @@ -160,7 +160,14 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re defer func() { // Always attempt to patch the object and status after each reconciliation. // Patch ObservedGeneration only if the reconciliation completed successfully - patchOpts := []patch.Option{} + patchOpts := []patch.Option{ + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.RemediationAllowedCondition, + }}, + patch.WithOwnedV1Beta2Conditions{Conditions: []string{ + clusterv1.MachineHealthCheckRemediationAllowedV1Beta2Condition, + }}, + } if reterr == nil { patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{}) } @@ -300,7 +307,18 @@ func (r *Reconciler) reconcile(ctx context.Context, logger logr.Logger, cluster } errList := []error{} for _, t := range append(healthy, unhealthy...) { - if err := t.patchHelper.Patch(ctx, t.Machine); err != nil { + patchOpts := []patch.Option{ + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.MachineHealthCheckSucceededCondition, + // Note: intentionally leaving out OwnerRemediated condition which is mostly controlled by the owner. + }}, + patch.WithOwnedV1Beta2Conditions{Conditions: []string{ + clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + // Note: intentionally leaving out OwnerRemediated condition which is mostly controlled by the owner. + // (Same for ExternallyRemediated condition) + }}, + } + if err := t.patchHelper.Patch(ctx, t.Machine, patchOpts...); err != nil { errList = append(errList, errors.Wrapf(err, "failed to patch machine status for machine: %s/%s", t.Machine.Namespace, t.Machine.Name)) continue } @@ -380,7 +398,18 @@ func (r *Reconciler) patchHealthyTargets(ctx context.Context, logger logr.Logger } } - if err := t.patchHelper.Patch(ctx, t.Machine); err != nil { + patchOpts := []patch.Option{ + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.MachineHealthCheckSucceededCondition, + // Note: intentionally leaving out OwnerRemediated condition which is mostly controlled by the owner. + }}, + patch.WithOwnedV1Beta2Conditions{Conditions: []string{ + clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + // Note: intentionally leaving out OwnerRemediated condition which is mostly controlled by the owner. + // (Same for ExternallyRemediated condition) + }}, + } + if err := t.patchHelper.Patch(ctx, t.Machine, patchOpts...); err != nil { logger.Error(err, "failed to patch healthy machine status for machine", "Machine", klog.KObj(t.Machine)) errList = append(errList, errors.Wrapf(err, "failed to patch healthy machine status for machine: %s/%s", t.Machine.Namespace, t.Machine.Name)) } @@ -477,15 +506,27 @@ func (r *Reconciler) patchUnhealthyTargets(ctx context.Context, logger logr.Logg if ownerRemediatedCondition := v1beta2conditions.Get(t.Machine, clusterv1.MachineOwnerRemediatedV1Beta2Condition); ownerRemediatedCondition == nil || ownerRemediatedCondition.Status == metav1.ConditionTrue { v1beta2conditions.Set(t.Machine, metav1.Condition{ - Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", }) } } } - if err := t.patchHelper.Patch(ctx, t.Machine); err != nil { + patchOpts := []patch.Option{ + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.MachineHealthCheckSucceededCondition, + // Note: intentionally leaving out OwnerRemediated condition which is mostly controlled by the owner. + }}, + patch.WithOwnedV1Beta2Conditions{Conditions: []string{ + clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + // Note: intentionally leaving out OwnerRemediated condition which is mostly controlled by the owner. + // (Same for ExternallyRemediated condition) + }}, + } + if err := t.patchHelper.Patch(ctx, t.Machine, patchOpts...); err != nil { errList = append(errList, errors.Wrapf(err, "failed to patch unhealthy machine status for machine: %s/%s", t.Machine.Namespace, t.Machine.Name)) continue } @@ -571,12 +612,12 @@ func (r *Reconciler) nodeToMachineHealthCheck(ctx context.Context, o client.Obje } func (r *Reconciler) watchClusterNodes(ctx context.Context, cluster *clusterv1.Cluster) error { - return r.ClusterCache.Watch(ctx, util.ObjectKey(cluster), clustercache.WatchInput{ + return r.ClusterCache.Watch(ctx, util.ObjectKey(cluster), clustercache.NewWatcher(clustercache.WatcherOptions{ Name: "machinehealthcheck-watchClusterNodes", Watcher: r.controller, Kind: &corev1.Node{}, EventHandler: handler.EnqueueRequestsFromMapFunc(r.nodeToMachineHealthCheck), - }) + })) } // getMachineFromNode retrieves the machine with a nodeRef to nodeName diff --git a/internal/controllers/machineset/machineset_controller.go b/internal/controllers/machineset/machineset_controller.go index eadc30a22753..26457fd30b61 100644 --- a/internal/controllers/machineset/machineset_controller.go +++ b/internal/controllers/machineset/machineset_controller.go @@ -19,6 +19,7 @@ package machineset import ( "context" "fmt" + "math" "sort" "strings" "time" @@ -50,10 +51,12 @@ import ( "sigs.k8s.io/cluster-api/controllers/noderefutil" "sigs.k8s.io/cluster-api/internal/contract" "sigs.k8s.io/cluster-api/internal/controllers/machine" + "sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil" "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" + v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" utilconversion "sigs.k8s.io/cluster-api/util/conversion" "sigs.k8s.io/cluster-api/util/finalizers" "sigs.k8s.io/cluster-api/util/labels/format" @@ -181,8 +184,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (retres ct } s := &scope{ - cluster: cluster, - machineSet: machineSet, + cluster: cluster, + machineSet: machineSet, + reconciliationTime: time.Now(), } // Initialize the patch helper @@ -268,6 +272,8 @@ type scope struct { infrastructureObjectNotFound bool getAndAdoptMachinesForMachineSetSucceeded bool owningMachineDeployment *clusterv1.MachineDeployment + scaleUpPreflightCheckErrMessage string + reconciliationTime time.Time } type machineSetReconcileFunc func(ctx context.Context, s *scope) (ctrl.Result, error) @@ -489,6 +495,9 @@ func (r *Reconciler) syncMachines(ctx context.Context, s *scope) (ctrl.Result, e log := ctrl.LoggerFrom(ctx) for i := range machines { m := machines[i] + + upToDateCondition := newMachineUpToDateCondition(s) + // If the machine is already being deleted, we only need to sync // the subset of fields that impact tearing down a machine if !m.DeletionTimestamp.IsZero() { @@ -503,12 +512,31 @@ func (r *Reconciler) syncMachines(ctx context.Context, s *scope) (ctrl.Result, e m.Spec.NodeDeletionTimeout = machineSet.Spec.Template.Spec.NodeDeletionTimeout m.Spec.NodeVolumeDetachTimeout = machineSet.Spec.Template.Spec.NodeVolumeDetachTimeout - if err := patchHelper.Patch(ctx, m); err != nil { + // Set machine's up to date condition + if upToDateCondition != nil { + v1beta2conditions.Set(m, *upToDateCondition) + } + + if err := patchHelper.Patch(ctx, m, patch.WithOwnedV1Beta2Conditions{Conditions: []string{clusterv1.MachineUpToDateV1Beta2Condition}}); err != nil { return ctrl.Result{}, err } continue } + // Patch the machine's up-to-date condition. + // Note: for the time being we continue to rely on the patch helper for setting conditions; In the future, if + // we will improve patch helper to support SSA, we can revisit this code and perform both this change and the others in place mutations in a single operation. + if upToDateCondition != nil { + patchHelper, err := patch.NewHelper(m, r.Client) + if err != nil { + return ctrl.Result{}, err + } + v1beta2conditions.Set(m, *upToDateCondition) + if err := patchHelper.Patch(ctx, m, patch.WithOwnedV1Beta2Conditions{Conditions: []string{clusterv1.MachineUpToDateV1Beta2Condition}}); err != nil { + return ctrl.Result{}, err + } + } + // Cleanup managed fields of all Machines. // We do this so that Machines that were created/patched before the controller adopted Server-Side-Apply (SSA) // (< v1.4.0) can also work with SSA. Otherwise, fields would be co-owned by our "old" "manager" and @@ -569,6 +597,57 @@ func (r *Reconciler) syncMachines(ctx context.Context, s *scope) (ctrl.Result, e return ctrl.Result{}, nil } +func newMachineUpToDateCondition(s *scope) *metav1.Condition { + // If the current MachineSet is a stand-alone MachineSet, the MachineSet controller does not set an up-to-date condition + // on Machines, allowing tools managing higher level abstractions to set this condition. + // This is also consistent with the fact that the MachineSet controller primarily takes care of the number of Machine + // replicas, it doesn't reconcile them (even if we have a few exceptions like in-place propagation of a few selected + // fields and remediation). + if s.owningMachineDeployment == nil { + return nil + } + + // Determine current and desired state. + // If the current MachineSet is owned by a MachineDeployment, we mirror what is implemented in the MachineDeployment controller + // to trigger rollouts (by creating new MachineSets). + // More specifically: + // - desired state for the Machine is the spec.Template of the MachineDeployment + // - current state for the Machine is the spec.Template of the MachineSet who owns the Machine + // Note: We are intentionally considering current spec from the MachineSet instead of spec from the Machine itself in + // order to surface info consistent with what the MachineDeployment controller uses to take decisions about rollouts. + // The downside is that the system will ignore out of band changes applied to controlled Machines, which is + // considered an acceptable trade-off given that out of band changes are the exception (users should not change + // objects owned by the system). + // However, if out of band changes happen, at least the system will ignore out of band changes consistently, both in the + // MachineDeployment controller and in the condition computed here. + current := &s.machineSet.Spec.Template + desired := &s.owningMachineDeployment.Spec.Template + + upToDate, _, conditionMessages := mdutil.MachineTemplateUpToDate(current, desired) + + if s.owningMachineDeployment.Spec.RolloutAfter != nil { + if s.owningMachineDeployment.Spec.RolloutAfter.Time.Before(s.reconciliationTime) && !s.machineSet.CreationTimestamp.After(s.owningMachineDeployment.Spec.RolloutAfter.Time) { + upToDate = false + conditionMessages = append(conditionMessages, "MachineDeployment spec.rolloutAfter expired") + } + } + + if !upToDate { + return &metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotUpToDateV1Beta2Reason, + Message: strings.Join(conditionMessages, "; "), + } + } + + return &metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + } +} + // syncReplicas scales Machine resources up or down. func (r *Reconciler) syncReplicas(ctx context.Context, s *scope) (ctrl.Result, error) { ms := s.machineSet @@ -600,6 +679,7 @@ func (r *Reconciler) syncReplicas(ctx context.Context, s *scope) (ctrl.Result, e // If the error is not nil use that as the message for the condition. preflightCheckErrMessage = err.Error() } + s.scaleUpPreflightCheckErrMessage = preflightCheckErrMessage conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason, clusterv1.ConditionSeverityError, preflightCheckErrMessage) return result, err } @@ -1195,19 +1275,77 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) ( cluster := s.cluster ms := s.machineSet - filteredMachines := s.machines + machines := s.machines owner := s.owningMachineDeployment log := ctrl.LoggerFrom(ctx) + // Remove OwnerRemediated condition from Machines that have HealthCheckSucceeded condition true + // and OwnerRemediated condition false + errList := []error{} + for _, m := range machines { + if !m.DeletionTimestamp.IsZero() { + continue + } + + shouldCleanup := conditions.IsTrue(m, clusterv1.MachineHealthCheckSucceededCondition) && conditions.IsFalse(m, clusterv1.MachineOwnerRemediatedCondition) + shouldCleanupV1Beta2 := v1beta2conditions.IsTrue(m, clusterv1.MachineHealthCheckSucceededV1Beta2Condition) && v1beta2conditions.IsFalse(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) + + if !(shouldCleanup || shouldCleanupV1Beta2) { + continue + } + + patchHelper, err := patch.NewHelper(m, r.Client) + if err != nil { + errList = append(errList, err) + continue + } + + if shouldCleanup { + conditions.Delete(m, clusterv1.MachineOwnerRemediatedCondition) + } + + if shouldCleanupV1Beta2 { + v1beta2conditions.Delete(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) + } + + if err := patchHelper.Patch(ctx, m, patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.MachineOwnerRemediatedCondition, + }}, patch.WithOwnedV1Beta2Conditions{Conditions: []string{ + clusterv1.MachineOwnerRemediatedV1Beta2Condition, + }}); err != nil { + errList = append(errList, err) + } + } + if len(errList) > 0 { + return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errList), "failed to remove OwnerRemediated condition from healhty Machines") + } + + // Calculates the Machines to be remediated. + // Note: Machines already deleting are not included, there is no need to trigger remediation for them again. + machinesToRemediate := collections.FromMachines(machines...).Filter(collections.IsUnhealthyAndOwnerRemediated, collections.Not(collections.HasDeletionTimestamp)).UnsortedList() + + // If there are no machines to remediate return early. + if len(machinesToRemediate) == 0 { + return ctrl.Result{}, nil + } + // Calculate how many in flight machines we should remediate. // By default, we allow all machines to be remediated at the same time. - maxInFlight := len(filteredMachines) + maxInFlight := math.MaxInt // If the MachineSet is part of a MachineDeployment, only allow remediations if // it's the desired revision. if isDeploymentChild(ms) { if owner.Annotations[clusterv1.RevisionAnnotation] != ms.Annotations[clusterv1.RevisionAnnotation] { // MachineSet is part of a MachineDeployment but isn't the current revision, no remediations allowed. + if err := patchMachineConditions(ctx, r.Client, machinesToRemediate, metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineCannotBeRemediatedV1Beta2Reason, + Message: "Machine won't be remediated because it is pending removal due to rollout", + }, nil); err != nil { + return ctrl.Result{}, err + } return ctrl.Result{}, nil } @@ -1224,31 +1362,33 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) ( } } - // List all unhealthy machines. - machinesToRemediate := make([]*clusterv1.Machine, 0, len(filteredMachines)) - for _, m := range filteredMachines { - // filteredMachines contains machines in deleting status to calculate correct status. - // skip remediation for those in deleting status. + // Update maxInFlight based on remediations that are in flight. + // A Machine has a remediation in flight when Machine's OwnerRemediated condition + // reports that remediation has been completed and the Machine has been deleted. + for _, m := range machines { if !m.DeletionTimestamp.IsZero() { + // TODO: Check for Status: False and Reason: MachineSetMachineRemediationMachineDeletedV1Beta2Reason + // instead when starting to use v1beta2 conditions for control flow. if conditions.IsTrue(m, clusterv1.MachineOwnerRemediatedCondition) { - // Machine has been remediated by this controller and still in flight. + // Remediation for this Machine has been triggered by this controller but it is still in flight, + // i.e. it still goes through the deletion workflow and exists in etcd. maxInFlight-- } - continue - } - if conditions.IsFalse(m, clusterv1.MachineOwnerRemediatedCondition) { - machinesToRemediate = append(machinesToRemediate, m) } } - // If there are no machines to remediate return early. - if len(machinesToRemediate) == 0 { - return ctrl.Result{}, nil - } // Check if we can remediate any machines. if maxInFlight <= 0 { // No tokens available to remediate machines. log.V(3).Info("Remediation strategy is set, and maximum in flight has been reached", "machinesToBeRemediated", len(machinesToRemediate)) + if err := patchMachineConditions(ctx, r.Client, machinesToRemediate, metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationDeferredV1Beta2Reason, + Message: fmt.Sprintf("Waiting because there are already too many remediations in progress (spec.strategy.remediation.maxInFlight is %s)", owner.Spec.Strategy.Remediation.MaxInFlight), + }, nil); err != nil { + return ctrl.Result{}, err + } return ctrl.Result{}, nil } @@ -1263,11 +1403,22 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) ( if len(machinesToRemediate) > maxInFlight { log.V(5).Info("Remediation strategy is set, limiting in flight operations", "machinesToBeRemediated", len(machinesToRemediate)) // We have more machines to remediate than tokens available. - machinesToRemediate = machinesToRemediate[:maxInFlight] + allMachinesToRemediate := machinesToRemediate + machinesToRemediate = allMachinesToRemediate[:maxInFlight] + machinesToDeferRemediation := allMachinesToRemediate[maxInFlight:] + + if err := patchMachineConditions(ctx, r.Client, machinesToDeferRemediation, metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationDeferredV1Beta2Reason, + Message: fmt.Sprintf("Waiting because there are already too many remediations in progress (spec.strategy.remediation.maxInFlight is %s)", owner.Spec.Strategy.Remediation.MaxInFlight), + }, nil); err != nil { + return ctrl.Result{}, err + } } // Run preflight checks. - preflightChecksResult, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Machine Remediation") + preflightChecksResult, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Machine remediation") if err != nil { // If err is not nil use that as the preflightCheckErrMessage preflightCheckErrMessage = err.Error() @@ -1277,41 +1428,48 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) ( if preflightChecksFailed { // PreflightChecks did not pass. Update the MachineOwnerRemediated condition on the unhealthy Machines with // WaitingForRemediationReason reason. - var errs []error - for _, m := range machinesToRemediate { - patchHelper, err := patch.NewHelper(m, r.Client) - if err != nil { - errs = append(errs, err) - continue - } - conditions.MarkFalse(m, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, preflightCheckErrMessage) - if err := patchHelper.Patch(ctx, m); err != nil { - errs = append(errs, err) - } - } - - if len(errs) > 0 { - return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errs), "failed to patch unhealthy Machines") + if err := patchMachineConditions(ctx, r.Client, machinesToRemediate, metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationDeferredV1Beta2Reason, + Message: preflightCheckErrMessage, + }, &clusterv1.Condition{ + Type: clusterv1.MachineOwnerRemediatedCondition, + Status: corev1.ConditionFalse, + Reason: clusterv1.WaitingForRemediationReason, + Severity: clusterv1.ConditionSeverityWarning, + Message: preflightCheckErrMessage, + }); err != nil { + return ctrl.Result{}, err } return preflightChecksResult, nil } - // PreflightChecks passed, so it is safe to remediate unhealthy machines. - // Remediate unhealthy machines by deleting them. + // PreflightChecks passed, so it is safe to remediate unhealthy machines by deleting them. + + // Note: We intentionally patch the Machines before we delete them to make this code reentrant. + // If we delete the Machine first, the Machine would be filtered out on next reconcile because + // it has a deletionTimestamp so it would never get the condition. + // Instead if we set the condition but the deletion does not go through on next reconcile either the + // condition will be fixed/updated or the Machine deletion will be retried. + if err := patchMachineConditions(ctx, r.Client, machinesToRemediate, metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, + Message: "Machine deletionTimestamp set", + }, &clusterv1.Condition{ + Type: clusterv1.MachineOwnerRemediatedCondition, + Status: corev1.ConditionTrue, + }); err != nil { + return ctrl.Result{}, err + } var errs []error for _, m := range machinesToRemediate { log.Info("Deleting unhealthy Machine", "Machine", klog.KObj(m)) - patch := client.MergeFrom(m.DeepCopy()) if err := r.Client.Delete(ctx, m); err != nil && !apierrors.IsNotFound(err) { errs = append(errs, errors.Wrapf(err, "failed to delete Machine %s", klog.KObj(m))) - continue - } - conditions.MarkTrue(m, clusterv1.MachineOwnerRemediatedCondition) - if err := r.Client.Status().Patch(ctx, m, patch); err != nil && !apierrors.IsNotFound(err) { - errs = append(errs, errors.Wrapf(err, "failed to update status of Machine %s", klog.KObj(m))) } } - if len(errs) > 0 { return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errs), "failed to delete unhealthy Machines") } @@ -1319,6 +1477,36 @@ func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope) ( return ctrl.Result{}, nil } +func patchMachineConditions(ctx context.Context, c client.Client, machines []*clusterv1.Machine, v1beta2Condition metav1.Condition, condition *clusterv1.Condition) error { + var errs []error + for _, m := range machines { + patchHelper, err := patch.NewHelper(m, c) + if err != nil { + errs = append(errs, err) + continue + } + + if condition != nil { + conditions.Set(m, condition) + } + v1beta2conditions.Set(m, v1beta2Condition) + + if err := patchHelper.Patch(ctx, m, + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.MachineOwnerRemediatedCondition, + }}, patch.WithOwnedV1Beta2Conditions{Conditions: []string{ + clusterv1.MachineOwnerRemediatedV1Beta2Condition, + }}); err != nil { + errs = append(errs, err) + } + } + if len(errs) > 0 { + return errors.Wrapf(kerrors.NewAggregate(errs), "failed to patch Machines") + } + + return nil +} + func (r *Reconciler) reconcileExternalTemplateReference(ctx context.Context, cluster *clusterv1.Cluster, ms *clusterv1.MachineSet, owner *clusterv1.MachineDeployment, ref *corev1.ObjectReference) (objectNotFound bool, err error) { if !strings.HasSuffix(ref.Kind, clusterv1.TemplateSuffix) { return false, nil diff --git a/internal/controllers/machineset/machineset_controller_status.go b/internal/controllers/machineset/machineset_controller_status.go index 910f47896eb0..de6036aae082 100644 --- a/internal/controllers/machineset/machineset_controller_status.go +++ b/internal/controllers/machineset/machineset_controller_status.go @@ -19,6 +19,7 @@ package machineset import ( "context" "fmt" + "slices" "sort" "strings" "time" @@ -28,6 +29,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/collections" v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" clog "sigs.k8s.io/cluster-api/util/log" ) @@ -46,7 +48,7 @@ func (r *Reconciler) updateStatus(ctx context.Context, s *scope) { // Conditions // Update the ScalingUp and ScalingDown condition. - setScalingUpCondition(ctx, s.machineSet, s.machines, s.bootstrapObjectNotFound, s.infrastructureObjectNotFound, s.getAndAdoptMachinesForMachineSetSucceeded) + setScalingUpCondition(ctx, s.machineSet, s.machines, s.bootstrapObjectNotFound, s.infrastructureObjectNotFound, s.getAndAdoptMachinesForMachineSetSucceeded, s.scaleUpPreflightCheckErrMessage) setScalingDownCondition(ctx, s.machineSet, s.machines, s.getAndAdoptMachinesForMachineSetSucceeded) // MachinesReady condition: aggregate the Machine's Ready condition. @@ -55,6 +57,11 @@ func (r *Reconciler) updateStatus(ctx context.Context, s *scope) { // MachinesUpToDate condition: aggregate the Machine's UpToDate condition. setMachinesUpToDateCondition(ctx, s.machineSet, s.machines, s.getAndAdoptMachinesForMachineSetSucceeded) + machines := collections.FromMachines(s.machines...) + machinesToBeRemediated := machines.Filter(collections.IsUnhealthyAndOwnerRemediated) + unhealthyMachines := machines.Filter(collections.IsUnhealthy) + setRemediatingCondition(ctx, s.machineSet, machinesToBeRemediated, unhealthyMachines, s.getAndAdoptMachinesForMachineSetSucceeded) + setDeletingCondition(ctx, s.machineSet, s.machines, s.getAndAdoptMachinesForMachineSetSucceeded) } @@ -86,7 +93,7 @@ func setReplicas(_ context.Context, ms *clusterv1.MachineSet, machines []*cluste ms.Status.V1Beta2.UpToDateReplicas = ptr.To(upToDateReplicas) } -func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines []*clusterv1.Machine, bootstrapObjectNotFound, infrastructureObjectNotFound, getAndAdoptMachinesForMachineSetSucceeded bool) { +func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines []*clusterv1.Machine, bootstrapObjectNotFound, infrastructureObjectNotFound, getAndAdoptMachinesForMachineSetSucceeded bool, scaleUpPreflightCheckErrMessage string) { // If we got unexpected errors in listing the machines (this should never happen), surface them. if !getAndAdoptMachinesForMachineSetSucceeded { v1beta2conditions.Set(ms, metav1.Condition{ @@ -119,7 +126,7 @@ func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines if currentReplicas >= desiredReplicas { var message string if missingReferencesMessage != "" { - message = fmt.Sprintf("Scaling up would be blocked %s", missingReferencesMessage) + message = fmt.Sprintf("Scaling up would be blocked because %s", missingReferencesMessage) } v1beta2conditions.Set(ms, metav1.Condition{ Type: clusterv1.MachineSetScalingUpV1Beta2Condition, @@ -132,8 +139,11 @@ func setScalingUpCondition(_ context.Context, ms *clusterv1.MachineSet, machines // Scaling up. message := fmt.Sprintf("Scaling up from %d to %d replicas", currentReplicas, desiredReplicas) - if missingReferencesMessage != "" { - message += fmt.Sprintf(" is blocked %s", missingReferencesMessage) + if missingReferencesMessage != "" || scaleUpPreflightCheckErrMessage != "" { + blockMessages := slices.DeleteFunc([]string{missingReferencesMessage, scaleUpPreflightCheckErrMessage}, func(s string) bool { + return s == "" + }) + message += fmt.Sprintf(" is blocked because %s", strings.Join(blockMessages, " and ")) } v1beta2conditions.Set(ms, metav1.Condition{ Type: clusterv1.MachineSetScalingUpV1Beta2Condition, @@ -275,6 +285,53 @@ func setMachinesUpToDateCondition(ctx context.Context, machineSet *clusterv1.Mac v1beta2conditions.Set(machineSet, *upToDateCondition) } +func setRemediatingCondition(ctx context.Context, machineSet *clusterv1.MachineSet, machinesToBeRemediated, unhealthyMachines collections.Machines, getAndAdoptMachinesForMachineSetSucceeded bool) { + if !getAndAdoptMachinesForMachineSetSucceeded { + v1beta2conditions.Set(machineSet, metav1.Condition{ + Type: clusterv1.MachineSetRemediatingV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineSetRemediatingInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + if len(machinesToBeRemediated) == 0 { + message := aggregateUnhealthyMachines(unhealthyMachines) + v1beta2conditions.Set(machineSet, metav1.Condition{ + Type: clusterv1.MachineSetRemediatingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetNotRemediatingV1Beta2Reason, + Message: message, + }) + return + } + + remediatingCondition, err := v1beta2conditions.NewAggregateCondition( + machinesToBeRemediated.UnsortedList(), clusterv1.MachineOwnerRemediatedV1Beta2Condition, + v1beta2conditions.TargetConditionType(clusterv1.MachineSetRemediatingV1Beta2Condition), + ) + if err != nil { + v1beta2conditions.Set(machineSet, metav1.Condition{ + Type: clusterv1.MachineSetRemediatingV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineSetRemediatingInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + + log := ctrl.LoggerFrom(ctx) + log.Error(err, fmt.Sprintf("Failed to aggregate Machine's %s conditions", clusterv1.MachineOwnerRemediatedV1Beta2Condition)) + return + } + + v1beta2conditions.Set(machineSet, metav1.Condition{ + Type: remediatingCondition.Type, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineSetRemediatingV1Beta2Reason, + Message: remediatingCondition.Message, + }) +} + func setDeletingCondition(_ context.Context, machineSet *clusterv1.MachineSet, machines []*clusterv1.Machine, getAndAdoptMachinesForMachineSetSucceeded bool) { // If we got unexpected errors in listing the machines (this should never happen), surface them. if !getAndAdoptMachinesForMachineSetSucceeded { @@ -330,10 +387,10 @@ func calculateMissingReferencesMessage(ms *clusterv1.MachineSet, bootstrapTempla } if len(missingObjects) == 1 { - return fmt.Sprintf("because %s does not exist", missingObjects[0]) + return fmt.Sprintf("%s does not exist", missingObjects[0]) } - return fmt.Sprintf("because %s do not exist", strings.Join(missingObjects, " and ")) + return fmt.Sprintf("%s do not exist", strings.Join(missingObjects, " and ")) } func aggregateStaleMachines(machines []*clusterv1.Machine) string { @@ -369,3 +426,32 @@ func aggregateStaleMachines(machines []*clusterv1.Machine) string { return message } + +func aggregateUnhealthyMachines(machines collections.Machines) string { + if len(machines) == 0 { + return "" + } + + machineNames := machines.Names() + + if len(machineNames) == 0 { + return "" + } + + message := "Machine" + if len(machineNames) > 1 { + message += "s" + } + + sort.Strings(machineNames) + message += " " + clog.ListToString(machineNames, func(s string) string { return s }, 3) + + if len(machineNames) == 1 { + message += " is " + } else { + message += " are " + } + message += "not healthy (not to be remediated by MachineSet)" + + return message +} diff --git a/internal/controllers/machineset/machineset_controller_status_test.go b/internal/controllers/machineset/machineset_controller_status_test.go index e157563fd4ba..509046ce7c7b 100644 --- a/internal/controllers/machineset/machineset_controller_status_test.go +++ b/internal/controllers/machineset/machineset_controller_status_test.go @@ -26,6 +26,7 @@ import ( "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/collections" v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" ) @@ -193,6 +194,7 @@ func Test_setScalingUpCondition(t *testing.T) { bootstrapObjectNotFound bool infrastructureObjectNotFound bool getAndAdoptMachinesForMachineSetSucceeded bool + scaleUpPreflightCheckErrMessage string expectCondition metav1.Condition }{ { @@ -298,6 +300,27 @@ func Test_setScalingUpCondition(t *testing.T) { Message: "Scaling up from 0 to 3 replicas is blocked because DockerMachineTemplate does not exist", }, }, + { + name: "scaling up and blocked by bootstrap and infrastructure object and preflight checks", + ms: scalingUpMachineSetWith3Replicas, + bootstrapObjectNotFound: true, + infrastructureObjectNotFound: true, + getAndAdoptMachinesForMachineSetSucceeded: true, + // This preflight check error can happen when a MachineSet is scaling up while the control plane + // already has a newer Kubernetes version. + scaleUpPreflightCheckErrMessage: "MachineSet version (1.25.5) and ControlPlane version (1.26.2) " + + "do not conform to kubeadm version skew policy as kubeadm only supports joining with the same " + + "major+minor version as the control plane (\"KubeadmVersionSkew\" preflight check failed)", + expectCondition: metav1.Condition{ + Type: clusterv1.MachineSetScalingUpV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineSetScalingUpV1Beta2Reason, + Message: "Scaling up from 0 to 3 replicas is blocked because KubeadmBootstrapTemplate and DockerMachineTemplate " + + "do not exist and MachineSet version (1.25.5) and ControlPlane version (1.26.2) " + + "do not conform to kubeadm version skew policy as kubeadm only supports joining with the same " + + "major+minor version as the control plane (\"KubeadmVersionSkew\" preflight check failed)", + }, + }, { name: "deleting", ms: deletingMachineSetWith3Replicas, @@ -316,7 +339,7 @@ func Test_setScalingUpCondition(t *testing.T) { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) - setScalingUpCondition(ctx, tt.ms, tt.machines, tt.bootstrapObjectNotFound, tt.infrastructureObjectNotFound, tt.getAndAdoptMachinesForMachineSetSucceeded) + setScalingUpCondition(ctx, tt.ms, tt.machines, tt.bootstrapObjectNotFound, tt.infrastructureObjectNotFound, tt.getAndAdoptMachinesForMachineSetSucceeded, tt.scaleUpPreflightCheckErrMessage) condition := v1beta2conditions.Get(tt.ms, clusterv1.MachineSetScalingUpV1Beta2Condition) g.Expect(condition).ToNot(BeNil()) @@ -549,7 +572,7 @@ func Test_setMachinesReadyCondition(t *testing.T) { Type: clusterv1.MachineSetMachinesReadyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: v1beta2conditions.NotYetReportedReason, - Message: "Condition Ready not yet reported from Machine machine-2", + Message: "* Machine machine-2: Condition Ready not yet reported", }, }, { @@ -578,10 +601,12 @@ func Test_setMachinesReadyCondition(t *testing.T) { }, getAndAdoptMachinesForMachineSetSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.MachineSetMachinesReadyV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "Deleting: Machine deletion in progress, stage: DrainingNode from Machine machine-4; HealthCheckSucceeded: Some message from Machine machine-2; Some unknown message from Machine machine-3", + Type: clusterv1.MachineSetMachinesReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* Machine machine-2: HealthCheckSucceeded: Some message\n" + + "* Machine machine-4: Deleting: Machine deletion in progress, stage: DrainingNode\n" + + "* Machine machine-3: Some unknown message", }, }, } @@ -666,7 +691,7 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { Type: clusterv1.MachineSetMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: "some-unknown-reason-1", - Message: "some unknown message from Machine unknown-1", + Message: "* Machine unknown-1: some unknown message", }, }, { @@ -685,7 +710,7 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { Type: clusterv1.MachineSetMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "some-not-up-to-date-reason", - Message: "some not up-to-date message from Machine not-up-to-date-machine-1", + Message: "* Machine not-up-to-date-machine-1: some not up-to-date message", }, }, { @@ -699,7 +724,7 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { Type: clusterv1.MachineSetMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: v1beta2conditions.NotYetReportedReason, - Message: "Condition UpToDate not yet reported from Machine no-condition-machine-1", + Message: "* Machine no-condition-machine-1: Condition UpToDate not yet reported", }, }, { @@ -733,10 +758,11 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { }, getAndAdoptMachinesForMachineSetSucceeded: true, expectCondition: metav1.Condition{ - Type: clusterv1.MachineSetMachinesUpToDateV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: v1beta2conditions.MultipleIssuesReportedReason, - Message: "This is not up-to-date message from Machines not-up-to-date-machine-1, not-up-to-date-machine-2; Condition UpToDate not yet reported from Machines no-condition-machine-1, no-condition-machine-2", + Type: clusterv1.MachineSetMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "* Machines not-up-to-date-machine-1, not-up-to-date-machine-2: This is not up-to-date message\n" + + "* Machines no-condition-machine-1, no-condition-machine-2: Condition UpToDate not yet reported", }, }, } @@ -753,6 +779,133 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { } } +func Test_setRemediatingCondition(t *testing.T) { + healthCheckSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionTrue} + healthCheckNotSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionFalse} + ownerRemediated := clusterv1.Condition{Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse} + ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, Message: "Machine deletionTimestamp set"} + ownerRemediatedWaitingForRemediationV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, Message: "KubeadmControlPlane ns1/cp1 is upgrading (\"ControlPlaneIsStable\" preflight check failed)"} + + tests := []struct { + name string + machineSet *clusterv1.MachineSet + machines []*clusterv1.Machine + getAndAdoptMachinesForMachineSetSucceeded bool + expectCondition metav1.Condition + }{ + { + name: "get machines failed", + machineSet: &clusterv1.MachineSet{}, + machines: nil, + getAndAdoptMachinesForMachineSetSucceeded: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineSetRemediatingV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineSetRemediatingInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "Without unhealthy machines", + machineSet: &clusterv1.MachineSet{}, + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + fakeMachine("m2"), + }, + getAndAdoptMachinesForMachineSetSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineSetRemediatingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetNotRemediatingV1Beta2Reason, + }, + }, + { + name: "With machines to be remediated by MS", + machineSet: &clusterv1.MachineSet{}, + machines: []*clusterv1.Machine{ + fakeMachine("m1", withConditions(healthCheckSucceeded)), // Healthy machine + fakeMachine("m2", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation + fakeMachine("m3", withConditions(healthCheckNotSucceeded, ownerRemediated), withV1Beta2Condition(ownerRemediatedV1Beta2)), + }, + getAndAdoptMachinesForMachineSetSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineSetRemediatingV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineSetRemediatingV1Beta2Reason, + Message: "* Machine m3: Machine deletionTimestamp set", + }, + }, + { + name: "With machines to be remediated by MS and preflight check error", + machineSet: &clusterv1.MachineSet{}, + machines: []*clusterv1.Machine{ + fakeMachine("m1", withConditions(healthCheckSucceeded)), // Healthy machine + fakeMachine("m2", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation + fakeMachine("m3", withConditions(healthCheckNotSucceeded, ownerRemediated), withV1Beta2Condition(ownerRemediatedV1Beta2)), + fakeMachine("m4", withConditions(healthCheckNotSucceeded, ownerRemediated), withV1Beta2Condition(ownerRemediatedWaitingForRemediationV1Beta2)), + }, + getAndAdoptMachinesForMachineSetSucceeded: true, + // This preflight check error can happen when a Machine becomes unhealthy while the control plane is upgrading. + expectCondition: metav1.Condition{ + Type: clusterv1.MachineSetRemediatingV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineSetRemediatingV1Beta2Reason, + Message: "* Machine m3: Machine deletionTimestamp set\n" + + "* Machine m4: KubeadmControlPlane ns1/cp1 is upgrading (\"ControlPlaneIsStable\" preflight check failed)", + }, + }, + { + name: "With one unhealthy machine not to be remediated by MS", + machineSet: &clusterv1.MachineSet{}, + machines: []*clusterv1.Machine{ + fakeMachine("m1", withConditions(healthCheckSucceeded)), // Healthy machine + fakeMachine("m2", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation + fakeMachine("m3", withConditions(healthCheckSucceeded)), // Healthy machine + }, + getAndAdoptMachinesForMachineSetSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineSetRemediatingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetNotRemediatingV1Beta2Reason, + Message: "Machine m2 is not healthy (not to be remediated by MachineSet)", + }, + }, + { + name: "With two unhealthy machine not to be remediated by MS", + machineSet: &clusterv1.MachineSet{}, + machines: []*clusterv1.Machine{ + fakeMachine("m1", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation + fakeMachine("m2", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation + fakeMachine("m3", withConditions(healthCheckSucceeded)), // Healthy machine + }, + getAndAdoptMachinesForMachineSetSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineSetRemediatingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetNotRemediatingV1Beta2Reason, + Message: "Machines m1, m2 are not healthy (not to be remediated by MachineSet)", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + var machinesToBeRemediated, unHealthyMachines collections.Machines + if tt.getAndAdoptMachinesForMachineSetSucceeded { + machines := collections.FromMachines(tt.machines...) + machinesToBeRemediated = machines.Filter(collections.IsUnhealthyAndOwnerRemediated) + unHealthyMachines = machines.Filter(collections.IsUnhealthy) + } + setRemediatingCondition(ctx, tt.machineSet, machinesToBeRemediated, unHealthyMachines, tt.getAndAdoptMachinesForMachineSetSucceeded) + + condition := v1beta2conditions.Get(tt.machineSet, clusterv1.MachineSetRemediatingV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + func Test_setDeletingCondition(t *testing.T) { tests := []struct { name string @@ -847,3 +1000,32 @@ func newStaleDeletingMachine(name string) *clusterv1.Machine { m.DeletionTimestamp = ptr.To(metav1.Time{Time: time.Now().Add(-1 * time.Hour)}) return m } + +type fakeMachinesOption func(m *clusterv1.Machine) + +func fakeMachine(name string, options ...fakeMachinesOption) *clusterv1.Machine { + p := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + } + for _, opt := range options { + opt(p) + } + return p +} + +func withV1Beta2Condition(c metav1.Condition) fakeMachinesOption { + return func(m *clusterv1.Machine) { + if m.Status.V1Beta2 == nil { + m.Status.V1Beta2 = &clusterv1.MachineV1Beta2Status{} + } + v1beta2conditions.Set(m, c) + } +} + +func withConditions(c ...clusterv1.Condition) fakeMachinesOption { + return func(m *clusterv1.Machine) { + m.Status.Conditions = append(m.Status.Conditions, c...) + } +} diff --git a/internal/controllers/machineset/machineset_controller_test.go b/internal/controllers/machineset/machineset_controller_test.go index 1a62fba1f914..22ea76b7697d 100644 --- a/internal/controllers/machineset/machineset_controller_test.go +++ b/internal/controllers/machineset/machineset_controller_test.go @@ -40,6 +40,7 @@ import ( "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/conditions" + v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/test/builder" ) @@ -1500,6 +1501,8 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "unhealthy-machine", Namespace: "default", + // Blocking deletion so we can confirm conditions were updated as expected. + Finalizers: []string{"block-deletion"}, }, Status: clusterv1.MachineStatus{ Conditions: []clusterv1.Condition{ @@ -1507,6 +1510,26 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse, }, + { + Type: clusterv1.MachineHealthCheckSucceededCondition, + Status: corev1.ConditionFalse, + }, + }, + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", + }, + { + Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineHealthCheckHasRemediateAnnotationV1Beta2Reason, + Message: "Marked for remediation via cluster.x-k8s.io/remediate-machine annotation", + }, + }, }, }, } @@ -1515,11 +1538,40 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { Name: "healthy-machine", Namespace: "default", }, + Status: clusterv1.MachineStatus{ + Conditions: []clusterv1.Condition{ + { + // This condition should be cleaned up because HealthCheckSucceeded is true. + Type: clusterv1.MachineOwnerRemediatedCondition, + Status: corev1.ConditionFalse, + }, + { + Type: clusterv1.MachineHealthCheckSucceededCondition, + Status: corev1.ConditionTrue, + }, + }, + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + // This condition should be cleaned up because HealthCheckSucceeded is true. + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", + }, + { + Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineHealthCheckSucceededV1Beta2Reason, + }, + }, + }, + }, } machines := []*clusterv1.Machine{unhealthyMachine, healthyMachine} - fakeClient := fake.NewClientBuilder().WithObjects(controlPlaneStable, unhealthyMachine, healthyMachine).Build() + fakeClient := fake.NewClientBuilder().WithObjects(controlPlaneStable, unhealthyMachine, healthyMachine).WithStatusSubresource(&clusterv1.Machine{}).Build() r := &Reconciler{ Client: fakeClient, } @@ -1533,13 +1585,27 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { _, err := r.reconcileUnhealthyMachines(ctx, s) g.Expect(err).ToNot(HaveOccurred()) - // Verify the unhealthy machine is deleted. + + // Verify the unhealthy machine is deleted (deletionTimestamp must be set). m := &clusterv1.Machine{} - err = r.Client.Get(ctx, client.ObjectKeyFromObject(unhealthyMachine), m) - g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) - // Verify the healthy machine is not deleted. + g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(unhealthyMachine), m)).To(Succeed()) + g.Expect(m.DeletionTimestamp.IsZero()).To(BeFalse()) + g.Expect(conditions.IsTrue(m, clusterv1.MachineOwnerRemediatedCondition)).To(BeTrue()) + c := v1beta2conditions.Get(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) + g.Expect(c).ToNot(BeNil()) + g.Expect(*c).To(v1beta2conditions.MatchCondition(metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, + Message: "Machine deletionTimestamp set", + }, v1beta2conditions.IgnoreLastTransitionTime(true))) + + // Verify the healthy machine is not deleted and does not have the OwnerRemediated condition. m = &clusterv1.Machine{} g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(healthyMachine), m)).Should(Succeed()) + g.Expect(m.DeletionTimestamp.IsZero()).To(BeTrue()) + g.Expect(conditions.Has(m, clusterv1.MachineOwnerRemediatedCondition)).To(BeFalse()) + g.Expect(v1beta2conditions.Has(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition)).To(BeFalse()) }) t.Run("should update the unhealthy machine MachineOwnerRemediated condition if preflight checks did not pass", func(t *testing.T) { @@ -1574,6 +1640,26 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse, }, + { + Type: clusterv1.MachineHealthCheckSucceededCondition, + Status: corev1.ConditionFalse, + }, + }, + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", + }, + { + Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineHealthCheckHasRemediateAnnotationV1Beta2Reason, + Message: "Marked for remediation via cluster.x-k8s.io/remediate-machine annotation", + }, + }, }, }, } @@ -1582,6 +1668,35 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { Name: "healthy-machine", Namespace: "default", }, + Status: clusterv1.MachineStatus{ + Conditions: []clusterv1.Condition{ + { + // This condition should be cleaned up because HealthCheckSucceeded is true. + Type: clusterv1.MachineOwnerRemediatedCondition, + Status: corev1.ConditionFalse, + }, + { + Type: clusterv1.MachineHealthCheckSucceededCondition, + Status: corev1.ConditionTrue, + }, + }, + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + // This condition should be cleaned up because HealthCheckSucceeded is true. + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", + }, + { + Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineHealthCheckSucceededV1Beta2Reason, + }, + }, + }, + }, } machines := []*clusterv1.Machine{unhealthyMachine, healthyMachine} @@ -1602,6 +1717,7 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { condition := clusterv1.MachineOwnerRemediatedCondition m := &clusterv1.Machine{} g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(unhealthyMachine), m)).To(Succeed()) + g.Expect(m.DeletionTimestamp.IsZero()).To(BeTrue()) g.Expect(conditions.Has(m, condition)). To(BeTrue(), "Machine should have the %s condition set", condition) machineOwnerRemediatedCondition := conditions.Get(m, condition) @@ -1609,12 +1725,22 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { To(Equal(corev1.ConditionFalse), "%s condition status should be false", condition) g.Expect(machineOwnerRemediatedCondition.Reason). To(Equal(clusterv1.WaitingForRemediationReason), "%s condition should have reason %s", condition, clusterv1.WaitingForRemediationReason) - - // Verify the healthy machine continues to not have the MachineOwnerRemediated condition. + c := v1beta2conditions.Get(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) + g.Expect(c).ToNot(BeNil()) + g.Expect(*c).To(v1beta2conditions.MatchCondition(metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationDeferredV1Beta2Reason, + Message: "GenericControlPlane default/cp1 is upgrading (\"ControlPlaneIsStable\" preflight check failed)", + }, v1beta2conditions.IgnoreLastTransitionTime(true))) + + // Verify the healthy machine is not deleted and does not have the OwnerRemediated condition. m = &clusterv1.Machine{} g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(healthyMachine), m)).To(Succeed()) + g.Expect(m.DeletionTimestamp.IsZero()).To(BeTrue()) g.Expect(conditions.Has(m, condition)). To(BeFalse(), "Machine should not have the %s condition set", condition) + g.Expect(v1beta2conditions.Has(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition)).To(BeFalse()) }) t.Run("should only try to remediate MachineOwnerRemediated if MachineSet is current", func(t *testing.T) { @@ -1672,6 +1798,8 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "unhealthy-machine", Namespace: "default", + // Blocking deletion so we can confirm conditions were updated as expected. + Finalizers: []string{"block-deletion"}, }, Status: clusterv1.MachineStatus{ Conditions: []clusterv1.Condition{ @@ -1679,6 +1807,26 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse, }, + { + Type: clusterv1.MachineHealthCheckSucceededCondition, + Status: corev1.ConditionFalse, + }, + }, + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", + }, + { + Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineHealthCheckHasRemediateAnnotationV1Beta2Reason, + Message: "Marked for remediation via cluster.x-k8s.io/remediate-machine annotation", + }, + }, }, }, } @@ -1687,6 +1835,35 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { Name: "healthy-machine", Namespace: "default", }, + Status: clusterv1.MachineStatus{ + Conditions: []clusterv1.Condition{ + { + // This condition should be cleaned up because HealthCheckSucceeded is true. + Type: clusterv1.MachineOwnerRemediatedCondition, + Status: corev1.ConditionFalse, + }, + { + Type: clusterv1.MachineHealthCheckSucceededCondition, + Status: corev1.ConditionTrue, + }, + }, + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + // This condition should be cleaned up because HealthCheckSucceeded is true. + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", + }, + { + Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineHealthCheckSucceededV1Beta2Reason, + }, + }, + }, + }, } machines := []*clusterv1.Machine{unhealthyMachine, healthyMachine} @@ -1725,12 +1902,22 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { g.Expect(machineOwnerRemediatedCondition.Status). To(Equal(corev1.ConditionFalse), "%s condition status should be false", condition) g.Expect(unhealthyMachine.DeletionTimestamp).Should(BeZero()) - - // Verify the healthy machine continues to not have the MachineOwnerRemediated condition. + c := v1beta2conditions.Get(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) + g.Expect(c).ToNot(BeNil()) + g.Expect(*c).To(v1beta2conditions.MatchCondition(metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineCannotBeRemediatedV1Beta2Reason, + Message: "Machine won't be remediated because it is pending removal due to rollout", + }, v1beta2conditions.IgnoreLastTransitionTime(true))) + + // Verify the healthy machine is not deleted and does not have the OwnerRemediated condition. m = &clusterv1.Machine{} g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(healthyMachine), m)).To(Succeed()) + g.Expect(m.DeletionTimestamp.IsZero()).To(BeTrue()) g.Expect(conditions.Has(m, condition)). To(BeFalse(), "Machine should not have the %s condition set", condition) + g.Expect(v1beta2conditions.Has(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition)).To(BeFalse()) // Test with the current MachineSet. s = &scope{ @@ -1744,14 +1931,25 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { g.Expect(err).ToNot(HaveOccurred()) // Verify the unhealthy machine has been deleted. - err = r.Client.Get(ctx, client.ObjectKeyFromObject(unhealthyMachine), m) - g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) - - // Verify (again) the healthy machine continues to not have the MachineOwnerRemediated condition. + g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(unhealthyMachine), m)).To(Succeed()) + g.Expect(m.DeletionTimestamp.IsZero()).To(BeFalse()) + g.Expect(conditions.IsTrue(m, clusterv1.MachineOwnerRemediatedCondition)).To(BeTrue()) + c = v1beta2conditions.Get(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) + g.Expect(c).ToNot(BeNil()) + g.Expect(*c).To(v1beta2conditions.MatchCondition(metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, + Message: "Machine deletionTimestamp set", + }, v1beta2conditions.IgnoreLastTransitionTime(true))) + + // Verify (again) the healthy machine is not deleted and does not have the OwnerRemediated condition. m = &clusterv1.Machine{} g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(healthyMachine), m)).To(Succeed()) + g.Expect(m.DeletionTimestamp.IsZero()).To(BeTrue()) g.Expect(conditions.Has(m, condition)). To(BeFalse(), "Machine should not have the %s condition set", condition) + g.Expect(v1beta2conditions.Has(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition)).To(BeFalse()) }) t.Run("should only try to remediate up to MaxInFlight unhealthy", func(t *testing.T) { @@ -1822,6 +2020,26 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse, }, + { + Type: clusterv1.MachineHealthCheckSucceededCondition, + Status: corev1.ConditionFalse, + }, + }, + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", + }, + { + Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineHealthCheckHasRemediateAnnotationV1Beta2Reason, + Message: "Marked for remediation via cluster.x-k8s.io/remediate-machine annotation", + }, + }, }, }, }) @@ -1832,6 +2050,35 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { Name: "healthy-machine", Namespace: "default", }, + Status: clusterv1.MachineStatus{ + Conditions: []clusterv1.Condition{ + { + // This condition should be cleaned up because HealthCheckSucceeded is true. + Type: clusterv1.MachineOwnerRemediatedCondition, + Status: corev1.ConditionFalse, + }, + { + Type: clusterv1.MachineHealthCheckSucceededCondition, + Status: corev1.ConditionTrue, + }, + }, + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + // This condition should be cleaned up because HealthCheckSucceeded is true. + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineOwnerRemediatedWaitingForRemediationV1Beta2Reason, + Message: "Waiting for remediation", + }, + { + Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineHealthCheckSucceededV1Beta2Reason, + }, + }, + }, + }, } fakeClient := fake.NewClientBuilder().WithObjects(cluster, machineDeployment, healthyMachine). @@ -1872,17 +2119,27 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { machineOwnerRemediatedCondition := conditions.Get(m, condition) g.Expect(machineOwnerRemediatedCondition.Status). To(Equal(corev1.ConditionFalse), "%s condition status should be false", condition) + c := v1beta2conditions.Get(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) + g.Expect(c).ToNot(BeNil()) + g.Expect(*c).To(v1beta2conditions.MatchCondition(metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationDeferredV1Beta2Reason, + Message: "Waiting because there are already too many remediations in progress (spec.strategy.remediation.maxInFlight is 3)", + }, v1beta2conditions.IgnoreLastTransitionTime(true))) } else { // Machines after maxInFlight, should be deleted. g.Expect(apierrors.IsNotFound(err)).To(BeTrue(), "expected machine %d to be deleted", i) } } - // Verify the healthy machine continues to not have the MachineOwnerRemediated condition. + // Verify the healthy machine is not deleted and does not have the OwnerRemediated condition. m := &clusterv1.Machine{} g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(healthyMachine), m)).To(Succeed()) + g.Expect(m.DeletionTimestamp.IsZero()).To(BeTrue()) g.Expect(conditions.Has(m, condition)). To(BeFalse(), "Machine should not have the %s condition set", condition) + g.Expect(v1beta2conditions.Has(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition)).To(BeFalse()) // // Second pass. @@ -1929,6 +2186,15 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { machineOwnerRemediatedCondition := conditions.Get(m, condition) g.Expect(machineOwnerRemediatedCondition.Status). To(Equal(corev1.ConditionFalse), "%s condition status should be false", condition) + c := v1beta2conditions.Get(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) + g.Expect(c).ToNot(BeNil()) + g.Expect(*c).To(v1beta2conditions.MatchCondition(metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationDeferredV1Beta2Reason, + Message: "Waiting because there are already too many remediations in progress (spec.strategy.remediation.maxInFlight is 3)", + }, v1beta2conditions.IgnoreLastTransitionTime(true))) + g.Expect(m.DeletionTimestamp).To(BeZero()) } else if i < total-maxInFlight { // Machines before the maxInFlight should have a deletion timestamp g.Expect(err).ToNot(HaveOccurred()) @@ -1937,6 +2203,14 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { machineOwnerRemediatedCondition := conditions.Get(m, condition) g.Expect(machineOwnerRemediatedCondition.Status). To(Equal(corev1.ConditionTrue), "%s condition status should be true", condition) + c := v1beta2conditions.Get(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition) + g.Expect(c).ToNot(BeNil()) + g.Expect(*c).To(v1beta2conditions.MatchCondition(metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineSetMachineRemediationMachineDeletedV1Beta2Reason, + Message: "Machine deletionTimestamp set", + }, v1beta2conditions.IgnoreLastTransitionTime(true))) g.Expect(m.DeletionTimestamp).ToNot(BeZero()) if cleanFinalizer { @@ -1951,10 +2225,12 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { } validateSecondPass(false) - // Verify (again) the healthy machine continues to not have the MachineOwnerRemediated condition. + // Verify (again) the healthy machine is not deleted and does not have the OwnerRemediated condition. g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(healthyMachine), m)).To(Succeed()) + g.Expect(m.DeletionTimestamp.IsZero()).To(BeTrue()) g.Expect(conditions.Has(m, condition)). To(BeFalse(), "Machine should not have the %s condition set", condition) + g.Expect(v1beta2conditions.Has(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition)).To(BeFalse()) // Perform another pass with the same exact configuration. // This is testing that, given that we have Machines that are being deleted and are in flight, @@ -1972,10 +2248,12 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { // Validate and remove finalizers for in flight machines. validateSecondPass(true) - // Verify (again) the healthy machine continues to not have the MachineOwnerRemediated condition. + // Verify (again) the healthy machine is not deleted and does not have the OwnerRemediated condition. g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(healthyMachine), m)).To(Succeed()) + g.Expect(m.DeletionTimestamp.IsZero()).To(BeTrue()) g.Expect(conditions.Has(m, condition)). To(BeFalse(), "Machine should not have the %s condition set", condition) + g.Expect(v1beta2conditions.Has(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition)).To(BeFalse()) // Call again to verify that the remaining unhealthy machines are deleted, // at this point all unhealthy machines should be deleted given the max in flight @@ -1996,10 +2274,12 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { g.Expect(apierrors.IsNotFound(err)).To(BeTrue(), "expected machine %d to be deleted: %v", i) } - // Verify (again) the healthy machine continues to not have the MachineOwnerRemediated condition. + // Verify (again) the healthy machine is not deleted and does not have the OwnerRemediated condition. g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(healthyMachine), m)).To(Succeed()) + g.Expect(m.DeletionTimestamp.IsZero()).To(BeTrue()) g.Expect(conditions.Has(m, condition)). To(BeFalse(), "Machine should not have the %s condition set", condition) + g.Expect(v1beta2conditions.Has(m, clusterv1.MachineOwnerRemediatedV1Beta2Condition)).To(BeFalse()) }) } @@ -2312,3 +2592,183 @@ func TestReconciler_reconcileDelete(t *testing.T) { }) } } + +func TestNewMachineUpToDateCondition(t *testing.T) { + reconciliationTime := time.Now() + tests := []struct { + name string + machineDeployment *clusterv1.MachineDeployment + machineSet *clusterv1.MachineSet + expectCondition *metav1.Condition + }{ + { + name: "no condition returned for stand-alone MachineSet", + machineDeployment: nil, + machineSet: &clusterv1.MachineSet{}, + expectCondition: nil, + }, + { + name: "up-to-date", + machineDeployment: &clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), + }, + }, + }, + }, + machineSet: &clusterv1.MachineSet{ + Spec: clusterv1.MachineSetSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), + }, + }, + }, + }, + expectCondition: &metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + }, + }, + { + name: "not up-to-date", + machineDeployment: &clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), + }, + }, + }, + }, + machineSet: &clusterv1.MachineSet{ + Spec: clusterv1.MachineSetSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.30.0"), + }, + }, + }, + }, + expectCondition: &metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotUpToDateV1Beta2Reason, + Message: "Version v1.30.0, v1.31.0 required", + }, + }, + { + name: "up-to-date, spec.rolloutAfter not expired", + machineDeployment: &clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{ + RolloutAfter: &metav1.Time{Time: reconciliationTime.Add(1 * time.Hour)}, // rollout after not yet expired + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), + }, + }, + }, + }, + machineSet: &clusterv1.MachineSet{ + ObjectMeta: metav1.ObjectMeta{ + CreationTimestamp: metav1.Time{Time: reconciliationTime.Add(-1 * time.Hour)}, // MS created before rollout after + }, + Spec: clusterv1.MachineSetSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), + }, + }, + }, + }, + expectCondition: &metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + }, + }, + { + name: "not up-to-date, rollout After expired", + machineDeployment: &clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{ + RolloutAfter: &metav1.Time{Time: reconciliationTime.Add(-1 * time.Hour)}, // rollout after expired + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), + }, + }, + }, + }, + machineSet: &clusterv1.MachineSet{ + ObjectMeta: metav1.ObjectMeta{ + CreationTimestamp: metav1.Time{Time: reconciliationTime.Add(-2 * time.Hour)}, // MS created before rollout after + }, + Spec: clusterv1.MachineSetSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), + }, + }, + }, + }, + expectCondition: &metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotUpToDateV1Beta2Reason, + Message: "MachineDeployment spec.rolloutAfter expired", + }, + }, + { + name: "not up-to-date, rollout After expired and a new MS created", + machineDeployment: &clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{ + RolloutAfter: &metav1.Time{Time: reconciliationTime.Add(-2 * time.Hour)}, // rollout after expired + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), + }, + }, + }, + }, + machineSet: &clusterv1.MachineSet{ + ObjectMeta: metav1.ObjectMeta{ + CreationTimestamp: metav1.Time{Time: reconciliationTime.Add(-1 * time.Hour)}, // MS created after rollout after + }, + Spec: clusterv1.MachineSetSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Version: ptr.To("v1.31.0"), + }, + }, + }, + }, + expectCondition: &metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineUpToDateV1Beta2Reason, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + s := &scope{ + owningMachineDeployment: tt.machineDeployment, + machineSet: tt.machineSet, + reconciliationTime: reconciliationTime, + } + + condition := newMachineUpToDateCondition(s) + if tt.expectCondition != nil { + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(*tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + } else { + g.Expect(condition).To(BeNil()) + } + }) + } +} diff --git a/internal/controllers/machineset/machineset_preflight.go b/internal/controllers/machineset/machineset_preflight.go index ae959329e2f8..1f446eb96c81 100644 --- a/internal/controllers/machineset/machineset_preflight.go +++ b/internal/controllers/machineset/machineset_preflight.go @@ -136,9 +136,8 @@ func (r *Reconciler) runPreflightChecks(ctx context.Context, cluster *clusterv1. for _, v := range preflightCheckErrs { preflightCheckErrStrings = append(preflightCheckErrStrings, *v) } - msg := fmt.Sprintf("Performing %q on hold because %s. The operation will continue after the preflight check(s) pass", action, strings.Join(preflightCheckErrStrings, "; ")) - log.Info(msg) - return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, msg, nil + log.Info(fmt.Sprintf("%s on hold because %s. The operation will continue after the preflight check(s) pass", action, strings.Join(preflightCheckErrStrings, "; "))) + return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, strings.Join(preflightCheckErrStrings, "; "), nil } return ctrl.Result{}, "", nil } @@ -149,19 +148,19 @@ func (r *Reconciler) controlPlaneStablePreflightCheck(controlPlane *unstructured // Check that the control plane is not provisioning. isProvisioning, err := contract.ControlPlane().IsProvisioning(controlPlane) if err != nil { - return nil, errors.Wrapf(err, "failed to perform %q preflight check: failed to check if ControlPlane %s is provisioning", clusterv1.MachineSetPreflightCheckControlPlaneIsStable, cpKlogRef) + return nil, errors.Wrapf(err, "failed to perform %q preflight check: failed to check if %s %s is provisioning", clusterv1.MachineSetPreflightCheckControlPlaneIsStable, controlPlane.GetKind(), cpKlogRef) } if isProvisioning { - return ptr.To(fmt.Sprintf("ControlPlane %s is provisioning (%q preflight failed)", cpKlogRef, clusterv1.MachineSetPreflightCheckControlPlaneIsStable)), nil + return ptr.To(fmt.Sprintf("%s %s is provisioning (%q preflight check failed)", controlPlane.GetKind(), cpKlogRef, clusterv1.MachineSetPreflightCheckControlPlaneIsStable)), nil } // Check that the control plane is not upgrading. isUpgrading, err := contract.ControlPlane().IsUpgrading(controlPlane) if err != nil { - return nil, errors.Wrapf(err, "failed to perform %q preflight check: failed to check if the ControlPlane %s is upgrading", clusterv1.MachineSetPreflightCheckControlPlaneIsStable, cpKlogRef) + return nil, errors.Wrapf(err, "failed to perform %q preflight check: failed to check if the %s %s is upgrading", clusterv1.MachineSetPreflightCheckControlPlaneIsStable, controlPlane.GetKind(), cpKlogRef) } if isUpgrading { - return ptr.To(fmt.Sprintf("ControlPlane %s is upgrading (%q preflight failed)", cpKlogRef, clusterv1.MachineSetPreflightCheckControlPlaneIsStable)), nil + return ptr.To(fmt.Sprintf("%s %s is upgrading (%q preflight check failed)", controlPlane.GetKind(), cpKlogRef, clusterv1.MachineSetPreflightCheckControlPlaneIsStable)), nil } return nil, nil @@ -173,7 +172,7 @@ func (r *Reconciler) kubernetesVersionPreflightCheck(cpSemver, msSemver semver.V // => MS minor version cannot be outside of the supported skew. // Kubernetes skew policy: https://kubernetes.io/releases/version-skew-policy/#kubelet if msSemver.Minor > cpSemver.Minor { - return ptr.To(fmt.Sprintf("MachineSet version (%s) and ControlPlane version (%s) do not conform to the kubernetes version skew policy as MachineSet version is higher than ControlPlane version (%q preflight failed)", msSemver.String(), cpSemver.String(), clusterv1.MachineSetPreflightCheckKubernetesVersionSkew)) + return ptr.To(fmt.Sprintf("MachineSet version (%s) and ControlPlane version (%s) do not conform to the kubernetes version skew policy as MachineSet version is higher than ControlPlane version (%q preflight check failed)", msSemver.String(), cpSemver.String(), clusterv1.MachineSetPreflightCheckKubernetesVersionSkew)) } minorSkew := uint64(3) // For Control Planes running Kubernetes < v1.28, the version skew policy for kubelets is two. @@ -181,7 +180,7 @@ func (r *Reconciler) kubernetesVersionPreflightCheck(cpSemver, msSemver semver.V minorSkew = 2 } if msSemver.Minor < cpSemver.Minor-minorSkew { - return ptr.To(fmt.Sprintf("MachineSet version (%s) and ControlPlane version (%s) do not conform to the kubernetes version skew policy as MachineSet version is more than %d minor versions older than the ControlPlane version (%q preflight failed)", msSemver.String(), cpSemver.String(), minorSkew, clusterv1.MachineSetPreflightCheckKubernetesVersionSkew)) + return ptr.To(fmt.Sprintf("MachineSet version (%s) and ControlPlane version (%s) do not conform to the kubernetes version skew policy as MachineSet version is more than %d minor versions older than the ControlPlane version (%q preflight check failed)", msSemver.String(), cpSemver.String(), minorSkew, clusterv1.MachineSetPreflightCheckKubernetesVersionSkew)) } return nil @@ -205,7 +204,7 @@ func (r *Reconciler) kubeadmVersionPreflightCheck(cpSemver, msSemver semver.Vers groupVersion.Group == bootstrapv1.GroupVersion.Group if kubeadmBootstrapProviderUsed { if cpSemver.Minor != msSemver.Minor { - return ptr.To(fmt.Sprintf("MachineSet version (%s) and ControlPlane version (%s) do not conform to kubeadm version skew policy as kubeadm only supports joining with the same major+minor version as the control plane (%q preflight failed)", msSemver.String(), cpSemver.String(), clusterv1.MachineSetPreflightCheckKubeadmVersionSkew)), nil + return ptr.To(fmt.Sprintf("MachineSet version (%s) and ControlPlane version (%s) do not conform to kubeadm version skew policy as kubeadm only supports joining with the same major+minor version as the control plane (%q preflight check failed)", msSemver.String(), cpSemver.String(), clusterv1.MachineSetPreflightCheckKubeadmVersionSkew)), nil } } return nil, nil diff --git a/internal/controllers/machineset/machineset_preflight_test.go b/internal/controllers/machineset/machineset_preflight_test.go index 811ddbe49aaa..58aca4af4fb9 100644 --- a/internal/controllers/machineset/machineset_preflight_test.go +++ b/internal/controllers/machineset/machineset_preflight_test.go @@ -69,12 +69,13 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { t.Run("should run preflight checks if the feature gate is enabled", func(t *testing.T) { tests := []struct { - name string - cluster *clusterv1.Cluster - controlPlane *unstructured.Unstructured - machineSet *clusterv1.MachineSet - wantPass bool - wantErr bool + name string + cluster *clusterv1.Cluster + controlPlane *unstructured.Unstructured + machineSet *clusterv1.MachineSet + wantPass bool + wantPreflightCheckErrMessage string + wantErr bool }{ { name: "should pass if cluster has no control plane", @@ -141,9 +142,10 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { ControlPlaneRef: contract.ObjToRef(controlPlaneProvisioning), }, }, - controlPlane: controlPlaneProvisioning, - machineSet: &clusterv1.MachineSet{}, - wantPass: false, + controlPlane: controlPlaneProvisioning, + machineSet: &clusterv1.MachineSet{}, + wantPass: false, + wantPreflightCheckErrMessage: "GenericControlPlane ns1/cp1 is provisioning (\"ControlPlaneIsStable\" preflight check failed)", }, { name: "control plane preflight check: should fail if the control plane is upgrading", @@ -155,9 +157,10 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { ControlPlaneRef: contract.ObjToRef(controlPlaneUpgrading), }, }, - controlPlane: controlPlaneUpgrading, - machineSet: &clusterv1.MachineSet{}, - wantPass: false, + controlPlane: controlPlaneUpgrading, + machineSet: &clusterv1.MachineSet{}, + wantPass: false, + wantPreflightCheckErrMessage: "GenericControlPlane ns1/cp1 is upgrading (\"ControlPlaneIsStable\" preflight check failed)", }, { name: "control plane preflight check: should pass if the control plane is upgrading but the preflight check is skipped", @@ -269,7 +272,8 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { }, }, }, - wantPass: false, + wantPass: false, + wantPreflightCheckErrMessage: "MachineSet version (1.27.0) and ControlPlane version (1.26.2) do not conform to the kubernetes version skew policy as MachineSet version is higher than ControlPlane version (\"KubernetesVersionSkew\" preflight check failed)", }, { name: "kubernetes version preflight check: should fail if the machine set minor version is 4 older than control plane minor version for >= v1.28", @@ -294,7 +298,8 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { }, }, }, - wantPass: false, + wantPass: false, + wantPreflightCheckErrMessage: "MachineSet version (1.24.0) and ControlPlane version (1.28.0) do not conform to the kubernetes version skew policy as MachineSet version is more than 3 minor versions older than the ControlPlane version (\"KubernetesVersionSkew\" preflight check failed)", }, { name: "kubernetes version preflight check: should fail if the machine set minor version is 3 older than control plane minor version for < v1.28", @@ -319,7 +324,8 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { }, }, }, - wantPass: false, + wantPass: false, + wantPreflightCheckErrMessage: "MachineSet version (1.23.0) and ControlPlane version (1.26.2) do not conform to the kubernetes version skew policy as MachineSet version is more than 2 minor versions older than the ControlPlane version (\"KubernetesVersionSkew\" preflight check failed)", }, { name: "kubernetes version preflight check: should pass if the machine set minor version is greater than control plane minor version but the preflight check is skipped", @@ -426,7 +432,8 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { }, }, }, - wantPass: false, + wantPass: false, + wantPreflightCheckErrMessage: "MachineSet version (1.25.5) and ControlPlane version (1.26.2) do not conform to kubeadm version skew policy as kubeadm only supports joining with the same major+minor version as the control plane (\"KubeadmVersionSkew\" preflight check failed)", }, { name: "kubeadm version preflight check: should pass if the machine set is not using kubeadm bootstrap provider", @@ -556,13 +563,14 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { r := &Reconciler{ Client: fakeClient, } - result, _, err := r.runPreflightChecks(ctx, tt.cluster, tt.machineSet, "") + result, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, tt.cluster, tt.machineSet, "") if tt.wantErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).ToNot(HaveOccurred()) g.Expect(result.IsZero()).To(Equal(tt.wantPass)) } + g.Expect(preflightCheckErrMessage).To(BeComparableTo(tt.wantPreflightCheckErrMessage)) }) } }) diff --git a/internal/controllers/topology/cluster/cluster_controller.go b/internal/controllers/topology/cluster/cluster_controller.go index 11cd135b7460..ef5ecc179a87 100644 --- a/internal/controllers/topology/cluster/cluster_controller.go +++ b/internal/controllers/topology/cluster/cluster_controller.go @@ -21,17 +21,20 @@ import ( "fmt" "time" + "github.com/go-logr/logr" "github.com/pkg/errors" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/client-go/tools/record" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/cache/informertest" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -86,6 +89,8 @@ type Reconciler struct { desiredStateGenerator desiredstate.Generator patchHelperFactory structuredmerge.PatchHelperFactoryFunc + + predicateLog logr.Logger } func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { @@ -93,11 +98,11 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt return errors.New("Client, APIReader, ClusterCache and RuntimeClient must not be nil") } - predicateLog := ctrl.LoggerFrom(ctx).WithValues("controller", "topology/cluster") + r.predicateLog = ctrl.LoggerFrom(ctx).WithValues("controller", "topology/cluster") c, err := ctrl.NewControllerManagedBy(mgr). For(&clusterv1.Cluster{}, builder.WithPredicates( // Only reconcile Cluster with topology. - predicates.ClusterHasTopology(mgr.GetScheme(), predicateLog), + predicates.ClusterHasTopology(mgr.GetScheme(), r.predicateLog), )). Named("topology/cluster"). WatchesRawSource(r.ClusterCache.GetClusterSource("topology/cluster", func(_ context.Context, o client.Object) []ctrl.Request { @@ -111,16 +116,16 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt &clusterv1.MachineDeployment{}, handler.EnqueueRequestsFromMapFunc(r.machineDeploymentToCluster), // Only trigger Cluster reconciliation if the MachineDeployment is topology owned. - builder.WithPredicates(predicates.ResourceIsTopologyOwned(mgr.GetScheme(), predicateLog)), + builder.WithPredicates(predicates.ResourceIsTopologyOwned(mgr.GetScheme(), r.predicateLog)), ). Watches( &expv1.MachinePool{}, handler.EnqueueRequestsFromMapFunc(r.machinePoolToCluster), // Only trigger Cluster reconciliation if the MachinePool is topology owned. - builder.WithPredicates(predicates.ResourceIsTopologyOwned(mgr.GetScheme(), predicateLog)), + builder.WithPredicates(predicates.ResourceIsTopologyOwned(mgr.GetScheme(), r.predicateLog)), ). WithOptions(options). - WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(mgr.GetScheme(), predicateLog, r.WatchFilterValue)). + WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), r.predicateLog, r.WatchFilterValue)). Build(r) if err != nil { @@ -128,9 +133,10 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt } r.externalTracker = external.ObjectTracker{ - Controller: c, - Cache: mgr.GetCache(), - Scheme: mgr.GetScheme(), + Controller: c, + Cache: mgr.GetCache(), + Scheme: mgr.GetScheme(), + PredicateLogger: &r.predicateLog, } r.desiredStateGenerator = desiredstate.NewGenerator(r.Client, r.ClusterCache, r.RuntimeClient) r.recorder = mgr.GetEventRecorderFor("topology/cluster-controller") @@ -145,9 +151,10 @@ func (r *Reconciler) SetupForDryRun(recorder record.EventRecorder) { r.desiredStateGenerator = desiredstate.NewGenerator(r.Client, r.ClusterCache, r.RuntimeClient) r.recorder = recorder r.externalTracker = external.ObjectTracker{ - Controller: externalfake.Controller{}, - Cache: &informertest.FakeInformers{}, - Scheme: r.Client.Scheme(), + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: r.Client.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), } r.patchHelperFactory = dryRunPatchHelperFactory(r.Client) } @@ -175,13 +182,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re return ctrl.Result{}, nil } - // Return early if the Cluster is paused. - // TODO: What should we do if the cluster class is paused? - if annotations.IsPaused(cluster, cluster) { - log.Info("Reconciliation is paused for this object") - return ctrl.Result{}, nil - } - patchHelper, err := patch.NewHelper(cluster, r.Client) if err != nil { return ctrl.Result{}, err @@ -200,7 +200,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ clusterv1.TopologyReconciledCondition, }}, - patch.WithForceOverwriteConditions{}, + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + clusterv1.ClusterTopologyReconciledV1Beta2Condition, + }}, } if err := patchHelper.Patch(ctx, cluster, options...); err != nil { reterr = kerrors.NewAggregate([]error{reterr, err}) @@ -208,6 +210,11 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } }() + // Return early if the Cluster is paused. + if cluster.Spec.Paused || annotations.HasPaused(cluster) { + return ctrl.Result{}, nil + } + // In case the object is deleted, the managed topology stops to reconcile; // (the other controllers will take care of deletion). if !cluster.ObjectMeta.DeletionTimestamp.IsZero() { @@ -313,7 +320,7 @@ func (r *Reconciler) setupDynamicWatches(ctx context.Context, s *scope.Scope) er if err := r.externalTracker.Watch(ctrl.LoggerFrom(ctx), s.Current.InfrastructureCluster, handler.EnqueueRequestForOwner(scheme, r.Client.RESTMapper(), &clusterv1.Cluster{}), // Only trigger Cluster reconciliation if the InfrastructureCluster is topology owned. - predicates.ResourceIsTopologyOwned(scheme, ctrl.LoggerFrom(ctx))); err != nil { + predicates.ResourceIsTopologyOwned(scheme, r.predicateLog)); err != nil { return errors.Wrap(err, "error watching Infrastructure CR") } } @@ -321,7 +328,7 @@ func (r *Reconciler) setupDynamicWatches(ctx context.Context, s *scope.Scope) er if err := r.externalTracker.Watch(ctrl.LoggerFrom(ctx), s.Current.ControlPlane.Object, handler.EnqueueRequestForOwner(scheme, r.Client.RESTMapper(), &clusterv1.Cluster{}), // Only trigger Cluster reconciliation if the ControlPlane is topology owned. - predicates.ResourceIsTopologyOwned(scheme, ctrl.LoggerFrom(ctx))); err != nil { + predicates.ResourceIsTopologyOwned(scheme, r.predicateLog)); err != nil { return errors.Wrap(err, "error watching ControlPlane CR") } } diff --git a/internal/controllers/topology/cluster/conditions.go b/internal/controllers/topology/cluster/conditions.go index f9bafd9f968f..1670b5d9bbdf 100644 --- a/internal/controllers/topology/cluster/conditions.go +++ b/internal/controllers/topology/cluster/conditions.go @@ -21,11 +21,14 @@ import ( "strings" "github.com/pkg/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/exp/topology/scope" "sigs.k8s.io/cluster-api/internal/contract" + "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" + v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" ) func (r *Reconciler) reconcileConditions(s *scope.Scope, cluster *clusterv1.Cluster, reconcileErr error) error { @@ -36,6 +39,7 @@ func (r *Reconciler) reconcileConditions(s *scope.Scope, cluster *clusterv1.Clus // The TopologyReconciled condition is considered true if spec of all the objects associated with the // cluster are in sync with the topology defined in the cluster. // The condition is false under the following conditions: +// - The cluster is paused. // - An error occurred during the reconcile process of the cluster topology. // - The ClusterClass has not been successfully reconciled with its current spec. // - The cluster upgrade has not yet propagated to all the components of the cluster. @@ -43,10 +47,35 @@ func (r *Reconciler) reconcileConditions(s *scope.Scope, cluster *clusterv1.Clus // In such a case, since some of the component's spec would be adrift from the topology the // topology cannot be considered fully reconciled. func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluster *clusterv1.Cluster, reconcileErr error) error { + // Mark TopologyReconciled as false if the Cluster is paused. + if cluster.Spec.Paused || annotations.HasPaused(cluster) { + var messages []string + if cluster.Spec.Paused { + messages = append(messages, "Cluster spec.paused is set to true") + } + if annotations.HasPaused(cluster) { + messages = append(messages, "Cluster has the cluster.x-k8s.io/paused annotation") + } + conditions.Set(cluster, + conditions.FalseCondition( + clusterv1.TopologyReconciledCondition, + clusterv1.TopologyReconciledPausedReason, + clusterv1.ConditionSeverityInfo, + strings.Join(messages, ", "), + ), + ) + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconcilePausedV1Beta2Reason, + Message: strings.Join(messages, ", "), + }) + return nil + } + // Mark TopologyReconciled as false due to cluster deletion. if !cluster.ObjectMeta.DeletionTimestamp.IsZero() { - conditions.Set( - cluster, + conditions.Set(cluster, conditions.FalseCondition( clusterv1.TopologyReconciledCondition, clusterv1.DeletedReason, @@ -54,14 +83,18 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste "", ), ) + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason, + }) return nil } // If an error occurred during reconciliation set the TopologyReconciled condition to false. // Add the error message from the reconcile function to the message of the condition. if reconcileErr != nil { - conditions.Set( - cluster, + conditions.Set(cluster, conditions.FalseCondition( clusterv1.TopologyReconciledCondition, clusterv1.TopologyReconcileFailedReason, @@ -70,6 +103,13 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste reconcileErr.Error(), ), ) + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledFailedV1Beta2Reason, + // TODO: Add a protection for messages continuously changing leading to Cluster object changes/reconcile. + Message: reconcileErr.Error(), + }) return nil } @@ -77,8 +117,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste // is not up to date. if s.Blueprint != nil && s.Blueprint.ClusterClass != nil && s.Blueprint.ClusterClass.GetGeneration() != s.Blueprint.ClusterClass.Status.ObservedGeneration { - conditions.Set( - cluster, + conditions.Set(cluster, conditions.FalseCondition( clusterv1.TopologyReconciledCondition, clusterv1.TopologyReconciledClusterClassNotReconciledReason, @@ -87,14 +126,20 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste ".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused", ), ) + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledClusterClassNotReconciledV1Beta2Reason, + Message: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" + + ".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused", + }) return nil } // If any of the lifecycle hooks are blocking any part of the reconciliation then topology // is not considered as fully reconciled. if s.HookResponseTracker.AggregateRetryAfter() != 0 { - conditions.Set( - cluster, + conditions.Set(cluster, conditions.FalseCondition( clusterv1.TopologyReconciledCondition, clusterv1.TopologyReconciledHookBlockingReason, @@ -103,6 +148,13 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste s.HookResponseTracker.AggregateMessage(), ), ) + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterTopologyReconciledHookBlockingV1Beta2Reason, + // TODO: Add a protection for messages continuously changing leading to Cluster object changes/reconcile. + Message: s.HookResponseTracker.AggregateMessage(), + }) return nil } @@ -121,6 +173,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste s.UpgradeTracker.MachinePools.DeferredUpgrade() { msgBuilder := &strings.Builder{} var reason string + var v1beta2Reason string // TODO(ykakarap): Evaluate potential improvements to building the condition. Multiple causes can trigger the // condition to be false at the same time (Example: ControlPlane.IsPendingUpgrade and MachineDeployments.IsAnyPendingCreate can @@ -130,40 +183,47 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste case s.UpgradeTracker.ControlPlane.IsPendingUpgrade: fmt.Fprintf(msgBuilder, "Control plane rollout and upgrade to version %s on hold.", s.Blueprint.Topology.Version) reason = clusterv1.TopologyReconciledControlPlaneUpgradePendingReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason case s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade(): fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s rollout and upgrade to version %s on hold.", computeNameList(s.UpgradeTracker.MachineDeployments.PendingUpgradeNames()), s.Blueprint.Topology.Version, ) reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason case s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate(): fmt.Fprintf(msgBuilder, "MachineDeployment(s) for Topologies %s creation on hold.", computeNameList(s.UpgradeTracker.MachineDeployments.PendingCreateTopologyNames()), ) reason = clusterv1.TopologyReconciledMachineDeploymentsCreatePendingReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason case s.UpgradeTracker.MachineDeployments.DeferredUpgrade(): fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s rollout and upgrade to version %s deferred.", computeNameList(s.UpgradeTracker.MachineDeployments.DeferredUpgradeNames()), s.Blueprint.Topology.Version, ) reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason case s.UpgradeTracker.MachinePools.IsAnyPendingUpgrade(): fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s on hold.", computeNameList(s.UpgradeTracker.MachinePools.PendingUpgradeNames()), s.Blueprint.Topology.Version, ) reason = clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason case s.UpgradeTracker.MachinePools.IsAnyPendingCreate(): fmt.Fprintf(msgBuilder, "MachinePool(s) for Topologies %s creation on hold.", computeNameList(s.UpgradeTracker.MachinePools.PendingCreateTopologyNames()), ) reason = clusterv1.TopologyReconciledMachinePoolsCreatePendingReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason case s.UpgradeTracker.MachinePools.DeferredUpgrade(): fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s deferred.", computeNameList(s.UpgradeTracker.MachinePools.DeferredUpgradeNames()), s.Blueprint.Topology.Version, ) reason = clusterv1.TopologyReconciledMachinePoolsUpgradeDeferredReason + v1beta2Reason = clusterv1.ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason } switch { @@ -191,8 +251,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste ) } - conditions.Set( - cluster, + conditions.Set(cluster, conditions.FalseCondition( clusterv1.TopologyReconciledCondition, reason, @@ -200,17 +259,26 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste msgBuilder.String(), ), ) + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2Reason, + Message: msgBuilder.String(), + }) return nil } // If there are no errors while reconciling and if the topology is not holding out changes // we can consider that spec of all the objects is reconciled to match the topology. Set the // TopologyReconciled condition to true. - conditions.Set( - cluster, + conditions.Set(cluster, conditions.TrueCondition(clusterv1.TopologyReconciledCondition), ) - + v1beta2conditions.Set(cluster, metav1.Condition{ + Type: clusterv1.ClusterTopologyReconciledV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.ClusterTopologyReconcileSucceededV1Beta2Reason, + }) return nil } diff --git a/internal/controllers/topology/cluster/conditions_test.go b/internal/controllers/topology/cluster/conditions_test.go index 2124fb1e6e8e..f215de684c64 100644 --- a/internal/controllers/topology/cluster/conditions_test.go +++ b/internal/controllers/topology/cluster/conditions_test.go @@ -32,6 +32,7 @@ import ( runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1" "sigs.k8s.io/cluster-api/exp/topology/scope" "sigs.k8s.io/cluster-api/util/conditions" + v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" "sigs.k8s.io/cluster-api/util/test/builder" ) @@ -43,24 +44,30 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { deletionTime := metav1.Unix(0, 0) tests := []struct { - name string - reconcileErr error - s *scope.Scope - cluster *clusterv1.Cluster - machines []*clusterv1.Machine - wantConditionStatus corev1.ConditionStatus - wantConditionReason string - wantConditionMessage string - wantErr bool + name string + reconcileErr error + s *scope.Scope + cluster *clusterv1.Cluster + machines []*clusterv1.Machine + wantConditionStatus corev1.ConditionStatus + wantConditionReason string + wantConditionMessage string + wantV1Beta2ConditionStatus metav1.ConditionStatus + wantV1Beta2ConditionReason string + wantV1Beta2ConditionMessage string + wantErr bool }{ { - name: "should set the condition to false if there is a reconcile error", - reconcileErr: errors.New("reconcile error"), - cluster: &clusterv1.Cluster{}, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconcileFailedReason, - wantConditionMessage: "reconcile error", - wantErr: false, + name: "should set the condition to false if there is a reconcile error", + reconcileErr: errors.New("reconcile error"), + cluster: &clusterv1.Cluster{}, + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconcileFailedReason, + wantConditionMessage: "reconcile error", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledFailedV1Beta2Reason, + wantV1Beta2ConditionMessage: "reconcile error", + wantErr: false, }, { name: "should set the condition to false if the ClusterClass is out of date", @@ -82,6 +89,10 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { wantConditionReason: clusterv1.TopologyReconciledClusterClassNotReconciledReason, wantConditionMessage: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" + ".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.TopologyReconciledClusterClassNotReconciledReason, + wantV1Beta2ConditionMessage: "ClusterClass not reconciled. If this condition persists please check ClusterClass status. A ClusterClass is reconciled if" + + ".status.observedGeneration == .metadata.generation is true. If this is not the case either ClusterClass reconciliation failed or the ClusterClass is paused", wantErr: false, }, { @@ -102,9 +113,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { return hrt }(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledHookBlockingReason, - wantConditionMessage: "hook \"BeforeClusterUpgrade\" is blocking: msg", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledHookBlockingReason, + wantConditionMessage: "hook \"BeforeClusterUpgrade\" is blocking: msg", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledHookBlockingV1Beta2Reason, + wantV1Beta2ConditionMessage: "hook \"BeforeClusterUpgrade\" is blocking: msg", }, { name: "should set the condition to false if new version is not picked up because control plane is provisioning", @@ -132,9 +146,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, - wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is completing initial provisioning", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, + wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is completing initial provisioning", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is completing initial provisioning", }, { name: "should set the condition to false if new version is not picked up because control plane is upgrading", @@ -163,9 +180,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, - wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.21.2", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, + wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.21.2", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.21.2", }, { name: "should set the condition to false if new version is not picked up because control plane is scaling", @@ -194,9 +214,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, - wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, + wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas", }, { name: "should set the condition to false if new version is not picked up because at least one of the machine deployment is upgrading", @@ -239,9 +262,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, - wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, + wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading", }, { name: "should set the condition to false if new version is not picked up because at least one of the machine pool is upgrading", @@ -283,9 +309,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, - wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. MachinePool(s) mp0-abc123 are upgrading", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, + wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. MachinePool(s) mp0-abc123 are upgrading", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledControlPlaneUpgradePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. MachinePool(s) mp0-abc123 are upgrading", }, { name: "should set the condition to false if control plane picked the new version but machine deployments did not because control plane is upgrading", @@ -329,9 +358,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason, - wantConditionMessage: "MachineDeployment(s) md0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.22.0", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason, + wantConditionMessage: "MachineDeployment(s) md0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.22.0", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "MachineDeployment(s) md0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.22.0", }, { name: "should set the condition to false if control plane picked the new version but machine pools did not because control plane is upgrading", @@ -374,9 +406,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason, - wantConditionMessage: "MachinePool(s) mp0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.22.0", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason, + wantConditionMessage: "MachinePool(s) mp0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.22.0", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "MachinePool(s) mp0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.22.0", }, { name: "should set the condition to false if control plane picked the new version but machine deployments did not because control plane is scaling", @@ -420,9 +455,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason, - wantConditionMessage: "MachineDeployment(s) md0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason, + wantConditionMessage: "MachineDeployment(s) md0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "MachineDeployment(s) md0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas", }, { name: "should set the condition to false if control plane picked the new version but machine pools did not because control plane is scaling", @@ -465,9 +503,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason, - wantConditionMessage: "MachinePool(s) mp0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason, + wantConditionMessage: "MachinePool(s) mp0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "MachinePool(s) mp0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas", }, { name: "should set the condition to false if control plane picked the new version but there are machine deployments pending create because control plane is scaling", @@ -497,9 +538,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsCreatePendingReason, - wantConditionMessage: "MachineDeployment(s) for Topologies md0 creation on hold. Control plane is reconciling desired replicas", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsCreatePendingReason, + wantConditionMessage: "MachineDeployment(s) for Topologies md0 creation on hold. Control plane is reconciling desired replicas", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledMachineDeploymentsCreatePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "MachineDeployment(s) for Topologies md0 creation on hold. Control plane is reconciling desired replicas", }, { name: "should set the condition to false if control plane picked the new version but there are machine pools pending create because control plane is scaling", @@ -529,9 +573,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledMachinePoolsCreatePendingReason, - wantConditionMessage: "MachinePool(s) for Topologies mp0 creation on hold. Control plane is reconciling desired replicas", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledMachinePoolsCreatePendingReason, + wantConditionMessage: "MachinePool(s) for Topologies mp0 creation on hold. Control plane is reconciling desired replicas", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledMachinePoolsCreatePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "MachinePool(s) for Topologies mp0 creation on hold. Control plane is reconciling desired replicas", }, { name: "should set the condition to true if control plane picked the new version and is upgrading but there are no machine deployments or machine pools", @@ -560,7 +607,10 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionTrue, + wantConditionStatus: corev1.ConditionTrue, + wantV1Beta2ConditionStatus: metav1.ConditionTrue, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconcileSucceededV1Beta2Reason, + wantV1Beta2ConditionMessage: "", }, { name: "should set the condition to true if control plane picked the new version and is scaling but there are no machine deployments or machine pools", @@ -589,7 +639,10 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionTrue, + wantConditionStatus: corev1.ConditionTrue, + wantV1Beta2ConditionStatus: metav1.ConditionTrue, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconcileSucceededV1Beta2Reason, + wantV1Beta2ConditionMessage: "", }, { name: "should set the condition to false is some machine deployments have not picked the new version because other machine deployments are upgrading", @@ -668,9 +721,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { WithVersion("v1.21.2"). Build(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason, - wantConditionMessage: "MachineDeployment(s) md1-abc123 rollout and upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason, + wantConditionMessage: "MachineDeployment(s) md1-abc123 rollout and upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledMachineDeploymentsUpgradePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "MachineDeployment(s) md1-abc123 rollout and upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading", }, { name: "should set the condition to false is some machine pools have not picked the new version because other machine pools are upgrading", @@ -737,9 +793,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { WithVersion("v1.21.2"). Build(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason, - wantConditionMessage: "MachinePool(s) mp1-abc123 rollout and upgrade to version v1.22.0 on hold. MachinePool(s) mp0-abc123 are upgrading", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason, + wantConditionMessage: "MachinePool(s) mp1-abc123 rollout and upgrade to version v1.22.0 on hold. MachinePool(s) mp0-abc123 are upgrading", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledMachinePoolsUpgradePendingV1Beta2Reason, + wantV1Beta2ConditionMessage: "MachinePool(s) mp1-abc123 rollout and upgrade to version v1.22.0 on hold. MachinePool(s) mp0-abc123 are upgrading", }, { name: "should set the condition to false if some machine deployments have not picked the new version because their upgrade has been deferred", @@ -796,9 +855,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason, - wantConditionMessage: "MachineDeployment(s) md1-abc123 rollout and upgrade to version v1.22.0 deferred.", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason, + wantConditionMessage: "MachineDeployment(s) md1-abc123 rollout and upgrade to version v1.22.0 deferred.", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledMachineDeploymentsUpgradeDeferredV1Beta2Reason, + wantV1Beta2ConditionMessage: "MachineDeployment(s) md1-abc123 rollout and upgrade to version v1.22.0 deferred.", }, { name: "should set the condition to false if some machine pools have not picked the new version because their upgrade has been deferred", @@ -853,9 +915,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.TopologyReconciledMachinePoolsUpgradeDeferredReason, - wantConditionMessage: "MachinePool(s) mp1-abc123 rollout and upgrade to version v1.22.0 deferred.", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledMachinePoolsUpgradeDeferredReason, + wantConditionMessage: "MachinePool(s) mp1-abc123 rollout and upgrade to version v1.22.0 deferred.", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledMachinePoolsUpgradeDeferredV1Beta2Reason, + wantV1Beta2ConditionMessage: "MachinePool(s) mp1-abc123 rollout and upgrade to version v1.22.0 deferred.", }, { name: "should set the condition to true if there are no reconcile errors and control plane and all machine deployments and machine pools picked up the new version", @@ -937,7 +1002,10 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }(), HookResponseTracker: scope.NewHookResponseTracker(), }, - wantConditionStatus: corev1.ConditionTrue, + wantConditionStatus: corev1.ConditionTrue, + wantV1Beta2ConditionStatus: metav1.ConditionTrue, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconcileSucceededV1Beta2Reason, + wantV1Beta2ConditionMessage: "", }, { name: "should set the TopologyReconciledCondition to False if the cluster has been deleted", @@ -946,9 +1014,12 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { DeletionTimestamp: &deletionTime, }, }, - wantConditionStatus: corev1.ConditionFalse, - wantConditionReason: clusterv1.DeletedReason, - wantConditionMessage: "", + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.DeletedReason, + wantConditionMessage: "", + wantV1Beta2ConditionStatus: metav1.ConditionFalse, + wantV1Beta2ConditionReason: clusterv1.ClusterTopologyReconciledDeletionTimestampSetV1Beta2Reason, + wantV1Beta2ConditionMessage: "", }, } @@ -978,9 +1049,16 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { g.Expect(err).ToNot(HaveOccurred()) actualCondition := conditions.Get(tt.cluster, clusterv1.TopologyReconciledCondition) + g.Expect(actualCondition).ToNot(BeNil()) g.Expect(actualCondition.Status).To(Equal(tt.wantConditionStatus)) g.Expect(actualCondition.Reason).To(Equal(tt.wantConditionReason)) g.Expect(actualCondition.Message).To(Equal(tt.wantConditionMessage)) + + actualV1Beta2Condition := v1beta2conditions.Get(tt.cluster, clusterv1.ClusterTopologyReconciledV1Beta2Condition) + g.Expect(actualV1Beta2Condition).ToNot(BeNil()) + g.Expect(actualV1Beta2Condition.Status).To(Equal(tt.wantV1Beta2ConditionStatus)) + g.Expect(actualV1Beta2Condition.Reason).To(Equal(tt.wantV1Beta2ConditionReason)) + g.Expect(actualV1Beta2Condition.Message).To(Equal(tt.wantV1Beta2ConditionMessage)) } }) } diff --git a/test/e2e/node_drain.go b/test/e2e/node_drain.go index 8c2f3e757225..75729da31f06 100644 --- a/test/e2e/node_drain.go +++ b/test/e2e/node_drain.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "path/filepath" + "regexp" "time" . "github.com/onsi/ginkgo/v2" @@ -313,18 +314,18 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo DeploymentNamePrefix: "drain-order-1", CPConditionMessageSubstrings: []string{ // The evictable Pod with order 1 was evicted. It still blocks the drain because of the finalizer, otherwise the Pod would be gone already. - fmt.Sprintf("Pods with deletionTimestamp that still exist: evictable-workload/%s", cpDeploymentName("drain-order-1")), + fmt.Sprintf(`(?m)\* Pod evictable-workload\/%s[^:]+: deletionTimestamp set, but still not removed from the Node`, cpDeploymentName("drain-order-1")), // After the Pod with order 1 is gone, the drain continues with the Pod with order 5. - fmt.Sprintf("After above Pods have been removed from the Node, the following Pods will be evicted: evictable-workload/%s", cpDeploymentName("drain-order-5")), + fmt.Sprintf(`(?m)After above Pods have been removed from the Node, the following Pods will be evicted: evictable-workload\/%s`, cpDeploymentName("drain-order-5")), }, MDConditionMessageSubstrings: func() map[string][]string { messageSubStrings := map[string][]string{} for _, md := range machineDeployments { messageSubStrings[md.Name] = []string{ // The evictable Pod with order 1 was evicted. It still blocks the drain because of the finalizer, otherwise the Pod would be gone already. - fmt.Sprintf("Pods with deletionTimestamp that still exist: evictable-workload/%s", mdDeploymentName("drain-order-1", md.Name)), + fmt.Sprintf(`(?m)\* Pod evictable-workload\/%s[^:]+: deletionTimestamp set, but still not removed from the Node`, mdDeploymentName("drain-order-1", md.Name)), // After the Pod with order 1 is gone, the drain continues with the Pod with order 5. - fmt.Sprintf("After above Pods have been removed from the Node, the following Pods will be evicted: evictable-workload/%s", mdDeploymentName("drain-order-5", md.Name)), + fmt.Sprintf(`(?m)After above Pods have been removed from the Node, the following Pods will be evicted: evictable-workload\/%s`, mdDeploymentName("drain-order-5", md.Name)), } } return messageSubStrings @@ -343,18 +344,18 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo DeploymentNamePrefix: "drain-order-5", CPConditionMessageSubstrings: []string{ // The evictable Pod with order 5 was evicted. It still blocks the drain because of the finalizer, otherwise the Pod would be gone already. - fmt.Sprintf("Pods with deletionTimestamp that still exist: evictable-workload/%s", cpDeploymentName("drain-order-5")), + fmt.Sprintf(`(?m)\* Pod evictable-workload\/%s[^:]+: deletionTimestamp set, but still not removed from the Node`, cpDeploymentName("drain-order-5")), // After the Pod with order 5 is gone, the drain continues with the unevictable Pod. - fmt.Sprintf("After above Pods have been removed from the Node, the following Pods will be evicted: unevictable-workload/%s", cpDeploymentWithPDBName()), + fmt.Sprintf(`(?m)After above Pods have been removed from the Node, the following Pods will be evicted: unevictable-workload\/%s`, cpDeploymentWithPDBName()), }, MDConditionMessageSubstrings: func() map[string][]string { messageSubStrings := map[string][]string{} for _, md := range machineDeployments { messageSubStrings[md.Name] = []string{ // The evictable Pod with order 5 was evicted. It still blocks the drain because of the finalizer, otherwise the Pod would be gone already. - fmt.Sprintf("Pods with deletionTimestamp that still exist: evictable-workload/%s", mdDeploymentName("drain-order-5", md.Name)), + fmt.Sprintf(`(?m)\* Pod evictable-workload\/%s[^:]+: deletionTimestamp set, but still not removed from the Node`, mdDeploymentName("drain-order-5", md.Name)), // After the Pod with order 5 is gone, the drain continues with the unevictable Pod. - fmt.Sprintf("After above Pods have been removed from the Node, the following Pods will be evicted: unevictable-workload/%s", mdDeploymentWithPDBName(md.Name)), + fmt.Sprintf(`(?m)After above Pods have been removed from the Node, the following Pods will be evicted: unevictable-workload\/%s`, mdDeploymentWithPDBName(md.Name)), } } return messageSubStrings @@ -391,9 +392,9 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo g.Expect(condition).ToNot(BeNil()) g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) // The evictable Pod should be gone now. - g.Expect(condition.Message).ToNot(ContainSubstring("Pods with deletionTimestamp that still exist")) + g.Expect(condition.Message).ToNot(ContainSubstring("deletionTimestamp set, but still not removed from the Node")) // The unevictable Pod should still not be evicted because of the PDB. - g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", cpDeploymentWithPDBName()))) + g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", cpDeploymentWithPDBName()))) }, input.E2EConfig.GetIntervals(specName, "wait-machine-deleted")...).Should(Succeed()) for _, md := range machineDeployments { Eventually(func(g Gomega) { @@ -404,9 +405,9 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo g.Expect(condition).ToNot(BeNil()) g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) // The evictable Pod should be gone now. - g.Expect(condition.Message).ToNot(ContainSubstring("Pods with deletionTimestamp that still exist")) + g.Expect(condition.Message).ToNot(ContainSubstring("deletionTimestamp set, but still not removed from the Node")) // The unevictable Pod should still not be evicted because of the PDB. - g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", mdDeploymentWithPDBName(md.Name)))) + g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", mdDeploymentWithPDBName(md.Name)))) }, input.E2EConfig.GetIntervals(specName, "wait-machine-deleted")...).Should(Succeed()) } @@ -556,7 +557,9 @@ func verifyNodeDrainsBlockedAndUnblock(ctx context.Context, input verifyNodeDrai g.Expect(condition).ToNot(BeNil()) g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) for _, messageSubstring := range input.CPConditionMessageSubstrings { - g.Expect(condition.Message).To(ContainSubstring(messageSubstring)) + var re = regexp.MustCompile(messageSubstring) + match := re.MatchString(condition.Message) + g.Expect(match).To(BeTrue(), fmt.Sprintf("message substring '%s' does not match %s", condition.Message, messageSubstring)) } // Verify evictable Pod was evicted and terminated (i.e. phase is succeeded) @@ -582,7 +585,9 @@ func verifyNodeDrainsBlockedAndUnblock(ctx context.Context, input verifyNodeDrai g.Expect(condition).ToNot(BeNil()) g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) for _, messageSubstring := range input.MDConditionMessageSubstrings[md.Name] { - g.Expect(condition.Message).To(ContainSubstring(messageSubstring)) + var re = regexp.MustCompile(messageSubstring) + match := re.MatchString(condition.Message) + g.Expect(match).To(BeTrue(), fmt.Sprintf("message substring '%s' does not match %s", condition.Message, messageSubstring)) } // Verify evictable Pod was evicted and terminated (i.e. phase is succeeded) diff --git a/test/go.mod b/test/go.mod index 27a57f48ba00..5d9637191028 100644 --- a/test/go.mod +++ b/test/go.mod @@ -22,7 +22,7 @@ require ( github.com/vincent-petithory/dataurl v1.0.0 go.etcd.io/etcd/api/v3 v3.5.16 go.etcd.io/etcd/client/v3 v3.5.16 - golang.org/x/net v0.30.0 + golang.org/x/net v0.31.0 google.golang.org/grpc v1.65.1 k8s.io/api v0.31.2 k8s.io/apiextensions-apiserver v0.31.2 @@ -138,13 +138,13 @@ require ( go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/crypto v0.28.0 // indirect + golang.org/x/crypto v0.29.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect - golang.org/x/oauth2 v0.23.0 // indirect - golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.26.0 // indirect - golang.org/x/term v0.25.0 // indirect - golang.org/x/text v0.19.0 // indirect + golang.org/x/oauth2 v0.24.0 // indirect + golang.org/x/sync v0.9.0 // indirect + golang.org/x/sys v0.27.0 // indirect + golang.org/x/term v0.26.0 // indirect + golang.org/x/text v0.20.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.26.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect diff --git a/test/go.sum b/test/go.sum index a736777468bc..f6f8aea9a306 100644 --- a/test/go.sum +++ b/test/go.sum @@ -345,8 +345,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= +golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -358,15 +358,15 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= -golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= -golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= +golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= +golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= +golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -377,16 +377,16 @@ golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= -golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= +golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= +golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/test/infrastructure/docker/api/v1alpha4/zz_generated.conversion.go b/test/infrastructure/docker/api/v1alpha4/zz_generated.conversion.go index 53c41052e8fd..a3dfb1489874 100644 --- a/test/infrastructure/docker/api/v1alpha4/zz_generated.conversion.go +++ b/test/infrastructure/docker/api/v1alpha4/zz_generated.conversion.go @@ -518,6 +518,7 @@ func autoConvert_v1beta1_DockerLoadBalancer_To_v1alpha4_DockerLoadBalancer(in *v return err } // WARNING: in.CustomHAProxyConfigTemplateRef requires manual conversion: does not exist in peer-type + // WARNING: in.Disable requires manual conversion: does not exist in peer-type return nil } diff --git a/test/infrastructure/docker/api/v1beta1/dockercluster_types.go b/test/infrastructure/docker/api/v1beta1/dockercluster_types.go index c450c83072be..b63e3293ef30 100644 --- a/test/infrastructure/docker/api/v1beta1/dockercluster_types.go +++ b/test/infrastructure/docker/api/v1beta1/dockercluster_types.go @@ -65,6 +65,9 @@ type DockerLoadBalancer struct { // node is added or removed. The template will also support the JoinHostPort function to join the host and port of the backend server. // +optional CustomHAProxyConfigTemplateRef *corev1.LocalObjectReference `json:"customHAProxyConfigTemplateRef,omitempty"` + + // Disable allows skipping the creation of the cluster load balancer. + Disable bool `json:"disable,omitempty"` } // ImageMeta allows customizing the image used for components that are not diff --git a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockerclusters.yaml b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockerclusters.yaml index 84e5bd468041..45c417adc808 100644 --- a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockerclusters.yaml +++ b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockerclusters.yaml @@ -429,6 +429,10 @@ spec: type: string type: object x-kubernetes-map-type: atomic + disable: + description: Disable allows skipping the creation of the cluster + load balancer. + type: boolean imageRepository: description: |- ImageRepository sets the container registry to pull the haproxy image from. diff --git a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockerclustertemplates.yaml b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockerclustertemplates.yaml index fdfd55b1c456..b6b7115bcc41 100644 --- a/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockerclustertemplates.yaml +++ b/test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockerclustertemplates.yaml @@ -250,6 +250,10 @@ spec: type: string type: object x-kubernetes-map-type: atomic + disable: + description: Disable allows skipping the creation of the + cluster load balancer. + type: boolean imageRepository: description: |- ImageRepository sets the container registry to pull the haproxy image from. diff --git a/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go b/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go index 160178d71176..f019c91d767c 100644 --- a/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go +++ b/test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go @@ -192,9 +192,10 @@ func (r *DockerMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr r.recorder = mgr.GetEventRecorderFor(dockerMachinePoolControllerName) r.externalTracker = external.ObjectTracker{ - Controller: c, - Cache: mgr.GetCache(), - Scheme: mgr.GetScheme(), + Controller: c, + Cache: mgr.GetCache(), + Scheme: mgr.GetScheme(), + PredicateLogger: &predicateLog, } r.ssaCache = ssa.NewCache() diff --git a/test/infrastructure/docker/internal/controllers/dockercluster_controller.go b/test/infrastructure/docker/internal/controllers/dockercluster_controller.go index 6eba1c266cb1..68fa6fb863dc 100644 --- a/test/infrastructure/docker/internal/controllers/dockercluster_controller.go +++ b/test/infrastructure/docker/internal/controllers/dockercluster_controller.go @@ -106,11 +106,14 @@ func (r *DockerClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reques } }() - // Create a helper for managing a docker container hosting the loadbalancer. - externalLoadBalancer, err := docker.NewLoadBalancer(ctx, cluster, dockerCluster) - if err != nil { - conditions.MarkFalse(dockerCluster, infrav1.LoadBalancerAvailableCondition, infrav1.LoadBalancerProvisioningFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) - return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalLoadBalancer") + var externalLoadBalancer *docker.LoadBalancer + if !dockerCluster.Spec.LoadBalancer.Disable { + // Create a helper for managing a docker container hosting the loadbalancer. + externalLoadBalancer, err = docker.NewLoadBalancer(ctx, cluster, dockerCluster) + if err != nil { + conditions.MarkFalse(dockerCluster, infrav1.LoadBalancerAvailableCondition, infrav1.LoadBalancerProvisioningFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) + return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalLoadBalancer") + } } // Support FailureDomains @@ -149,6 +152,13 @@ func patchDockerCluster(ctx context.Context, patchHelper *patch.Helper, dockerCl } func (r *DockerClusterReconciler) reconcileNormal(ctx context.Context, dockerCluster *infrav1.DockerCluster, externalLoadBalancer *docker.LoadBalancer) error { + if dockerCluster.Spec.LoadBalancer.Disable { + // Mark the dockerCluster ready + dockerCluster.Status.Ready = true + conditions.MarkTrue(dockerCluster, infrav1.LoadBalancerAvailableCondition) + return nil + } + // Create the docker container hosting the load balancer. if err := externalLoadBalancer.Create(ctx); err != nil { conditions.MarkFalse(dockerCluster, infrav1.LoadBalancerAvailableCondition, infrav1.LoadBalancerProvisioningFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) @@ -189,9 +199,11 @@ func (r *DockerClusterReconciler) reconcileDelete(ctx context.Context, dockerClu return errors.Wrap(err, "failed to patch DockerCluster") } - // Delete the docker container hosting the load balancer - if err := externalLoadBalancer.Delete(ctx); err != nil { - return errors.Wrap(err, "failed to delete load balancer") + if !dockerCluster.Spec.LoadBalancer.Disable { + // Delete the docker container hosting the load balancer + if err := externalLoadBalancer.Delete(ctx); err != nil { + return errors.Wrap(err, "failed to delete load balancer") + } } // Cluster is deleted so remove the finalizer. diff --git a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go index fd855062948f..a68d3085f860 100644 --- a/test/infrastructure/docker/internal/controllers/dockermachine_controller.go +++ b/test/infrastructure/docker/internal/controllers/dockermachine_controller.go @@ -173,13 +173,16 @@ func (r *DockerMachineReconciler) Reconcile(ctx context.Context, req ctrl.Reques return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalMachine") } - // Create a helper for managing a docker container hosting the loadbalancer. - // NB. the machine controller has to manage the cluster load balancer because the current implementation of the - // docker load balancer does not support auto-discovery of control plane nodes, so CAPD should take care of - // updating the cluster load balancer configuration when control plane machines are added/removed - externalLoadBalancer, err := docker.NewLoadBalancer(ctx, cluster, dockerCluster) - if err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalLoadBalancer") + var externalLoadBalancer *docker.LoadBalancer + if dockerCluster.Spec.LoadBalancer.Disable { + // Create a helper for managing a docker container hosting the loadbalancer. + // NB. the machine controller has to manage the cluster load balancer because the current implementation of the + // docker load balancer does not support auto-discovery of control plane nodes, so CAPD should take care of + // updating the cluster load balancer configuration when control plane machines are added/removed + externalLoadBalancer, err = docker.NewLoadBalancer(ctx, cluster, dockerCluster) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalLoadBalancer") + } } // Handle deleted machines @@ -304,17 +307,21 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster * } } - // if the machine is a control plane update the load balancer configuration - // we should only do this once, as reconfiguration more or less ensures - // node ref setting fails - if util.IsControlPlaneMachine(machine) && !dockerMachine.Status.LoadBalancerConfigured { - unsafeLoadBalancerConfigTemplate, err := r.getUnsafeLoadBalancerConfigTemplate(ctx, dockerCluster) - if err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to retrieve HAProxy configuration from CustomHAProxyConfigTemplateRef") - } - if err := externalLoadBalancer.UpdateConfiguration(ctx, unsafeLoadBalancerConfigTemplate); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to update DockerCluster.loadbalancer configuration") + if !dockerCluster.Spec.LoadBalancer.Disable { + // if the machine is a control plane update the load balancer configuration + // we should only do this once, as reconfiguration more or less ensures + // node ref setting fails + if util.IsControlPlaneMachine(machine) && !dockerMachine.Status.LoadBalancerConfigured { + unsafeLoadBalancerConfigTemplate, err := r.getUnsafeLoadBalancerConfigTemplate(ctx, dockerCluster) + if err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to retrieve HAProxy configuration from CustomHAProxyConfigTemplateRef") + } + if err := externalLoadBalancer.UpdateConfiguration(ctx, unsafeLoadBalancerConfigTemplate); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to update DockerCluster.loadbalancer configuration") + } + dockerMachine.Status.LoadBalancerConfigured = true } + } else { dockerMachine.Status.LoadBalancerConfigured = true } @@ -458,14 +465,16 @@ func (r *DockerMachineReconciler) reconcileDelete(ctx context.Context, dockerClu return errors.Wrap(err, "failed to delete DockerMachine") } - // if the deleted machine is a control-plane node, remove it from the load balancer configuration; - if util.IsControlPlaneMachine(machine) { - unsafeLoadBalancerConfigTemplate, err := r.getUnsafeLoadBalancerConfigTemplate(ctx, dockerCluster) - if err != nil { - return errors.Wrap(err, "failed to retrieve HAProxy configuration from CustomHAProxyConfigTemplateRef") - } - if err := externalLoadBalancer.UpdateConfiguration(ctx, unsafeLoadBalancerConfigTemplate); err != nil { - return errors.Wrap(err, "failed to update DockerCluster.loadbalancer configuration") + if !dockerCluster.Spec.LoadBalancer.Disable { + // if the deleted machine is a control-plane node, remove it from the load balancer configuration; + if util.IsControlPlaneMachine(machine) { + unsafeLoadBalancerConfigTemplate, err := r.getUnsafeLoadBalancerConfigTemplate(ctx, dockerCluster) + if err != nil { + return errors.Wrap(err, "failed to retrieve HAProxy configuration from CustomHAProxyConfigTemplateRef") + } + if err := externalLoadBalancer.UpdateConfiguration(ctx, unsafeLoadBalancerConfigTemplate); err != nil { + return errors.Wrap(err, "failed to update DockerCluster.loadbalancer configuration") + } } } diff --git a/test/infrastructure/docker/main.go b/test/infrastructure/docker/main.go index 8ab8366569be..61e85f0b13c7 100644 --- a/test/infrastructure/docker/main.go +++ b/test/infrastructure/docker/main.go @@ -93,6 +93,7 @@ var ( // CAPD specific flags. concurrency int clusterCacheConcurrency int + skipLoadBalancer bool ) func init() { diff --git a/test/infrastructure/docker/templates/clusterclass-quick-start.yaml b/test/infrastructure/docker/templates/clusterclass-quick-start.yaml index e6d31972820b..b5413035dbab 100644 --- a/test/infrastructure/docker/templates/clusterclass-quick-start.yaml +++ b/test/infrastructure/docker/templates/clusterclass-quick-start.yaml @@ -13,6 +13,14 @@ spec: kind: DockerMachineTemplate apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 name: quick-start-control-plane + machineHealthCheck: + unhealthyConditions: + - type: Ready + status: Unknown + timeout: 300s + - type: Ready + status: "False" + timeout: 300s infrastructure: ref: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 @@ -32,6 +40,14 @@ spec: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerMachineTemplate name: quick-start-default-worker-machinetemplate + machineHealthCheck: + unhealthyConditions: + - type: Ready + status: Unknown + timeout: 300s + - type: Ready + status: "False" + timeout: 300s machinePools: - class: default-worker template: diff --git a/test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller.go b/test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller.go index 51cbca048dc3..e737b9d0d1ca 100644 --- a/test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller.go +++ b/test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller.go @@ -332,24 +332,28 @@ func (r *InMemoryMachineReconciler) reconcileNormalNode(ctx context.Context, clu Status: corev1.NodeStatus{ Conditions: []corev1.NodeCondition{ { - Type: corev1.NodeReady, - Status: corev1.ConditionTrue, - Reason: "KubeletReady", + LastTransitionTime: metav1.Now(), + Type: corev1.NodeReady, + Status: corev1.ConditionTrue, + Reason: "KubeletReady", }, { - Type: corev1.NodeMemoryPressure, - Status: corev1.ConditionFalse, - Reason: "KubeletHasSufficientMemory", + LastTransitionTime: metav1.Now(), + Type: corev1.NodeMemoryPressure, + Status: corev1.ConditionFalse, + Reason: "KubeletHasSufficientMemory", }, { - Type: corev1.NodeDiskPressure, - Status: corev1.ConditionFalse, - Reason: "KubeletHasNoDiskPressure", + LastTransitionTime: metav1.Now(), + Type: corev1.NodeDiskPressure, + Status: corev1.ConditionFalse, + Reason: "KubeletHasNoDiskPressure", }, { - Type: corev1.NodePIDPressure, - Status: corev1.ConditionFalse, - Reason: "KubeletHasSufficientPID", + LastTransitionTime: metav1.Now(), + Type: corev1.NodePIDPressure, + Status: corev1.ConditionFalse, + Reason: "KubeletHasSufficientPID", }, }, }, diff --git a/util/conditions/v1beta2/aggregate_test.go b/util/conditions/v1beta2/aggregate_test.go index dc636296ea3a..a5edd3ee86b3 100644 --- a/util/conditions/v1beta2/aggregate_test.go +++ b/util/conditions/v1beta2/aggregate_test.go @@ -47,9 +47,9 @@ func TestAggregate(t *testing.T) { options: []AggregateOption{}, want: &metav1.Condition{ Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionFalse, // False because there is one issue - Reason: "Reason-1", // Picking the reason from the only existing issue - Message: "Message-1 from Phase3Obj obj0", // messages from all the issues & unknown conditions (info dropped) + Status: metav1.ConditionFalse, // False because there is one issue + Reason: "Reason-1", // Picking the reason from the only existing issue + Message: "* Phase3Obj obj0: Message-1", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -63,9 +63,9 @@ func TestAggregate(t *testing.T) { options: []AggregateOption{NegativePolarityConditionTypes{clusterv1.ScalingUpV1Beta2Condition}}, want: &metav1.Condition{ Type: clusterv1.ScalingUpV1Beta2Condition, - Status: metav1.ConditionTrue, // True because there is one issue, and the target condition has negative polarity - Reason: "Reason-1", // Picking the reason from the only existing issue - Message: "Message-1 from Phase3Obj obj0", // messages from all the issues & unknown conditions (info dropped) + Status: metav1.ConditionTrue, // True because there is one issue, and the target condition has negative polarity + Reason: "Reason-1", // Picking the reason from the only existing issue + Message: "* Phase3Obj obj0: Message-1", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -87,9 +87,9 @@ func TestAggregate(t *testing.T) { options: []AggregateOption{NegativePolarityConditionTypes{clusterv1.ScalingUpV1Beta2Condition}, CustomMergeStrategy{newDefaultMergeStrategy(true, sets.New(clusterv1.ScalingUpV1Beta2Condition))}}, want: &metav1.Condition{ Type: clusterv1.ScalingUpV1Beta2Condition, - Status: metav1.ConditionFalse, // False because there is one issue, and the custom merge strategy doesn't set the flag that defines that the target condition has negative polarity - Reason: "Reason-1", // Picking the reason from the only existing issue - Message: "Message-1 from Phase3Obj obj0", // messages from all the issues & unknown conditions (info dropped) + Status: metav1.ConditionFalse, // False because there is one issue, and the custom merge strategy doesn't set the flag that defines that the target condition has negative polarity + Reason: "Reason-1", // Picking the reason from the only existing issue + Message: "* Phase3Obj obj0: Message-1", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -103,9 +103,9 @@ func TestAggregate(t *testing.T) { options: []AggregateOption{TargetConditionType("SomethingAvailable")}, want: &metav1.Condition{ Type: "SomethingAvailable", - Status: metav1.ConditionFalse, // False because there is one issue - Reason: "Reason-1", // Picking the reason from the only existing issue - Message: "Message-1 from Phase3Obj obj0", // messages from all the issues & unknown conditions (info dropped) + Status: metav1.ConditionFalse, // False because there is one issue + Reason: "Reason-1", // Picking the reason from the only existing issue + Message: "* Phase3Obj obj0: Message-1", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -119,9 +119,9 @@ func TestAggregate(t *testing.T) { options: []AggregateOption{TargetConditionType("SomethingAvailable"), NegativePolarityConditionTypes{clusterv1.ScalingUpV1Beta2Condition}}, want: &metav1.Condition{ Type: "SomethingAvailable", - Status: metav1.ConditionTrue, // True because there is one issue, and the target condition has negative polarity - Reason: "Reason-1", // Picking the reason from the only existing issue - Message: "Message-1 from Phase3Obj obj0", // messages from all the issues & unknown conditions (info dropped) + Status: metav1.ConditionTrue, // True because there is one issue, and the target condition has negative polarity + Reason: "Reason-1", // Picking the reason from the only existing issue + Message: "* Phase3Obj obj0: Message-1", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -137,9 +137,9 @@ func TestAggregate(t *testing.T) { options: []AggregateOption{}, want: &metav1.Condition{ Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionFalse, // False because there is one issue - Reason: MultipleIssuesReportedReason, // Using a generic reason - Message: "Message-1 from Phase3Objs obj0, obj1, obj2", // messages from all the issues & unknown conditions (info dropped) + Status: metav1.ConditionFalse, // False because there is one issue + Reason: MultipleIssuesReportedReason, // Using a generic reason + Message: "* Phase3Objs obj0, obj1, obj2: Message-1", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -157,9 +157,9 @@ func TestAggregate(t *testing.T) { options: []AggregateOption{}, want: &metav1.Condition{ Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionFalse, // False because there is one issue - Reason: MultipleIssuesReportedReason, // Using a generic reason - Message: "Message-1 from Phase3Objs obj0, obj1, obj2 and 2 more", // messages from all the issues & unknown conditions (info dropped) + Status: metav1.ConditionFalse, // False because there is one issue + Reason: MultipleIssuesReportedReason, // Using a generic reason + Message: "* Phase3Objs obj0, obj1, obj2, ... (2 more): Message-1", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -177,10 +177,39 @@ func TestAggregate(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []AggregateOption{}, want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionFalse, // False because there is one issue - Reason: MultipleIssuesReportedReason, // Using a generic reason - Message: "Message-1 from Phase3Objs obj0, obj3, obj4; Message-2 from Phase3Objs obj1, obj2; Message-3 from Phase3Obj obj5", // messages from all the issues & unknown conditions (info dropped) + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionFalse, // False because there is one issue + Reason: MultipleIssuesReportedReason, // Using a generic reason + Message: "* Phase3Objs obj0, obj3, obj4: Message-1\n" + + "* Phase3Objs obj1, obj2: Message-2\n" + + "* Phase3Obj obj5: Message-3", // messages from all the issues & unknown conditions (info dropped) + }, + wantErr: false, + }, + { + name: "Up to three different issue messages; if message is a list, it should be indented", + conditions: [][]metav1.Condition{ + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "Reason-1", Message: "* Message-1"}}, // obj0 + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "Reason-2", Message: "* Message-2A\n* Message-2B"}}, // obj1 + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "Reason-2", Message: "* Message-2A\n* Message-2B"}}, // obj2 + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "Reason-1", Message: "* Message-1"}}, // obj3 + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "Reason-1", Message: "* Message-1"}}, // obj4 + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "Reason-3", Message: "* Message-3"}}, // obj5 + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionTrue, Reason: "Reason-99", Message: "Message-99"}}, // obj6 + }, + conditionType: clusterv1.AvailableV1Beta2Condition, + options: []AggregateOption{}, + want: &metav1.Condition{ + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionFalse, // False because there is one issue + Reason: MultipleIssuesReportedReason, // Using a generic reason + Message: "* Phase3Objs obj0, obj3, obj4:\n" + + " * Message-1\n" + + "* Phase3Objs obj1, obj2:\n" + + " * Message-2A\n" + + " * Message-2B\n" + + "* Phase3Obj obj5:\n" + + " * Message-3", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -198,10 +227,13 @@ func TestAggregate(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []AggregateOption{}, want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionFalse, // False because there is one issue - Reason: MultipleIssuesReportedReason, // Using a generic reason - Message: "Message-1 from Phase3Objs obj0, obj4; Message-2 from Phase3Obj obj1; Message-3 from Phase3Obj obj5; 2 Phase3Objs with other issues", // messages from all the issues & unknown conditions (info dropped) + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionFalse, // False because there is one issue + Reason: MultipleIssuesReportedReason, // Using a generic reason + Message: "* Phase3Objs obj0, obj4: Message-1\n" + + "* Phase3Obj obj1: Message-2\n" + + "* Phase3Obj obj2: Message-4\n" + + "And 2 Phase3Objs with other issues", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -216,10 +248,12 @@ func TestAggregate(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []AggregateOption{}, want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionFalse, // False because there is one issue - Reason: MultipleIssuesReportedReason, // Using a generic reason - Message: "Message-1 from Phase3Obj obj0; Message-2 from Phase3Obj obj1; Message-3 from Phase3Obj obj2", // messages from all the issues & unknown conditions (info dropped) + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionFalse, // False because there is one issue + Reason: MultipleIssuesReportedReason, // Using a generic reason + Message: "* Phase3Obj obj0: Message-1\n" + + "* Phase3Obj obj1: Message-2\n" + + "* Phase3Obj obj2: Message-3", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -235,10 +269,38 @@ func TestAggregate(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []AggregateOption{}, want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionFalse, // False because there is one issue - Reason: MultipleIssuesReportedReason, // Using a generic reason - Message: "Message-1 from Phase3Obj obj0; Message-2 from Phase3Obj obj1; Message-4 from Phase3Obj obj3; 1 Phase3Obj with status unknown", // messages from all the issues & unknown conditions (info dropped) + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionFalse, // False because there is one issue + Reason: MultipleIssuesReportedReason, // Using a generic reason + Message: "* Phase3Obj obj0: Message-1\n" + + "* Phase3Obj obj1: Message-2\n" + + "* Phase3Obj obj3: Message-4\n" + + "And 1 Phase3Obj with status unknown", // messages from all the issues & unknown conditions (info dropped) + }, + wantErr: false, + }, + { + name: "More than 3 issue messages and unknown message", + conditions: [][]metav1.Condition{ + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "Reason-1", Message: "Message-1"}}, // obj0 + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "Reason-2", Message: "Message-2"}}, // obj1 + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: "Reason-3", Message: "Message-3"}}, // obj2 + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "Reason-4", Message: "Message-4"}}, // obj3 + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "Reason-5", Message: "Message-5"}}, // obj4 + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: "Reason-6", Message: "Message-6"}}, // obj5 + {{Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionTrue, Reason: "Reason-99", Message: "Message-99"}}, // obj6 + }, + conditionType: clusterv1.AvailableV1Beta2Condition, + options: []AggregateOption{}, + want: &metav1.Condition{ + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionFalse, // False because there is one issue + Reason: MultipleIssuesReportedReason, // Using a generic reason + Message: "* Phase3Obj obj0: Message-1\n" + + "* Phase3Obj obj1: Message-2\n" + + "* Phase3Obj obj3: Message-4\n" + + "And 2 Phase3Objs with other issues\n" + + "And 1 Phase3Obj with status unknown", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -256,10 +318,13 @@ func TestAggregate(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []AggregateOption{}, want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionUnknown, // Unknown because there is at least an unknown and no issue - Reason: MultipleUnknownReportedReason, // Using a generic reason - Message: "Message-1 from Phase3Objs obj0, obj4; Message-2 from Phase3Obj obj1; Message-3 from Phase3Obj obj5; 2 Phase3Objs with status unknown", // messages from all the issues & unknown conditions (info dropped) + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, // Unknown because there is at least an unknown and no issue + Reason: MultipleUnknownReportedReason, // Using a generic reason + Message: "* Phase3Objs obj0, obj4: Message-1\n" + + "* Phase3Obj obj1: Message-2\n" + + "* Phase3Obj obj2: Message-4\n" + + "And 2 Phase3Objs with status unknown", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -276,10 +341,13 @@ func TestAggregate(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []AggregateOption{}, want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionTrue, // True because there are no issue and unknown - Reason: MultipleInfoReportedReason, // Using a generic reason - Message: "Message-1 from Phase3Objs obj0, obj4; Message-2 from Phase3Obj obj1; Message-3 from Phase3Obj obj5; 1 Phase3Obj with additional info", // messages from all the issues & unknown conditions (info dropped) + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionTrue, // True because there are no issue and unknown + Reason: MultipleInfoReportedReason, // Using a generic reason + Message: "* Phase3Objs obj0, obj4: Message-1\n" + + "* Phase3Obj obj1: Message-2\n" + + "* Phase3Obj obj2: Message-4\n" + + "And 1 Phase3Obj with additional info", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -292,10 +360,11 @@ func TestAggregate(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []AggregateOption{}, want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionFalse, // False because there is one issue - Reason: "Reason-1", // Picking the reason from the only existing issue - Message: "Message-1 from Phase3Obj obj0; Condition Available not yet reported from Phase3Obj obj1", // messages from all the issues & unknown conditions (info dropped) + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionFalse, // False because there is one issue + Reason: "Reason-1", // Picking the reason from the only existing issue + Message: "* Phase3Obj obj0: Message-1\n" + + "* Phase3Obj obj1: Condition Available not yet reported", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, @@ -308,10 +377,11 @@ func TestAggregate(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []AggregateOption{TargetConditionType("SomethingAvailable")}, want: &metav1.Condition{ - Type: "SomethingAvailable", - Status: metav1.ConditionFalse, // False because there is one issue - Reason: "Reason-1", // Picking the reason from the only existing issue - Message: "Message-1 from Phase3Obj obj0; Condition Available not yet reported from Phase3Obj obj1", // messages from all the issues & unknown conditions (info dropped) + Type: "SomethingAvailable", + Status: metav1.ConditionFalse, // False because there is one issue + Reason: "Reason-1", // Picking the reason from the only existing issue + Message: "* Phase3Obj obj0: Message-1\n" + + "* Phase3Obj obj1: Condition Available not yet reported", // messages from all the issues & unknown conditions (info dropped) }, wantErr: false, }, diff --git a/util/conditions/v1beta2/merge_strategies.go b/util/conditions/v1beta2/merge_strategies.go index 8804c41029ec..a8e0fb109da0 100644 --- a/util/conditions/v1beta2/merge_strategies.go +++ b/util/conditions/v1beta2/merge_strategies.go @@ -236,16 +236,16 @@ func (d *defaultMergeStrategy) Merge(conditions []ConditionWithOwnerInfo, condit } } - var m string + m := fmt.Sprintf("* %s:", condition.Type) if condition.Message != "" { - m = fmt.Sprintf("%s: %s", condition.Type, condition.Message) + m += indentIfMultiline(condition.Message) } else { - m = fmt.Sprintf("%s: No additional info provided", condition.Type) + m += " No additional info provided" } messages = append(messages, m) } - message = strings.Join(messages, "; ") + message = strings.Join(messages, "\n") } // When performing the aggregate operation, we are merging one single condition from potentially many objects. @@ -289,7 +289,7 @@ func (d *defaultMergeStrategy) Merge(conditions []ConditionWithOwnerInfo, condit messages = append(messages, infoMessages...) } - message = strings.Join(messages, "; ") + message = strings.Join(messages, "\n") } return status, reason, message, nil @@ -359,7 +359,7 @@ func aggregateMessages(conditions []ConditionWithOwnerInfo, n *int, dropEmpty bo messageObjMapForKind := messageObjMap[kind] // compute the order of messages according to the number of objects reporting the same message. - // Note: The message text is used as a secondary criteria to sort messages with the same number of objects. + // Note: The list of object names is used as a secondary criteria to sort messages with the same number of objects. messageIndex := make([]string, 0, len(messageObjMapForKind)) for m := range messageObjMapForKind { messageIndex = append(messageIndex, m) @@ -367,7 +367,7 @@ func aggregateMessages(conditions []ConditionWithOwnerInfo, n *int, dropEmpty bo sort.SliceStable(messageIndex, func(i, j int) bool { return len(messageObjMapForKind[messageIndex[i]]) > len(messageObjMapForKind[messageIndex[j]]) || - (len(messageObjMapForKind[messageIndex[i]]) == len(messageObjMapForKind[messageIndex[j]]) && messageIndex[i] < messageIndex[j]) + (len(messageObjMapForKind[messageIndex[i]]) == len(messageObjMapForKind[messageIndex[j]]) && strings.Join(messageObjMapForKind[messageIndex[i]], ",") < strings.Join(messageObjMapForKind[messageIndex[j]], ",")) }) // Pick the first n messages, decrement n. @@ -382,35 +382,51 @@ func aggregateMessages(conditions []ConditionWithOwnerInfo, n *int, dropEmpty bo continue } - msg := m + msg := "" allObjects := messageObjMapForKind[m] sort.Strings(allObjects) switch { case len(allObjects) == 0: // This should never happen, entry in the map exists only when an object reports a message. case len(allObjects) == 1: - msg += fmt.Sprintf(" from %s %s", kind, strings.Join(allObjects, ", ")) + msg += fmt.Sprintf("* %s %s:", kind, strings.Join(allObjects, ", ")) case len(allObjects) <= 3: - msg += fmt.Sprintf(" from %s %s", kindPlural, strings.Join(allObjects, ", ")) + msg += fmt.Sprintf("* %s %s:", kindPlural, strings.Join(allObjects, ", ")) default: - msg += fmt.Sprintf(" from %s %s and %d more", kindPlural, strings.Join(allObjects[:3], ", "), len(allObjects)-3) + msg += fmt.Sprintf("* %s %s, ... (%d more):", kindPlural, strings.Join(allObjects[:3], ", "), len(allObjects)-3) } + msg += indentIfMultiline(m) messages = append(messages, msg) *n-- } if other == 1 { - messages = append(messages, fmt.Sprintf("%d %s %s", other, kind, otherMessage)) + messages = append(messages, fmt.Sprintf("And %d %s %s", other, kind, otherMessage)) } if other > 1 { - messages = append(messages, fmt.Sprintf("%d %s %s", other, kindPlural, otherMessage)) + messages = append(messages, fmt.Sprintf("And %d %s %s", other, kindPlural, otherMessage)) } } return messages } +func indentIfMultiline(m string) string { + msg := "" + if strings.Contains(m, "\n") || strings.HasPrefix(m, "* ") { + msg += "\n" + lines := strings.Split(m, "\n") + for i, l := range lines { + lines[i] = " " + l + } + msg += strings.Join(lines, "\n") + } else { + msg += " " + m + } + return msg +} + // getConditionsWithOwnerInfo return all the conditions from an object each one with the corresponding ConditionOwnerInfo. func getConditionsWithOwnerInfo(obj Getter) []ConditionWithOwnerInfo { ret := make([]ConditionWithOwnerInfo, 0, 10) diff --git a/util/conditions/v1beta2/merge_strategies_test.go b/util/conditions/v1beta2/merge_strategies_test.go index 37bc94536454..7847973d3879 100644 --- a/util/conditions/v1beta2/merge_strategies_test.go +++ b/util/conditions/v1beta2/merge_strategies_test.go @@ -32,9 +32,9 @@ func TestAggregateMessages(t *testing.T) { // NOTE: objects are intentionally not in order so we can validate they are sorted by name {OwnerResource: ConditionOwnerInfo{Kind: "MachineDeployment", Name: "obj02"}, Condition: metav1.Condition{Type: "A", Message: "Message-1", Status: metav1.ConditionFalse}}, {OwnerResource: ConditionOwnerInfo{Kind: "MachineDeployment", Name: "obj01"}, Condition: metav1.Condition{Type: "A", Message: "Message-1", Status: metav1.ConditionFalse}}, - {OwnerResource: ConditionOwnerInfo{Kind: "MachineDeployment", Name: "obj04"}, Condition: metav1.Condition{Type: "A", Message: "Message-2", Status: metav1.ConditionFalse}}, - {OwnerResource: ConditionOwnerInfo{Kind: "MachineDeployment", Name: "obj03"}, Condition: metav1.Condition{Type: "A", Message: "Message-2", Status: metav1.ConditionFalse}}, - {OwnerResource: ConditionOwnerInfo{Kind: "MachineDeployment", Name: "obj06"}, Condition: metav1.Condition{Type: "A", Message: "Message-3", Status: metav1.ConditionFalse}}, + {OwnerResource: ConditionOwnerInfo{Kind: "MachineDeployment", Name: "obj04"}, Condition: metav1.Condition{Type: "A", Message: "* Message-2", Status: metav1.ConditionFalse}}, + {OwnerResource: ConditionOwnerInfo{Kind: "MachineDeployment", Name: "obj03"}, Condition: metav1.Condition{Type: "A", Message: "* Message-2", Status: metav1.ConditionFalse}}, + {OwnerResource: ConditionOwnerInfo{Kind: "MachineDeployment", Name: "obj06"}, Condition: metav1.Condition{Type: "A", Message: "* Message-3A\n* Message-3B", Status: metav1.ConditionFalse}}, {OwnerResource: ConditionOwnerInfo{Kind: "MachineDeployment", Name: "obj05"}, Condition: metav1.Condition{Type: "A", Message: "Message-1", Status: metav1.ConditionFalse}}, {OwnerResource: ConditionOwnerInfo{Kind: "MachineDeployment", Name: "obj08"}, Condition: metav1.Condition{Type: "A", Message: "Message-1", Status: metav1.ConditionFalse}}, {OwnerResource: ConditionOwnerInfo{Kind: "MachineDeployment", Name: "obj07"}, Condition: metav1.Condition{Type: "A", Message: "Message-4", Status: metav1.ConditionFalse}}, @@ -49,11 +49,14 @@ func TestAggregateMessages(t *testing.T) { g.Expect(n).To(Equal(0)) g.Expect(messages).To(Equal([]string{ - "Message-1 from MachineDeployments obj01, obj02, obj05 and 2 more", // MachineDeployments obj08, obj09 - "Message-2 from MachineDeployments obj03, obj04", - "Message-3 from MachineDeployment obj06", - "2 MachineDeployments with other issues", // MachineDeployments obj07 (Message-4), obj10 (Message-5) - "2 MachineSets with other issues", // MachineSet obj11, obj12 (Message-1) + "* MachineDeployments obj01, obj02, obj05, ... (2 more): Message-1", // MachineDeployments obj08, obj09 + "* MachineDeployments obj03, obj04:\n" + + " * Message-2", + "* MachineDeployment obj06:\n" + + " * Message-3A\n" + + " * Message-3B", + "And 2 MachineDeployments with other issues", // MachineDeployments obj07 (Message-4), obj10 (Message-5) + "And 2 MachineSets with other issues", // MachineSet obj11, obj12 (Message-1) })) } diff --git a/util/conditions/v1beta2/mirror.go b/util/conditions/v1beta2/mirror.go index 2ef8b939dd8f..4db71476f3a5 100644 --- a/util/conditions/v1beta2/mirror.go +++ b/util/conditions/v1beta2/mirror.go @@ -60,29 +60,23 @@ func (o *MirrorOptions) ApplyOptions(opts []MirrorOption) *MirrorOptions { func NewMirrorCondition(sourceObj Getter, sourceConditionType string, opts ...MirrorOption) *metav1.Condition { condition := Get(sourceObj, sourceConditionType) - return newMirrorCondition(sourceObj, condition, sourceConditionType, opts) + return newMirrorCondition(condition, sourceConditionType, opts) } -func newMirrorCondition(sourceObj any, sourceCondition *metav1.Condition, sourceConditionType string, opts []MirrorOption) *metav1.Condition { +func newMirrorCondition(sourceCondition *metav1.Condition, sourceConditionType string, opts []MirrorOption) *metav1.Condition { mirrorOpt := &MirrorOptions{ targetConditionType: sourceConditionType, } mirrorOpt.ApplyOptions(opts) - conditionOwner := getConditionOwnerInfo(sourceObj) - if sourceCondition != nil { - message := "" - if sourceCondition.Message != "" { - message = fmt.Sprintf("%s (from %s)", sourceCondition.Message, conditionOwner.Kind) - } return &metav1.Condition{ Type: mirrorOpt.targetConditionType, Status: sourceCondition.Status, // NOTE: we are preserving the original transition time (when the underlying condition changed) LastTransitionTime: sourceCondition.LastTransitionTime, Reason: sourceCondition.Reason, - Message: message, + Message: sourceCondition.Message, // NOTE: ObservedGeneration will be set when this condition is added to an object by calling Set // (also preserving ObservedGeneration from the source object will be confusing when the mirror conditions shows up in the target object). } @@ -102,7 +96,7 @@ func newMirrorCondition(sourceObj any, sourceCondition *metav1.Condition, source Type: mirrorOpt.targetConditionType, Status: metav1.ConditionUnknown, Reason: NotYetReportedReason, - Message: fmt.Sprintf("Condition %s not yet reported from %s", sourceConditionType, conditionOwner.Kind), + Message: fmt.Sprintf("Condition %s not yet reported", sourceConditionType), // NOTE: LastTransitionTime and ObservedGeneration will be set when this condition is added to an object by calling Set. } } @@ -123,7 +117,7 @@ func SetMirrorConditionFromUnstructured(sourceObj runtime.Unstructured, targetOb return err } - Set(targetObj, *newMirrorCondition(sourceObj, condition, sourceConditionType, opts)) + Set(targetObj, *newMirrorCondition(condition, sourceConditionType, opts)) return nil } diff --git a/util/conditions/v1beta2/mirror_test.go b/util/conditions/v1beta2/mirror_test.go index cdd82fc1a83d..bb6b14bc2c64 100644 --- a/util/conditions/v1beta2/mirror_test.go +++ b/util/conditions/v1beta2/mirror_test.go @@ -41,7 +41,7 @@ func TestMirrorStatusCondition(t *testing.T) { }, conditionType: "Ready", options: []MirrorOption{}, - want: metav1.Condition{Type: "Ready", Status: metav1.ConditionTrue, Reason: "AllGood!", Message: "We are good! (from Phase3Obj)", LastTransitionTime: now}, + want: metav1.Condition{Type: "Ready", Status: metav1.ConditionTrue, Reason: "AllGood!", Message: "We are good!", LastTransitionTime: now}, }, { name: "Mirror a condition with target type", @@ -50,7 +50,7 @@ func TestMirrorStatusCondition(t *testing.T) { }, conditionType: "Ready", options: []MirrorOption{TargetConditionType("SomethingReady")}, - want: metav1.Condition{Type: "SomethingReady", Status: metav1.ConditionTrue, Reason: "AllGood!", Message: "We are good! (from Phase3Obj)", LastTransitionTime: now}, + want: metav1.Condition{Type: "SomethingReady", Status: metav1.ConditionTrue, Reason: "AllGood!", Message: "We are good!", LastTransitionTime: now}, }, { name: "Mirror a condition with empty message", @@ -66,14 +66,14 @@ func TestMirrorStatusCondition(t *testing.T) { conditions: []metav1.Condition{}, conditionType: "Ready", options: []MirrorOption{}, - want: metav1.Condition{Type: "Ready", Status: metav1.ConditionUnknown, Reason: NotYetReportedReason, Message: "Condition Ready not yet reported from Phase3Obj"}, + want: metav1.Condition{Type: "Ready", Status: metav1.ConditionUnknown, Reason: NotYetReportedReason, Message: "Condition Ready not yet reported"}, }, { name: "Mirror a condition not yet reported with target type", conditions: []metav1.Condition{}, conditionType: "Ready", options: []MirrorOption{TargetConditionType("SomethingReady")}, - want: metav1.Condition{Type: "SomethingReady", Status: metav1.ConditionUnknown, Reason: NotYetReportedReason, Message: "Condition Ready not yet reported from Phase3Obj"}, + want: metav1.Condition{Type: "SomethingReady", Status: metav1.ConditionUnknown, Reason: NotYetReportedReason, Message: "Condition Ready not yet reported"}, }, { name: "Mirror a condition not yet reported with a fallback condition", diff --git a/util/conditions/v1beta2/summary.go b/util/conditions/v1beta2/summary.go index 799788e39be6..6ca07a5e89f1 100644 --- a/util/conditions/v1beta2/summary.go +++ b/util/conditions/v1beta2/summary.go @@ -17,8 +17,6 @@ limitations under the License. package v1beta2 import ( - "fmt" - "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" @@ -128,7 +126,7 @@ func NewSummaryCondition(sourceObj Getter, targetConditionType string, opts ...S Type: c, Status: metav1.ConditionUnknown, Reason: NotYetReportedReason, - Message: fmt.Sprintf("Condition %s not yet reported", c), + Message: "Condition not yet reported", // NOTE: LastTransitionTime and ObservedGeneration are not relevant for merge. }, }) diff --git a/util/conditions/v1beta2/summary_test.go b/util/conditions/v1beta2/summary_test.go index e81bd5255595..256dd5e00993 100644 --- a/util/conditions/v1beta2/summary_test.go +++ b/util/conditions/v1beta2/summary_test.go @@ -49,7 +49,7 @@ func TestSummary(t *testing.T) { Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionFalse, // False because there is one issue Reason: "Reason-!C", // Picking the reason from the only existing issue - Message: "!C: Message-!C", // messages from all the issues & unknown conditions (info dropped) + Message: "* !C: Message-!C", // messages from all the issues & unknown conditions (info dropped) }, }, { @@ -63,9 +63,9 @@ func TestSummary(t *testing.T) { options: []SummaryOption{ForConditionTypes{"A", "B", "!C"}, NegativePolarityConditionTypes{"!C"}}, want: &metav1.Condition{ Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionFalse, // False because there is one issue - Reason: "Reason-!C", // Picking the reason from the only existing issue - Message: "!C: No additional info provided", // messages from all the issues & unknown conditions (info dropped); since message is empty, a default one is added + Status: metav1.ConditionFalse, // False because there is one issue + Reason: "Reason-!C", // Picking the reason from the only existing issue + Message: "* !C: No additional info provided", // messages from all the issues & unknown conditions (info dropped); since message is empty, a default one is added }, }, { @@ -78,10 +78,30 @@ func TestSummary(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []SummaryOption{ForConditionTypes{"A", "B", "!C"}, NegativePolarityConditionTypes{"!C"}}, want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionFalse, // False because there are many issues - Reason: MultipleIssuesReportedReason, // Using a generic reason - Message: "B: Message-B; !C: Message-!C", // messages from all the issues & unknown conditions (info dropped) + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionFalse, // False because there are many issues + Reason: MultipleIssuesReportedReason, // Using a generic reason + Message: "* B: Message-B\n" + + "* !C: Message-!C", // messages from all the issues & unknown conditions (info dropped) + }, + }, + { + name: "More than one issue, some with multiline messages", + conditions: []metav1.Condition{ + {Type: "B", Status: metav1.ConditionFalse, Reason: "Reason-B", Message: "Message-B"}, // issue + {Type: "A", Status: metav1.ConditionTrue, Reason: "Reason-A", Message: "Message-A"}, // info + {Type: "!C", Status: metav1.ConditionTrue, Reason: "Reason-!C", Message: "* Message-!C1\n* Message-!C2"}, // issue + }, + conditionType: clusterv1.AvailableV1Beta2Condition, + options: []SummaryOption{ForConditionTypes{"A", "B", "!C"}, NegativePolarityConditionTypes{"!C"}}, + want: &metav1.Condition{ + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionFalse, // False because there are many issues + Reason: MultipleIssuesReportedReason, // Using a generic reason + Message: "* B: Message-B\n" + + "* !C:\n" + + " * Message-!C1\n" + + " * Message-!C2", // messages from all the issues & unknown conditions (info dropped) }, }, { @@ -94,10 +114,12 @@ func TestSummary(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []SummaryOption{ForConditionTypes{"A", "B", "!C"}, NegativePolarityConditionTypes{"!C"}}, want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionFalse, // False because there are many issues - Reason: MultipleIssuesReportedReason, // Using a generic reason - Message: "B: Message-B; !C: Message-!C; A: Message-A", // messages from all the issues & unknown conditions (info dropped) + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionFalse, // False because there are many issues + Reason: MultipleIssuesReportedReason, // Using a generic reason + Message: "* B: Message-B\n" + + "* !C: Message-!C\n" + + "* A: Message-A", // messages from all the issues & unknown conditions (info dropped) }, }, { @@ -113,7 +135,7 @@ func TestSummary(t *testing.T) { Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionUnknown, // Unknown because there is one unknown Reason: "Reason-!C", // Picking the reason from the only existing unknown - Message: "!C: Message-!C", // messages from all the issues & unknown conditions (info dropped) + Message: "* !C: Message-!C", // messages from all the issues & unknown conditions (info dropped) }, }, { @@ -126,10 +148,11 @@ func TestSummary(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []SummaryOption{ForConditionTypes{"A", "B", "!C"}, NegativePolarityConditionTypes{"!C"}}, want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionUnknown, // Unknown because there are many unknown - Reason: MultipleUnknownReportedReason, // Using a generic reason - Message: "B: Message-B; !C: Message-!C", // messages from all the issues & unknown conditions (info dropped) + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, // Unknown because there are many unknown + Reason: MultipleUnknownReportedReason, // Using a generic reason + Message: "* B: Message-B\n" + + "* !C: Message-!C", // messages from all the issues & unknown conditions (info dropped) }, }, @@ -143,10 +166,11 @@ func TestSummary(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []SummaryOption{ForConditionTypes{"A", "B", "!C"}, NegativePolarityConditionTypes{"!C"}, CustomMergeStrategy{newDefaultMergeStrategy(true, sets.New("!C"))}}, want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionTrue, // True because there are many info - Reason: MultipleInfoReportedReason, // Using a generic reason - Message: "B: Message-B; !C: Message-!C", // messages from all the info conditions (empty messages are dropped) + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionTrue, // True because there are many info + Reason: MultipleInfoReportedReason, // Using a generic reason + Message: "* B: Message-B\n" + + "* !C: Message-!C", // messages from all the info conditions (empty messages are dropped) }, }, { @@ -158,10 +182,11 @@ func TestSummary(t *testing.T) { conditionType: clusterv1.AvailableV1Beta2Condition, options: []SummaryOption{ForConditionTypes{"A", "B", "!C"}, NegativePolarityConditionTypes{"!C"}}, // B and !C are required! want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionUnknown, // Unknown because there more than one unknown - Reason: MultipleUnknownReportedReason, // Using a generic reason - Message: "B: Condition B not yet reported; !C: Condition !C not yet reported", // messages from all the issues & unknown conditions (info dropped) + Type: clusterv1.AvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, // Unknown because there more than one unknown + Reason: MultipleUnknownReportedReason, // Using a generic reason + Message: "* B: Condition not yet reported\n" + + "* !C: Condition not yet reported", // messages from all the issues & unknown conditions (info dropped) }, }, { @@ -174,9 +199,9 @@ func TestSummary(t *testing.T) { options: []SummaryOption{ForConditionTypes{"A", "B", "!C"}, NegativePolarityConditionTypes{"!C"}, IgnoreTypesIfMissing{"B"}}, // B and !C are required! want: &metav1.Condition{ Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionUnknown, // Unknown because there more than one unknown - Reason: NotYetReportedReason, // Picking the reason from the only existing issue, which is a default missing condition added for !C - Message: "!C: Condition !C not yet reported", // messages from all the issues & unknown conditions (info dropped) + Status: metav1.ConditionUnknown, // Unknown because there more than one unknown + Reason: NotYetReportedReason, // Picking the reason from the only existing issue, which is a default missing condition added for !C + Message: "* !C: Condition not yet reported", // messages from all the issues & unknown conditions (info dropped) }, }, { @@ -191,7 +216,7 @@ func TestSummary(t *testing.T) { Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionTrue, // True because B and !C are ignored Reason: "Reason-A", // Picking the reason from A, the only existing info - Message: "A: Message-A", // messages from A, the only existing info + Message: "* A: Message-A", // messages from A, the only existing info }, }, { @@ -207,7 +232,7 @@ func TestSummary(t *testing.T) { Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionTrue, // True because there are many info Reason: MultipleInfoReportedReason, // Using a generic reason - Message: "B: Message-B", // messages from all the info conditions (empty messages are dropped) + Message: "* B: Message-B", // messages from all the info conditions (empty messages are dropped) }, }, { @@ -235,9 +260,9 @@ func TestSummary(t *testing.T) { }, // OverrideCondition replaces the same condition from the SourceObject want: &metav1.Condition{ Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionFalse, // False because !C is an issue - Reason: "Reason-C-additional", // Picking the reason from the additional condition - Message: "!C: Message-C-additional", // Picking the message from the additional condition (info dropped) + Status: metav1.ConditionFalse, // False because !C is an issue + Reason: "Reason-C-additional", // Picking the reason from the additional condition + Message: "* !C: Message-C-additional", // Picking the message from the additional condition (info dropped) }, }, {