Skip to content

Commit

Permalink
More comments
Browse files Browse the repository at this point in the history
  • Loading branch information
fabriziopandini committed Nov 7, 2024
1 parent db202e3 commit 985e8d2
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 45 deletions.
9 changes: 4 additions & 5 deletions controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,14 @@ import clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"

// KubeadmControlPlane's Available condition and corresponding reasons that will be used in v1Beta2 API version.
const (
// KubeadmControlPlaneAvailableV1Beta2Condition is true if KubeadmControlPlane not deleted, `CertificatesAvailable` is true,
// at least one Machine with Kubernetes API server, scheduler and controller manager healthy,
// and etcd has enough operational members to meet quorum requirements.
// KubeadmControlPlaneAvailableV1Beta2Condition is true if KubeadmControlPlane is not deleted, `CertificatesAvailable` is true,
// at least one Machine with healthy control plane components, and etcd has enough operational members to meet quorum requirements.
// More specifically, considering how kubeadm layouts components:
// - Kubernetes API server, scheduler and controller manager health is inferred by the status of
// the corresponding Pods hosted on each machine.
// - In case of managed etcd, also an healthy etcd Pod and an healthy etcd member must exist on the same
// - In case of managed etcd, also a healthy etcd Pod and a healthy etcd member must exist on the same
// machine with the healthy Kubernetes API server, scheduler and controller manager, otherwise the k8s control
// plane cannot be considered operational (if etcd is not operational on machine, most likely also API server,
// plane cannot be considered operational (if etcd is not operational on a machine, most likely also API server,
// scheduler and controller manager on the same machine will be impacted).
// - In case of external etcd, KCP cannot make any assumption on etcd status, so all the etcd checks are skipped.
KubeadmControlPlaneAvailableV1Beta2Condition = clusterv1.AvailableV1Beta2Condition
Expand Down
10 changes: 5 additions & 5 deletions controlplane/kubeadm/internal/workload_cluster_conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,13 +354,13 @@ func compareMachinesAndMembers(controlPlane *ControlPlane, nodes *corev1.NodeLis
// Instead, surface there is a machine without etcd member on kcp's' Available condition
// only if the machine is not deleting and the node exists by more than two minutes
// (this prevents the condition to flick during scale up operations).
// Note: Two minutes is an arbitrary interval where we expect the system to detect the new etcd member on the machine.
// Note: Two minutes is the time after which we expect the system to detect the new etcd member on the machine.
if machine.DeletionTimestamp.IsZero() {
oldNode := true
oldNode := false
if nodes != nil {
for _, node := range nodes.Items {
if machine.Status.NodeRef.Name == node.Name && time.Since(node.CreationTimestamp.Time) <= 2*time.Minute {
oldNode = false
if machine.Status.NodeRef.Name == node.Name && time.Since(node.CreationTimestamp.Time) > 2*time.Minute {
oldNode = true
}
}
}
Expand All @@ -386,7 +386,7 @@ func compareMachinesAndMembers(controlPlane *ControlPlane, nodes *corev1.NodeLis
}
}
if !found {
// Surface there is an etcd member without a machine on into the EtcdClusterHealthy condition on kcp.
// Surface there is an etcd member without a machine into the EtcdClusterHealthy condition on kcp.
name := member.Name
if name == "" {
name = fmt.Sprintf("%d (Name not yet assigned)", member.ID)
Expand Down
97 changes: 62 additions & 35 deletions controlplane/kubeadm/internal/workload_cluster_conditions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func TestUpdateEtcdConditions(t *testing.T) {
expectedKCPV1Beta2Condition *metav1.Condition
expectedMachineConditions map[string]clusterv1.Conditions
expectedMachineV1Beta2Conditions map[string][]metav1.Condition
expectedEtcdMembers []*etcd.Member
expectedEtcdMembers []string
expectedEtcdMembersAgreeOnMemberList bool
expectedEtcdMembersAgreeOnClusterID bool
expectedEtcdMembersAndMachinesAreMatching bool
Expand Down Expand Up @@ -82,9 +82,9 @@ func TestUpdateEtcdConditions(t *testing.T) {
{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Failed to get the Node hosting the etcd member"},
},
},
expectedEtcdMembersAgreeOnMemberList: false, // without reading notes, we can make assumptions.
expectedEtcdMembersAgreeOnClusterID: false, // without reading notes, we can make assumptions.
expectedEtcdMembersAndMachinesAreMatching: false, // without reading notes, we can make assumptions.
expectedEtcdMembersAgreeOnMemberList: false, // without reading nodes, we can not make assumptions.
expectedEtcdMembersAgreeOnClusterID: false, // without reading nodes, we can not make assumptions.
expectedEtcdMembersAndMachinesAreMatching: false, // without reading nodes, we can not make assumptions.
},
{
name: "If there are provisioning machines, a node without machine should be ignored in v1beta1, reported in v1beta2",
Expand All @@ -111,9 +111,9 @@ func TestUpdateEtcdConditions(t *testing.T) {
{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Node does not exist"},
},
},
expectedEtcdMembersAgreeOnMemberList: false, // without reading members, we can make assumptions.
expectedEtcdMembersAgreeOnClusterID: false, // without reading members, we can make assumptions.
expectedEtcdMembersAndMachinesAreMatching: false, // without reading members, we can make assumptions.
expectedEtcdMembersAgreeOnMemberList: false, // without reading members, we can not make assumptions.
expectedEtcdMembersAgreeOnClusterID: false, // without reading members, we can not make assumptions.
expectedEtcdMembersAndMachinesAreMatching: false, // without reading members, we can not make assumptions.
},
{
name: "If there are no provisioning machines, a node without machine should be reported as False condition at KCP level",
Expand All @@ -130,9 +130,9 @@ func TestUpdateEtcdConditions(t *testing.T) {
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason,
Message: "Control plane Node n1 does not have a corresponding Machine",
},
expectedEtcdMembersAgreeOnMemberList: false, // without reading members, we can make assumptions.
expectedEtcdMembersAgreeOnClusterID: false, // without reading members, we can make assumptions.
expectedEtcdMembersAndMachinesAreMatching: false, // without reading members, we can make assumptions.
expectedEtcdMembersAgreeOnMemberList: false, // without reading members, we can not make assumptions.
expectedEtcdMembersAgreeOnClusterID: false, // without reading members, we can not make assumptions.
expectedEtcdMembersAndMachinesAreMatching: false, // without reading members, we can not make assumptions.
},
{
name: "failure creating the etcd client should report unknown condition",
Expand Down Expand Up @@ -164,9 +164,9 @@ func TestUpdateEtcdConditions(t *testing.T) {
{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Failed to connect to the etcd Pod on the n1 Node: failed to get client for node"},
},
},
expectedEtcdMembersAgreeOnMemberList: false, // failure in reading members, we can make assumptions.
expectedEtcdMembersAgreeOnClusterID: false, // failure in reading members, we can make assumptions.
expectedEtcdMembersAndMachinesAreMatching: false, // failure in reading members, we can make assumptions.
expectedEtcdMembersAgreeOnMemberList: false, // failure in reading members, we can not make assumptions.
expectedEtcdMembersAgreeOnClusterID: false, // failure in reading members, we can not make assumptions.
expectedEtcdMembersAndMachinesAreMatching: false, // failure in reading members, we can not make assumptions.
},
{
name: "etcd client reporting status errors should be reflected into a false condition",
Expand Down Expand Up @@ -203,9 +203,9 @@ func TestUpdateEtcdConditions(t *testing.T) {
{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "Etcd reports errors: some errors"},
},
},
expectedEtcdMembersAgreeOnMemberList: false, // without reading members, we can make assumptions.
expectedEtcdMembersAgreeOnClusterID: false, // without reading members, we can make assumptions.
expectedEtcdMembersAndMachinesAreMatching: false, // without reading members, we can make assumptions.
expectedEtcdMembersAgreeOnMemberList: false, // without reading members, we can not make assumptions.
expectedEtcdMembersAgreeOnClusterID: false, // without reading members, we can not make assumptions.
expectedEtcdMembersAndMachinesAreMatching: false, // without reading members, we can not make assumptions.
},
{
name: "failure listing members should report false condition in v1beta1, unknown in v1beta2",
Expand Down Expand Up @@ -242,9 +242,9 @@ func TestUpdateEtcdConditions(t *testing.T) {
{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Failed to get answer from the etcd member on the n1 Node: failed to get list of members for etcd cluster: failed to list members"},
},
},
expectedEtcdMembersAgreeOnMemberList: false, // without reading members, we can make assumptions.
expectedEtcdMembersAgreeOnClusterID: false, // without reading members, we can make assumptions.
expectedEtcdMembersAndMachinesAreMatching: false, // without reading members, we can make assumptions.
expectedEtcdMembersAgreeOnMemberList: false, // without reading members, we can not make assumptions.
expectedEtcdMembersAgreeOnClusterID: false, // without reading members, we can not make assumptions.
expectedEtcdMembersAndMachinesAreMatching: false, // without reading members, we can not make assumptions.
},
{
name: "an etcd member with alarms should report false condition",
Expand Down Expand Up @@ -290,6 +290,7 @@ func TestUpdateEtcdConditions(t *testing.T) {
{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "Etcd reports alarms: NOSPACE"},
},
},
expectedEtcdMembers: []string{"n1"},
expectedEtcdMembersAgreeOnMemberList: true,
expectedEtcdMembersAgreeOnClusterID: true,
expectedEtcdMembersAndMachinesAreMatching: true,
Expand Down Expand Up @@ -375,6 +376,7 @@ func TestUpdateEtcdConditions(t *testing.T) {
{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "Etcd member has cluster ID 2, but all previously seen etcd members have cluster ID 1"},
},
},
expectedEtcdMembers: []string{"n1", "n2"},
expectedEtcdMembersAgreeOnMemberList: true,
expectedEtcdMembersAgreeOnClusterID: false,
expectedEtcdMembersAndMachinesAreMatching: false,
Expand Down Expand Up @@ -460,6 +462,7 @@ func TestUpdateEtcdConditions(t *testing.T) {
{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "The etcd member hosted on this Machine reports the cluster is composed by [n2 n3], but all previously seen etcd members are reporting [n1 n2]"},
},
},
expectedEtcdMembers: []string{"n1", "n2"},
expectedEtcdMembersAgreeOnMemberList: false,
expectedEtcdMembersAgreeOnClusterID: true,
expectedEtcdMembersAndMachinesAreMatching: false,
Expand Down Expand Up @@ -527,6 +530,7 @@ func TestUpdateEtcdConditions(t *testing.T) {
{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "Etcd doesn't have an etcd member for Node n2"},
},
},
expectedEtcdMembers: []string{"n1"},
expectedEtcdMembersAgreeOnMemberList: true,
expectedEtcdMembersAgreeOnClusterID: true,
expectedEtcdMembersAndMachinesAreMatching: false,
Expand Down Expand Up @@ -611,6 +615,7 @@ func TestUpdateEtcdConditions(t *testing.T) {
{Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Reason, Message: ""},
},
},
expectedEtcdMembers: []string{"n1", "n2"},
expectedEtcdMembersAgreeOnMemberList: true,
expectedEtcdMembersAgreeOnClusterID: true,
expectedEtcdMembersAndMachinesAreMatching: true,
Expand Down Expand Up @@ -668,6 +673,12 @@ func TestUpdateEtcdConditions(t *testing.T) {
g.Expect(controlPane.EtcdMembersAgreeOnMemberList).To(Equal(tt.expectedEtcdMembersAgreeOnMemberList), "EtcdMembersAgreeOnMemberList does not match")
g.Expect(controlPane.EtcdMembersAgreeOnClusterID).To(Equal(tt.expectedEtcdMembersAgreeOnClusterID), "EtcdMembersAgreeOnClusterID does not match")
g.Expect(controlPane.EtcdMembersAndMachinesAreMatching).To(Equal(tt.expectedEtcdMembersAndMachinesAreMatching), "EtcdMembersAndMachinesAreMatching does not match")

var membersNames []string
for _, m := range controlPane.EtcdMembers {
membersNames = append(membersNames, m.Name)
}
g.Expect(membersNames).To(Equal(tt.expectedEtcdMembers))
})
}
}
Expand Down Expand Up @@ -1672,7 +1683,6 @@ func TestCompareMachinesAndMembers(t *testing.T) {
controlPlane *ControlPlane
nodes *corev1.NodeList
members []*etcd.Member
kcpErrors []string
expectMembersAndMachinesAreMatching bool
expectKCPErrors []string
}{
Expand All @@ -1684,7 +1694,6 @@ func TestCompareMachinesAndMembers(t *testing.T) {
},
members: nil,
nodes: nil,
kcpErrors: nil,
expectMembersAndMachinesAreMatching: true,
expectKCPErrors: nil,
},
Expand All @@ -1696,7 +1705,6 @@ func TestCompareMachinesAndMembers(t *testing.T) {
},
members: nil,
nodes: nil,
kcpErrors: nil,
expectMembersAndMachinesAreMatching: false,
expectKCPErrors: nil,
},
Expand All @@ -1713,51 +1721,52 @@ func TestCompareMachinesAndMembers(t *testing.T) {
{Name: "m1"},
{Name: "m2"},
},
kcpErrors: nil,
nodes: nil,
expectMembersAndMachinesAreMatching: true,
expectKCPErrors: nil,
},
{
name: "false if there is a machine without a member",
name: "true if there is a machine without a member but at least a machine is still provisioning",
controlPlane: &ControlPlane{
KCP: &controlplanev1.KubeadmControlPlane{},
Machines: collections.FromMachines(
fakeMachine("m1", withNodeRef("m1")),
fakeMachine("m2", withNodeRef("m2")),
fakeMachine("m3"), // m3 is still provisioning
),
},
members: []*etcd.Member{
// m1 is missing
{Name: "m1"},
{Name: "m2"},
// m3 is missing
},
kcpErrors: nil,
nodes: nil,
expectMembersAndMachinesAreMatching: false,
expectMembersAndMachinesAreMatching: true,
expectKCPErrors: nil,
},
{
name: "true if there is a machine without a member but at least a machine is still provisioning",
name: "true if there is a machine without a member but node on this machine does not exist yet",
controlPlane: &ControlPlane{
KCP: &controlplanev1.KubeadmControlPlane{},
Machines: collections.FromMachines(
fakeMachine("m1", withNodeRef("m1")),
fakeMachine("m2", withNodeRef("m2")),
fakeMachine("m3"), // m3 is still provisioning
fakeMachine("m3", withNodeRef("m3")),
),
},
members: []*etcd.Member{
{Name: "m1"},
{Name: "m2"},
// m3 is missing
},
kcpErrors: nil,
nodes: nil,
nodes: &corev1.NodeList{Items: []corev1.Node{
// m3 is missing
}},
expectMembersAndMachinesAreMatching: true,
expectKCPErrors: nil,
},
{
name: "true if there is a machine without a member bu node on this machine has been just created",
name: "true if there is a machine without a member but node on this machine has been just created",
controlPlane: &ControlPlane{
KCP: &controlplanev1.KubeadmControlPlane{},
Machines: collections.FromMachines(
Expand All @@ -1774,7 +1783,27 @@ func TestCompareMachinesAndMembers(t *testing.T) {
nodes: &corev1.NodeList{Items: []corev1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "m3", CreationTimestamp: metav1.Time{Time: time.Now().Add(-110 * time.Second)}}}, // m3 is just provisioned
}},
kcpErrors: nil,
expectMembersAndMachinesAreMatching: true,
expectKCPErrors: nil,
},
{
name: "false if there is a machine without a member and node on this machine is old",
controlPlane: &ControlPlane{
KCP: &controlplanev1.KubeadmControlPlane{},
Machines: collections.FromMachines(
fakeMachine("m1", withNodeRef("m1")),
fakeMachine("m2", withNodeRef("m2")),
fakeMachine("m3", withNodeRef("m3")),
),
},
members: []*etcd.Member{
{Name: "m1"},
{Name: "m2"},
// m3 is missing
},
nodes: &corev1.NodeList{Items: []corev1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "m3", CreationTimestamp: metav1.Time{Time: time.Now().Add(10 * time.Minute)}}}, // m3 is old
}},
expectMembersAndMachinesAreMatching: true,
expectKCPErrors: nil,
},
Expand All @@ -1791,7 +1820,6 @@ func TestCompareMachinesAndMembers(t *testing.T) {
{Name: "m1"},
{Name: "m2"},
},
kcpErrors: nil,
nodes: nil,
expectMembersAndMachinesAreMatching: false,
expectKCPErrors: []string{"Etcd member m2 does not have a corresponding Machine"},
Expand All @@ -1809,7 +1837,6 @@ func TestCompareMachinesAndMembers(t *testing.T) {
{Name: "m1"},
{Name: "m2"},
},
kcpErrors: nil,
nodes: nil,
expectMembersAndMachinesAreMatching: true,
expectKCPErrors: []string{"Etcd member m2 does not have a corresponding Machine"},
Expand Down

0 comments on commit 985e8d2

Please sign in to comment.