Skip to content

Commit

Permalink
feat: enable the CRP controller to process DiffReported condition (#1017
Browse files Browse the repository at this point in the history
)
michaelawyu authored Jan 23, 2025

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent ac690ac commit 62f2fa2
Showing 7 changed files with 3,352 additions and 768 deletions.
10 changes: 10 additions & 0 deletions apis/placement/v1beta1/binding_types.go
Original file line number Diff line number Diff line change
@@ -174,6 +174,16 @@ const (
// - "False" means not all the resources are available in the target cluster yet.
// - "Unknown" means we haven't finished the apply yet so that we cannot check the resource availability.
ResourceBindingAvailable ResourceBindingConditionType = "Available"

// ResourceBindingDiffReported indicates whether Fleet has successfully reported configuration
// differences between the hub cluster and a member cluster for the given resources.
//
// It can have the following condition statuses:
// * True: Fleet has successfully reported configuration differences for all resources.
// * False: Fleet has not yet reported configuration differences for some resources, or an
// error has occurred.
// * Unknown: Fleet has not finished processing the diff reporting yet.
ResourceBindingDiffReported ResourceBindingConditionType = "DiffReported"
)

// ClusterResourceBindingList is a collection of ClusterResourceBinding.
22 changes: 22 additions & 0 deletions apis/placement/v1beta1/clusterresourceplacement_types.go
Original file line number Diff line number Diff line change
@@ -1142,6 +1142,17 @@ const (
// array.
// - "Unknown" means we haven't finished the apply yet so that we cannot check the resource availability.
ClusterResourcePlacementAvailableConditionType ClusterResourcePlacementConditionType = "ClusterResourcePlacementAvailable"

// ClusterResourcePlacementDiffReportedConditionType indicates whether Fleet has reported
// configuration differences between the desired states of resources as kept in the hub cluster
// and the current states on the all member clusters.
//
// It can have the following condition statuses:
// * True: Fleet has reported complete sets of configuration differences on all member clusters.
// * False: Fleet has not yet reported complete sets of configuration differences on some member
// clusters, or an error has occurred.
// * Unknown: Fleet has not finished processing the diff reporting yet.
ClusterResourcePlacementDiffReportedConditionType ClusterResourcePlacementConditionType = "ClusterResourcePlacementDiffReported"
)

// ResourcePlacementConditionType defines a specific condition of a resource placement.
@@ -1197,6 +1208,17 @@ const (
// - "False" means some of them are not available yet.
// - "Unknown" means we haven't finished the apply yet so that we cannot check the resource availability.
ResourcesAvailableConditionType ResourcePlacementConditionType = "Available"

// ResourceDiffReportedConditionType indicates whether Fleet has reported configuration
// differences between the desired states of resources as kept in the hub cluster and the
// current states on the selected member cluster.
//
// It can have the following condition statuses:
// * True: Fleet has reported the complete set of configuration differences on the member cluster.
// * False: Fleet has not yet reported the complete set of configuration differences on the
// member cluster, or an error has occurred.
// * Unknown: Fleet has not finished processing the diff reporting yet.
ResourcesDiffReportedConditionType ResourcePlacementConditionType = "DiffReported"
)

// PlacementType identifies the type of placement.
62 changes: 54 additions & 8 deletions pkg/controllers/clusterresourceplacement/controller.go
Original file line number Diff line number Diff line change
@@ -212,12 +212,12 @@ func (r *Reconciler) handleUpdate(ctx context.Context, crp *fleetv1beta1.Cluster
}
}

// There is no need to check if the CRP is available or not.
// If the available condition is true, it means the rollout is completed.
// Rollout is considered to be completed when all the expected condition types are set to the
// True status.
if isRolloutCompleted(crp) {
if !isRolloutCompleted(oldCRP) {
klog.V(2).InfoS("Placement rollout has finished and resources are available", "clusterResourcePlacement", crpKObj, "generation", crp.Generation)
r.Recorder.Event(crp, corev1.EventTypeNormal, "PlacementRolloutCompleted", "Resources are available in the selected clusters")
klog.V(2).InfoS("Placement has finished the rollout process and reached the desired status", "clusterResourcePlacement", crpKObj, "generation", crp.Generation)
r.Recorder.Event(crp, corev1.EventTypeNormal, "PlacementRolloutCompleted", "Placement has finished the rollout process and reached the desired status")
}
// We don't need to requeue any request now by watching the binding changes
return ctrl.Result{}, nil
@@ -888,8 +888,13 @@ func parseResourceGroupHashFromAnnotation(s *fleetv1beta1.ClusterResourceSnapsho
}

// setPlacementStatus returns if there is a cluster scheduled by the scheduler.
func (r *Reconciler) setPlacementStatus(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement, selectedResourceIDs []fleetv1beta1.ResourceIdentifier,
latestSchedulingPolicySnapshot *fleetv1beta1.ClusterSchedulingPolicySnapshot, latestResourceSnapshot *fleetv1beta1.ClusterResourceSnapshot) (bool, error) {
func (r *Reconciler) setPlacementStatus(
ctx context.Context,
crp *fleetv1beta1.ClusterResourcePlacement,
selectedResourceIDs []fleetv1beta1.ResourceIdentifier,
latestSchedulingPolicySnapshot *fleetv1beta1.ClusterSchedulingPolicySnapshot,
latestResourceSnapshot *fleetv1beta1.ClusterResourceSnapshot,
) (bool, error) {
crp.Status.SelectedResources = selectedResourceIDs
scheduledCondition := buildScheduledCondition(crp, latestSchedulingPolicySnapshot)
crp.SetConditions(scheduledCondition)
@@ -913,7 +918,47 @@ func (r *Reconciler) setPlacementStatus(ctx context.Context, crp *fleetv1beta1.C
return false, nil
}

return r.setResourceConditions(ctx, crp, latestSchedulingPolicySnapshot, latestResourceSnapshot)
// Classify cluster decisions; find out clusters that have been selected and
// have not been selected.
selected, unselected := classifyClusterDecisions(latestSchedulingPolicySnapshot.Status.ClusterDecisions)
// Calculate the number of clusters that should have been selected yet cannot be, due to
// scheduling constraints.
failedToScheduleClusterCount := calculateFailedToScheduleClusterCount(crp, selected, unselected)

// Prepare the resource placement status (status per cluster) in the CRP status.
allRPS := make([]fleetv1beta1.ResourcePlacementStatus, 0, len(latestSchedulingPolicySnapshot.Status.ClusterDecisions))

// For clusters that have been selected, set the resource placement status based on the
// respective resource binding status for each of them.
expectedCondTypes := determineExpectedCRPAndResourcePlacementStatusCondType(crp)
allRPS, rpsSetCondTypeCounter, err := r.appendScheduledResourcePlacementStatuses(
ctx, allRPS, selected, expectedCondTypes, crp, latestSchedulingPolicySnapshot, latestResourceSnapshot)
if err != nil {
return false, err
}

// For clusters that failed to get scheduled, set a resource placement status with the
// failed to schedule condition for each of them.
allRPS = appendFailedToScheduleResourcePlacementStatuses(allRPS, unselected, failedToScheduleClusterCount, crp)

crp.Status.PlacementStatuses = allRPS

// Prepare the conditions for the CRP object itself.

if len(selected) == 0 {
// There is no selected cluster at all. It could be that there is no matching cluster
// given the current scheduling policy; there remains a corner case as well where a cluster
// has been selected before (with resources being possibly applied), but has now
// left the fleet. To address this corner case, Fleet here will remove all lingering
// conditions (any condition type other than CRPScheduled).

// Note that the scheduled condition has been set earlier in this method.
crp.Status.Conditions = []metav1.Condition{*crp.GetCondition(string(fleetv1beta1.ClusterResourcePlacementScheduledConditionType))}
return false, nil
}

setCRPConditions(crp, allRPS, rpsSetCondTypeCounter, expectedCondTypes)
return true, nil
}

func buildScheduledCondition(crp *fleetv1beta1.ClusterResourcePlacement, latestSchedulingPolicySnapshot *fleetv1beta1.ClusterSchedulingPolicySnapshot) metav1.Condition {
@@ -974,7 +1019,8 @@ func isRolloutCompleted(crp *fleetv1beta1.ClusterResourcePlacement) bool {
return false
}

for i := condition.RolloutStartedCondition; i < condition.TotalCondition; i++ {
expectedCondTypes := determineExpectedCRPAndResourcePlacementStatusCondType(crp)
for _, i := range expectedCondTypes {
if !condition.IsConditionStatusTrue(crp.GetCondition(string(i.ClusterResourcePlacementConditionType())), crp.Generation) {
return false
}
359 changes: 221 additions & 138 deletions pkg/controllers/clusterresourceplacement/placement_status.go

Large diffs are not rendered by default.

3,587 changes: 2,970 additions & 617 deletions pkg/controllers/clusterresourceplacement/placement_status_test.go

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -109,7 +109,7 @@ func TestFindPlacementsSelectedDeletedResV1Alpha1(t *testing.T) {
Status: fleetv1alpha1.ClusterResourcePlacementStatus{
SelectedResources: []fleetv1alpha1.ResourceIdentifier{
{
Group: "abd",
Group: "xyz",
Name: "not-deleted",
Namespace: "bar",
},
@@ -229,7 +229,7 @@ func TestFindPlacementsSelectedDeletedResV1Beta11(t *testing.T) {
Status: placementv1beta1.ClusterResourcePlacementStatus{
SelectedResources: []placementv1beta1.ResourceIdentifier{
{
Group: "abd",
Group: "xyz",
Name: "not-deleted",
Namespace: "bar",
},
76 changes: 73 additions & 3 deletions pkg/utils/condition/condition.go
Original file line number Diff line number Diff line change
@@ -68,6 +68,18 @@ const (

// AvailableReason is the reason string of placement condition if the selected resources are available.
AvailableReason = "ResourceAvailable"

// DiffReportedStatusUnknownReason is the reason string of the DiffReported condition when the
// diff reporting has just started and its status is not yet to be known.
DiffReportedStatusUnknownReason = "DiffReportingPending"

// DiffReportedStatusFalseReason is the reason string of the DiffReported condition when the
// diff reporting has not been fully completed.
DiffReportedStatusFalseReason = "DiffReportingIncompleteOrFailed"

// DiffReportedStatusTrueReason is the reason string of the DiffReported condition when the
// diff reporting has been fully completed.
DiffReportedStatusTrueReason = "DiffReportingCompleted"
)

// A group of condition reason string which is used to populate the placement condition per cluster.
@@ -242,25 +254,51 @@ func IsConditionStatusFalse(cond *metav1.Condition, latestGeneration int64) bool
// ResourceCondition is all the resource related condition, for example, scheduled condition is not included.
type ResourceCondition int

// The following conditions are in ordered.
// Once the placement is scheduled, it will be divided into following stages.
// Used to populate the CRP conditions.
// The full set of condition types that Fleet will populate on CRPs (the CRP itself and the
// resource placement status per cluster) and cluster resource bindings.
//
// - RolloutStarted, Overridden and WorkSynchronized apply to all objects;
// - Applied and Available apply only when the apply strategy in use is of the ClientSideApply
// and ServerSideApply type;
// - DiffReported applies only the apply strategy in use is of the ReportDiff type.
// - Total is a end marker (not used).
const (
RolloutStartedCondition ResourceCondition = iota
OverriddenCondition
WorkSynchronizedCondition
AppliedCondition
AvailableCondition
DiffReportedCondition
TotalCondition
)

var (
// Different set of condition types that Fleet will populate in sequential order based on the
// apply strategy in use.
CondTypesForClientSideServerSideApplyStrategies = []ResourceCondition{
RolloutStartedCondition,
OverriddenCondition,
WorkSynchronizedCondition,
AppliedCondition,
AvailableCondition,
}

CondTypesForReportDiffApplyStrategy = []ResourceCondition{
RolloutStartedCondition,
OverriddenCondition,
WorkSynchronizedCondition,
DiffReportedCondition,
}
)

func (c ResourceCondition) EventReasonForTrue() string {
return []string{
"PlacementRolloutStarted",
"PlacementOverriddenSucceeded",
"PlacementWorkSynchronized",
"PlacementApplied",
"PlacementAvailable",
"PlacementDiffReported",
}[c]
}

@@ -271,6 +309,7 @@ func (c ResourceCondition) EventMessageForTrue() string {
"Work(s) have been created or updated successfully for the selected cluster(s)",
"Resources have been applied to the selected cluster(s)",
"Resources are available on the selected cluster(s)",
"Configuration differences have been reported on the selected cluster(s)",
}[c]
}

@@ -282,6 +321,7 @@ func (c ResourceCondition) ResourcePlacementConditionType() fleetv1beta1.Resourc
fleetv1beta1.ResourceWorkSynchronizedConditionType,
fleetv1beta1.ResourcesAppliedConditionType,
fleetv1beta1.ResourcesAvailableConditionType,
fleetv1beta1.ResourcesDiffReportedConditionType,
}[c]
}

@@ -293,6 +333,7 @@ func (c ResourceCondition) ResourceBindingConditionType() fleetv1beta1.ResourceB
fleetv1beta1.ResourceBindingWorkSynchronized,
fleetv1beta1.ResourceBindingApplied,
fleetv1beta1.ResourceBindingAvailable,
fleetv1beta1.ResourceBindingDiffReported,
}[c]
}

@@ -304,6 +345,7 @@ func (c ResourceCondition) ClusterResourcePlacementConditionType() fleetv1beta1.
fleetv1beta1.ClusterResourcePlacementWorkSynchronizedConditionType,
fleetv1beta1.ClusterResourcePlacementAppliedConditionType,
fleetv1beta1.ClusterResourcePlacementAvailableConditionType,
fleetv1beta1.ClusterResourcePlacementDiffReportedConditionType,
}[c]
}

@@ -345,6 +387,13 @@ func (c ResourceCondition) UnknownResourceConditionPerCluster(generation int64)
Message: "The availability of the selected resources is unknown yet ",
ObservedGeneration: generation,
},
{
Status: metav1.ConditionUnknown,
Type: string(fleetv1beta1.ResourcesDiffReportedConditionType),
Reason: DiffReportedStatusUnknownReason,
Message: "Diff reporting has just started; its status is not yet to be known",
ObservedGeneration: generation,
},
}[c]
}

@@ -386,6 +435,13 @@ func (c ResourceCondition) UnknownClusterResourcePlacementCondition(generation i
Message: fmt.Sprintf("There are still %d cluster(s) in the process of checking the availability of the selected resources", clusterCount),
ObservedGeneration: generation,
},
{
Status: metav1.ConditionUnknown,
Type: string(fleetv1beta1.ClusterResourcePlacementDiffReportedConditionType),
Reason: DiffReportedStatusUnknownReason,
Message: fmt.Sprintf("There are still %d cluster(s) in the process of checking for configuration differences", clusterCount),
ObservedGeneration: generation,
},
}[c]
}

@@ -427,6 +483,13 @@ func (c ResourceCondition) FalseClusterResourcePlacementCondition(generation int
Message: fmt.Sprintf("The selected resources in %d cluster(s) are still not available yet", clusterCount),
ObservedGeneration: generation,
},
{
Status: metav1.ConditionFalse,
Type: string(fleetv1beta1.ClusterResourcePlacementDiffReportedConditionType),
Reason: DiffReportedStatusFalseReason,
Message: fmt.Sprintf("Diff reporting in %d clusters is still in progress or has failed", clusterCount),
ObservedGeneration: generation,
},
}[c]
}

@@ -468,6 +531,13 @@ func (c ResourceCondition) TrueClusterResourcePlacementCondition(generation int6
Message: fmt.Sprintf("The selected resources in %d cluster(s) are available now", clusterCount),
ObservedGeneration: generation,
},
{
Status: metav1.ConditionTrue,
Type: string(fleetv1beta1.ClusterResourcePlacementDiffReportedConditionType),
Reason: DiffReportedStatusTrueReason,
Message: fmt.Sprintf("Diff reporting in %d cluster(s) has been completed", clusterCount),
ObservedGeneration: generation,
},
}[c]
}

0 comments on commit 62f2fa2

Please sign in to comment.