Skip to content

Commit

Permalink
feat: copy failed manifest from binding
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhiying Lin committed Jun 17, 2024
1 parent a9c1d6c commit d99d2e6
Show file tree
Hide file tree
Showing 6 changed files with 597 additions and 1,009 deletions.
53 changes: 1 addition & 52 deletions pkg/controllers/clusterresourceplacement/placement_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1"
"go.goms.io/fleet/pkg/utils"
"go.goms.io/fleet/pkg/utils/condition"
"go.goms.io/fleet/pkg/utils/controller"
)
Expand Down Expand Up @@ -260,9 +259,7 @@ func (r *Reconciler) setResourcePlacementStatusPerCluster(ctx context.Context,
}
case condition.AppliedCondition, condition.AvailableCondition:
if bindingCond.Status == metav1.ConditionFalse {
if err := r.setFailedPlacementsPerCluster(ctx, crp, binding, status); err != nil {
return nil, err
}
status.FailedPlacements = binding.Status.FailedPlacements
}
}
cond := metav1.Condition{
Expand Down Expand Up @@ -300,51 +297,3 @@ func (r *Reconciler) setResourcePlacementStatusPerCluster(ctx context.Context,
meta.SetStatusCondition(&status.Conditions, condition.RolloutStartedCondition.UnknownResourceConditionPerCluster(crp.Generation))
return []metav1.ConditionStatus{metav1.ConditionUnknown}, nil
}

// TODO, instead of crp looking for the failed manifests from the works, the work generator will populate in the binding
// in addition to the conditions to solve the inconsistency data between bindings and works, which is also more efficient.
// Note, today there is no data about the mapping between the binding generation and work generation.
func (r *Reconciler) setFailedPlacementsPerCluster(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement, binding *fleetv1beta1.ClusterResourceBinding, status *fleetv1beta1.ResourcePlacementStatus) error {
namespaceMatcher := client.InNamespace(fmt.Sprintf(utils.NamespaceNameFormat, status.ClusterName))
workLabelMatcher := client.MatchingLabels{
fleetv1beta1.CRPTrackingLabel: crp.Name,
fleetv1beta1.ParentBindingLabel: binding.Name,
}
workList := &fleetv1beta1.WorkList{}
crpKObj := klog.KObj(crp)
bindingKObj := klog.KObj(binding)
if err := r.Client.List(ctx, workList, workLabelMatcher, namespaceMatcher); err != nil {
klog.ErrorS(err, "Failed to list all the work associated with the clusterResourcePlacement", "clusterResourcePlacement", crpKObj, "clusterResourceBinding", bindingKObj, "clusterName", status.ClusterName)
return controller.NewAPIServerError(true, err)
}
klog.V(2).InfoS("Listed works to find the failed placements", "clusterResourcePlacement", crpKObj, "clusterResourceBinding", bindingKObj, "clusterName", status.ClusterName, "numberOfWorks", len(workList.Items))

failedResourcePlacements := make([]fleetv1beta1.FailedResourcePlacement, 0, controller.MaxFailedResourcePlacementLimit) // preallocate the memory
for i := range workList.Items {
work := workList.Items[i]
if work.DeletionTimestamp != nil {
klog.V(2).InfoS("Ignoring the deleting work", "clusterResourcePlacement", crpKObj, "clusterResourceBinding", bindingKObj, "work", klog.KObj(&work))
continue // ignore the deleting work
}
failedManifests := controller.ExtractFailedResourcePlacementsFromWork(&work)
if len(failedManifests) != 0 && len(failedResourcePlacements) < controller.MaxFailedResourcePlacementLimit {
failedResourcePlacements = append(failedResourcePlacements, failedManifests...)
}
}

if len(failedResourcePlacements) == 0 {
err := fmt.Errorf("there are no works (total number %v) with failed manifest condition which is not matched with the binding status: %v", len(workList.Items), binding.Status.Conditions)
klog.ErrorS(err, "No failed manifests are found for the resource", "clusterResourcePlacement", crpKObj, "clusterResourceBinding", bindingKObj, "clusterName", status.ClusterName)
// There will be a case that, the binding is just updated when we query the works.
// So that the works have been updated and the cached binding condition is out of date.
// We requeue the request to try again.
return controller.NewExpectedBehaviorError(err)
}

if len(failedResourcePlacements) > controller.MaxFailedResourcePlacementLimit {
failedResourcePlacements = failedResourcePlacements[0:controller.MaxFailedResourcePlacementLimit]
}
status.FailedPlacements = failedResourcePlacements
klog.V(2).InfoS("Populated failed manifests", "clusterResourcePlacement", crpKObj, "clusterName", status.ClusterName, "numberOfFailedPlacements", len(failedResourcePlacements))
return nil
}
Loading

0 comments on commit d99d2e6

Please sign in to comment.