Merge pull request #5621 from ipsum-0320/Implement-group-score-calc-base

Implement calc group score for divided to assign replicas evenly
karmada-io · Oct 26, 2024 · 1cd75f4 · 1cd75f4
2 parents 53afbbc + aa8cbda
commit 1cd75f4
Showing 1 changed file with 150 additions and 12 deletions.
diff --git a/pkg/scheduler/core/spreadconstraint/group_clusters.go b/pkg/scheduler/core/spreadconstraint/group_clusters.go
@@ -17,6 +17,8 @@ limitations under the License.
 package spreadconstraint
 
 import (
+	"math"
+
 	"k8s.io/utils/ptr"
 
 	clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
@@ -40,7 +42,7 @@ type GroupClustersInfo struct {
 // ProviderInfo indicate the provider information
 type ProviderInfo struct {
 	Name              string
-	Score             int64 // the highest score in all clusters of the provider
+	Score             int64 // the comprehensive score in all clusters of the provider
 	AvailableReplicas int64
 
 	// Regions under this provider
@@ -54,7 +56,7 @@ type ProviderInfo struct {
 // RegionInfo indicate the region information
 type RegionInfo struct {
 	Name              string
-	Score             int64 // the highest score in all clusters of the region
+	Score             int64 // the comprehensive score in all clusters of the region
 	AvailableReplicas int64
 
 	// Zones under this provider
@@ -66,7 +68,7 @@ type RegionInfo struct {
 // ZoneInfo indicate the zone information
 type ZoneInfo struct {
 	Name              string
-	Score             int64 // the highest score in all clusters of the zone
+	Score             int64 // the comprehensive score in all clusters of the zone
 	AvailableReplicas int64
 
 	// Clusters under this zone, sorted by cluster.Score descending.
@@ -109,9 +111,9 @@ func groupClustersBasedTopology(
 	}
 	groupClustersInfo.calAvailableReplicasFunc = calAvailableReplicasFunc
 	groupClustersInfo.generateClustersInfo(clustersScore, rbSpec)
-	groupClustersInfo.generateZoneInfo(spreadConstraints)
-	groupClustersInfo.generateRegionInfo(spreadConstraints)
-	groupClustersInfo.generateProviderInfo(spreadConstraints)
+	groupClustersInfo.generateZoneInfo(spreadConstraints, rbSpec)
+	groupClustersInfo.generateRegionInfo(spreadConstraints, rbSpec)
+	groupClustersInfo.generateProviderInfo(spreadConstraints, rbSpec)
 
 	return groupClustersInfo
 }
@@ -128,6 +130,121 @@ func groupClustersIgnoringTopology(
 	return groupClustersInfo
 }
 
+// weightUnit is used to minimize the impact of avg(cluster.Score).
+// By multiply the weightUnit, the unit of targetReplica will be 1000, and the avg(cluster.Score) will in [0, 100].
+// Thus, when sorting by Group Score, targetReplica will be considered first, and if the Weights are the same, then Score will be considered.
+const weightUnit int64 = 1000
+
+func (info *GroupClustersInfo) calcGroupScore(
+	clusters []ClusterDetailInfo,
+	rbSpec *workv1alpha2.ResourceBindingSpec,
+	minGroups int) int64 {
+	// if the replica scheduling type is divided, the score is calculated by followed.
+	float64MinGroups := float64(minGroups)
+	targetReplica := int64(math.Ceil(float64(rbSpec.Replicas) / float64MinGroups))
+
+	// get the minGroups of Cluster.
+	var clusterMinGroups int
+	if rbSpec.Placement != nil && rbSpec.Placement.SpreadConstraints != nil {
+		for _, sc := range rbSpec.Placement.SpreadConstraints {
+			if sc.SpreadByField == policyv1alpha1.SpreadByFieldCluster {
+				clusterMinGroups = sc.MinGroups
+			}
+		}
+	}
+
+	// if the minGroups of Cluster is less than the minGroups of Group, set the minGroups of Cluster to the minGroups of Group.
+	if clusterMinGroups < minGroups {
+		clusterMinGroups = minGroups
+	}
+	int64ClusterMinGroups := int64(clusterMinGroups)
+
+	// clusters have been sorted by cluster.Score descending,
+	// and if the cluster.Score is the same, the cluster.availableReplica is ascending.
+	var sumAvailableReplica int64
+	var sumScore int64
+	var validClusters int64
+	for _, cluster := range clusters {
+		sumAvailableReplica += cluster.AvailableReplicas
+		sumScore += cluster.Score
+		validClusters++
+		if validClusters >= int64ClusterMinGroups && sumAvailableReplica >= targetReplica {
+			break
+		}
+	}
+
+	// cluster.Score is 0 or 100. To minimize the impact of Score,
+	// set the atomic value of targetReplica to 1000. This way,
+	// when sorting by Group Score, targetReplica will be considered first,
+	// and if the Weights are the same, then Score will be considered.
+
+	// Here is an example, the rbSpec.Replicas == 100 and the Group.minGroups == 2, Cluster.minGroups == 1.
+	// Thus, the targetReplica is 50, and the int64ClusterMinGroups == 2, because int64ClusterMinGroups == max(Group.minGroups, Cluster.minGroups).
+
+	// There is the Group 1, it has three clusters as follows.
+	// ----------------------------------------------------------------------
+	// | clusterName      | member1 | member2 | member3 | member4 | member5 |
+	// |---------------------------------------------------------------------
+	// | score            |   100   |   100   |   100   |   100   |   100   |
+	// |------------------------------------------------|---------|---------|
+	// |AvailableReplicas |   10    |    10   |    10   |    10   |    5    |
+	// |------------------------------------------------|---------|---------|
+
+	// There is the Group 2, it has four clusters as follows.
+	// ------------------------------------------------------------
+	// | clusterName      | member1 | member2 | member3 | member4 |
+	// |-----------------------------------------------------------
+	// | score            |    0    |    0    |    0    |    0    |
+	// |------------------------------------------------|---------|
+	// |AvailableReplicas |   40    |    30   |    10   |    10   |
+	// |------------------------------------------------|---------|
+
+	// According to our expectations, Group 2 is a more ideal choice
+	// than Group 1 because Group 2 has more available replica capacity,
+	// which meets the needs of replica allocation, even though Group 1 has a higher Cluster balance.
+	// Based on our algorithm, Group 2’s Score is also higher than that of Group 1.
+
+	// Group1's Score = 45 * 1000 + 100 = 45100
+	// Group2's Score = 50 * 1000 + 0 = 50000
+
+	// There is another example, the targetReplica is 50, and the int64ClusterMinGroups == 2.
+	// The difference now is the situation of the Groups; both Groups now meet the requirements for available replica capacity.
+
+	// There is the Group 1, it has three clusters as follows.
+	// ----------------------------------------------------------------------
+	// | clusterName      | member1 | member2 | member3 | member4 | member5 |
+	// |---------------------------------------------------------------------
+	// | score            |   100   |   100   |   100   |   100   |   100   |
+	// |------------------------------------------------|---------|---------|
+	// |AvailableReplicas |   40    |    40   |    10   |    10   |    5    |
+	// |------------------------------------------------|---------|---------|
+
+	// There is the Group 2, it has four clusters as follows.
+	// ------------------------------------------------------------
+	// | clusterName      | member1 | member2 | member3 | member4 |
+	// |-----------------------------------------------------------
+	// | score            |    0    |    0    |    0    |    0    |
+	// |------------------------------------------------|---------|
+	// |AvailableReplicas |   100   |    100  |    10   |    10   |
+	// |------------------------------------------------|---------|
+
+	// According to our expectations, Group 1 is a more ideal choice than Group 2,
+	// as both Group 2 and Group 1 can now meet the replica allocation requirements.
+	// However, Group 1 has a higher Cluster balance (even though Group 2 has more available replicas).
+	// Based on our algorithm, the Score for Group 1 is also higher than that of Group 2.
+
+	// Group1's Score = 50 * 1000 + 100 = 50100
+	// Group2's Score = 50 * 1000 + 0 = 50000
+
+	if sumAvailableReplica < targetReplica {
+		sumAvailableReplica = sumAvailableReplica * weightUnit
+		return sumAvailableReplica + sumScore/int64(len(clusters))
+	}
+
+	targetReplica = targetReplica * weightUnit
+	return targetReplica + sumScore/validClusters
+}
+
 func (info *GroupClustersInfo) generateClustersInfo(clustersScore framework.ClusterScoreList, rbSpec *workv1alpha2.ResourceBindingSpec) {
 	var clusters []*clusterv1alpha1.Cluster
 	for _, clusterScore := range clustersScore {
@@ -153,7 +270,7 @@ func (info *GroupClustersInfo) generateClustersInfo(clustersScore framework.Clus
 	})
 }
 
-func (info *GroupClustersInfo) generateZoneInfo(spreadConstraints []policyv1alpha1.SpreadConstraint) {
+func (info *GroupClustersInfo) generateZoneInfo(spreadConstraints []policyv1alpha1.SpreadConstraint, rbSpec *workv1alpha2.ResourceBindingSpec) {
 	if !IsSpreadConstraintExisted(spreadConstraints, policyv1alpha1.SpreadByFieldZone) {
 		return
 	}
@@ -178,13 +295,20 @@ func (info *GroupClustersInfo) generateZoneInfo(spreadConstraints []policyv1alph
 		}
 	}
 
+	var minGroups int
+	for _, sc := range spreadConstraints {
+		if sc.SpreadByField == policyv1alpha1.SpreadByFieldZone {
+			minGroups = sc.MinGroups
+		}
+	}
+
 	for zone, zoneInfo := range info.Zones {
-		zoneInfo.Score = zoneInfo.Clusters[0].Score
+		zoneInfo.Score = info.calcGroupScore(zoneInfo.Clusters, rbSpec, minGroups)
 		info.Zones[zone] = zoneInfo
 	}
 }
 
-func (info *GroupClustersInfo) generateRegionInfo(spreadConstraints []policyv1alpha1.SpreadConstraint) {
+func (info *GroupClustersInfo) generateRegionInfo(spreadConstraints []policyv1alpha1.SpreadConstraint, rbSpec *workv1alpha2.ResourceBindingSpec) {
 	if !IsSpreadConstraintExisted(spreadConstraints, policyv1alpha1.SpreadByFieldRegion) {
 		return
 	}
@@ -212,13 +336,20 @@ func (info *GroupClustersInfo) generateRegionInfo(spreadConstraints []policyv1al
 		info.Regions[region] = regionInfo
 	}
 
+	var minGroups int
+	for _, sc := range spreadConstraints {
+		if sc.SpreadByField == policyv1alpha1.SpreadByFieldRegion {
+			minGroups = sc.MinGroups
+		}
+	}
+
 	for region, regionInfo := range info.Regions {
-		regionInfo.Score = regionInfo.Clusters[0].Score
+		regionInfo.Score = info.calcGroupScore(regionInfo.Clusters, rbSpec, minGroups)
 		info.Regions[region] = regionInfo
 	}
 }
 
-func (info *GroupClustersInfo) generateProviderInfo(spreadConstraints []policyv1alpha1.SpreadConstraint) {
+func (info *GroupClustersInfo) generateProviderInfo(spreadConstraints []policyv1alpha1.SpreadConstraint, rbSpec *workv1alpha2.ResourceBindingSpec) {
 	if !IsSpreadConstraintExisted(spreadConstraints, policyv1alpha1.SpreadByFieldProvider) {
 		return
 	}
@@ -252,8 +383,15 @@ func (info *GroupClustersInfo) generateProviderInfo(spreadConstraints []policyv1
 		info.Providers[provider] = providerInfo
 	}
 
+	var minGroups int
+	for _, sc := range spreadConstraints {
+		if sc.SpreadByField == policyv1alpha1.SpreadByFieldProvider {
+			minGroups = sc.MinGroups
+		}
+	}
+
 	for provider, providerInfo := range info.Providers {
-		providerInfo.Score = providerInfo.Clusters[0].Score
+		providerInfo.Score = info.calcGroupScore(providerInfo.Clusters, rbSpec, minGroups)
 		info.Providers[provider] = providerInfo
 	}
 }