Skip to content

Commit

Permalink
fix: bug cannot use the disk which has failed replica
Browse files Browse the repository at this point in the history
We should not exclude the disk which has failed replica in 2 cases:
1. The caller is trying to reuse the failed replica, this disk is a
   valid candidate
2. The failed replica is no longer reusable, this disk is a valid candidate

longhorn-10210

Signed-off-by: Phan Le <[email protected]>
(cherry picked from commit 82ada80)
  • Loading branch information
PhanLe1010 authored and derekbit committed Jan 19, 2025
1 parent e296fd5 commit f12325b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
12 changes: 10 additions & 2 deletions scheduler/replica_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod
}
multiError.Append(errors)
}
diskCandidates = filterDisksWithMatchingReplicas(diskCandidates, replicas, diskSoftAntiAffinity)
diskCandidates = filterDisksWithMatchingReplicas(diskCandidates, replicas, diskSoftAntiAffinity, ignoreFailedReplicas)
return diskCandidates, multiError
}

Expand Down Expand Up @@ -441,9 +441,17 @@ func (rcs *ReplicaScheduler) filterNodeDisksForReplica(node *longhorn.Node, disk
// filterDisksWithMatchingReplicas filters the input disks map and returns only the disks that have the fewest matching
// replicas. If diskSoftAntiAffinity is false, it only returns disks that have no matching replicas.
func filterDisksWithMatchingReplicas(disks map[string]*Disk, replicas map[string]*longhorn.Replica,
diskSoftAntiAffinity bool) map[string]*Disk {
diskSoftAntiAffinity, ignoreFailedReplicas bool) map[string]*Disk {
replicasCountPerDisk := map[string]int{}
for _, r := range replicas {
if r.Spec.FailedAt != "" {
if ignoreFailedReplicas {
continue
}
if !IsPotentiallyReusableReplica(r) {
continue // This replica can never be used again, so it does not count in scheduling decisions.
}
}
replicasCountPerDisk[r.Spec.DiskID]++
}

Expand Down
4 changes: 3 additions & 1 deletion scheduler/replica_scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1355,6 +1355,7 @@ func (s *TestSuite) TestFilterDisksWithMatchingReplicas(c *C) {
inputDiskUUIDs []string
inputReplicas map[string]*longhorn.Replica
diskSoftAntiAffinity bool
ignoreFailedReplicas bool

expectDiskUUIDs []string
}
Expand Down Expand Up @@ -1417,6 +1418,7 @@ func (s *TestSuite) TestFilterDisksWithMatchingReplicas(c *C) {
replica4.Name: replica4,
}
tc.diskSoftAntiAffinity = true
tc.ignoreFailedReplicas = false
tc.expectDiskUUIDs = []string{diskUUID5} // Only disk5 has no matching replica.
tests["only schedule to disk without matching replica"] = tc

Expand All @@ -1426,7 +1428,7 @@ func (s *TestSuite) TestFilterDisksWithMatchingReplicas(c *C) {
for _, UUID := range tc.inputDiskUUIDs {
inputDisks[UUID] = &Disk{}
}
outputDiskUUIDs := filterDisksWithMatchingReplicas(inputDisks, tc.inputReplicas, tc.diskSoftAntiAffinity)
outputDiskUUIDs := filterDisksWithMatchingReplicas(inputDisks, tc.inputReplicas, tc.diskSoftAntiAffinity, tc.ignoreFailedReplicas)
c.Assert(len(outputDiskUUIDs), Equals, len(tc.expectDiskUUIDs))
for _, UUID := range tc.expectDiskUUIDs {
_, ok := outputDiskUUIDs[UUID]
Expand Down

0 comments on commit f12325b

Please sign in to comment.