Skip to content

Commit

Permalink
scheduler: change reservation event message format (#2090)
Browse files Browse the repository at this point in the history
Signed-off-by: 佑祎 <[email protected]>
  • Loading branch information
zwzhang0107 authored Jun 7, 2024
1 parent f472c94 commit b208bdf
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 48 deletions.
53 changes: 19 additions & 34 deletions pkg/scheduler/frameworkext/eventhandlers/reservation_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,19 +144,12 @@ func generatePodEventOnReservationLevel(errorMsg string) (string, bool) {
// for reservation total item
reserveTotalRe := regexp.MustCompile("^([0-9]+) Reservation\\(s\\) matched owner total$")

// for node related item
reserveNodeDetailRe := regexp.MustCompile("^([0-9]+ Reservation\\(s\\)) for node reason that (.*)$")

// for reservation detail item
reserveDetailRe := regexp.MustCompile("^([0-9]+) Reservation\\(s\\) .*$")

// for affinity item of node level
affinityPatterns := []string{
"^([0-9]+) node\\(s\\) (didn't match pod topology spread constraints \\(missing required label\\))",
"^([0-9]+) node\\(s\\) (didn't match pod topology spread constraints)",
"^([0-9]+) node\\(s\\) (didn't satisfy existing pods anti-affinity rules)",
"^([0-9]+) node\\(s\\) (didn't match pod affinity rules)",
"^([0-9]+) node\\(s\\) (didn't match pod anti-affinity rules)",
}
affinityDetailRe := regexp.MustCompile(strings.Join(affinityPatterns, "|"))

for _, item := range detailSplit {
trimItem := strings.TrimSpace(item)
totalStr := reserveTotalRe.FindAllStringSubmatch(trimItem, -1)
Expand All @@ -167,35 +160,27 @@ func generatePodEventOnReservationLevel(errorMsg string) (string, bool) {
if total, err = strconv.ParseInt(totalStr[0][1], 10, 64); err != nil {
return "", false
}
} else if reserveDetailRe.MatchString(trimItem) {
// not total item, append to details, e.g. " 1 Reservation(s) ..."

// for 1 Reservation(s) Insufficient nvidia, replace nvidia with nvidia.com/gpu
// TODO support other extend resource fields like kubernetes.io/batch-cpu
itemReplaced := strings.Replace(trimItem, "nvidia", "nvidia.com/gpu", -1)
resultDetails = append(resultDetails, itemReplaced)
} else {
// other node items, record affinity errors on reservation level as:
// "at least 3 didn't match pod topology spread constraints Reservation(s)"
affinityDetailsSubMatch := affinityDetailRe.FindAllStringSubmatch(trimItem, -1)
if len(affinityDetailsSubMatch) == 0 {
} else if reserveNodeDetailRe.MatchString(trimItem) {
// node related item, e.g. "2 Reservation(s) for node reason that node(s) didn't match pod affinity rules"
reserveNodeSubMatch := reserveNodeDetailRe.FindStringSubmatch(trimItem)
if len(reserveNodeSubMatch) <= 1 {
continue
}
for _, submatch := range affinityDetailsSubMatch {
if len(submatch) <= 1 {
// expect: ["2 Reservation(s)", "didn't match pod affinity rules"]
nodeReasonWords := make([]string, 0, len(reserveNodeSubMatch)-1)
for _, vv := range reserveNodeSubMatch[1:] {
if vv == "" {
continue
}
r := &strings.Builder{}
r.WriteString("at least ")
for _, vv := range submatch[1:] {
if vv == "" {
continue
}
r.WriteString(vv + " ")
}
r.WriteString("Reservation(s)")
resultDetails = append(resultDetails, r.String())
nodeReasonWords = append(nodeReasonWords, vv)
}
resultDetails = append(resultDetails, strings.Join(nodeReasonWords, " "))
} else if reserveDetailRe.MatchString(trimItem) {
// reservation itself item, append to details, e.g. " 1 Reservation(s) ..."
// for 1 Reservation(s) Insufficient nvidia, replace nvidia with nvidia.com/gpu
// TODO support other extend resource fields like kubernetes.io/batch-cpu
itemReplaced := strings.Replace(trimItem, "nvidia", "nvidia.com/gpu", -1)
resultDetails = append(resultDetails, itemReplaced)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1344,60 +1344,86 @@ func Test_generatePodEventOnReservationLevel(t *testing.T) {
},
{
name: "pod topology spread constraints missing required label errors",
errorMsg: "0/5 nodes are available: 3 node(s) didn't match pod topology spread constraints (missing required label)," +
"1 Insufficient cpu, 1 Insufficient memory, 2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
errorMsg: "0/5 nodes are available: 3 node(s) didn't match pod topology spread constraints (missing required label), " +
"1 Insufficient cpu, 1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't match pod topology spread constraints (missing required label), " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory, " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: at least 3 didn't match pod topology spread constraints (missing required label) Reservation(s), " +
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't match pod topology spread constraints (missing required label), " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "pod topology spread constraints errors",
errorMsg: "0/5 nodes are available: 3 node(s) didn't match pod topology spread constraints," +
"1 Insufficient cpu, 1 Insufficient memory, 2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"1 Insufficient cpu, 1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't match pod topology spread constraints," +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: at least 3 didn't match pod topology spread constraints Reservation(s), " +
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't match pod topology spread constraints, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "satisfy existing pods anti-affinity rules, errors",
errorMsg: "0/5 nodes are available: 3 node(s) didn't satisfy existing pods anti-affinity rules," +
"1 Insufficient cpu, 1 Insufficient memory, 2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"1 Insufficient cpu, 1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't satisfy existing pods anti-affinity rules," +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: at least 3 didn't satisfy existing pods anti-affinity rules Reservation(s), " +
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't satisfy existing pods anti-affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "match pod affinity rules errors",
errorMsg: "0/5 nodes are available: 3 node(s) didn't match pod affinity rules," +
"1 Insufficient cpu, 1 Insufficient memory, 2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"1 Insufficient cpu, 1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't match pod affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: at least 3 didn't match pod affinity rules Reservation(s), " +
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't match pod affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "match pod anti-affinity rules errors",
errorMsg: "0/5 nodes are available: 3 node(s) didn't match pod anti-affinity rules," +
"1 Insufficient cpu, 1 Insufficient memory, 2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"1 Insufficient cpu, 1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't match pod anti-affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: at least 3 didn't match pod anti-affinity rules Reservation(s), " +
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't match pod anti-affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "mix affinity errors of 'match pod topology spread constraints' and 'match pod affinity rules'",
errorMsg: "0/5 nodes are available: 3 node(s) didn't match pod topology spread constraints, " +
"1 node(s) didn't match pod affinity rules, " +
"1 Insufficient memory, 2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't match pod topology spread constraints, " +
"1 Reservation(s) for node reason that didn't match pod affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: at least 3 didn't match pod topology spread constraints Reservation(s), " +
"at least 1 didn't match pod affinity rules Reservation(s), " +
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't match pod topology spread constraints, " +
"1 Reservation(s) didn't match pod affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "reservation of node reason",
errorMsg: "0/5 nodes are available: 2 node(s) didn't match pod topology spread constraints, " +
"1 node(s) didn't match pod affinity rules, 1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't match pod topology spread constraints, " +
"2 Reservation(s) for node reason that didn't match pod affinity rules," +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't match pod topology spread constraints, " +
"2 Reservation(s) didn't match pod affinity rules, 2 Reservation(s) Insufficient cpu, " +
"1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "only gang errors",
errorMsg: "Gang \"default/demo-job-podgroup\" gets rejected due to member Pod \"demo-job-kfqfs\" is" +
Expand Down

0 comments on commit b208bdf

Please sign in to comment.