Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scheduler: change reservation event message format #2090

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 19 additions & 34 deletions pkg/scheduler/frameworkext/eventhandlers/reservation_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,19 +144,12 @@ func generatePodEventOnReservationLevel(errorMsg string) (string, bool) {
// for reservation total item
reserveTotalRe := regexp.MustCompile("^([0-9]+) Reservation\\(s\\) matched owner total$")

// for node related item
reserveNodeDetailRe := regexp.MustCompile("^([0-9]+ Reservation\\(s\\)) for node reason that (.*)$")

// for reservation detail item
reserveDetailRe := regexp.MustCompile("^([0-9]+) Reservation\\(s\\) .*$")

// for affinity item of node level
affinityPatterns := []string{
"^([0-9]+) node\\(s\\) (didn't match pod topology spread constraints \\(missing required label\\))",
"^([0-9]+) node\\(s\\) (didn't match pod topology spread constraints)",
"^([0-9]+) node\\(s\\) (didn't satisfy existing pods anti-affinity rules)",
"^([0-9]+) node\\(s\\) (didn't match pod affinity rules)",
"^([0-9]+) node\\(s\\) (didn't match pod anti-affinity rules)",
}
affinityDetailRe := regexp.MustCompile(strings.Join(affinityPatterns, "|"))

for _, item := range detailSplit {
trimItem := strings.TrimSpace(item)
totalStr := reserveTotalRe.FindAllStringSubmatch(trimItem, -1)
Expand All @@ -167,35 +160,27 @@ func generatePodEventOnReservationLevel(errorMsg string) (string, bool) {
if total, err = strconv.ParseInt(totalStr[0][1], 10, 64); err != nil {
return "", false
}
} else if reserveDetailRe.MatchString(trimItem) {
// not total item, append to details, e.g. " 1 Reservation(s) ..."

// for 1 Reservation(s) Insufficient nvidia, replace nvidia with nvidia.com/gpu
// TODO support other extend resource fields like kubernetes.io/batch-cpu
itemReplaced := strings.Replace(trimItem, "nvidia", "nvidia.com/gpu", -1)
resultDetails = append(resultDetails, itemReplaced)
} else {
// other node items, record affinity errors on reservation level as:
// "at least 3 didn't match pod topology spread constraints Reservation(s)"
affinityDetailsSubMatch := affinityDetailRe.FindAllStringSubmatch(trimItem, -1)
if len(affinityDetailsSubMatch) == 0 {
} else if reserveNodeDetailRe.MatchString(trimItem) {
// node related item, e.g. "2 Reservation(s) for node reason that node(s) didn't match pod affinity rules"
reserveNodeSubMatch := reserveNodeDetailRe.FindStringSubmatch(trimItem)
if len(reserveNodeSubMatch) <= 1 {
continue
}
for _, submatch := range affinityDetailsSubMatch {
if len(submatch) <= 1 {
// expect: ["2 Reservation(s)", "didn't match pod affinity rules"]
nodeReasonWords := make([]string, 0, len(reserveNodeSubMatch)-1)
for _, vv := range reserveNodeSubMatch[1:] {
if vv == "" {
continue
}
r := &strings.Builder{}
r.WriteString("at least ")
for _, vv := range submatch[1:] {
if vv == "" {
continue
}
r.WriteString(vv + " ")
}
r.WriteString("Reservation(s)")
resultDetails = append(resultDetails, r.String())
nodeReasonWords = append(nodeReasonWords, vv)
}
resultDetails = append(resultDetails, strings.Join(nodeReasonWords, " "))
} else if reserveDetailRe.MatchString(trimItem) {
// reservation itself item, append to details, e.g. " 1 Reservation(s) ..."
// for 1 Reservation(s) Insufficient nvidia, replace nvidia with nvidia.com/gpu
// TODO support other extend resource fields like kubernetes.io/batch-cpu
itemReplaced := strings.Replace(trimItem, "nvidia", "nvidia.com/gpu", -1)
resultDetails = append(resultDetails, itemReplaced)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1344,60 +1344,86 @@ func Test_generatePodEventOnReservationLevel(t *testing.T) {
},
{
name: "pod topology spread constraints missing required label errors",
errorMsg: "0/5 nodes are available: 3 node(s) didn't match pod topology spread constraints (missing required label)," +
"1 Insufficient cpu, 1 Insufficient memory, 2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
errorMsg: "0/5 nodes are available: 3 node(s) didn't match pod topology spread constraints (missing required label), " +
"1 Insufficient cpu, 1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't match pod topology spread constraints (missing required label), " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory, " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: at least 3 didn't match pod topology spread constraints (missing required label) Reservation(s), " +
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't match pod topology spread constraints (missing required label), " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "pod topology spread constraints errors",
errorMsg: "0/5 nodes are available: 3 node(s) didn't match pod topology spread constraints," +
"1 Insufficient cpu, 1 Insufficient memory, 2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"1 Insufficient cpu, 1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't match pod topology spread constraints," +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: at least 3 didn't match pod topology spread constraints Reservation(s), " +
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't match pod topology spread constraints, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "satisfy existing pods anti-affinity rules, errors",
errorMsg: "0/5 nodes are available: 3 node(s) didn't satisfy existing pods anti-affinity rules," +
"1 Insufficient cpu, 1 Insufficient memory, 2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"1 Insufficient cpu, 1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't satisfy existing pods anti-affinity rules," +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: at least 3 didn't satisfy existing pods anti-affinity rules Reservation(s), " +
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't satisfy existing pods anti-affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "match pod affinity rules errors",
errorMsg: "0/5 nodes are available: 3 node(s) didn't match pod affinity rules," +
"1 Insufficient cpu, 1 Insufficient memory, 2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"1 Insufficient cpu, 1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't match pod affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: at least 3 didn't match pod affinity rules Reservation(s), " +
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't match pod affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "match pod anti-affinity rules errors",
errorMsg: "0/5 nodes are available: 3 node(s) didn't match pod anti-affinity rules," +
"1 Insufficient cpu, 1 Insufficient memory, 2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"1 Insufficient cpu, 1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't match pod anti-affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: at least 3 didn't match pod anti-affinity rules Reservation(s), " +
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't match pod anti-affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "mix affinity errors of 'match pod topology spread constraints' and 'match pod affinity rules'",
errorMsg: "0/5 nodes are available: 3 node(s) didn't match pod topology spread constraints, " +
"1 node(s) didn't match pod affinity rules, " +
"1 Insufficient memory, 2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't match pod topology spread constraints, " +
"1 Reservation(s) for node reason that didn't match pod affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: at least 3 didn't match pod topology spread constraints Reservation(s), " +
"at least 1 didn't match pod affinity rules Reservation(s), " +
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't match pod topology spread constraints, " +
"1 Reservation(s) didn't match pod affinity rules, " +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "reservation of node reason",
errorMsg: "0/5 nodes are available: 2 node(s) didn't match pod topology spread constraints, " +
"1 node(s) didn't match pod affinity rules, 1 Insufficient memory, " +
"3 Reservation(s) for node reason that didn't match pod topology spread constraints, " +
"2 Reservation(s) for node reason that didn't match pod affinity rules," +
"2 Reservation(s) Insufficient cpu, 1 Reservation(s) Insufficient memory. " +
"8 Reservation(s) matched owner total.",
wantMsg: "0/8 reservations are available: 3 Reservation(s) didn't match pod topology spread constraints, " +
"2 Reservation(s) didn't match pod affinity rules, 2 Reservation(s) Insufficient cpu, " +
"1 Reservation(s) Insufficient memory.",
wantIsReserve: true,
},
{
name: "only gang errors",
errorMsg: "Gang \"default/demo-job-podgroup\" gets rejected due to member Pod \"demo-job-kfqfs\" is" +
Expand Down