From a3eab7cf7ffdf9d7678efcccdc82e32998e6b161 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Wed, 12 Jun 2024 16:55:58 +0200 Subject: [PATCH 01/14] Take amount of used reservations into account for free machine count. --- cmd/metal-api/internal/service/partition-service.go | 3 +-- cmd/metal-api/internal/service/partition-service_test.go | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index 7d11615d7..5c2d32057 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -466,10 +466,9 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque size := sizesByID[cap.Size] for _, reservation := range size.Reservations.ForPartition(pc.ID) { - reservation := reservation - cap.Reservations += reservation.Amount cap.UsedReservations += min(len(machinesByProject[reservation.ProjectID].WithSize(size.ID).WithPartition(pc.ID)), reservation.Amount) + cap.Free -= cap.UsedReservations } } diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index a54d80ce7..f0f5604eb 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -287,7 +287,7 @@ func TestPartitionCapacity(t *testing.T) { c := result[0].ServerCapacities[0] require.Equal(t, "1", c.Size) require.Equal(t, 5, c.Total) - require.Equal(t, 0, c.Free) + require.Equal(t, -1, c.Free) require.Equal(t, 3, c.Reservations) require.Equal(t, 1, c.UsedReservations) } From 908cbd828e9f7ee0a94ae3a1f2f99e69d8ddc902 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Mon, 29 Jul 2024 15:41:09 +0200 Subject: [PATCH 02/14] Update. --- cmd/metal-api/internal/issues/types.go | 12 - cmd/metal-api/internal/metal/machine.go | 5 + .../internal/service/partition-service.go | 28 +- .../service/partition-service_test.go | 321 +++++++++++++++--- .../internal/service/v1/partition.go | 41 ++- 5 files changed, 327 insertions(+), 80 deletions(-) diff --git a/cmd/metal-api/internal/issues/types.go b/cmd/metal-api/internal/issues/types.go index db05f05b1..2ff13ad0e 100644 --- a/cmd/metal-api/internal/issues/types.go +++ b/cmd/metal-api/internal/issues/types.go @@ -24,18 +24,6 @@ func AllIssueTypes() []Type { } } -func NotAllocatableIssueTypes() []Type { - return []Type{ - TypeNoPartition, - TypeLivelinessDead, - TypeLivelinessUnknown, - TypeLivelinessNotAvailable, - TypeFailedMachineReclaim, - TypeCrashLoop, - TypeNoEventContainer, - } -} - func NewIssueFromType(t Type) (issue, error) { switch t { case TypeNoPartition: diff --git a/cmd/metal-api/internal/metal/machine.go b/cmd/metal-api/internal/metal/machine.go index 0f6eabfa3..bd3fc8847 100644 --- a/cmd/metal-api/internal/metal/machine.go +++ b/cmd/metal-api/internal/metal/machine.go @@ -12,6 +12,7 @@ import ( "github.com/dustin/go-humanize" mn "github.com/metal-stack/metal-lib/pkg/net" + "github.com/metal-stack/metal-lib/pkg/pointer" "github.com/samber/lo" ) @@ -132,6 +133,10 @@ func (m *Machine) IsFirewall() bool { return false } +func (m *Machine) IsWaiting(ec ProvisioningEventContainer) bool { + return m.Waiting && ProvisioningEventWaiting == pointer.FirstOrZero(ec.Events).Event +} + // A MachineAllocation stores the data which are only present for allocated machines. type MachineAllocation struct { Creator string `rethinkdb:"creator" json:"creator"` diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index 5c2d32057..6010c899e 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -10,7 +10,6 @@ import ( "github.com/metal-stack/metal-api/cmd/metal-api/internal/issues" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" "github.com/metal-stack/metal-lib/auditing" - "github.com/metal-stack/metal-lib/pkg/pointer" v1 "github.com/metal-stack/metal-api/cmd/metal-api/internal/service/v1" @@ -378,7 +377,7 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque machinesWithIssues, err := issues.Find(&issues.Config{ Machines: ms, EventContainers: ecs, - Only: issues.NotAllocatableIssueTypes(), + Only: issues.AllIssueTypes(), }) if err != nil { return nil, fmt.Errorf("unable to calculate machine issues: %w", err) @@ -436,24 +435,23 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque cap.Total++ - if m.Allocation != nil { - cap.Allocated++ - continue - } - if _, ok := machinesWithIssues[m.ID]; ok { cap.Faulty++ cap.FaultyMachines = append(cap.FaultyMachines, m.ID) - continue } - if m.State.Value == metal.AvailableState && metal.ProvisioningEventWaiting == pointer.FirstOrZero(ec.Events).Event { - cap.Free++ - continue + switch { + case m.Allocation != nil: + cap.Allocated++ + case m.IsWaiting(ec): + cap.Waiting++ + if m.State.Value == metal.AvailableState { + cap.Free++ + } + default: + cap.Other++ + cap.OtherMachines = append(cap.OtherMachines, m.ID) } - - cap.Other++ - cap.OtherMachines = append(cap.OtherMachines, m.ID) } res := []v1.PartitionCapacity{} @@ -468,7 +466,7 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque for _, reservation := range size.Reservations.ForPartition(pc.ID) { cap.Reservations += reservation.Amount cap.UsedReservations += min(len(machinesByProject[reservation.ProjectID].WithSize(size.ID).WithPartition(pc.ID)), reservation.Amount) - cap.Free -= cap.UsedReservations + cap.Free -= cap.Reservations } } diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index f0f5604eb..74bc19f4d 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -7,8 +7,11 @@ import ( "log/slog" "net/http" "net/http/httptest" + "slices" "testing" + "time" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" r "gopkg.in/rethinkdb/rethinkdb-go.v6" @@ -18,6 +21,7 @@ import ( v1 "github.com/metal-stack/metal-api/cmd/metal-api/internal/service/v1" "github.com/metal-stack/metal-api/cmd/metal-api/internal/testdata" "github.com/metal-stack/metal-lib/httperrors" + "github.com/metal-stack/metal-lib/pkg/pointer" "github.com/stretchr/testify/require" ) @@ -244,50 +248,281 @@ func TestUpdatePartition(t *testing.T) { require.Equal(t, downloadableFile, *result.PartitionBootConfiguration.ImageURL) } -func TestPartitionCapacity(t *testing.T) { - ds, mock := datastore.InitMockDB(t) +// func TestPartitionCapacity(t *testing.T) { +// ds, mock := datastore.InitMockDB(t) + +// ecs := []metal.ProvisioningEventContainer{} +// for _, m := range testdata.TestMachines { +// m := m +// ecs = append(ecs, metal.ProvisioningEventContainer{ +// Base: m.Base, +// }) +// } +// mock.On(r.DB("mockdb").Table("event")).Return(ecs, nil) + +// testdata.InitMockDBData(mock) +// log := slog.Default() + +// service := NewPartition(log, ds, &nopTopicCreator{}) +// container := restful.NewContainer().Add(service) + +// pcRequest := &v1.PartitionCapacityRequest{} +// js, err := json.Marshal(pcRequest) +// require.NoError(t, err) +// body := bytes.NewBuffer(js) + +// req := httptest.NewRequest("POST", "/v1/partition/capacity", body) +// req.Header.Add("Content-Type", "application/json") +// container = injectAdmin(log, container, req) +// w := httptest.NewRecorder() +// container.ServeHTTP(w, req) + +// resp := w.Result() +// defer resp.Body.Close() +// require.Equal(t, http.StatusOK, resp.StatusCode, w.Body.String()) +// var result []v1.PartitionCapacity +// err = json.NewDecoder(resp.Body).Decode(&result) + +// require.NoError(t, err) +// require.Len(t, result, 1) +// require.Equal(t, testdata.Partition1.ID, result[0].ID) +// require.NotNil(t, result[0].ServerCapacities) +// require.Len(t, result[0].ServerCapacities, 1) +// c := result[0].ServerCapacities[0] +// require.Equal(t, "1", c.Size) +// require.Equal(t, 5, c.Total) +// require.Equal(t, -1, c.Free) +// require.Equal(t, 3, c.Reservations) +// require.Equal(t, 1, c.UsedReservations) +// } - ecs := []metal.ProvisioningEventContainer{} - for _, m := range testdata.TestMachines { - m := m - ecs = append(ecs, metal.ProvisioningEventContainer{ - Base: m.Base, +func TestPartitionCapacity(t *testing.T) { + var ( + mockMachines = func(mock *r.Mock, reservationCount int, ms ...metal.Machine) { + var ( + sizes metal.Sizes + events metal.ProvisioningEventContainers + partitions metal.Partitions + ) + + for _, m := range ms { + ec := metal.ProvisioningEventContainer{Base: metal.Base{ID: m.ID}, Liveliness: metal.MachineLivelinessAlive} + if m.Waiting { + ec.Events = append(ec.Events, metal.ProvisioningEvent{ + Event: metal.ProvisioningEventWaiting, + }) + } + events = append(events, ec) + if !slices.ContainsFunc(sizes, func(s metal.Size) bool { + return s.ID == m.SizeID + }) { + s := metal.Size{Base: metal.Base{ID: m.SizeID}} + if reservationCount > 0 { + s.Reservations = append(s.Reservations, metal.Reservation{ + Amount: reservationCount, + ProjectID: pointer.SafeDeref(m.Allocation).Project, + PartitionIDs: []string{m.PartitionID}, + }) + } + sizes = append(sizes, s) + } + if !slices.ContainsFunc(partitions, func(p metal.Partition) bool { + return p.ID == m.PartitionID + }) { + partitions = append(partitions, metal.Partition{Base: metal.Base{ID: m.PartitionID}}) + } + } + + mock.On(r.DB("mockdb").Table("machine")).Return(ms, nil) + mock.On(r.DB("mockdb").Table("event")).Return(events, nil) + mock.On(r.DB("mockdb").Table("partition")).Return(partitions, nil) + mock.On(r.DB("mockdb").Table("size")).Return(sizes, nil) + } + + machineTpl = func(id, partition, size, project string) metal.Machine { + m := metal.Machine{ + Base: metal.Base{ID: id}, + PartitionID: partition, + SizeID: size, + IPMI: metal.IPMI{ // required for healthy machine state + Address: "1.2.3.4", + MacAddress: "aa:bb:00", + LastUpdated: time.Now().Add(-1 * time.Minute), + }, + } + if project != "" { + m.Allocation = &metal.MachineAllocation{ + Project: project, + } + } + return m + } + ) + + tests := []struct { + name string + mockFn func(mock *r.Mock) + want []*v1.PartitionCapacity + }{ + { + name: "one allocated machine", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "project-123") + mockMachines(mock, 0, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Allocated: 1, + }, + }, + }, + }, + }, + { + name: "one faulty, allocated machine", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "project-123") + m1.IPMI.Address = "" + mockMachines(mock, 0, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Faulty: 1, + Allocated: 1, + FaultyMachines: []string{"1"}, + }, + }, + }, + }, + }, + { + name: "one waiting machine", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = true + mockMachines(mock, 0, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Waiting: 1, + Free: 1, + }, + }, + }, + }, + }, + { + name: "one free machine", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = true + m1.State.Value = metal.AvailableState + mockMachines(mock, 0, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Waiting: 1, + Free: 1, + }, + }, + }, + }, + }, + { + name: "one machine rebooting", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = false + mockMachines(mock, 0, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Other: 1, + OtherMachines: []string{"1"}, + }, + }, + }, + }, + }, + { + name: "machine reserved", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = true + mockMachines(mock, 1, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Waiting: 1, + Free: 0, + Reservations: 1, + UsedReservations: 0, + }, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var ( + ds, mock = datastore.InitMockDB(t) + body = &v1.PartitionCapacityRequest{} + ws = NewPartition(slog.Default(), ds, nil) + ) + + if tt.mockFn != nil { + tt.mockFn(mock) + } + + code, got := genericWebRequest[[]*v1.PartitionCapacity](t, ws, testViewUser, body, "POST", "/v1/partition/capacity") + assert.Equal(t, http.StatusOK, code) + + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } }) } - mock.On(r.DB("mockdb").Table("event")).Return(ecs, nil) - - testdata.InitMockDBData(mock) - log := slog.Default() - - service := NewPartition(log, ds, &nopTopicCreator{}) - container := restful.NewContainer().Add(service) - - pcRequest := &v1.PartitionCapacityRequest{} - js, err := json.Marshal(pcRequest) - require.NoError(t, err) - body := bytes.NewBuffer(js) - - req := httptest.NewRequest("POST", "/v1/partition/capacity", body) - req.Header.Add("Content-Type", "application/json") - container = injectAdmin(log, container, req) - w := httptest.NewRecorder() - container.ServeHTTP(w, req) - - resp := w.Result() - defer resp.Body.Close() - require.Equal(t, http.StatusOK, resp.StatusCode, w.Body.String()) - var result []v1.PartitionCapacity - err = json.NewDecoder(resp.Body).Decode(&result) - - require.NoError(t, err) - require.Len(t, result, 1) - require.Equal(t, testdata.Partition1.ID, result[0].ID) - require.NotNil(t, result[0].ServerCapacities) - require.Len(t, result[0].ServerCapacities, 1) - c := result[0].ServerCapacities[0] - require.Equal(t, "1", c.Size) - require.Equal(t, 5, c.Total) - require.Equal(t, -1, c.Free) - require.Equal(t, 3, c.Reservations) - require.Equal(t, 1, c.UsedReservations) } diff --git a/cmd/metal-api/internal/service/v1/partition.go b/cmd/metal-api/internal/service/v1/partition.go index beed5d2fc..f5d186488 100644 --- a/cmd/metal-api/internal/service/v1/partition.go +++ b/cmd/metal-api/internal/service/v1/partition.go @@ -49,17 +49,38 @@ type PartitionCapacity struct { ServerCapacities ServerCapacities `json:"servers" description:"servers available in this partition"` } +// ServerCapacity holds the machine capacity of a partition of a specific size. +// The amount of allocated, waiting and other machines sum up to the total amount of machines. type ServerCapacity struct { - Size string `json:"size" description:"the size of the server"` - Total int `json:"total" description:"total amount of servers with this size"` - Free int `json:"free" description:"free servers with this size"` - Allocated int `json:"allocated" description:"allocated servers with this size"` - Reservations int `json:"reservations" description:"the amount of reservations for this size"` - UsedReservations int `json:"usedreservations" description:"the amount of used reservations for this size"` - Faulty int `json:"faulty" description:"servers with issues with this size"` - FaultyMachines []string `json:"faultymachines" description:"servers with issues with this size"` - Other int `json:"other" description:"servers neither free, allocated or faulty with this size"` - OtherMachines []string `json:"othermachines" description:"servers neither free, allocated or faulty with this size"` + // Size is the size id correlating to all counts in this server capacity. + Size string `json:"size" description:"the size of the machine"` + + // Total is the total amount of machines for this size. + Total int `json:"total" description:"total amount of machines with this size"` + + // Allocated is the amount of machines that are currently allocated. + Allocated int `json:"allocated" description:"allocated machines with this size"` + // Waiting is the amount of machines that are currently available for allocation. + Waiting int `json:"waiting" description:"machines waiting for allocation with this size"` + // Other is the amount of machines that are neither allocated nor in the pool of available machines because they are currently in another provisioning state. + Other int `json:"other" description:"machines neither allocated or waiting with this size"` + // OtherMachines contains the machine IDs for machines that were classified into "Other". + OtherMachines []string `json:"othermachines" description:"machine ids neither allocated or waiting with this size"` + + // Faulty is the amount of machines that are neither allocated nor in the pool of available machines because they report an error. + Faulty int `json:"faulty" description:"machines with issues with this size"` + // FaultyMachines contains the machine IDs for machines that were classified into "Faulty". + FaultyMachines []string `json:"faultymachines" description:"machine ids with issues with this size"` + + // Reservations is the amount of reservations made for this size. + Reservations int `json:"reservations" description:"the amount of reservations for this size"` + // UsedReservations is the amount of reservations already used up for this size. + UsedReservations int `json:"usedreservations" description:"the amount of used reservations for this size"` + + // Free is the amount of machines in a partition that can be freely allocated at any given moment by a project. + // Effectively this is the amount of waiting machines minus the machines that are unavailable due to machine state or un-allocatable due to size reservations. + // This can also be a negative number indicating that this machine size is "overbooked", i.e. there are more reservations than waiting machines. + Free int `json:"free" description:"free machines with this size"` } func NewPartitionResponse(p *metal.Partition) *PartitionResponse { From 73a8276d50f337309dc278e5b0aecdb9b9cbae7c Mon Sep 17 00:00:00 2001 From: Gerrit Date: Mon, 29 Jul 2024 15:42:08 +0200 Subject: [PATCH 03/14] Cleanup old test. --- .../service/partition-service_test.go | 48 ------------------- 1 file changed, 48 deletions(-) diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index 74bc19f4d..608212889 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -248,54 +248,6 @@ func TestUpdatePartition(t *testing.T) { require.Equal(t, downloadableFile, *result.PartitionBootConfiguration.ImageURL) } -// func TestPartitionCapacity(t *testing.T) { -// ds, mock := datastore.InitMockDB(t) - -// ecs := []metal.ProvisioningEventContainer{} -// for _, m := range testdata.TestMachines { -// m := m -// ecs = append(ecs, metal.ProvisioningEventContainer{ -// Base: m.Base, -// }) -// } -// mock.On(r.DB("mockdb").Table("event")).Return(ecs, nil) - -// testdata.InitMockDBData(mock) -// log := slog.Default() - -// service := NewPartition(log, ds, &nopTopicCreator{}) -// container := restful.NewContainer().Add(service) - -// pcRequest := &v1.PartitionCapacityRequest{} -// js, err := json.Marshal(pcRequest) -// require.NoError(t, err) -// body := bytes.NewBuffer(js) - -// req := httptest.NewRequest("POST", "/v1/partition/capacity", body) -// req.Header.Add("Content-Type", "application/json") -// container = injectAdmin(log, container, req) -// w := httptest.NewRecorder() -// container.ServeHTTP(w, req) - -// resp := w.Result() -// defer resp.Body.Close() -// require.Equal(t, http.StatusOK, resp.StatusCode, w.Body.String()) -// var result []v1.PartitionCapacity -// err = json.NewDecoder(resp.Body).Decode(&result) - -// require.NoError(t, err) -// require.Len(t, result, 1) -// require.Equal(t, testdata.Partition1.ID, result[0].ID) -// require.NotNil(t, result[0].ServerCapacities) -// require.Len(t, result[0].ServerCapacities, 1) -// c := result[0].ServerCapacities[0] -// require.Equal(t, "1", c.Size) -// require.Equal(t, 5, c.Total) -// require.Equal(t, -1, c.Free) -// require.Equal(t, 3, c.Reservations) -// require.Equal(t, 1, c.UsedReservations) -// } - func TestPartitionCapacity(t *testing.T) { var ( mockMachines = func(mock *r.Mock, reservationCount int, ms ...metal.Machine) { From f9cae9f0d94e51c7f6a774cc91e0ca0537cac3e8 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Mon, 29 Jul 2024 15:54:26 +0200 Subject: [PATCH 04/14] Fix. --- .../internal/service/partition-service.go | 5 ++-- .../service/partition-service_test.go | 29 ++++++++++--------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index 6010c899e..3663606f0 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -465,8 +465,9 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque for _, reservation := range size.Reservations.ForPartition(pc.ID) { cap.Reservations += reservation.Amount - cap.UsedReservations += min(len(machinesByProject[reservation.ProjectID].WithSize(size.ID).WithPartition(pc.ID)), reservation.Amount) - cap.Free -= cap.Reservations + used := min(len(machinesByProject[reservation.ProjectID].WithSize(size.ID).WithPartition(pc.ID)), reservation.Amount) + cap.UsedReservations += used + cap.Free -= reservation.Amount - used } } diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index 608212889..226595443 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -250,7 +250,7 @@ func TestUpdatePartition(t *testing.T) { func TestPartitionCapacity(t *testing.T) { var ( - mockMachines = func(mock *r.Mock, reservationCount int, ms ...metal.Machine) { + mockMachines = func(mock *r.Mock, reservation *metal.Reservation, ms ...metal.Machine) { var ( sizes metal.Sizes events metal.ProvisioningEventContainers @@ -269,12 +269,8 @@ func TestPartitionCapacity(t *testing.T) { return s.ID == m.SizeID }) { s := metal.Size{Base: metal.Base{ID: m.SizeID}} - if reservationCount > 0 { - s.Reservations = append(s.Reservations, metal.Reservation{ - Amount: reservationCount, - ProjectID: pointer.SafeDeref(m.Allocation).Project, - PartitionIDs: []string{m.PartitionID}, - }) + if reservation != nil { + s.Reservations = append(s.Reservations, *reservation) } sizes = append(sizes, s) } @@ -320,7 +316,7 @@ func TestPartitionCapacity(t *testing.T) { name: "one allocated machine", mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "project-123") - mockMachines(mock, 0, m1) + mockMachines(mock, nil, m1) }, want: []*v1.PartitionCapacity{ { @@ -342,7 +338,7 @@ func TestPartitionCapacity(t *testing.T) { mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "project-123") m1.IPMI.Address = "" - mockMachines(mock, 0, m1) + mockMachines(mock, nil, m1) }, want: []*v1.PartitionCapacity{ { @@ -366,7 +362,7 @@ func TestPartitionCapacity(t *testing.T) { mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "") m1.Waiting = true - mockMachines(mock, 0, m1) + mockMachines(mock, nil, m1) }, want: []*v1.PartitionCapacity{ { @@ -390,7 +386,7 @@ func TestPartitionCapacity(t *testing.T) { m1 := machineTpl("1", "partition-a", "size-a", "") m1.Waiting = true m1.State.Value = metal.AvailableState - mockMachines(mock, 0, m1) + mockMachines(mock, nil, m1) }, want: []*v1.PartitionCapacity{ { @@ -413,7 +409,7 @@ func TestPartitionCapacity(t *testing.T) { mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "") m1.Waiting = false - mockMachines(mock, 0, m1) + mockMachines(mock, nil, m1) }, want: []*v1.PartitionCapacity{ { @@ -436,7 +432,14 @@ func TestPartitionCapacity(t *testing.T) { mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "") m1.Waiting = true - mockMachines(mock, 1, m1) + + reservation := metal.Reservation{ + Amount: 1, + ProjectID: "project-123", + PartitionIDs: []string{"partition-a"}, + } + + mockMachines(mock, &reservation, m1) }, want: []*v1.PartitionCapacity{ { From 11ee943463f771df8e825ec7e97fc4afbae4f0c8 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 30 Jul 2024 08:54:14 +0200 Subject: [PATCH 05/14] More tests. --- .../service/partition-service_test.go | 194 ++++++++++++++++-- 1 file changed, 182 insertions(+), 12 deletions(-) diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index 226595443..52f56dc7b 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -250,7 +250,7 @@ func TestUpdatePartition(t *testing.T) { func TestPartitionCapacity(t *testing.T) { var ( - mockMachines = func(mock *r.Mock, reservation *metal.Reservation, ms ...metal.Machine) { + mockMachines = func(mock *r.Mock, reservations []metal.Reservation, ms ...metal.Machine) { var ( sizes metal.Sizes events metal.ProvisioningEventContainers @@ -269,9 +269,6 @@ func TestPartitionCapacity(t *testing.T) { return s.ID == m.SizeID }) { s := metal.Size{Base: metal.Base{ID: m.SizeID}} - if reservation != nil { - s.Reservations = append(s.Reservations, *reservation) - } sizes = append(sizes, s) } if !slices.ContainsFunc(partitions, func(p metal.Partition) bool { @@ -281,6 +278,12 @@ func TestPartitionCapacity(t *testing.T) { } } + if len(reservations) > 0 { + for i := range sizes { + sizes[i].Reservations = append(sizes[i].Reservations, reservations...) + } + } + mock.On(r.DB("mockdb").Table("machine")).Return(ms, nil) mock.On(r.DB("mockdb").Table("event")).Return(events, nil) mock.On(r.DB("mockdb").Table("partition")).Return(partitions, nil) @@ -293,8 +296,8 @@ func TestPartitionCapacity(t *testing.T) { PartitionID: partition, SizeID: size, IPMI: metal.IPMI{ // required for healthy machine state - Address: "1.2.3.4", - MacAddress: "aa:bb:00", + Address: "1.2.3." + id, + MacAddress: "aa:bb:0" + id, LastUpdated: time.Now().Add(-1 * time.Minute), }, } @@ -333,6 +336,28 @@ func TestPartitionCapacity(t *testing.T) { }, }, }, + { + name: "two allocated machines", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "project-123") + m2 := machineTpl("2", "partition-a", "size-a", "project-123") + mockMachines(mock, nil, m1, m2) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 2, + Allocated: 2, + }, + }, + }, + }, + }, { name: "one faulty, allocated machine", mockFn: func(mock *r.Mock) { @@ -380,6 +405,31 @@ func TestPartitionCapacity(t *testing.T) { }, }, }, + { + name: "one waiting, one allocated machine", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = true + m2 := machineTpl("2", "partition-a", "size-a", "project-123") + mockMachines(mock, nil, m1, m2) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 2, + Allocated: 1, + Waiting: 1, + Free: 1, + }, + }, + }, + }, + }, { name: "one free machine", mockFn: func(mock *r.Mock) { @@ -428,18 +478,20 @@ func TestPartitionCapacity(t *testing.T) { }, }, { - name: "machine reserved", + name: "reserved machine does not count as free", mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "") m1.Waiting = true - reservation := metal.Reservation{ - Amount: 1, - ProjectID: "project-123", - PartitionIDs: []string{"partition-a"}, + reservations := []metal.Reservation{ + { + Amount: 1, + ProjectID: "project-123", + PartitionIDs: []string{"partition-a"}, + }, } - mockMachines(mock, &reservation, m1) + mockMachines(mock, reservations, m1) }, want: []*v1.PartitionCapacity{ { @@ -459,6 +511,124 @@ func TestPartitionCapacity(t *testing.T) { }, }, }, + { + name: "overbooked partition", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = true + + reservations := []metal.Reservation{ + { + Amount: 1, + ProjectID: "project-123", + PartitionIDs: []string{"partition-a"}, + }, + { + Amount: 2, + ProjectID: "project-456", + PartitionIDs: []string{"partition-a"}, + }, + } + + mockMachines(mock, reservations, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Waiting: 1, + Free: -2, + Reservations: 3, + UsedReservations: 0, + }, + }, + }, + }, + }, + { + name: "reservations already used up", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "project-123") + m2 := machineTpl("2", "partition-a", "size-a", "project-123") + m3 := machineTpl("3", "partition-a", "size-a", "") + m3.Waiting = true + + reservations := []metal.Reservation{ + { + Amount: 2, + ProjectID: "project-123", + PartitionIDs: []string{"partition-a"}, + }, + } + + mockMachines(mock, reservations, m1, m2, m3) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 3, + Allocated: 2, + Waiting: 1, + Free: 1, + Reservations: 2, + UsedReservations: 2, + }, + }, + }, + }, + }, + { + name: "other partition size reservation has no influence", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "project-123") + m2 := machineTpl("2", "partition-a", "size-a", "project-123") + m3 := machineTpl("3", "partition-a", "size-a", "") + m3.Waiting = true + + reservations := []metal.Reservation{ + { + Amount: 2, + ProjectID: "project-123", + PartitionIDs: []string{"partition-a"}, + }, + { + Amount: 2, + ProjectID: "project-123", + PartitionIDs: []string{"partition-b"}, + }, + } + + mockMachines(mock, reservations, m1, m2, m3) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 3, + Allocated: 2, + Waiting: 1, + Free: 1, + Reservations: 2, + UsedReservations: 2, + }, + }, + }, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { From 533a4ce3912d06c5b49ea00e876b9585f0994e42 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 30 Jul 2024 09:00:14 +0200 Subject: [PATCH 06/14] One more test. --- .../service/partition-service_test.go | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index 52f56dc7b..0fb031c06 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -551,7 +551,7 @@ func TestPartitionCapacity(t *testing.T) { }, }, { - name: "reservations already used up", + name: "reservations already used up (edge)", mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "project-123") m2 := machineTpl("2", "partition-a", "size-a", "project-123") @@ -587,6 +587,43 @@ func TestPartitionCapacity(t *testing.T) { }, }, }, + { + name: "reservations already used up", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "project-123") + m2 := machineTpl("2", "partition-a", "size-a", "project-123") + m3 := machineTpl("3", "partition-a", "size-a", "") + m3.Waiting = true + + reservations := []metal.Reservation{ + { + Amount: 1, + ProjectID: "project-123", + PartitionIDs: []string{"partition-a"}, + }, + } + + mockMachines(mock, reservations, m1, m2, m3) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 3, + Allocated: 2, + Waiting: 1, + Free: 1, + Reservations: 1, + UsedReservations: 1, + }, + }, + }, + }, + }, { name: "other partition size reservation has no influence", mockFn: func(mock *r.Mock) { From e52d3492b70e81894d320932aab31931806cb84b Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 30 Jul 2024 09:10:49 +0200 Subject: [PATCH 07/14] Spec. --- .../internal/service/partition-service.go | 1 - spec/metal-api.json | 24 ++++++++++++------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index 3663606f0..32157c761 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -377,7 +377,6 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque machinesWithIssues, err := issues.Find(&issues.Config{ Machines: ms, EventContainers: ecs, - Only: issues.AllIssueTypes(), }) if err != nil { return nil, fmt.Errorf("unable to calculate machine issues: %w", err) diff --git a/spec/metal-api.json b/spec/metal-api.json index 163f5f1cb..e00d14dee 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -4392,34 +4392,34 @@ "v1.ServerCapacity": { "properties": { "allocated": { - "description": "allocated servers with this size", + "description": "allocated machines with this size", "format": "int32", "type": "integer" }, "faulty": { - "description": "servers with issues with this size", + "description": "machines with issues with this size", "format": "int32", "type": "integer" }, "faultymachines": { - "description": "servers with issues with this size", + "description": "machine ids with issues with this size", "items": { "type": "string" }, "type": "array" }, "free": { - "description": "free servers with this size", + "description": "free machines with this size", "format": "int32", "type": "integer" }, "other": { - "description": "servers neither free, allocated or faulty with this size", + "description": "machines neither allocated or waiting with this size", "format": "int32", "type": "integer" }, "othermachines": { - "description": "servers neither free, allocated or faulty with this size", + "description": "machine ids neither allocated or waiting with this size", "items": { "type": "string" }, @@ -4431,11 +4431,11 @@ "type": "integer" }, "size": { - "description": "the size of the server", + "description": "the size of the machine", "type": "string" }, "total": { - "description": "total amount of servers with this size", + "description": "total amount of machines with this size", "format": "int32", "type": "integer" }, @@ -4443,6 +4443,11 @@ "description": "the amount of used reservations for this size", "format": "int32", "type": "integer" + }, + "waiting": { + "description": "machines waiting for allocation with this size", + "format": "int32", + "type": "integer" } }, "required": [ @@ -4455,7 +4460,8 @@ "reservations", "size", "total", - "usedreservations" + "usedreservations", + "waiting" ] }, "v1.SizeConstraint": { From 5167137dc523073d0e75cd5a954806bab88aed49 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 30 Jul 2024 09:12:16 +0200 Subject: [PATCH 08/14] Spelling. --- cmd/metal-api/internal/service/v1/partition.go | 4 ++-- spec/metal-api.json | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/metal-api/internal/service/v1/partition.go b/cmd/metal-api/internal/service/v1/partition.go index f5d186488..bf3f2f8c7 100644 --- a/cmd/metal-api/internal/service/v1/partition.go +++ b/cmd/metal-api/internal/service/v1/partition.go @@ -63,9 +63,9 @@ type ServerCapacity struct { // Waiting is the amount of machines that are currently available for allocation. Waiting int `json:"waiting" description:"machines waiting for allocation with this size"` // Other is the amount of machines that are neither allocated nor in the pool of available machines because they are currently in another provisioning state. - Other int `json:"other" description:"machines neither allocated or waiting with this size"` + Other int `json:"other" description:"machines neither allocated nor waiting with this size"` // OtherMachines contains the machine IDs for machines that were classified into "Other". - OtherMachines []string `json:"othermachines" description:"machine ids neither allocated or waiting with this size"` + OtherMachines []string `json:"othermachines" description:"machine ids neither allocated nor waiting with this size"` // Faulty is the amount of machines that are neither allocated nor in the pool of available machines because they report an error. Faulty int `json:"faulty" description:"machines with issues with this size"` diff --git a/spec/metal-api.json b/spec/metal-api.json index e00d14dee..04e06e880 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -4414,12 +4414,12 @@ "type": "integer" }, "other": { - "description": "machines neither allocated or waiting with this size", + "description": "machines neither allocated nor waiting with this size", "format": "int32", "type": "integer" }, "othermachines": { - "description": "machine ids neither allocated or waiting with this size", + "description": "machine ids neither allocated nor waiting with this size", "items": { "type": "string" }, From c0a2edbab2b84e489dd285ed8a10a39c619e5555 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 30 Jul 2024 09:13:41 +0200 Subject: [PATCH 09/14] Omit empty. --- .../internal/service/v1/partition.go | 20 +++++++++---------- spec/metal-api.json | 12 +---------- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/cmd/metal-api/internal/service/v1/partition.go b/cmd/metal-api/internal/service/v1/partition.go index bf3f2f8c7..eb12d12a5 100644 --- a/cmd/metal-api/internal/service/v1/partition.go +++ b/cmd/metal-api/internal/service/v1/partition.go @@ -56,31 +56,31 @@ type ServerCapacity struct { Size string `json:"size" description:"the size of the machine"` // Total is the total amount of machines for this size. - Total int `json:"total" description:"total amount of machines with this size"` + Total int `json:"total,omitempty" description:"total amount of machines with this size"` // Allocated is the amount of machines that are currently allocated. - Allocated int `json:"allocated" description:"allocated machines with this size"` + Allocated int `json:"allocated,omitempty" description:"allocated machines with this size"` // Waiting is the amount of machines that are currently available for allocation. - Waiting int `json:"waiting" description:"machines waiting for allocation with this size"` + Waiting int `json:"waiting,omitempty" description:"machines waiting for allocation with this size"` // Other is the amount of machines that are neither allocated nor in the pool of available machines because they are currently in another provisioning state. - Other int `json:"other" description:"machines neither allocated nor waiting with this size"` + Other int `json:"other,omitempty" description:"machines neither allocated nor waiting with this size"` // OtherMachines contains the machine IDs for machines that were classified into "Other". - OtherMachines []string `json:"othermachines" description:"machine ids neither allocated nor waiting with this size"` + OtherMachines []string `json:"othermachines,omitempty" description:"machine ids neither allocated nor waiting with this size"` // Faulty is the amount of machines that are neither allocated nor in the pool of available machines because they report an error. - Faulty int `json:"faulty" description:"machines with issues with this size"` + Faulty int `json:"faulty,omitempty" description:"machines with issues with this size"` // FaultyMachines contains the machine IDs for machines that were classified into "Faulty". - FaultyMachines []string `json:"faultymachines" description:"machine ids with issues with this size"` + FaultyMachines []string `json:"faultymachines,omitempty" description:"machine ids with issues with this size"` // Reservations is the amount of reservations made for this size. - Reservations int `json:"reservations" description:"the amount of reservations for this size"` + Reservations int `json:"reservations,omitempty" description:"the amount of reservations for this size"` // UsedReservations is the amount of reservations already used up for this size. - UsedReservations int `json:"usedreservations" description:"the amount of used reservations for this size"` + UsedReservations int `json:"usedreservations,omitempty" description:"the amount of used reservations for this size"` // Free is the amount of machines in a partition that can be freely allocated at any given moment by a project. // Effectively this is the amount of waiting machines minus the machines that are unavailable due to machine state or un-allocatable due to size reservations. // This can also be a negative number indicating that this machine size is "overbooked", i.e. there are more reservations than waiting machines. - Free int `json:"free" description:"free machines with this size"` + Free int `json:"free,omitempty" description:"free machines with this size"` } func NewPartitionResponse(p *metal.Partition) *PartitionResponse { diff --git a/spec/metal-api.json b/spec/metal-api.json index 04e06e880..d8a9b54d0 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -4451,17 +4451,7 @@ } }, "required": [ - "allocated", - "faulty", - "faultymachines", - "free", - "other", - "othermachines", - "reservations", - "size", - "total", - "usedreservations", - "waiting" + "size" ] }, "v1.SizeConstraint": { From 7836c3c25f565f0cfac041ab32f73b0cb1cefe35 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 30 Jul 2024 09:19:20 +0200 Subject: [PATCH 10/14] Try to make it nicer to read. --- cmd/metal-api/internal/metal/machine.go | 4 ++++ cmd/metal-api/internal/service/partition-service.go | 9 +++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cmd/metal-api/internal/metal/machine.go b/cmd/metal-api/internal/metal/machine.go index bd3fc8847..2131cdcae 100644 --- a/cmd/metal-api/internal/metal/machine.go +++ b/cmd/metal-api/internal/metal/machine.go @@ -137,6 +137,10 @@ func (m *Machine) IsWaiting(ec ProvisioningEventContainer) bool { return m.Waiting && ProvisioningEventWaiting == pointer.FirstOrZero(ec.Events).Event } +func (m *Machine) IsAvailable() bool { + return m.State.Value == AvailableState +} + // A MachineAllocation stores the data which are only present for allocated machines. type MachineAllocation struct { Creator string `rethinkdb:"creator" json:"creator"` diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index 32157c761..bc3922f7e 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -444,7 +444,7 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque cap.Allocated++ case m.IsWaiting(ec): cap.Waiting++ - if m.State.Value == metal.AvailableState { + if m.IsAvailable() { cap.Free++ } default: @@ -463,10 +463,11 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque size := sizesByID[cap.Size] for _, reservation := range size.Reservations.ForPartition(pc.ID) { + usedReservations := min(len(machinesByProject[reservation.ProjectID].WithSize(size.ID).WithPartition(pc.ID)), reservation.Amount) + cap.Reservations += reservation.Amount - used := min(len(machinesByProject[reservation.ProjectID].WithSize(size.ID).WithPartition(pc.ID)), reservation.Amount) - cap.UsedReservations += used - cap.Free -= reservation.Amount - used + cap.UsedReservations += usedReservations + cap.Free -= reservation.Amount - usedReservations } } From ca25e22cef82f6779fd105302f80cd32e0d05661 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 30 Jul 2024 14:15:02 +0200 Subject: [PATCH 11/14] No last event error. --- cmd/metal-api/internal/service/partition-service.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index bc3922f7e..a70ae048f 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -377,6 +377,7 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque machinesWithIssues, err := issues.Find(&issues.Config{ Machines: ms, EventContainers: ecs, + Omit: []issues.Type{issues.TypeLastEventError}, }) if err != nil { return nil, fmt.Errorf("unable to calculate machine issues: %w", err) From 359f0c3db33fe48401c4766552c035933b13469a Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 30 Jul 2024 15:44:59 +0200 Subject: [PATCH 12/14] Hopefully even better. --- cmd/metal-api/internal/metal/machine.go | 9 ----- .../internal/service/partition-service.go | 17 +++++++-- .../service/partition-service_test.go | 38 +++++++++++++------ .../internal/service/v1/partition.go | 31 ++++++++------- spec/metal-api.json | 20 +++++++--- 5 files changed, 73 insertions(+), 42 deletions(-) diff --git a/cmd/metal-api/internal/metal/machine.go b/cmd/metal-api/internal/metal/machine.go index 2131cdcae..0f6eabfa3 100644 --- a/cmd/metal-api/internal/metal/machine.go +++ b/cmd/metal-api/internal/metal/machine.go @@ -12,7 +12,6 @@ import ( "github.com/dustin/go-humanize" mn "github.com/metal-stack/metal-lib/pkg/net" - "github.com/metal-stack/metal-lib/pkg/pointer" "github.com/samber/lo" ) @@ -133,14 +132,6 @@ func (m *Machine) IsFirewall() bool { return false } -func (m *Machine) IsWaiting(ec ProvisioningEventContainer) bool { - return m.Waiting && ProvisioningEventWaiting == pointer.FirstOrZero(ec.Events).Event -} - -func (m *Machine) IsAvailable() bool { - return m.State.Value == AvailableState -} - // A MachineAllocation stores the data which are only present for allocated machines. type MachineAllocation struct { Creator string `rethinkdb:"creator" json:"creator"` diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index a70ae048f..f17627520 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -10,6 +10,7 @@ import ( "github.com/metal-stack/metal-api/cmd/metal-api/internal/issues" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" "github.com/metal-stack/metal-lib/auditing" + "github.com/metal-stack/metal-lib/pkg/pointer" v1 "github.com/metal-stack/metal-api/cmd/metal-api/internal/service/v1" @@ -440,14 +441,22 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque cap.FaultyMachines = append(cap.FaultyMachines, m.ID) } + // allocation dependent counts switch { case m.Allocation != nil: cap.Allocated++ - case m.IsWaiting(ec): + case m.Waiting && !m.PreAllocated && m.State.Value == metal.AvailableState: + cap.Free++ + default: + cap.Unavailable++ + } + + // provisioning state dependent counts + switch pointer.FirstOrZero(ec.Events).Event { //nolint:exhaustive + case metal.ProvisioningEventPhonedHome: + cap.PhonedHome++ + case metal.ProvisioningEventWaiting: cap.Waiting++ - if m.IsAvailable() { - cap.Free++ - } default: cap.Other++ cap.OtherMachines = append(cap.OtherMachines, m.ID) diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index 0fb031c06..87478e153 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -264,6 +264,11 @@ func TestPartitionCapacity(t *testing.T) { Event: metal.ProvisioningEventWaiting, }) } + if m.Allocation != nil { + ec.Events = append(ec.Events, metal.ProvisioningEvent{ + Event: metal.ProvisioningEventPhonedHome, + }) + } events = append(events, ec) if !slices.ContainsFunc(sizes, func(s metal.Size) bool { return s.ID == m.SizeID @@ -300,6 +305,9 @@ func TestPartitionCapacity(t *testing.T) { MacAddress: "aa:bb:0" + id, LastUpdated: time.Now().Add(-1 * time.Minute), }, + State: metal.MachineState{ + Value: metal.AvailableState, + }, } if project != "" { m.Allocation = &metal.MachineAllocation{ @@ -328,9 +336,10 @@ func TestPartitionCapacity(t *testing.T) { }, ServerCapacities: v1.ServerCapacities{ { - Size: "size-a", - Total: 1, - Allocated: 1, + Size: "size-a", + Total: 1, + PhonedHome: 1, + Allocated: 1, }, }, }, @@ -350,9 +359,10 @@ func TestPartitionCapacity(t *testing.T) { }, ServerCapacities: v1.ServerCapacities{ { - Size: "size-a", - Total: 2, - Allocated: 2, + Size: "size-a", + Total: 2, + PhonedHome: 2, + Allocated: 2, }, }, }, @@ -374,6 +384,7 @@ func TestPartitionCapacity(t *testing.T) { { Size: "size-a", Total: 1, + PhonedHome: 1, Faulty: 1, Allocated: 1, FaultyMachines: []string{"1"}, @@ -420,11 +431,12 @@ func TestPartitionCapacity(t *testing.T) { }, ServerCapacities: v1.ServerCapacities{ { - Size: "size-a", - Total: 2, - Allocated: 1, - Waiting: 1, - Free: 1, + Size: "size-a", + Total: 2, + Allocated: 1, + Waiting: 1, + PhonedHome: 1, + Free: 1, }, }, }, @@ -471,6 +483,7 @@ func TestPartitionCapacity(t *testing.T) { Size: "size-a", Total: 1, Other: 1, + Unavailable: 1, OtherMachines: []string{"1"}, }, }, @@ -582,6 +595,7 @@ func TestPartitionCapacity(t *testing.T) { Free: 1, Reservations: 2, UsedReservations: 2, + PhonedHome: 2, }, }, }, @@ -619,6 +633,7 @@ func TestPartitionCapacity(t *testing.T) { Free: 1, Reservations: 1, UsedReservations: 1, + PhonedHome: 2, }, }, }, @@ -661,6 +676,7 @@ func TestPartitionCapacity(t *testing.T) { Free: 1, Reservations: 2, UsedReservations: 2, + PhonedHome: 2, }, }, }, diff --git a/cmd/metal-api/internal/service/v1/partition.go b/cmd/metal-api/internal/service/v1/partition.go index eb12d12a5..4201af69b 100644 --- a/cmd/metal-api/internal/service/v1/partition.go +++ b/cmd/metal-api/internal/service/v1/partition.go @@ -56,17 +56,27 @@ type ServerCapacity struct { Size string `json:"size" description:"the size of the machine"` // Total is the total amount of machines for this size. - Total int `json:"total,omitempty" description:"total amount of machines with this size"` - - // Allocated is the amount of machines that are currently allocated. - Allocated int `json:"allocated,omitempty" description:"allocated machines with this size"` - // Waiting is the amount of machines that are currently available for allocation. - Waiting int `json:"waiting,omitempty" description:"machines waiting for allocation with this size"` - // Other is the amount of machines that are neither allocated nor in the pool of available machines because they are currently in another provisioning state. - Other int `json:"other,omitempty" description:"machines neither allocated nor waiting with this size"` + Total int `json:"total,omitempty" description:"total amount of machines with size"` + + // PhonedHome is the amount of machines that are currently in the provisioning state "phoned home". + PhonedHome int `json:"phoned_home,omitempty" description:"machines in phoned home provisioning state"` + // Waiting is the amount of machines that are currently in the provisioning state "waiting". + Waiting int `json:"waiting,omitempty" description:"machines in waiting provisioning state"` + // Other is the amount of machines that are neither in the provisioning state waiting nor in phoned home but in another provisioning state. + Other int `json:"other,omitempty" description:"machines neither phoned home nor waiting but in another provisioning state"` // OtherMachines contains the machine IDs for machines that were classified into "Other". OtherMachines []string `json:"othermachines,omitempty" description:"machine ids neither allocated nor waiting with this size"` + // Allocated is the amount of machines that are currently allocated. + Allocated int `json:"allocated,omitempty" description:"allocated machines"` + // Free is the amount of machines in a partition that can be freely allocated at any given moment by a project. + // Effectively this is the amount of waiting machines minus the machines that are unavailable due to machine state or un-allocatable due to size reservations. + // This can also be a negative number indicating that this machine size is "overbooked", i.e. there are more reservations than waiting machines. + Free int `json:"free,omitempty" description:"free machines with this size (freely allocatable)"` + // Unavailable is the amount of machine in a partition that are currently not allocatable because they are not waiting or + // not in the machine state "available", e.g. locked or reserved. + Unavailable int `json:"unavailable,omitempty" description:"unavailable machines with this size"` + // Faulty is the amount of machines that are neither allocated nor in the pool of available machines because they report an error. Faulty int `json:"faulty,omitempty" description:"machines with issues with this size"` // FaultyMachines contains the machine IDs for machines that were classified into "Faulty". @@ -76,11 +86,6 @@ type ServerCapacity struct { Reservations int `json:"reservations,omitempty" description:"the amount of reservations for this size"` // UsedReservations is the amount of reservations already used up for this size. UsedReservations int `json:"usedreservations,omitempty" description:"the amount of used reservations for this size"` - - // Free is the amount of machines in a partition that can be freely allocated at any given moment by a project. - // Effectively this is the amount of waiting machines minus the machines that are unavailable due to machine state or un-allocatable due to size reservations. - // This can also be a negative number indicating that this machine size is "overbooked", i.e. there are more reservations than waiting machines. - Free int `json:"free,omitempty" description:"free machines with this size"` } func NewPartitionResponse(p *metal.Partition) *PartitionResponse { diff --git a/spec/metal-api.json b/spec/metal-api.json index d8a9b54d0..01bfeff7e 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -4392,7 +4392,7 @@ "v1.ServerCapacity": { "properties": { "allocated": { - "description": "allocated machines with this size", + "description": "allocated machines", "format": "int32", "type": "integer" }, @@ -4409,12 +4409,12 @@ "type": "array" }, "free": { - "description": "free machines with this size", + "description": "free machines with this size (freely allocatable)", "format": "int32", "type": "integer" }, "other": { - "description": "machines neither allocated nor waiting with this size", + "description": "machines neither phoned home nor waiting but in another provisioning state", "format": "int32", "type": "integer" }, @@ -4425,6 +4425,11 @@ }, "type": "array" }, + "phoned_home": { + "description": "machines in phoned home provisioning state", + "format": "int32", + "type": "integer" + }, "reservations": { "description": "the amount of reservations for this size", "format": "int32", @@ -4435,7 +4440,12 @@ "type": "string" }, "total": { - "description": "total amount of machines with this size", + "description": "total amount of machines with size", + "format": "int32", + "type": "integer" + }, + "unavailable": { + "description": "unavailable machines with this size", "format": "int32", "type": "integer" }, @@ -4445,7 +4455,7 @@ "type": "integer" }, "waiting": { - "description": "machines waiting for allocation with this size", + "description": "machines in waiting provisioning state", "format": "int32", "type": "integer" } From 7298452967644a90301e6ed4530b89c731696fae Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 30 Jul 2024 16:20:21 +0200 Subject: [PATCH 13/14] Add comment. --- cmd/metal-api/internal/service/partition-service.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index f17627520..1644dbcef 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -446,6 +446,7 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque case m.Allocation != nil: cap.Allocated++ case m.Waiting && !m.PreAllocated && m.State.Value == metal.AvailableState: + // the free machine count considers the same aspects as the query for electing the machine candidate! cap.Free++ default: cap.Unavailable++ From e0a9956b5028c3b8162cfbc7cc17ab5134cc153b Mon Sep 17 00:00:00 2001 From: Gerrit Date: Wed, 7 Aug 2024 13:28:20 +0200 Subject: [PATCH 14/14] Cap free at 0. --- cmd/metal-api/internal/service/partition-service.go | 1 + cmd/metal-api/internal/service/partition-service_test.go | 4 ++-- cmd/metal-api/internal/service/v1/partition.go | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index 1644dbcef..c5d081f67 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -479,6 +479,7 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque cap.Reservations += reservation.Amount cap.UsedReservations += usedReservations cap.Free -= reservation.Amount - usedReservations + cap.Free = max(cap.Free, 0) } } diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index 87478e153..e1d821a74 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -525,7 +525,7 @@ func TestPartitionCapacity(t *testing.T) { }, }, { - name: "overbooked partition", + name: "overbooked partition, free count capped at 0", mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "") m1.Waiting = true @@ -555,7 +555,7 @@ func TestPartitionCapacity(t *testing.T) { Size: "size-a", Total: 1, Waiting: 1, - Free: -2, + Free: 0, Reservations: 3, UsedReservations: 0, }, diff --git a/cmd/metal-api/internal/service/v1/partition.go b/cmd/metal-api/internal/service/v1/partition.go index 4201af69b..fc9b8c3a8 100644 --- a/cmd/metal-api/internal/service/v1/partition.go +++ b/cmd/metal-api/internal/service/v1/partition.go @@ -71,7 +71,6 @@ type ServerCapacity struct { Allocated int `json:"allocated,omitempty" description:"allocated machines"` // Free is the amount of machines in a partition that can be freely allocated at any given moment by a project. // Effectively this is the amount of waiting machines minus the machines that are unavailable due to machine state or un-allocatable due to size reservations. - // This can also be a negative number indicating that this machine size is "overbooked", i.e. there are more reservations than waiting machines. Free int `json:"free,omitempty" description:"free machines with this size (freely allocatable)"` // Unavailable is the amount of machine in a partition that are currently not allocatable because they are not waiting or // not in the machine state "available", e.g. locked or reserved.