Skip to content

Commit d73b246

Browse files
committed
feat(scheduler): support optional cpu constraints, make sort by guest count optional
1 parent 71d2f82 commit d73b246

File tree

11 files changed

+277
-135
lines changed

11 files changed

+277
-135
lines changed

api/v1alpha1/proxmoxcluster_types.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,15 @@ type SchedulerHints struct {
7878
// By default 100% of a node's memory will be used for allocation.
7979
// +optional
8080
MemoryAdjustment *uint64 `json:"memoryAdjustment,omitempty"`
81+
82+
// Like MemoryAdjustment, but for CPU resources.
83+
// Defaults to 0 (disabled), as CPU is a compressible resource.
84+
// +optional
85+
CPUAdjustment *uint64 `json:"cpuAdjustment,omitempty"`
86+
87+
// +optional
88+
// +kubebuilder:default=true
89+
PreferLowerGuestCount bool `json:"preferLowerGuestCount,omitempty"`
8190
}
8291

8392
// GetMemoryAdjustment returns the memory adjustment percentage to use within the scheduler.
@@ -91,6 +100,17 @@ func (sh *SchedulerHints) GetMemoryAdjustment() uint64 {
91100
return memoryAdjustment
92101
}
93102

103+
// GetCPUAdjustment returns the cpu adjustment percentage to use within the scheduler.
104+
func (sh *SchedulerHints) GetCPUAdjustment() uint64 {
105+
cpuAdjustment := uint64(0)
106+
107+
if sh != nil {
108+
cpuAdjustment = ptr.Deref(sh.CPUAdjustment, 0)
109+
}
110+
111+
return cpuAdjustment
112+
}
113+
94114
// ProxmoxClusterStatus defines the observed state of ProxmoxCluster.
95115
type ProxmoxClusterStatus struct {
96116
// Ready indicates that the cluster is ready.

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclusters.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,11 @@ spec:
137137
to a node's resources, to allow for overprovisioning or to ensure
138138
a node will always have a safety buffer.
139139
properties:
140+
cpuAdjustment:
141+
description: Like MemoryAdjustment, but for CPU resources. Defaults
142+
to 0 (disabled), as CPU is a compressible resource.
143+
format: int64
144+
type: integer
140145
memoryAdjustment:
141146
description: MemoryAdjustment allows to adjust a node's memory
142147
by a given percentage. For example, setting it to 300 allows
@@ -146,6 +151,9 @@ spec:
146151
default 100% of a node's memory will be used for allocation.
147152
format: int64
148153
type: integer
154+
preferLowerGuestCount:
155+
default: true
156+
type: boolean
149157
type: object
150158
required:
151159
- dnsServers

internal/service/scheduler/vmscheduler.go

Lines changed: 65 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,16 @@ import (
2929
"sigs.k8s.io/cluster-api/util"
3030
)
3131

32-
// InsufficientMemoryError is used when the scheduler cannot assign a VM to a node because it would
33-
// exceed the node's memory limit.
34-
type InsufficientMemoryError struct {
35-
node string
36-
available uint64
37-
requested uint64
32+
// InsufficientResourcesError is used when the scheduler cannot assign a VM to a node because no node
33+
// would be able to provide the requested resources.
34+
type InsufficientResourcesError struct {
35+
requestedMemory uint64
36+
requestedCores uint64
3837
}
3938

40-
func (err InsufficientMemoryError) Error() string {
41-
return fmt.Sprintf("cannot reserve %dB of memory on node %s: %dB available memory left",
42-
err.requested, err.node, err.available)
39+
func (err InsufficientResourcesError) Error() string {
40+
return fmt.Sprintf("cannot reserve %dB of memory and/or %d vCores in cluster",
41+
err.requestedMemory, err.requestedCores)
4342
}
4443

4544
// ScheduleVM decides which node to a ProxmoxMachine should be scheduled on.
@@ -64,69 +63,92 @@ func selectNode(
6463
allowedNodes []string,
6564
schedulerHints *infrav1.SchedulerHints,
6665
) (string, error) {
67-
byMemory := make(sortByAvailableMemory, len(allowedNodes))
68-
for i, nodeName := range allowedNodes {
69-
mem, err := client.GetReservableMemoryBytes(ctx, nodeName, schedulerHints.GetMemoryAdjustment())
66+
var nodes []nodeInfo
67+
68+
requestedMemory := uint64(machine.Spec.MemoryMiB) * 1024 * 1024 // convert to bytes
69+
requestedCores := uint64(machine.Spec.NumCores)
70+
71+
for _, nodeName := range allowedNodes {
72+
mem, cpu, err := client.GetReservableResources(
73+
ctx,
74+
nodeName,
75+
schedulerHints.GetMemoryAdjustment(),
76+
schedulerHints.GetCPUAdjustment(),
77+
)
7078
if err != nil {
7179
return "", err
7280
}
73-
byMemory[i] = nodeInfo{Name: nodeName, AvailableMemory: mem}
74-
}
7581

76-
sort.Sort(byMemory)
82+
// if MemoryAdjustment is explicitly set to 0 (zero), pretend we have enough mem for the guest
83+
if schedulerHints.GetMemoryAdjustment() == 0 {
84+
mem = requestedMemory
85+
}
86+
// if CPUAdjustment is explicitly set to 0 (zero), pretend we have enough cpu for the guest
87+
if schedulerHints.GetCPUAdjustment() == 0 {
88+
cpu = requestedCores
89+
}
7790

78-
requestedMemory := uint64(machine.Spec.MemoryMiB) * 1024 * 1024 // convert to bytes
79-
if requestedMemory > byMemory[0].AvailableMemory {
80-
// no more space on the node with the highest amount of available memory
81-
return "", InsufficientMemoryError{
82-
node: byMemory[0].Name,
83-
available: byMemory[0].AvailableMemory,
84-
requested: requestedMemory,
91+
node := nodeInfo{Name: nodeName, AvailableMemory: mem, AvailableCPU: cpu}
92+
if node.AvailableMemory >= requestedMemory && node.AvailableCPU >= requestedCores {
93+
nodes = append(nodes, node)
8594
}
8695
}
8796

97+
if len(nodes) == 0 {
98+
return "", InsufficientResourcesError{requestedMemory, requestedCores}
99+
}
100+
101+
// Sort nodes by free memory and then free CPU in descending order
102+
byResources := make(sortByResources, len(nodes))
103+
copy(byResources, nodes)
104+
sort.Sort(byResources)
105+
106+
decision := byResources[0].Name
107+
88108
// count the existing vms per node
89109
nodeCounter := make(map[string]int)
90110
for _, nl := range locations {
91111
nodeCounter[nl.Node]++
92112
}
93113

94-
for i, info := range byMemory {
114+
for i, info := range byResources {
95115
info.ScheduledVMs = nodeCounter[info.Name]
96-
byMemory[i] = info
116+
byResources[i] = info
97117
}
98118

99-
byReplicas := make(sortByReplicas, len(byMemory))
100-
copy(byReplicas, byMemory)
119+
byReplicas := make(sortByReplicas, len(byResources))
120+
copy(byReplicas, byResources)
101121

102122
sort.Sort(byReplicas)
103123

104-
decision := byMemory[0].Name
105-
if requestedMemory < byReplicas[0].AvailableMemory {
106-
// distribute round-robin when memory allows it
124+
// if memory allocation allows it, pick the node with the least amount of guests
125+
if schedulerHints.PreferLowerGuestCount {
107126
decision = byReplicas[0].Name
108127
}
109128

110129
if logger := logr.FromContextOrDiscard(ctx); logger.V(4).Enabled() {
111130
// only construct values when message should actually be logged
112131
logger.Info("Scheduler decision",
113132
"byReplicas", byReplicas.String(),
114-
"byMemory", byMemory.String(),
133+
"byResources", byResources.String(),
115134
"requestedMemory", requestedMemory,
135+
"requestedCores", requestedCores,
116136
"resultNode", decision,
137+
"schedulerHints", schedulerHints,
117138
)
118139
}
119140

120141
return decision, nil
121142
}
122143

123144
type resourceClient interface {
124-
GetReservableMemoryBytes(context.Context, string, uint64) (uint64, error)
145+
GetReservableResources(context.Context, string, uint64, uint64) (uint64, uint64, error)
125146
}
126147

127148
type nodeInfo struct {
128149
Name string `json:"node"`
129150
AvailableMemory uint64 `json:"mem"`
151+
AvailableCPU uint64 `json:"cpu"`
130152
ScheduledVMs int `json:"vms"`
131153
}
132154

@@ -143,16 +165,21 @@ func (a sortByReplicas) String() string {
143165
return string(o)
144166
}
145167

146-
type sortByAvailableMemory []nodeInfo
168+
type sortByResources []nodeInfo
169+
170+
func (a sortByResources) Len() int { return len(a) }
171+
func (a sortByResources) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
172+
func (a sortByResources) Less(i, j int) bool {
173+
// Compare by free memory and free CPU in descending order
174+
if a[i].AvailableMemory != a[j].AvailableMemory {
175+
return a[i].AvailableMemory > a[j].AvailableMemory
176+
}
147177

148-
func (a sortByAvailableMemory) Len() int { return len(a) }
149-
func (a sortByAvailableMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
150-
func (a sortByAvailableMemory) Less(i, j int) bool {
151-
// more available memory = lower index
152-
return a[i].AvailableMemory > a[j].AvailableMemory
178+
// If free memory is equal, sort by free CPU in descending order
179+
return a[i].AvailableCPU > a[j].AvailableCPU || (a[i].AvailableCPU == a[j].AvailableCPU && a[i].ScheduledVMs < a[j].ScheduledVMs)
153180
}
154181

155-
func (a sortByAvailableMemory) String() string {
182+
func (a sortByResources) String() string {
156183
o, _ := json.Marshal(a)
157184
return string(o)
158185
}

internal/service/scheduler/vmscheduler_test.go

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ import (
2525
"github.com/stretchr/testify/require"
2626
)
2727

28-
type fakeResourceClient map[string]uint64
28+
type fakeResourceClient map[string]nodeInfo
2929

30-
func (c fakeResourceClient) GetReservableMemoryBytes(_ context.Context, nodeName string, _ uint64) (uint64, error) {
31-
return c[nodeName], nil
30+
func (c fakeResourceClient) GetReservableResources(_ context.Context, nodeName string, _ uint64, _ uint64) (uint64, uint64, error) {
31+
return c[nodeName].AvailableMemory, c[nodeName].AvailableCPU, nil
3232
}
3333

3434
func miBytes(in uint64) uint64 {
@@ -39,10 +39,18 @@ func TestSelectNode(t *testing.T) {
3939
allowedNodes := []string{"pve1", "pve2", "pve3"}
4040
var locations []infrav1.NodeLocation
4141
const requestMiB = 8
42-
availableMem := map[string]uint64{
43-
"pve1": miBytes(20),
44-
"pve2": miBytes(30),
45-
"pve3": miBytes(15),
42+
const requestCores = 2
43+
cpuAdjustment := uint64(100)
44+
45+
schedulerHints := &infrav1.SchedulerHints{
46+
// This defaults to true in our CRD
47+
PreferLowerGuestCount: true,
48+
CPUAdjustment: &cpuAdjustment,
49+
}
50+
availableResources := map[string]nodeInfo{
51+
"pve1": {AvailableMemory: miBytes(20), AvailableCPU: uint64(16)},
52+
"pve2": {AvailableMemory: miBytes(30), AvailableCPU: uint64(16)},
53+
"pve3": {AvailableMemory: miBytes(15), AvailableCPU: uint64(16)},
4654
}
4755

4856
expectedNodes := []string{
@@ -57,40 +65,47 @@ func TestSelectNode(t *testing.T) {
5765
proxmoxMachine := &infrav1.ProxmoxMachine{
5866
Spec: infrav1.ProxmoxMachineSpec{
5967
MemoryMiB: requestMiB,
68+
NumCores: requestCores,
6069
},
6170
}
6271

63-
client := fakeResourceClient(availableMem)
72+
client := fakeResourceClient(availableResources)
6473

65-
node, err := selectNode(context.Background(), client, proxmoxMachine, locations, allowedNodes, &infrav1.SchedulerHints{})
74+
node, err := selectNode(context.Background(), client, proxmoxMachine, locations, allowedNodes, schedulerHints)
6675
require.NoError(t, err)
6776
require.Equal(t, expectedNode, node)
6877

69-
require.Greater(t, availableMem[node], miBytes(requestMiB))
70-
availableMem[node] -= miBytes(requestMiB)
78+
require.Greater(t, availableResources[node].AvailableMemory, miBytes(requestMiB))
79+
if entry, ok := availableResources[node]; ok {
80+
entry.AvailableMemory -= miBytes(requestMiB)
81+
entry.AvailableCPU -= requestCores
82+
availableResources[node] = entry
83+
}
7184

7285
locations = append(locations, infrav1.NodeLocation{Node: node})
7386
})
7487
}
7588

76-
t.Run("out of memory", func(t *testing.T) {
89+
t.Run("out of resources", func(t *testing.T) {
7790
proxmoxMachine := &infrav1.ProxmoxMachine{
7891
Spec: infrav1.ProxmoxMachineSpec{
7992
MemoryMiB: requestMiB,
93+
NumCores: requestCores,
8094
},
8195
}
8296

83-
client := fakeResourceClient(availableMem)
97+
client := fakeResourceClient(availableResources)
8498

85-
node, err := selectNode(context.Background(), client, proxmoxMachine, locations, allowedNodes, &infrav1.SchedulerHints{})
86-
require.ErrorAs(t, err, &InsufficientMemoryError{})
99+
node, err := selectNode(context.Background(), client, proxmoxMachine, locations, allowedNodes, schedulerHints)
100+
require.ErrorAs(t, err, &InsufficientResourcesError{})
87101
require.Empty(t, node)
88102

89-
expectMem := map[string]uint64{
90-
"pve1": miBytes(4), // 20 - 8 x 2
91-
"pve2": miBytes(6), // 30 - 8 x 3
92-
"pve3": miBytes(7), // 15 - 8 x 1
103+
expectResources := map[string]nodeInfo{
104+
"pve1": {AvailableMemory: miBytes(4), AvailableCPU: uint64(12)}, // 20 - 8 x 2
105+
"pve2": {AvailableMemory: miBytes(6), AvailableCPU: uint64(10)}, // 30 - 8 x 3
106+
"pve3": {AvailableMemory: miBytes(7), AvailableCPU: uint64(14)}, // 15 - 8 x 1
93107
}
94-
require.Equal(t, expectMem, availableMem)
108+
109+
require.Equal(t, expectResources, availableResources)
95110
})
96111
}

internal/service/vmservice/vm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ func createVM(ctx context.Context, scope *scope.MachineScope) (proxmox.VMCloneRe
308308
var err error
309309
options.Target, err = selectNextNode(ctx, scope)
310310
if err != nil {
311-
if errors.As(err, &scheduler.InsufficientMemoryError{}) {
311+
if errors.As(err, &scheduler.InsufficientResourcesError{}) {
312312
scope.SetFailureMessage(err)
313313
scope.SetFailureReason(capierrors.InsufficientResourcesMachineError)
314314
}

internal/service/vmservice/vm_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ func TestEnsureVirtualMachine_CreateVM_SelectNode_InsufficientMemory(t *testing.
105105
machineScope.InfraCluster.ProxmoxCluster.Spec.AllowedNodes = []string{"node1"}
106106

107107
selectNextNode = func(context.Context, *scope.MachineScope) (string, error) {
108-
return "", fmt.Errorf("error: %w", scheduler.InsufficientMemoryError{})
108+
return "", fmt.Errorf("error: %w", scheduler.InsufficientResourcesError{})
109109
}
110110
t.Cleanup(func() { selectNextNode = scheduler.ScheduleVM })
111111

pkg/proxmox/client.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ type Client interface {
3737

3838
GetTask(ctx context.Context, upID string) (*proxmox.Task, error)
3939

40-
GetReservableMemoryBytes(ctx context.Context, nodeName string, nodeMemoryAdjustment uint64) (uint64, error)
40+
GetReservableResources(ctx context.Context, nodeName string, nodeMemoryAdjustment uint64, nodeCPUAdjustment uint64) (uint64, uint64, error)
4141

4242
ResizeDisk(ctx context.Context, vm *proxmox.VirtualMachine, disk, size string) error
4343

0 commit comments

Comments
 (0)