-
Notifications
You must be signed in to change notification settings - Fork 40
feat(scheduler): support optional cpu constraints and make sort by guest count optional #50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,6 +78,15 @@ type SchedulerHints struct { | |
// By default 100% of a node's memory will be used for allocation. | ||
// +optional | ||
MemoryAdjustment *uint64 `json:"memoryAdjustment,omitempty"` | ||
|
||
// Like MemoryAdjustment, but for CPU resources. | ||
// Defaults to 0 (disabled), as CPU is a compressible resource. | ||
// +optional | ||
CPUAdjustment *uint64 `json:"cpuAdjustment,omitempty"` | ||
|
||
// +optional | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here we also need a description and why this is needed. |
||
// +kubebuilder:default=true | ||
PreferLowerGuestCount bool `json:"preferLowerGuestCount,omitempty"` | ||
} | ||
|
||
// GetMemoryAdjustment returns the memory adjustment percentage to use within the scheduler. | ||
|
@@ -91,6 +100,17 @@ func (sh *SchedulerHints) GetMemoryAdjustment() uint64 { | |
return memoryAdjustment | ||
} | ||
|
||
// GetCPUAdjustment returns the cpu adjustment percentage to use within the scheduler. | ||
func (sh *SchedulerHints) GetCPUAdjustment() uint64 { | ||
cpuAdjustment := uint64(0) | ||
|
||
if sh != nil { | ||
cpuAdjustment = ptr.Deref(sh.CPUAdjustment, 0) | ||
} | ||
|
||
return cpuAdjustment | ||
} | ||
|
||
// ProxmoxClusterStatus defines the observed state of ProxmoxCluster. | ||
type ProxmoxClusterStatus struct { | ||
// Ready indicates that the cluster is ready. | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,17 +29,16 @@ import ( | |
"sigs.k8s.io/cluster-api/util" | ||
) | ||
|
||
// InsufficientMemoryError is used when the scheduler cannot assign a VM to a node because it would | ||
// exceed the node's memory limit. | ||
type InsufficientMemoryError struct { | ||
node string | ||
available uint64 | ||
requested uint64 | ||
// InsufficientResourcesError is used when the scheduler cannot assign a VM to a node because no node | ||
// would be able to provide the requested resources. | ||
type InsufficientResourcesError struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess its better to separate the error by CPU, Memory. |
||
requestedMemory uint64 | ||
requestedCores uint64 | ||
} | ||
|
||
func (err InsufficientMemoryError) Error() string { | ||
return fmt.Sprintf("cannot reserve %dB of memory on node %s: %dB available memory left", | ||
err.requested, err.node, err.available) | ||
func (err InsufficientResourcesError) Error() string { | ||
return fmt.Sprintf("cannot reserve %dB of memory and/or %d vCores in cluster", | ||
err.requestedMemory, err.requestedCores) | ||
} | ||
|
||
// ScheduleVM decides which node to a ProxmoxMachine should be scheduled on. | ||
|
@@ -64,69 +63,92 @@ func selectNode( | |
allowedNodes []string, | ||
schedulerHints *infrav1.SchedulerHints, | ||
) (string, error) { | ||
byMemory := make(sortByAvailableMemory, len(allowedNodes)) | ||
for i, nodeName := range allowedNodes { | ||
mem, err := client.GetReservableMemoryBytes(ctx, nodeName, schedulerHints.GetMemoryAdjustment()) | ||
var nodes []nodeInfo | ||
|
||
requestedMemory := uint64(machine.Spec.MemoryMiB) * 1024 * 1024 // convert to bytes | ||
requestedCores := uint64(machine.Spec.NumCores) | ||
|
||
for _, nodeName := range allowedNodes { | ||
mem, cpu, err := client.GetReservableResources( | ||
ctx, | ||
nodeName, | ||
schedulerHints.GetMemoryAdjustment(), | ||
schedulerHints.GetCPUAdjustment(), | ||
) | ||
if err != nil { | ||
return "", err | ||
} | ||
byMemory[i] = nodeInfo{Name: nodeName, AvailableMemory: mem} | ||
} | ||
|
||
sort.Sort(byMemory) | ||
// if MemoryAdjustment is explicitly set to 0 (zero), pretend we have enough mem for the guest | ||
if schedulerHints.GetMemoryAdjustment() == 0 { | ||
mem = requestedMemory | ||
} | ||
// if CPUAdjustment is explicitly set to 0 (zero), pretend we have enough cpu for the guest | ||
if schedulerHints.GetCPUAdjustment() == 0 { | ||
cpu = requestedCores | ||
} | ||
|
||
requestedMemory := uint64(machine.Spec.MemoryMiB) * 1024 * 1024 // convert to bytes | ||
if requestedMemory > byMemory[0].AvailableMemory { | ||
// no more space on the node with the highest amount of available memory | ||
return "", InsufficientMemoryError{ | ||
node: byMemory[0].Name, | ||
available: byMemory[0].AvailableMemory, | ||
requested: requestedMemory, | ||
node := nodeInfo{Name: nodeName, AvailableMemory: mem, AvailableCPU: cpu} | ||
if node.AvailableMemory >= requestedMemory && node.AvailableCPU >= requestedCores { | ||
nodes = append(nodes, node) | ||
} | ||
} | ||
|
||
if len(nodes) == 0 { | ||
return "", InsufficientResourcesError{requestedMemory, requestedCores} | ||
} | ||
|
||
// Sort nodes by free memory and then free CPU in descending order | ||
byResources := make(sortByResources, len(nodes)) | ||
copy(byResources, nodes) | ||
sort.Sort(byResources) | ||
|
||
decision := byResources[0].Name | ||
|
||
// count the existing vms per node | ||
nodeCounter := make(map[string]int) | ||
for _, nl := range locations { | ||
nodeCounter[nl.Node]++ | ||
} | ||
|
||
for i, info := range byMemory { | ||
for i, info := range byResources { | ||
info.ScheduledVMs = nodeCounter[info.Name] | ||
byMemory[i] = info | ||
byResources[i] = info | ||
} | ||
|
||
byReplicas := make(sortByReplicas, len(byMemory)) | ||
copy(byReplicas, byMemory) | ||
byReplicas := make(sortByReplicas, len(byResources)) | ||
copy(byReplicas, byResources) | ||
|
||
sort.Sort(byReplicas) | ||
|
||
decision := byMemory[0].Name | ||
if requestedMemory < byReplicas[0].AvailableMemory { | ||
// distribute round-robin when memory allows it | ||
// if memory allocation allows it, pick the node with the least amount of guests | ||
if schedulerHints.PreferLowerGuestCount { | ||
decision = byReplicas[0].Name | ||
} | ||
|
||
if logger := logr.FromContextOrDiscard(ctx); logger.V(4).Enabled() { | ||
// only construct values when message should actually be logged | ||
logger.Info("Scheduler decision", | ||
"byReplicas", byReplicas.String(), | ||
"byMemory", byMemory.String(), | ||
"byResources", byResources.String(), | ||
"requestedMemory", requestedMemory, | ||
"requestedCores", requestedCores, | ||
"resultNode", decision, | ||
"schedulerHints", schedulerHints, | ||
) | ||
} | ||
|
||
return decision, nil | ||
} | ||
|
||
type resourceClient interface { | ||
GetReservableMemoryBytes(context.Context, string, uint64) (uint64, error) | ||
GetReservableResources(context.Context, string, uint64, uint64) (uint64, uint64, error) | ||
} | ||
|
||
type nodeInfo struct { | ||
Name string `json:"node"` | ||
AvailableMemory uint64 `json:"mem"` | ||
AvailableCPU uint64 `json:"cpu"` | ||
ScheduledVMs int `json:"vms"` | ||
} | ||
|
||
|
@@ -143,16 +165,21 @@ func (a sortByReplicas) String() string { | |
return string(o) | ||
} | ||
|
||
type sortByAvailableMemory []nodeInfo | ||
type sortByResources []nodeInfo | ||
|
||
func (a sortByResources) Len() int { return len(a) } | ||
func (a sortByResources) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | ||
func (a sortByResources) Less(i, j int) bool { | ||
// Compare by free memory and free CPU in descending order | ||
if a[i].AvailableMemory != a[j].AvailableMemory { | ||
return a[i].AvailableMemory > a[j].AvailableMemory | ||
} | ||
|
||
func (a sortByAvailableMemory) Len() int { return len(a) } | ||
func (a sortByAvailableMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | ||
func (a sortByAvailableMemory) Less(i, j int) bool { | ||
// more available memory = lower index | ||
return a[i].AvailableMemory > a[j].AvailableMemory | ||
// If free memory is equal, sort by free CPU in descending order | ||
return a[i].AvailableCPU > a[j].AvailableCPU || (a[i].AvailableCPU == a[j].AvailableCPU && a[i].ScheduledVMs < a[j].ScheduledVMs) | ||
} | ||
|
||
func (a sortByAvailableMemory) String() string { | ||
func (a sortByResources) String() string { | ||
o, _ := json.Marshal(a) | ||
return string(o) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please write a meaningful description here.
Re-explain the adjustment term here as well.