Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
242 changes: 242 additions & 0 deletions cluster-agent/internal/state/cpuPolicy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
// SPDX-FileCopyrightText: (C) 2026 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

package state

import (
"fmt"
"regexp"
"strconv"
"strings"

"github.com/open-edge-platform/edge-node-agents/common/pkg/utils"
)

const bitSize = 32

var reservedCPUsPattern = regexp.MustCompile(`--reserved-cpus[=\s]+["']?([^"'\s]+)["']?`)

type CPU struct {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cpu section of the HW agent includes this already, if the cluster agent also needs to check the cpu topology, we should move this the common folder as a shared function and have cluster and hw agents call it from there instead

Sockets uint32
Topology *CPUTopology
}

type CPUTopology struct {
Sockets []*Socket
}

type Socket struct {
SocketID uint32
CoreGroups []*CoreGroup
}

type CoreGroup struct {
Type string
List []uint32
}

type cores struct {
CPU string
Socket string
MaxMhz string
}

// ResolveReservedCPUPolicy processes the install command and resolves reserved CPU policy keywords.
// If reserved-cpus is a literal value (e.g., "0-1", "16-31"), it passes through unchanged.
// If it's a policy keyword (auto, auto:pcore, auto:ecore), it computes the actual CPU set
// based on local system topology.
func ResolveReservedCPUPolicy(installCmd string) (string, error) {
matches := reservedCPUsPattern.FindStringSubmatch(installCmd)
if len(matches) < 2 {
return installCmd, nil
}

reservedValue := matches[1]
if !isCPUPolicy(reservedValue) {
log.Debugf("reserved-cpus value %q is literal, passing through unchanged", reservedValue)
return installCmd, nil
}

cpu, err := getCPUList()
if err != nil {
return "", fmt.Errorf("failed to get CPU information: %w", err)
}
if cpu == nil || cpu.Topology == nil || len(cpu.Topology.Sockets) == 0 {
return "", fmt.Errorf("invalid or empty CPU topology detected")
}

var firstPCoreList, eCoreList []uint32
for _, socket := range cpu.Topology.Sockets {
for _, coreGroup := range socket.CoreGroups {
if coreGroup.Type == "P-Core" && len(firstPCoreList) == 0 {
firstPCoreList = coreGroup.List
} else if coreGroup.Type == "E-Core" {
eCoreList = append(eCoreList, coreGroup.List...)
}
}
}

var cpuSet string
switch reservedValue {
case "auto", "auto:pcore":
if len(firstPCoreList) == 0 {
return "", fmt.Errorf("no P-cores detected in CPU topology")
}
cpuSet = formatCPUSet(firstPCoreList)
case "auto:ecore":
if len(eCoreList) > 0 {
cpuSet = formatCPUSet(eCoreList)
} else if len(firstPCoreList) > 0 {
log.Info("No E-cores detected, falling back to P-core reservation for auto:ecore policy")
cpuSet = formatCPUSet(firstPCoreList)
Comment on lines +89 to +91

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
} else if len(firstPCoreList) > 0 {
log.Info("No E-cores detected, falling back to P-core reservation for auto:ecore policy")
cpuSet = formatCPUSet(firstPCoreList)
} else if len(eCoreList) == 0 && len(firstPCoreList) > 0 {
log.Info("No E-cores detected, falling back to P-core reservation for auto:ecore policy")
cpuSet = formatCPUSet(firstPCoreList)

} else {
return "", fmt.Errorf("no P-cores detected in CPU topology")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is the E-core check stage, the error if this is reached should be unable to detect valid CPU topology or something similar

}
}

oldFlag := matches[0]
newFlag := fmt.Sprintf(`--reserved-cpus="%s"`, cpuSet)
modifiedCmd := strings.Replace(installCmd, oldFlag, newFlag, 1)

log.Infof("Resolved reserved CPU policy %q to %q", reservedValue, cpuSet)
return modifiedCmd, nil
}

// isCPUPolicy reports whether the given value is a recognized CPU policy keyword.
func isCPUPolicy(value string) bool {
return value == "auto" || value == "auto:pcore" || value == "auto:ecore"
}

// formatCPUSet converts a list of CPU IDs to kernel-acceptable range format.
// E.g., [0, 1, 2, 3] -> "0-3", [5] -> "5"
func formatCPUSet(cpuList []uint32) string {
if len(cpuList) == 0 {
return ""
}
first, last := cpuList[0], cpuList[len(cpuList)-1]
if first == last {
return fmt.Sprintf("%d", first)
}
return fmt.Sprintf("%d-%d", first, last)
}

// getCPUList collects CPU information from `lscpu` and processes the result to generate structured CPU data
// It returns with a CPU struct.
// This reuses the same logic as hardware-discovery-agent/internal/cpu/cpu.go
func getCPUList() (*CPU, error) {
dataBytes, err := utils.ReadFromCommand(nil, "lscpu")
if err != nil {
return &CPU{}, fmt.Errorf("failed to read data from command; error: %w", err)
}

lscpu := strings.Split(string(dataBytes), "\n")
var cpu CPU
var maxMhz string
for _, attribute := range lscpu {
attr := strings.TrimSpace(attribute)
if strings.HasPrefix(attr, "Socket(s)") {
socketStr := strings.TrimSpace(strings.TrimPrefix(attr, "Socket(s):"))
sockets, err := strconv.ParseUint(socketStr, 10, bitSize)
if err != nil {
continue
}
cpu.Sockets = uint32(sockets)
}
if strings.HasPrefix(attr, "CPU max MHz:") {
maxMhz = strings.TrimSpace(strings.TrimPrefix(attr, "CPU max MHz:"))
}
}

// If the number of sockets has been retrieved using the `lscpu` command
// above, we can determine how many P-Cores and E-Cores are enabled on the Edge Node.
if cpu.Sockets != 0 {
coreInfo := []*cores{}
coreDetails, err := utils.ReadFromCommand(nil, "lscpu", "--extended=CPU,SOCKET,MAXMHZ")
if err != nil {
return &cpu, fmt.Errorf("failed to read data from command; error: %w", err)
}
parseCoreDetails := strings.SplitAfter(string(coreDetails), "\n")

for _, coreData := range parseCoreDetails {
if strings.Contains(coreData, "CPU") || coreData == "" {
continue
}
var core cores
coreValues := strings.Fields(coreData)
core.CPU = coreValues[0]
core.Socket = coreValues[1]
coreMaxMhz := strings.Split(coreValues[2], "\n")
core.MaxMhz = coreMaxMhz[0]
coreInfo = append(coreInfo, &core)
}

cpuTopology := inferEPCores(cpu.Sockets, coreInfo, maxMhz)
cpu.Topology = cpuTopology
}

return &cpu, nil
}

// inferEPCores classifies cores as P-Core or E-Core based on frequency heuristic.
func inferEPCores(sockets uint32, coreInfo []*cores, coreMaxFreq string) *CPUTopology {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to be evaluated on Panther Lake, which has three types of cores -- P, E, and LP. There may be more robust solutions. In particular, we should be able to determine whether a CPU is P or E/LP by looking at "/sys/devices/cpu_core/cpus" (P Cores) and "/sys/devices/cpu_atom/cpus" (E and LP cores). Then as a second stage you can differentiate between E and LP by looking at their max_frequency. This code also needs to be updated in hardware-discovery-agent.

socketInfo := []*Socket{}
for socketID := uint32(0); socketID < sockets; socketID++ {
// Determine a target max frequency for E Core detection based on the max frequency from lscpu.
maxCoreFreq, err := strconv.ParseUint(strings.TrimSuffix(coreMaxFreq, ".0000"), 10, 64)
if err != nil {
// If max frequency cannot be retrieved from lscpu, default to 0 so that all cores are considered P Cores.
maxCoreFreq = 0
}
eCoreTargetFreq := (3 * maxCoreFreq) / 4
socketDetails := getCoreGroupsPerSocket(socketID, coreInfo, eCoreTargetFreq)
socketInfo = append(socketInfo, socketDetails)
}
return &CPUTopology{Sockets: socketInfo}
}

// getCoreGroupsPerSocket groups cores by type (P-Core vs E-Core) for a single socket
func getCoreGroupsPerSocket(socketID uint32, coreInfo []*cores, coreMaxFreq uint64) *Socket {
pCoreList := make([]uint32, 0)
eCoreList := make([]uint32, 0)

for _, core := range coreInfo {
socket, err := strconv.ParseUint(core.Socket, 10, bitSize)
if err != nil {
socket = 0
}
if socket == uint64(socketID) {
cpu, err := strconv.ParseUint(core.CPU, 10, bitSize)
if err != nil {
continue
}
if core.MaxMhz == "-" {
// If max frequency is not found, default to P Core for cpu ID and continue
pCoreList = append(pCoreList, uint32(cpu))
continue
}
coreFreq, err := strconv.ParseUint(strings.TrimSuffix(core.MaxMhz, ".0000"), 10, 64)
if err != nil {
continue
}
if coreFreq <= coreMaxFreq {
eCoreList = append(eCoreList, uint32(cpu))
} else {
pCoreList = append(pCoreList, uint32(cpu))
}
}
}

coreGroups := []*CoreGroup{}
coreGroups = append(coreGroups, &CoreGroup{
Type: "P-Core",
List: pCoreList,
})
if len(eCoreList) > 0 {
coreGroups = append(coreGroups, &CoreGroup{
Type: "E-Core",
List: eCoreList,
})
}

return &Socket{SocketID: socketID, CoreGroups: coreGroups}
}
8 changes: 7 additions & 1 deletion cluster-agent/internal/state/installInProgress.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ type InstallInProgress struct {
func (s *InstallInProgress) Register() error {
log.Info("Start kubernetes engine installation script")

err := s.execute(s.sm.ctx, s.sm.installCmd)
resolvedCmd, err := ResolveReservedCPUPolicy(s.sm.installCmd)
if err != nil {
log.Warnf("Failed to resolve reserved CPU policy, proceeding with original command: %v", err)
resolvedCmd = s.sm.installCmd
}

err = s.execute(s.sm.ctx, resolvedCmd)
if err != nil {
s.sm.set(s.sm.inactive)
return err
Expand Down
Loading