Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmd/metrics/loader_legacy.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,10 @@ func isCollectableEvent(event EventDefinition, metadata Metadata) bool {
// - their corresponding device is not found
// - not in system-wide collection scope
if event.Device != "cpu" && event.Device != "" {
if !metadata.SupportsUncore {
slog.Debug("Uncore events not supported on target", slog.String("event", event.Name))
return false
}
if flagScope == scopeProcess || flagScope == scopeCgroup {
slog.Debug("Uncore events not supported in process or cgroup scope", slog.String("event", event.Name))
return false
Expand Down
8 changes: 8 additions & 0 deletions cmd/metrics/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const (
scriptKernelVersion = "kernel version"
scriptARMSlots = "arm slots"
scriptARMCPUID = "arm cpuid"
scriptPerfStatAMDUncoreProbe = "perf stat amd uncore probe"
)

// CommonMetadata -- common to all architectures
Expand Down Expand Up @@ -213,6 +214,13 @@ BEGIN {
Architectures: []string{cpus.X86Architecture},
Depends: []string{"perf"},
},
{
Name: scriptPerfStatAMDUncoreProbe,
ScriptTemplate: `perf stat -a -e "l3/event=0x4,umask=0xff,enallcores=0x1,enallslices=0x1,threadmask=0x3,name='l3_lookup_state.all_coherent_accesses_to_l3'/" sleep 1`,
Architectures: []string{cpus.X86Architecture},
Vendors: []string{cpus.AMDVendor},
Depends: []string{"perf"},
},
{
Name: scriptPerfStatFixedInstr,
ScriptTemplate: "perf stat -a -e '{{{.InstructionsList}}}' sleep 1",
Expand Down
38 changes: 38 additions & 0 deletions cmd/metrics/metadata_x86.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,18 @@ func (c *X86MetadataCollector) CollectMetadata(t target.Target, noRoot bool, noS
}
metadata.SupportsUncore = c.checkUncoreSupport(metadata.UncoreDeviceIDs, isAMDArchitecture)

// On AMD, probe that L3 uncore actually works (e.g. GCP VMs expose l3 in sysfs but perf cannot use it).
// If the probe fails, disable uncore so collection uses core-only events and still produces metrics.
if isAMDArchitecture && metadata.SupportsUncore {
if output, ok := scriptOutputs[scriptPerfStatAMDUncoreProbe]; ok {
if !getSupportsAMDUncore(output) {
slog.Warn("AMD L3 uncore probe failed, disabling uncore metrics", slog.String("stderr", output.Stderr))
metadata.SupportsUncore = false
removeUncoreDevices(metadata.UncoreDeviceIDs, "l3", "df")
}
}
}

return metadata, nil
}

Expand Down Expand Up @@ -229,6 +241,32 @@ func (c *X86MetadataCollector) checkUncoreSupport(uncoreDeviceIDs map[string][]i
return false
}

// getSupportsAMDUncore returns true if the AMD uncore probe script succeeded (L3 PMU is usable).
// On some VMs (e.g. GCP AMD Turin), sysfs lists amd_l3 but perf cannot use it.
func getSupportsAMDUncore(output script.ScriptOutput) bool {
if output.Exitcode != 0 {
return false
}
stderr := output.Stderr
if strings.Contains(stderr, "Unable to find PMU or event on a PMU of 'l3'") {
return false
}
if strings.Contains(stderr, "event syntax error") && strings.Contains(stderr, "l3") {
return false
}
if strings.Contains(stderr, "<not supported>") {
return false
}
return true
}

// removeUncoreDevices removes the given device names from the map (used when uncore probe fails).
func removeUncoreDevices(uncoreDeviceIDs map[string][]int, deviceNames ...string) {
for _, name := range deviceNames {
delete(uncoreDeviceIDs, name)
}
}

// --- x86-specific helper functions ---

// getUncoreDeviceIDs returns a map of device type to list of device indices.
Expand Down
Loading