Skip to content
Open
41 changes: 37 additions & 4 deletions e2e/validators.go
Original file line number Diff line number Diff line change
Expand Up @@ -2909,10 +2909,21 @@ func ValidateCollectWindowsLogsScript(ctx context.Context, s *Scenario) {
"collect-windows-logs.ps1 failed or did not produce a zip file")
}

// ValidateVulnerableKernelModulesDisabled verifies that kernel modules with known
// LPE vulnerabilities are blocked via modprobe config, not loaded, and cannot be loaded.
// Covers: CVE-2026-31431 (algif_aead), DirtyFrag (esp4, esp6, rxrpc).
// To add a new CVE mitigation, append the module name to the list below.
// ValidateVulnerableKernelModulesDisabled verifies that kernel modules with known LPE
// vulnerabilities (CVE-2026-31431 / DirtyFrag / Fragnesia: algif_aead, esp4, esp6, rxrpc)
// are handled correctly per OS:
//
// - Ubuntu / Mariner: full check — modprobe config entries are present, modules are
// NOT loaded, and modprobe refuses to load them.
// - AzureLinux 3.0: assert ABSENCE of the four modprobe blacklist entries. AzL3 is
// descoped from the mitigation because kernel 6.6.139.1-1.azl3 and later fix all
// three CVEs upstream, AND customer workloads on AzL3 require those modules (the
// blacklist actively blocks legitimate use cases). Newly-built AzL3 VHDs therefore
// no longer ship the modprobe-CIS.conf entries, and E2E runs against freshly-built
// VHDs. See https://github.com/Azure/AKS/issues/5753.
//
// To add a new CVE mitigation, append the module name to BOTH lists below —
// the AzureLinux 3.0 absence-check list AND the default presence + load-refusal list.
func ValidateVulnerableKernelModulesDisabled(ctx context.Context, s *Scenario) {
s.T.Helper()

Expand All @@ -2921,6 +2932,28 @@ func ValidateVulnerableKernelModulesDisabled(ctx context.Context, s *Scenario) {
return
}

// AzureLinux 3.0 (regular, NOT OSGuard): kernel 6.6.139.1-1.azl3+ supersedes the modprobe
// blacklist and the bake-in has been removed because customers need those modules. Assert
// the blacklist entries are NOT present on freshly-built AzL3 VHDs. AzureLinux OSGuard is
// intentionally kept in-scope (falls through to the full presence + load-refusal check below).
if s.VHD.OS == config.OSAzureLinux && !s.VHD.Distro.IsAzureLinuxOSGuardDistro() {
script := strings.Join([]string{
`failed=0`,
`for mod in algif_aead esp4 esp6 rxrpc; do`,
` if grep -qsE "^(install ${mod} /bin/false|blacklist ${mod})" /etc/modprobe.d/*.conf 2>/dev/null; then`,
` echo "FAIL: ${mod} blacklist entry unexpectedly present on AzureLinux 3.0 (bake-in removed; kernel 6.6.139.1-1.azl3+ supersedes)"`,
` failed=1`,
Comment thread
djsly marked this conversation as resolved.
` else`,
` echo "PASS: ${mod} blacklist correctly absent on AzureLinux 3.0"`,
` fi`,
`done`,
`exit $failed`,
}, "\n")
execScriptOnVMForScenarioValidateExitCode(ctx, s, script, 0,
"AzureLinux 3.0 modprobe blacklist should be absent (kernel fix 6.6.139.1-1.azl3+ supersedes; bake-in removed; no `install` or `blacklist` directive should remain)")
return
}

script := strings.Join([]string{
`failed=0`,
`for mod in algif_aead esp4 esp6 rxrpc; do`,
Expand Down
25 changes: 21 additions & 4 deletions parts/linux/cloud-init/artifacts/cse_main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -319,10 +319,27 @@ EOF

logs_to_events "AKS.CSE.ensureSysctl" ensureSysctl || exit $ERR_SYSCTL_RELOAD

# Disable kernel modules with known LPE vulnerabilities (CVE-2026-31431, DirtyFrag).
# Applies to existing VHDs that don't yet have the modprobe-CIS.conf fix baked in.
# To add a new CVE mitigation, add a disableVulnerableKernelModule call below.
if isUbuntu "$OS" || isMarinerOrAzureLinux "$OS"; then
# Disable kernel modules with known LPE vulnerabilities (CVE-2026-31431, DirtyFrag, Fragnesia).
# Applied at CSE provisioning time on Ubuntu and AzureLinux OSGuard. To add a new CVE
# mitigation, add a disableVulnerableKernelModule call below.
Comment on lines +322 to +324
#
# AzureLinux 3.0 (regular and Kata) is excluded: kernel 6.6.139.1-1.azl3 and later fix Copy
Comment thread
djsly marked this conversation as resolved.
# Fail / DirtyFrag / Fragnesia upstream, so the runtime modprobe blacklist is no longer
# required. Newly-built AzL3 VHDs also no longer ship the four entries in modprobe-CIS.conf —
# customers reported the blacklist actively blocks legitimate workloads that use
# algif_aead / esp4 / esp6 / rxrpc on the patched kernel. Existing in-support AzL3 VHDs
# (built before this change) still have the bake-in until they are rolled; no CSE-time active
# removal is performed — customers will get the unblocked configuration on their next AzL3
# VHD upgrade. AzureLinux OSGuard (hardened secure-boot variant) is intentionally kept in
# scope as defense-in-depth: OSGuard workloads are security-sensitive and do not require
# the affected kernel modules.
#
# Mariner/AzureLinux 2.0 (AzL2) images are frozen (see FrozenCBLMarinerV2AndAzureLinuxV2SIGImageVersion=202512.06.0),
# so they cannot pick up new modprobe-CIS.conf entries for these 2026 CVEs via VHD refresh.
# Keep the CSE-time runtime apply enabled for AzL2/Mariner while those images remain supported.
# See https://github.com/Azure/AKS/issues/5753.
#
if isUbuntu "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT" || { isMarinerOrAzureLinux "$OS" && [ "${OS_VERSION}" = "2.0" ]; }; then
Comment thread
djsly marked this conversation as resolved.
disableVulnerableKernelModule "algif_aead" "CVE-2026-31431 (Copy Fail)"
Comment on lines +337 to 343
disableVulnerableKernelModule "esp4" "DirtyFrag (xfrm-ESP page-cache write)"
disableVulnerableKernelModule "esp6" "DirtyFrag (xfrm-ESP6 page-cache write)"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env shellspec

# Unit tests for disableVulnerableKernelModule() in cse_main.sh
# and the OS gate that selects which OS variants get the runtime apply.
Comment thread
djsly marked this conversation as resolved.

Describe 'disableVulnerableKernelModule()'
MODPROBE_DIR=""
Expand Down Expand Up @@ -70,3 +71,78 @@ Describe 'disableVulnerableKernelModule()'
The output should not include "unloaded"
End
End

# Tests the OS gate that decides whether to call disableVulnerableKernelModule
# at CSE provisioning time. Apply on: Ubuntu, Mariner/AzureLinux 2.0 (AzL2), AzureLinux OSGuard
# (defense-in-depth — hardened secure-boot variant intentionally retains the mitigation). Skip on:
# AzureLinux 3.0 regular/Kata (kernel 6.6.139.1-1.azl3+ has the upstream fix and
# customers reported the blacklist actively blocks legitimate workloads), ACL, Flatcar.
# See https://github.com/Azure/AKS/issues/5753.
Describe 'CVE kernel module mitigation OS gate'
Include "./parts/linux/cloud-init/artifacts/cse_helpers.sh"

gate() {
# Mirrors the condition in cse_main.sh basePrep — must be kept in sync.
if isUbuntu "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT" || { isMarinerOrAzureLinux "$OS" && [ "${OS_VERSION}" = "2.0" ]; }; then
echo "APPLY"
else
echo "SKIP"
fi
}
Comment thread
djsly marked this conversation as resolved.

It 'applies the mitigation on Ubuntu'
OS="${UBUNTU_OS_NAME}"
OS_VARIANT=""
When call gate
The output should equal "APPLY"
End

It 'applies the mitigation on AzureLinux 3.0 OSGuard — defense-in-depth retained'
OS="${AZURELINUX_OS_NAME}"
OS_VARIANT="${AZURELINUX_OSGUARD_OS_VARIANT}"
When call gate
The output should equal "APPLY"
End

It 'applies the mitigation on Mariner/AzureLinux 2.0 (AzL2) — VHDs are frozen so CSE-time apply is required'
OS="${MARINER_OS_NAME}"
OS_VARIANT=""
OS_VERSION="2.0"
When call gate
The output should equal "APPLY"
End
It 'applies the mitigation on Mariner Kata (AzL2) — VHDs are frozen so CSE-time apply is required'
OS="${MARINER_KATA_OS_NAME}"
OS_VARIANT=""
OS_VERSION="2.0"
When call gate
The output should equal "APPLY"
End
It 'skips on AzureLinux 3.0 regular (kernel 6.6.139.1-1.azl3+ has upstream fix)'
OS="${AZURELINUX_OS_NAME}"
OS_VARIANT=""
When call gate
The output should equal "SKIP"
End

It 'skips on AzureLinux 3.0 Kata (same kernel as AzL3 regular)'
OS="${AZURELINUX_KATA_OS_NAME}"
OS_VARIANT=""
When call gate
The output should equal "SKIP"
End

It 'skips on ACL (Flatcar-based; never in scope)'
OS="${ACL_OS_NAME}"
OS_VARIANT=""
When call gate
The output should equal "SKIP"
End

It 'skips on Flatcar (never in scope)'
OS="${FLATCAR_OS_NAME}"
OS_VARIANT=""
When call gate
The output should equal "SKIP"
End
End
15 changes: 14 additions & 1 deletion vhdbuilder/packer/packer_source.sh
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,20 @@ copyPackerFiles() {
cpAndMode $ETC_ISSUE_CONFIG_SRC $ETC_ISSUE_CONFIG_DEST 644
cpAndMode $ETC_ISSUE_NET_CONFIG_SRC $ETC_ISSUE_NET_CONFIG_DEST 644
cpAndMode $SSHD_CONFIG_SRC $SSHD_CONFIG_DEST 600
cpAndMode $MODPROBE_CIS_SRC $MODPROBE_CIS_DEST 644
# CVE-2026-31431 (Copy Fail), DirtyFrag, Fragnesia mitigation: bake modprobe blacklist
# for algif_aead / esp4 / esp6 / rxrpc into the VHD.
#
# Skipped on AzureLinux 3.0 because:
# 1. The upstream kernel fix in 6.6.139.1-1.azl3+ supersedes the modprobe blacklist.
# 2. Customer workloads on AzL3 require those kernel modules; the bake-in actively
# blocks legitimate use cases.
# Ubuntu and Mariner (AzL2) still get the bake-in — their kernels are not patched
# upstream yet. See https://github.com/Azure/AKS/issues/5753.
if isAzureLinux "$OS" "$OS_VARIANT" && [ "${OS_VERSION}" = "3.0" ] && ! isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then
echo "Skipping modprobe-CIS.conf bake-in on AzureLinux 3.0 (kernel 6.6.139.1-1.azl3+ has upstream fix; OSGuard intentionally retains the bake-in)"
else
cpAndMode $MODPROBE_CIS_SRC $MODPROBE_CIS_DEST 644
fi
cpAndMode $PWQUALITY_CONF_SRC $PWQUALITY_CONF_DEST 600
cpAndMode $PAM_D_SU_SRC $PAM_D_SU_DEST 644
cpAndMode $PROFILE_D_PATH_SH_SRC $PROFILE_D_PATH_SH_DEST 755
Expand Down
31 changes: 29 additions & 2 deletions vhdbuilder/packer/test/linux-vhd-content-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1353,12 +1353,39 @@ testNfsServerService() {

# Verify all kernel modules with known LPE vulnerabilities are disabled.
# Covers: CVE-2026-31431 (algif_aead), DirtyFrag (esp4, esp6, rxrpc).
# To add a new CVE mitigation, append the module to the loop below.
# To add a new CVE mitigation, append the module to BOTH loops below — the
# AzureLinux 3.0 absence loop AND the default presence + load-refusal loop.
#
# AzureLinux 3.0 is descoped: kernel 6.6.139.1-1.azl3+ fixes the CVEs upstream and
# the modprobe blacklist is NOT baked into newly-built AzL3 VHDs (customer workloads
# require those modules). On AzL3 we therefore assert the blacklist entries are
# ABSENT. Ubuntu and Mariner (AzL2) still assert presence + load-refusal.
testVulnerableKernelModulesDisabled() {
local os_sku="${1:-$OS_SKU}"
local os_version="${2:-$OS_VERSION}"
local test="testVulnerableKernelModulesDisabled"
echo "$test:Start"

local failed=0

if [ "$os_sku" = "AzureLinux" ] && [ "$os_version" = "3.0" ]; then
for mod in algif_aead esp4 esp6 rxrpc; do
if grep -qsE "^(install ${mod} /bin/false|blacklist ${mod})" /etc/modprobe.d/*.conf 2>/dev/null; then
err "$test" "${mod} blacklist entry unexpectedly present in /etc/modprobe.d/*.conf on AzureLinux 3.0 (bake-in removed; kernel 6.6.139.1-1.azl3+ supersedes; no 'install' or 'blacklist' directive should remain)"
failed=1
Comment thread
djsly marked this conversation as resolved.
else
echo "$test: ${mod} blacklist correctly absent on AzureLinux 3.0"
fi
done

if [ "$failed" -ne 0 ]; then
return 1
fi

echo "$test:Finish"
return 0
fi

for mod in algif_aead esp4 esp6 rxrpc; do
if ! grep -qsE "^install ${mod} /bin/false" /etc/modprobe.d/*.conf 2>/dev/null; then
err "$test" "${mod} disable rule not found in /etc/modprobe.d/*.conf"
Expand Down Expand Up @@ -2472,4 +2499,4 @@ testInspektorGadgetAssets
testPackageDownloadURLFallbackLogic
testFileOwnership $OS_SKU
testDiskQueueServiceIsActive
testVulnerableKernelModulesDisabled
testVulnerableKernelModulesDisabled $OS_SKU $OS_VERSION
Loading