diff --git a/e2e/validators.go b/e2e/validators.go index 36e674105bf..272cd33962f 100644 --- a/e2e/validators.go +++ b/e2e/validators.go @@ -2909,10 +2909,21 @@ func ValidateCollectWindowsLogsScript(ctx context.Context, s *Scenario) { "collect-windows-logs.ps1 failed or did not produce a zip file") } -// ValidateVulnerableKernelModulesDisabled verifies that kernel modules with known -// LPE vulnerabilities are blocked via modprobe config, not loaded, and cannot be loaded. -// Covers: CVE-2026-31431 (algif_aead), DirtyFrag (esp4, esp6, rxrpc). -// To add a new CVE mitigation, append the module name to the list below. +// ValidateVulnerableKernelModulesDisabled verifies that kernel modules with known LPE +// vulnerabilities (CVE-2026-31431 / DirtyFrag / Fragnesia: algif_aead, esp4, esp6, rxrpc) +// are handled correctly per OS: +// +// - Ubuntu / Mariner: full check — modprobe config entries are present, modules are +// NOT loaded, and modprobe refuses to load them. +// - AzureLinux 3.0: assert ABSENCE of the four modprobe blacklist entries. AzL3 is +// descoped from the mitigation because kernel 6.6.139.1-1.azl3 and later fix all +// three CVEs upstream, AND customer workloads on AzL3 require those modules (the +// blacklist actively blocks legitimate use cases). Newly-built AzL3 VHDs therefore +// no longer ship the modprobe-CIS.conf entries, and E2E runs against freshly-built +// VHDs. See https://github.com/Azure/AKS/issues/5753. +// +// To add a new CVE mitigation, append the module name to BOTH lists below — +// the AzureLinux 3.0 absence-check list AND the default presence + load-refusal list. func ValidateVulnerableKernelModulesDisabled(ctx context.Context, s *Scenario) { s.T.Helper() @@ -2921,6 +2932,28 @@ func ValidateVulnerableKernelModulesDisabled(ctx context.Context, s *Scenario) { return } + // AzureLinux 3.0 (regular, NOT OSGuard): kernel 6.6.139.1-1.azl3+ supersedes the modprobe + // blacklist and the bake-in has been removed because customers need those modules. Assert + // the blacklist entries are NOT present on freshly-built AzL3 VHDs. AzureLinux OSGuard is + // intentionally kept in-scope (falls through to the full presence + load-refusal check below). + if s.VHD.OS == config.OSAzureLinux && !s.VHD.Distro.IsAzureLinuxOSGuardDistro() { + script := strings.Join([]string{ + `failed=0`, + `for mod in algif_aead esp4 esp6 rxrpc; do`, + ` if grep -qsE "^(install ${mod} /bin/false|blacklist ${mod})" /etc/modprobe.d/*.conf 2>/dev/null; then`, + ` echo "FAIL: ${mod} blacklist entry unexpectedly present on AzureLinux 3.0 (bake-in removed; kernel 6.6.139.1-1.azl3+ supersedes)"`, + ` failed=1`, + ` else`, + ` echo "PASS: ${mod} blacklist correctly absent on AzureLinux 3.0"`, + ` fi`, + `done`, + `exit $failed`, + }, "\n") + execScriptOnVMForScenarioValidateExitCode(ctx, s, script, 0, + "AzureLinux 3.0 modprobe blacklist should be absent (kernel fix 6.6.139.1-1.azl3+ supersedes; bake-in removed; no `install` or `blacklist` directive should remain)") + return + } + script := strings.Join([]string{ `failed=0`, `for mod in algif_aead esp4 esp6 rxrpc; do`, diff --git a/parts/linux/cloud-init/artifacts/cse_main.sh b/parts/linux/cloud-init/artifacts/cse_main.sh index c6c1873fce1..24d55bde038 100755 --- a/parts/linux/cloud-init/artifacts/cse_main.sh +++ b/parts/linux/cloud-init/artifacts/cse_main.sh @@ -319,10 +319,27 @@ EOF logs_to_events "AKS.CSE.ensureSysctl" ensureSysctl || exit $ERR_SYSCTL_RELOAD - # Disable kernel modules with known LPE vulnerabilities (CVE-2026-31431, DirtyFrag). - # Applies to existing VHDs that don't yet have the modprobe-CIS.conf fix baked in. + # Disable kernel modules with known LPE vulnerabilities (CVE-2026-31431, DirtyFrag, Fragnesia). + # Applied at CSE provisioning time on Ubuntu, AzureLinux OSGuard, and AzureLinux 2.0 / Mariner. # To add a new CVE mitigation, add a disableVulnerableKernelModule call below. - if isUbuntu "$OS" || isMarinerOrAzureLinux "$OS"; then + # + # AzureLinux 3.0 (regular and Kata) is excluded: kernel 6.6.139.1-1.azl3 and later fix Copy + # Fail / DirtyFrag / Fragnesia upstream, so the runtime modprobe blacklist is no longer + # required. Newly-built AzL3 VHDs also no longer ship the four entries in modprobe-CIS.conf — + # customers reported the blacklist actively blocks legitimate workloads that use + # algif_aead / esp4 / esp6 / rxrpc on the patched kernel. Existing in-support AzL3 VHDs + # (built before this change) still have the bake-in until they are rolled; no CSE-time active + # removal is performed — customers will get the unblocked configuration on their next AzL3 + # VHD upgrade. AzureLinux OSGuard (hardened secure-boot variant) is intentionally kept in + # scope as defense-in-depth: OSGuard workloads are security-sensitive and do not require + # the affected kernel modules. + # + # Mariner / AzureLinux 2.0 (AzL2) images are frozen (see FrozenCBLMarinerV2AndAzureLinuxV2SIGImageVersion=202512.06.0), + # so they cannot pick up new modprobe-CIS.conf entries for these 2026 CVEs via VHD refresh. + # Keep the CSE-time runtime apply enabled for AzL2/Mariner while those images remain supported. + # See https://github.com/Azure/AKS/issues/5753. + # + if isUbuntu "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT" || { isMarinerOrAzureLinux "$OS" && [ "${OS_VERSION}" = "2.0" ]; }; then disableVulnerableKernelModule "algif_aead" "CVE-2026-31431 (Copy Fail)" disableVulnerableKernelModule "esp4" "DirtyFrag (xfrm-ESP page-cache write)" disableVulnerableKernelModule "esp6" "DirtyFrag (xfrm-ESP6 page-cache write)" diff --git a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh index 743980359f0..96d734d1899 100644 --- a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh @@ -1,6 +1,7 @@ #!/usr/bin/env shellspec # Unit tests for disableVulnerableKernelModule() in cse_main.sh +# and the OS gate that selects which OS variants get the runtime apply. Describe 'disableVulnerableKernelModule()' MODPROBE_DIR="" @@ -70,3 +71,78 @@ Describe 'disableVulnerableKernelModule()' The output should not include "unloaded" End End + +# Tests the OS gate that decides whether to call disableVulnerableKernelModule +# at CSE provisioning time. Apply on: Ubuntu, Mariner/AzureLinux 2.0 (AzL2), AzureLinux OSGuard +# (defense-in-depth — hardened secure-boot variant intentionally retains the mitigation). Skip on: +# AzureLinux 3.0 regular/Kata (kernel 6.6.139.1-1.azl3+ has the upstream fix and +# customers reported the blacklist actively blocks legitimate workloads), ACL, Flatcar. +# See https://github.com/Azure/AKS/issues/5753. +Describe 'CVE kernel module mitigation OS gate' + Include "./parts/linux/cloud-init/artifacts/cse_helpers.sh" + + gate() { + # Mirrors the condition in cse_main.sh basePrep — must be kept in sync. + if isUbuntu "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT" || { isMarinerOrAzureLinux "$OS" && [ "${OS_VERSION}" = "2.0" ]; }; then + echo "APPLY" + else + echo "SKIP" + fi + } + + It 'applies the mitigation on Ubuntu' + OS="${UBUNTU_OS_NAME}" + OS_VARIANT="" + When call gate + The output should equal "APPLY" + End + + It 'applies the mitigation on AzureLinux 3.0 OSGuard — defense-in-depth retained' + OS="${AZURELINUX_OS_NAME}" + OS_VARIANT="${AZURELINUX_OSGUARD_OS_VARIANT}" + When call gate + The output should equal "APPLY" + End + + It 'applies the mitigation on Mariner/AzureLinux 2.0 (AzL2) — VHDs are frozen so CSE-time apply is required' + OS="${MARINER_OS_NAME}" + OS_VARIANT="" + OS_VERSION="2.0" + When call gate + The output should equal "APPLY" + End + It 'applies the mitigation on Mariner Kata (AzL2) — VHDs are frozen so CSE-time apply is required' + OS="${MARINER_KATA_OS_NAME}" + OS_VARIANT="" + OS_VERSION="2.0" + When call gate + The output should equal "APPLY" + End + It 'skips on AzureLinux 3.0 regular (kernel 6.6.139.1-1.azl3+ has upstream fix)' + OS="${AZURELINUX_OS_NAME}" + OS_VARIANT="" + When call gate + The output should equal "SKIP" + End + + It 'skips on AzureLinux 3.0 Kata (same kernel as AzL3 regular)' + OS="${AZURELINUX_KATA_OS_NAME}" + OS_VARIANT="" + When call gate + The output should equal "SKIP" + End + + It 'skips on ACL (Flatcar-based; never in scope)' + OS="${ACL_OS_NAME}" + OS_VARIANT="" + When call gate + The output should equal "SKIP" + End + + It 'skips on Flatcar (never in scope)' + OS="${FLATCAR_OS_NAME}" + OS_VARIANT="" + When call gate + The output should equal "SKIP" + End +End diff --git a/vhdbuilder/packer/packer_source.sh b/vhdbuilder/packer/packer_source.sh index 40e922672f4..3531239a427 100644 --- a/vhdbuilder/packer/packer_source.sh +++ b/vhdbuilder/packer/packer_source.sh @@ -423,7 +423,20 @@ copyPackerFiles() { cpAndMode $ETC_ISSUE_CONFIG_SRC $ETC_ISSUE_CONFIG_DEST 644 cpAndMode $ETC_ISSUE_NET_CONFIG_SRC $ETC_ISSUE_NET_CONFIG_DEST 644 cpAndMode $SSHD_CONFIG_SRC $SSHD_CONFIG_DEST 600 - cpAndMode $MODPROBE_CIS_SRC $MODPROBE_CIS_DEST 644 + # CVE-2026-31431 (Copy Fail), DirtyFrag, Fragnesia mitigation: bake modprobe blacklist + # for algif_aead / esp4 / esp6 / rxrpc into the VHD. + # + # Skipped on AzureLinux 3.0 because: + # 1. The upstream kernel fix in 6.6.139.1-1.azl3+ supersedes the modprobe blacklist. + # 2. Customer workloads on AzL3 require those kernel modules; the bake-in actively + # blocks legitimate use cases. + # Ubuntu and Mariner (AzL2) still get the bake-in — their kernels are not patched + # upstream yet. See https://github.com/Azure/AKS/issues/5753. + if isAzureLinux "$OS" "$OS_VARIANT" && [ "${OS_VERSION}" = "3.0" ] && ! isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then + echo "Skipping modprobe-CIS.conf bake-in on AzureLinux 3.0 (kernel 6.6.139.1-1.azl3+ has upstream fix; OSGuard intentionally retains the bake-in)" + else + cpAndMode $MODPROBE_CIS_SRC $MODPROBE_CIS_DEST 644 + fi cpAndMode $PWQUALITY_CONF_SRC $PWQUALITY_CONF_DEST 600 cpAndMode $PAM_D_SU_SRC $PAM_D_SU_DEST 644 cpAndMode $PROFILE_D_PATH_SH_SRC $PROFILE_D_PATH_SH_DEST 755 diff --git a/vhdbuilder/packer/test/linux-vhd-content-test.sh b/vhdbuilder/packer/test/linux-vhd-content-test.sh index 2c879f28a69..02c3dd5c9dc 100644 --- a/vhdbuilder/packer/test/linux-vhd-content-test.sh +++ b/vhdbuilder/packer/test/linux-vhd-content-test.sh @@ -1353,12 +1353,39 @@ testNfsServerService() { # Verify all kernel modules with known LPE vulnerabilities are disabled. # Covers: CVE-2026-31431 (algif_aead), DirtyFrag (esp4, esp6, rxrpc). -# To add a new CVE mitigation, append the module to the loop below. +# To add a new CVE mitigation, append the module to BOTH loops below — the +# AzureLinux 3.0 absence loop AND the default presence + load-refusal loop. +# +# AzureLinux 3.0 is descoped: kernel 6.6.139.1-1.azl3+ fixes the CVEs upstream and +# the modprobe blacklist is NOT baked into newly-built AzL3 VHDs (customer workloads +# require those modules). On AzL3 we therefore assert the blacklist entries are +# ABSENT. Ubuntu and Mariner (AzL2) still assert presence + load-refusal. testVulnerableKernelModulesDisabled() { + local os_sku="${1:-$OS_SKU}" + local os_version="${2:-$OS_VERSION}" local test="testVulnerableKernelModulesDisabled" echo "$test:Start" local failed=0 + + if [ "$os_sku" = "AzureLinux" ] && [ "$os_version" = "3.0" ]; then + for mod in algif_aead esp4 esp6 rxrpc; do + if grep -qsE "^(install ${mod} /bin/false|blacklist ${mod})" /etc/modprobe.d/*.conf 2>/dev/null; then + err "$test" "${mod} blacklist entry unexpectedly present in /etc/modprobe.d/*.conf on AzureLinux 3.0 (bake-in removed; kernel 6.6.139.1-1.azl3+ supersedes; no 'install' or 'blacklist' directive should remain)" + failed=1 + else + echo "$test: ${mod} blacklist correctly absent on AzureLinux 3.0" + fi + done + + if [ "$failed" -ne 0 ]; then + return 1 + fi + + echo "$test:Finish" + return 0 + fi + for mod in algif_aead esp4 esp6 rxrpc; do if ! grep -qsE "^install ${mod} /bin/false" /etc/modprobe.d/*.conf 2>/dev/null; then err "$test" "${mod} disable rule not found in /etc/modprobe.d/*.conf" @@ -2472,4 +2499,4 @@ testInspektorGadgetAssets testPackageDownloadURLFallbackLogic testFileOwnership $OS_SKU testDiskQueueServiceIsActive -testVulnerableKernelModulesDisabled +testVulnerableKernelModulesDisabled $OS_SKU $OS_VERSION