From d383e552403e18d44c413ded625e2737720ca003 Mon Sep 17 00:00:00 2001 From: Sylvain Boily <4981802+djsly@users.noreply.github.com> Date: Thu, 21 May 2026 16:03:07 -0400 Subject: [PATCH 01/10] fix: skip CSE-time CVE modprobe blacklist on AzureLinux 3.0 Kernel 6.6.139.1-1.azl3 and later fix Copy Fail (CVE-2026-31431), DirtyFrag (CVE-2026-43284, CVE-2026-43500), and Fragnesia (CVE-2026-46300) upstream, so the runtime modprobe blacklist for algif_aead/esp4/esp6/rxrpc is no longer required on AzureLinux 3.0. Defense-in-depth: the static modprobe-CIS.conf baked into every VHD is left untouched, so all VHDs in the 6-month support window still drop the install/blacklist directives at build time regardless of kernel version. Ubuntu 22.04/24.04 and AzureLinux 2.0 (Mariner) keep the runtime apply: their upstream kernel does not yet ship the fix. Windows was never affected. Updates: * parts/linux/cloud-init/artifacts/cse_main.sh - gate is now isUbuntu || isMariner (was isUbuntu || isMarinerOrAzureLinux). * spec/.../cse_main_disable_modules_spec.sh - new tests asserting APPLY on Ubuntu/Mariner and SKIP on AzureLinux 3.0 / Kata / ACL / Flatcar. * e2e/validators.go - ValidateVulnerableKernelModulesDisabled is OS-conditional: full presence + load-refusal check on Ubuntu/Mariner, defense-in-depth modprobe.d entry presence-only check on AzureLinux. Refs: https://github.com/Azure/AKS/issues/5753 AB#38070527 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- e2e/validators.go | 33 +++++++++- parts/linux/cloud-init/artifacts/cse_main.sh | 10 ++- .../cse_main_disable_modules_spec.sh | 63 ++++++++++++++++++- 3 files changed, 102 insertions(+), 4 deletions(-) diff --git a/e2e/validators.go b/e2e/validators.go index 36e674105bf..fa3dd991fc5 100644 --- a/e2e/validators.go +++ b/e2e/validators.go @@ -2911,7 +2911,15 @@ func ValidateCollectWindowsLogsScript(ctx context.Context, s *Scenario) { // ValidateVulnerableKernelModulesDisabled verifies that kernel modules with known // LPE vulnerabilities are blocked via modprobe config, not loaded, and cannot be loaded. -// Covers: CVE-2026-31431 (algif_aead), DirtyFrag (esp4, esp6, rxrpc). +// Covers: CVE-2026-31431 (algif_aead), DirtyFrag (esp4, esp6, rxrpc), Fragnesia (esp4, esp6). +// +// AzureLinux 3.0 is excluded from the runtime apply because kernel 6.6.139.1-1.azl3 +// and later fix all three CVEs upstream. The static modprobe-CIS.conf baked into the +// VHD still drops the install/blacklist directives, so we verify those entries are +// present on AzureLinux 3.0 (defense-in-depth) but do not require an active modprobe +// refusal — the kernel-level fix is the authoritative mitigation. See +// https://github.com/Azure/AKS/issues/5753. +// // To add a new CVE mitigation, append the module name to the list below. func ValidateVulnerableKernelModulesDisabled(ctx context.Context, s *Scenario) { s.T.Helper() @@ -2921,6 +2929,29 @@ func ValidateVulnerableKernelModulesDisabled(ctx context.Context, s *Scenario) { return } + // On AzureLinux 3.0 the kernel fix in 6.6.139.1-1.azl3+ is the authoritative + // mitigation; the CSE-time runtime apply is intentionally skipped. We still + // validate that the baked-in modprobe-CIS.conf entries are present as + // defense-in-depth, but we do NOT require the module to be refused by modprobe + // (the kernel-level fix supersedes the module disable). + if s.VHD.OS == config.OSAzureLinux { + script := strings.Join([]string{ + `failed=0`, + `for mod in algif_aead esp4 esp6 rxrpc; do`, + ` if ! grep -qsE "^install ${mod} /bin/false" /etc/modprobe.d/*.conf 2>/dev/null; then`, + ` echo "FAIL: ${mod} disable rule not found in /etc/modprobe.d/*.conf (expected from baked-in modprobe-CIS.conf)"`, + ` failed=1`, + ` else`, + ` echo "PASS: modprobe config blocks ${mod} (defense-in-depth)"`, + ` fi`, + `done`, + `exit $failed`, + }, "\n") + execScriptOnVMForScenarioValidateExitCode(ctx, s, script, 0, + "AzureLinux 3.0 modprobe-CIS.conf defense-in-depth check failed (algif_aead/esp4/esp6/rxrpc)") + return + } + script := strings.Join([]string{ `failed=0`, `for mod in algif_aead esp4 esp6 rxrpc; do`, diff --git a/parts/linux/cloud-init/artifacts/cse_main.sh b/parts/linux/cloud-init/artifacts/cse_main.sh index c6c1873fce1..8d078acdae3 100755 --- a/parts/linux/cloud-init/artifacts/cse_main.sh +++ b/parts/linux/cloud-init/artifacts/cse_main.sh @@ -319,10 +319,16 @@ EOF logs_to_events "AKS.CSE.ensureSysctl" ensureSysctl || exit $ERR_SYSCTL_RELOAD - # Disable kernel modules with known LPE vulnerabilities (CVE-2026-31431, DirtyFrag). + # Disable kernel modules with known LPE vulnerabilities (CVE-2026-31431, DirtyFrag, Fragnesia). # Applies to existing VHDs that don't yet have the modprobe-CIS.conf fix baked in. # To add a new CVE mitigation, add a disableVulnerableKernelModule call below. - if isUbuntu "$OS" || isMarinerOrAzureLinux "$OS"; then + # + # AzureLinux 3.0 is excluded: kernel 6.6.139.1-1.azl3 and later fix Copy Fail / DirtyFrag / + # Fragnesia upstream, so the runtime modprobe blacklist is no longer required there. + # See https://github.com/Azure/AKS/issues/5753. + # The static /etc/modprobe.d/modprobe-CIS.conf baked into the VHD remains in place on + # all OS variants as defense-in-depth for the 6-month VHD support window. + if isUbuntu "$OS" || isMariner "$OS"; then disableVulnerableKernelModule "algif_aead" "CVE-2026-31431 (Copy Fail)" disableVulnerableKernelModule "esp4" "DirtyFrag (xfrm-ESP page-cache write)" disableVulnerableKernelModule "esp6" "DirtyFrag (xfrm-ESP6 page-cache write)" diff --git a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh index 743980359f0..569ac5ab3ba 100644 --- a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh @@ -1,6 +1,7 @@ -#!/usr/bin/env shellspec +#!/bin/bash # Unit tests for disableVulnerableKernelModule() in cse_main.sh +# and the OS gate that selects which OS variants get the runtime apply. Describe 'disableVulnerableKernelModule()' MODPROBE_DIR="" @@ -70,3 +71,63 @@ Describe 'disableVulnerableKernelModule()' The output should not include "unloaded" End End + +# Tests the OS gate that decides whether to call disableVulnerableKernelModule +# at CSE provisioning time. AzureLinux 3.0 is excluded because the kernel fix +# in 6.6.139.1-1.azl3+ supersedes the modprobe blacklist. Ubuntu and Mariner +# still receive the runtime apply because their upstream kernel is not yet patched. +# See https://github.com/Azure/AKS/issues/5753. +Describe 'CVE kernel module mitigation OS gate' + Include "./parts/linux/cloud-init/artifacts/cse_helpers.sh" + + gate() { + # Mirrors the condition in cse_main.sh basePrep — must be kept in sync. + if isUbuntu "$OS" || isMariner "$OS"; then + echo "APPLY" + else + echo "SKIP" + fi + } + + It 'applies the mitigation on Ubuntu' + OS="${UBUNTU_OS_NAME}" + OS_VARIANT="" + When call gate + The output should equal "APPLY" + End + + It 'applies the mitigation on AzureLinux 2.0 (Mariner)' + OS="${MARINER_OS_NAME}" + OS_VARIANT="" + When call gate + The output should equal "APPLY" + End + + It 'skips the runtime mitigation on AzureLinux 3.0 (kernel 6.6.139.1-1.azl3+ has upstream fix)' + OS="${AZURELINUX_OS_NAME}" + OS_VARIANT="" + When call gate + The output should equal "SKIP" + End + + It 'skips the runtime mitigation on AzureLinux 3.0 Kata' + OS="${AZURELINUX_KATA_OS_NAME}" + OS_VARIANT="" + When call gate + The output should equal "SKIP" + End + + It 'skips on ACL (Flatcar-based)' + OS="${ACL_OS_NAME}" + OS_VARIANT="" + When call gate + The output should equal "SKIP" + End + + It 'skips on Flatcar' + OS="${FLATCAR_OS_NAME}" + OS_VARIANT="" + When call gate + The output should equal "SKIP" + End +End From 1724c45116dd5e2fefb05e066e7ca849cf299b65 Mon Sep 17 00:00:00 2001 From: Sylvain Boily <4981802+djsly@users.noreply.github.com> Date: Thu, 21 May 2026 16:20:50 -0400 Subject: [PATCH 02/10] fix: also remove modprobe-CIS.conf bake-in from AzureLinux 3.0 VHDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Customers reported that the algif_aead / esp4 / esp6 / rxrpc modprobe blacklist baked into AzureLinux 3.0 VHDs blocks legitimate workloads. Now that kernel 6.6.139.1-1.azl3+ fixes Copy Fail / DirtyFrag / Fragnesia upstream, the bake-in is no longer needed on AzL3. Changes: - packer_source.sh: skip cpAndMode of MODPROBE_CIS on AzureLinux 3.0 (Ubuntu and Mariner bake-in unchanged — those kernels still vulnerable). - linux-vhd-content-test.sh: testVulnerableKernelModulesDisabled now asserts the four entries are ABSENT on AzL3 and present + load-refused on Ubuntu/Mariner. - e2e/validators.go: ValidateVulnerableKernelModulesDisabled now asserts absence on AzureLinux (matching newly-built VHDs); Ubuntu/Mariner full presence+refusal check unchanged. - cse_main.sh: updated AzL3 skip comment to reflect that the static blacklist file is no longer baked in either; existing in-support AzL3 VHDs continue to carry the bake-in until they roll (no CSE-time active removal — by design). No CSE-time active removal of pre-existing blacklist files is implemented; customers on existing in-support AzL3 VHDs will get the unblocked configuration on their next AzL3 VHD upgrade. AB#38070527 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- e2e/validators.go | 26 ++++++++--------- parts/linux/cloud-init/artifacts/cse_main.sh | 11 +++++--- vhdbuilder/packer/packer_source.sh | 15 +++++++++- .../packer/test/linux-vhd-content-test.sh | 28 ++++++++++++++++++- 4 files changed, 60 insertions(+), 20 deletions(-) diff --git a/e2e/validators.go b/e2e/validators.go index fa3dd991fc5..c00d19ded32 100644 --- a/e2e/validators.go +++ b/e2e/validators.go @@ -2913,11 +2913,11 @@ func ValidateCollectWindowsLogsScript(ctx context.Context, s *Scenario) { // LPE vulnerabilities are blocked via modprobe config, not loaded, and cannot be loaded. // Covers: CVE-2026-31431 (algif_aead), DirtyFrag (esp4, esp6, rxrpc), Fragnesia (esp4, esp6). // -// AzureLinux 3.0 is excluded from the runtime apply because kernel 6.6.139.1-1.azl3 -// and later fix all three CVEs upstream. The static modprobe-CIS.conf baked into the -// VHD still drops the install/blacklist directives, so we verify those entries are -// present on AzureLinux 3.0 (defense-in-depth) but do not require an active modprobe -// refusal — the kernel-level fix is the authoritative mitigation. See +// AzureLinux 3.0 is descoped from the mitigation because kernel 6.6.139.1-1.azl3 and +// later fix all three CVEs upstream, AND customer workloads on AzL3 require those +// modules (the blacklist actively blocks legitimate use cases). Newly-built AzL3 VHDs +// therefore no longer ship the modprobe-CIS.conf entries. E2E runs against freshly-built +// VHDs, so on AzureLinux we assert the four module entries are ABSENT. See // https://github.com/Azure/AKS/issues/5753. // // To add a new CVE mitigation, append the module name to the list below. @@ -2929,26 +2929,24 @@ func ValidateVulnerableKernelModulesDisabled(ctx context.Context, s *Scenario) { return } - // On AzureLinux 3.0 the kernel fix in 6.6.139.1-1.azl3+ is the authoritative - // mitigation; the CSE-time runtime apply is intentionally skipped. We still - // validate that the baked-in modprobe-CIS.conf entries are present as - // defense-in-depth, but we do NOT require the module to be refused by modprobe - // (the kernel-level fix supersedes the module disable). + // AzureLinux 3.0: kernel 6.6.139.1-1.azl3+ supersedes the modprobe blacklist and + // the bake-in has been removed because customers need those modules. Assert the + // blacklist entries are NOT present on freshly-built AzL3 VHDs. if s.VHD.OS == config.OSAzureLinux { script := strings.Join([]string{ `failed=0`, `for mod in algif_aead esp4 esp6 rxrpc; do`, - ` if ! grep -qsE "^install ${mod} /bin/false" /etc/modprobe.d/*.conf 2>/dev/null; then`, - ` echo "FAIL: ${mod} disable rule not found in /etc/modprobe.d/*.conf (expected from baked-in modprobe-CIS.conf)"`, + ` if grep -qsE "^install ${mod} /bin/false" /etc/modprobe.d/*.conf 2>/dev/null; then`, + ` echo "FAIL: ${mod} disable rule unexpectedly present on AzureLinux 3.0 (bake-in removed; kernel 6.6.139.1-1.azl3+ supersedes)"`, ` failed=1`, ` else`, - ` echo "PASS: modprobe config blocks ${mod} (defense-in-depth)"`, + ` echo "PASS: ${mod} blacklist correctly absent on AzureLinux 3.0"`, ` fi`, `done`, `exit $failed`, }, "\n") execScriptOnVMForScenarioValidateExitCode(ctx, s, script, 0, - "AzureLinux 3.0 modprobe-CIS.conf defense-in-depth check failed (algif_aead/esp4/esp6/rxrpc)") + "AzureLinux 3.0 modprobe blacklist should be absent (kernel fix 6.6.139.1-1.azl3+ supersedes; bake-in removed)") return } diff --git a/parts/linux/cloud-init/artifacts/cse_main.sh b/parts/linux/cloud-init/artifacts/cse_main.sh index 8d078acdae3..3a43d2a571e 100755 --- a/parts/linux/cloud-init/artifacts/cse_main.sh +++ b/parts/linux/cloud-init/artifacts/cse_main.sh @@ -324,10 +324,13 @@ EOF # To add a new CVE mitigation, add a disableVulnerableKernelModule call below. # # AzureLinux 3.0 is excluded: kernel 6.6.139.1-1.azl3 and later fix Copy Fail / DirtyFrag / - # Fragnesia upstream, so the runtime modprobe blacklist is no longer required there. - # See https://github.com/Azure/AKS/issues/5753. - # The static /etc/modprobe.d/modprobe-CIS.conf baked into the VHD remains in place on - # all OS variants as defense-in-depth for the 6-month VHD support window. + # Fragnesia upstream, so neither the runtime modprobe blacklist nor the baked-in + # /etc/modprobe.d/CIS.conf entries are required. Newly-built AzL3 VHDs no longer ship + # the four module entries — customers reported the blacklist actively blocks legitimate + # workloads that use algif_aead / esp4 / esp6 / rxrpc on the patched kernel. Existing + # in-support AzL3 VHDs (built before this change) still have the bake-in until they are + # rolled; no CSE-time active removal is performed — customers will get the unblocked + # configuration on their next AzL3 VHD upgrade. See https://github.com/Azure/AKS/issues/5753. if isUbuntu "$OS" || isMariner "$OS"; then disableVulnerableKernelModule "algif_aead" "CVE-2026-31431 (Copy Fail)" disableVulnerableKernelModule "esp4" "DirtyFrag (xfrm-ESP page-cache write)" diff --git a/vhdbuilder/packer/packer_source.sh b/vhdbuilder/packer/packer_source.sh index 40e922672f4..69e725578f5 100644 --- a/vhdbuilder/packer/packer_source.sh +++ b/vhdbuilder/packer/packer_source.sh @@ -423,7 +423,20 @@ copyPackerFiles() { cpAndMode $ETC_ISSUE_CONFIG_SRC $ETC_ISSUE_CONFIG_DEST 644 cpAndMode $ETC_ISSUE_NET_CONFIG_SRC $ETC_ISSUE_NET_CONFIG_DEST 644 cpAndMode $SSHD_CONFIG_SRC $SSHD_CONFIG_DEST 600 - cpAndMode $MODPROBE_CIS_SRC $MODPROBE_CIS_DEST 644 + # CVE-2026-31431 (Copy Fail), DirtyFrag, Fragnesia mitigation: bake modprobe blacklist + # for algif_aead / esp4 / esp6 / rxrpc into the VHD. + # + # Skipped on AzureLinux 3.0 because: + # 1. The upstream kernel fix in 6.6.139.1-1.azl3+ supersedes the modprobe blacklist. + # 2. Customer workloads on AzL3 require those kernel modules; the bake-in actively + # blocks legitimate use cases. + # Ubuntu and Mariner (AzL2) still get the bake-in — their kernels are not patched + # upstream yet. See https://github.com/Azure/AKS/issues/5753. + if isAzureLinux "$OS" "$OS_VARIANT" && [ "${OS_VERSION}" = "3.0" ]; then + echo "Skipping modprobe-CIS.conf bake-in on AzureLinux 3.0 (kernel 6.6.139.1-1.azl3+ has upstream fix)" + else + cpAndMode $MODPROBE_CIS_SRC $MODPROBE_CIS_DEST 644 + fi cpAndMode $PWQUALITY_CONF_SRC $PWQUALITY_CONF_DEST 600 cpAndMode $PAM_D_SU_SRC $PAM_D_SU_DEST 644 cpAndMode $PROFILE_D_PATH_SH_SRC $PROFILE_D_PATH_SH_DEST 755 diff --git a/vhdbuilder/packer/test/linux-vhd-content-test.sh b/vhdbuilder/packer/test/linux-vhd-content-test.sh index 2c879f28a69..796a53a5b55 100644 --- a/vhdbuilder/packer/test/linux-vhd-content-test.sh +++ b/vhdbuilder/packer/test/linux-vhd-content-test.sh @@ -1354,11 +1354,37 @@ testNfsServerService() { # Verify all kernel modules with known LPE vulnerabilities are disabled. # Covers: CVE-2026-31431 (algif_aead), DirtyFrag (esp4, esp6, rxrpc). # To add a new CVE mitigation, append the module to the loop below. +# +# AzureLinux 3.0 is descoped: kernel 6.6.139.1-1.azl3+ fixes the CVEs upstream and +# the modprobe blacklist is NOT baked into newly-built AzL3 VHDs (customer workloads +# require those modules). On AzL3 we therefore assert the blacklist entries are +# ABSENT. Ubuntu and Mariner (AzL2) still assert presence + load-refusal. testVulnerableKernelModulesDisabled() { + local os_sku="${1:-$OS_SKU}" + local os_version="${2:-$OS_VERSION}" local test="testVulnerableKernelModulesDisabled" echo "$test:Start" local failed=0 + + if [ "$os_sku" = "AzureLinux" ] && [ "$os_version" = "3.0" ]; then + for mod in algif_aead esp4 esp6 rxrpc; do + if grep -qsE "^install ${mod} /bin/false" /etc/modprobe.d/*.conf 2>/dev/null; then + err "$test" "${mod} disable rule unexpectedly present in /etc/modprobe.d/*.conf on AzureLinux 3.0 (bake-in removed; kernel 6.6.139.1-1.azl3+ supersedes)" + failed=1 + else + echo "$test: ${mod} blacklist correctly absent on AzureLinux 3.0" + fi + done + + if [ "$failed" -ne 0 ]; then + return 1 + fi + + echo "$test:Finish" + return 0 + fi + for mod in algif_aead esp4 esp6 rxrpc; do if ! grep -qsE "^install ${mod} /bin/false" /etc/modprobe.d/*.conf 2>/dev/null; then err "$test" "${mod} disable rule not found in /etc/modprobe.d/*.conf" @@ -2472,4 +2498,4 @@ testInspektorGadgetAssets testPackageDownloadURLFallbackLogic testFileOwnership $OS_SKU testDiskQueueServiceIsActive -testVulnerableKernelModulesDisabled +testVulnerableKernelModulesDisabled $OS_SKU $OS_VERSION From aa4513e31690fa27a3645889c27fdb4054cc54de Mon Sep 17 00:00:00 2001 From: djsly <4981802+djsly@users.noreply.github.com> Date: Thu, 21 May 2026 16:42:16 -0400 Subject: [PATCH 03/10] fix: simplify CSE gate to isUbuntu-only and refresh validator doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback: - cse_main.sh: drop unused isMariner branch from the modprobe blacklist gate (AKS does not build Mariner VHDs anymore). - cse_main_disable_modules_spec.sh: update spec cases to match the new gate — Ubuntu APPLY; AzL3/Mariner/Kata/ACL/Flatcar SKIP. - validators.go: refresh the top-level doc comment on ValidateVulnerableKernelModulesDisabled to describe the OS-conditional behavior accurately (Ubuntu: full presence + load-refusal; AzureLinux: ABSENCE of blacklist entries). AB#38070527 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- e2e/validators.go | 20 ++++++++++--------- parts/linux/cloud-init/artifacts/cse_main.sh | 9 ++++++--- .../cse_main_disable_modules_spec.sh | 14 ++++++------- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/e2e/validators.go b/e2e/validators.go index c00d19ded32..927df5a0df7 100644 --- a/e2e/validators.go +++ b/e2e/validators.go @@ -2909,16 +2909,18 @@ func ValidateCollectWindowsLogsScript(ctx context.Context, s *Scenario) { "collect-windows-logs.ps1 failed or did not produce a zip file") } -// ValidateVulnerableKernelModulesDisabled verifies that kernel modules with known -// LPE vulnerabilities are blocked via modprobe config, not loaded, and cannot be loaded. -// Covers: CVE-2026-31431 (algif_aead), DirtyFrag (esp4, esp6, rxrpc), Fragnesia (esp4, esp6). +// ValidateVulnerableKernelModulesDisabled verifies that kernel modules with known LPE +// vulnerabilities (CVE-2026-31431 / DirtyFrag / Fragnesia: algif_aead, esp4, esp6, rxrpc) +// are handled correctly per OS: // -// AzureLinux 3.0 is descoped from the mitigation because kernel 6.6.139.1-1.azl3 and -// later fix all three CVEs upstream, AND customer workloads on AzL3 require those -// modules (the blacklist actively blocks legitimate use cases). Newly-built AzL3 VHDs -// therefore no longer ship the modprobe-CIS.conf entries. E2E runs against freshly-built -// VHDs, so on AzureLinux we assert the four module entries are ABSENT. See -// https://github.com/Azure/AKS/issues/5753. +// - Ubuntu / Mariner: full check — modprobe config entries are present, modules are +// NOT loaded, and modprobe refuses to load them. +// - AzureLinux 3.0: assert ABSENCE of the four modprobe blacklist entries. AzL3 is +// descoped from the mitigation because kernel 6.6.139.1-1.azl3 and later fix all +// three CVEs upstream, AND customer workloads on AzL3 require those modules (the +// blacklist actively blocks legitimate use cases). Newly-built AzL3 VHDs therefore +// no longer ship the modprobe-CIS.conf entries, and E2E runs against freshly-built +// VHDs. See https://github.com/Azure/AKS/issues/5753. // // To add a new CVE mitigation, append the module name to the list below. func ValidateVulnerableKernelModulesDisabled(ctx context.Context, s *Scenario) { diff --git a/parts/linux/cloud-init/artifacts/cse_main.sh b/parts/linux/cloud-init/artifacts/cse_main.sh index 3a43d2a571e..2a799dc230c 100755 --- a/parts/linux/cloud-init/artifacts/cse_main.sh +++ b/parts/linux/cloud-init/artifacts/cse_main.sh @@ -320,8 +320,11 @@ EOF logs_to_events "AKS.CSE.ensureSysctl" ensureSysctl || exit $ERR_SYSCTL_RELOAD # Disable kernel modules with known LPE vulnerabilities (CVE-2026-31431, DirtyFrag, Fragnesia). - # Applies to existing VHDs that don't yet have the modprobe-CIS.conf fix baked in. - # To add a new CVE mitigation, add a disableVulnerableKernelModule call below. + # Ubuntu-only: applies the runtime modprobe blacklist to existing Ubuntu VHDs that don't yet + # have the modprobe-CIS.conf fix baked in. To add a new CVE mitigation, add a + # disableVulnerableKernelModule call below. + # + # AKS no longer builds Mariner (AzureLinux 2.0) VHDs, so Mariner is not gated here. # # AzureLinux 3.0 is excluded: kernel 6.6.139.1-1.azl3 and later fix Copy Fail / DirtyFrag / # Fragnesia upstream, so neither the runtime modprobe blacklist nor the baked-in @@ -331,7 +334,7 @@ EOF # in-support AzL3 VHDs (built before this change) still have the bake-in until they are # rolled; no CSE-time active removal is performed — customers will get the unblocked # configuration on their next AzL3 VHD upgrade. See https://github.com/Azure/AKS/issues/5753. - if isUbuntu "$OS" || isMariner "$OS"; then + if isUbuntu "$OS"; then disableVulnerableKernelModule "algif_aead" "CVE-2026-31431 (Copy Fail)" disableVulnerableKernelModule "esp4" "DirtyFrag (xfrm-ESP page-cache write)" disableVulnerableKernelModule "esp6" "DirtyFrag (xfrm-ESP6 page-cache write)" diff --git a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh index 569ac5ab3ba..b664e48dc06 100644 --- a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh @@ -73,16 +73,16 @@ Describe 'disableVulnerableKernelModule()' End # Tests the OS gate that decides whether to call disableVulnerableKernelModule -# at CSE provisioning time. AzureLinux 3.0 is excluded because the kernel fix -# in 6.6.139.1-1.azl3+ supersedes the modprobe blacklist. Ubuntu and Mariner -# still receive the runtime apply because their upstream kernel is not yet patched. -# See https://github.com/Azure/AKS/issues/5753. +# at CSE provisioning time. Ubuntu-only: AKS no longer builds Mariner VHDs, and +# AzureLinux 3.0 is excluded because the kernel fix in 6.6.139.1-1.azl3+ supersedes +# the modprobe blacklist (and customers reported the blacklist actively blocks +# legitimate workloads on AzL3). See https://github.com/Azure/AKS/issues/5753. Describe 'CVE kernel module mitigation OS gate' Include "./parts/linux/cloud-init/artifacts/cse_helpers.sh" gate() { # Mirrors the condition in cse_main.sh basePrep — must be kept in sync. - if isUbuntu "$OS" || isMariner "$OS"; then + if isUbuntu "$OS"; then echo "APPLY" else echo "SKIP" @@ -96,11 +96,11 @@ Describe 'CVE kernel module mitigation OS gate' The output should equal "APPLY" End - It 'applies the mitigation on AzureLinux 2.0 (Mariner)' + It 'skips the runtime mitigation on AzureLinux 2.0 (Mariner) — no longer built' OS="${MARINER_OS_NAME}" OS_VARIANT="" When call gate - The output should equal "APPLY" + The output should equal "SKIP" End It 'skips the runtime mitigation on AzureLinux 3.0 (kernel 6.6.139.1-1.azl3+ has upstream fix)' From a8122b65338192a4d78b022f0507e56025c91933 Mon Sep 17 00:00:00 2001 From: Sylvain Boily <4981802+djsly@users.noreply.github.com> Date: Thu, 21 May 2026 16:50:10 -0400 Subject: [PATCH 04/10] fix: restore shellspec shebang on cse_main_disable_modules_spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address inline review comment from djsly — the shebang was incorrectly changed from #!/usr/bin/env shellspec to #!/bin/bash in this PR. All other spec files under spec/parts/linux/cloud-init/artifacts/ use the shellspec shebang as a convention; revert to match. ShellSpec ignores the file shebang when invoked via the shellspec CLI (the shell is controlled by --shell), so this change is purely a convention fix with no runtime impact. shellspec --shell bash on the spec still reports 11/11. AB#38070527 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh index b664e48dc06..18eebb3ef44 100644 --- a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env shellspec # Unit tests for disableVulnerableKernelModule() in cse_main.sh # and the OS gate that selects which OS variants get the runtime apply. From e1ae35c18b6e8d080dbace706f2dae0420115d13 Mon Sep 17 00:00:00 2001 From: Sylvain Boily <4981802+djsly@users.noreply.github.com> Date: Thu, 21 May 2026 16:55:48 -0400 Subject: [PATCH 05/10] fix: keep Mariner + OSGuard in scope; strengthen absence checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses re-review comments from the Copilot reviewer pass on commit aa4513e3 and corrects a factual error in the previous comment. Mariner (AzureLinux 2.0) IS still actively built (packer JSONs and recent CBLMariner release notes confirm). The previous commit's 'AKS no longer builds Mariner' claim was wrong, and the simplification of the CSE gate to 'isUbuntu' only would have silently dropped the mitigation on Mariner nodes whose kernel is not yet patched upstream. Changes: * cse_main.sh: restore the gate to apply on Ubuntu, Mariner (AzL2), and AzureLinux OSGuard. Only AzureLinux 3.0 (regular + Kata) is descoped — kernel 6.6.139.1-1.azl3+ has the upstream fix and customers need those modules. OSGuard explicitly stays in-scope as defense-in-depth (it's the hardened secure-boot variant). Comment block rewritten to reflect the actual scope. * packer_source.sh: AzL3 bake-in skip now excludes OSGuard (! isAzureLinuxOSGuard). OSGuard is OS=azurelinux + OS_VARIANT=OSGUARD, so the previous OS+OS_VERSION-only check incorrectly stripped it. * validators.go: ValidateVulnerableKernelModulesDisabled AzL3-absence Absence check strengthened to match both 'install /bin/false' and 'blacklist ' so a partial removal cannot pass silently. * linux-vhd-content-test.sh: AzL3 absence check strengthened to also detect 'blacklist ' entries (OSGuard is correctly distinguished at this layer because OSGuard's OS_SKU is 'AzureLinuxOSGuard', not 'AzureLinux', so no additional condition needed here). * cse_main_disable_modules_spec.sh: spec updated to match the new gate — Ubuntu APPLY, Mariner APPLY, Mariner Kata APPLY, OSGuard APPLY, AzL3 regular SKIP, AzL3 Kata SKIP, ACL SKIP, Flatcar SKIP. 13 cases, all pass. AB#38070527 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- e2e/validators.go | 15 ++++---- parts/linux/cloud-init/artifacts/cse_main.sh | 27 +++++++------- .../cse_main_disable_modules_spec.sh | 37 +++++++++++++------ vhdbuilder/packer/packer_source.sh | 4 +- .../packer/test/linux-vhd-content-test.sh | 4 +- 5 files changed, 51 insertions(+), 36 deletions(-) diff --git a/e2e/validators.go b/e2e/validators.go index 927df5a0df7..a173fcf8321 100644 --- a/e2e/validators.go +++ b/e2e/validators.go @@ -2931,15 +2931,16 @@ func ValidateVulnerableKernelModulesDisabled(ctx context.Context, s *Scenario) { return } - // AzureLinux 3.0: kernel 6.6.139.1-1.azl3+ supersedes the modprobe blacklist and - // the bake-in has been removed because customers need those modules. Assert the - // blacklist entries are NOT present on freshly-built AzL3 VHDs. - if s.VHD.OS == config.OSAzureLinux { + // AzureLinux 3.0 (regular, NOT OSGuard): kernel 6.6.139.1-1.azl3+ supersedes the modprobe + // blacklist and the bake-in has been removed because customers need those modules. Assert + // the blacklist entries are NOT present on freshly-built AzL3 VHDs. AzureLinux OSGuard is + // intentionally kept in-scope (falls through to the full presence + load-refusal check below). + if s.VHD.OS == config.OSAzureLinux && !s.VHD.Distro.IsAzureLinuxOSGuardDistro() { script := strings.Join([]string{ `failed=0`, `for mod in algif_aead esp4 esp6 rxrpc; do`, - ` if grep -qsE "^install ${mod} /bin/false" /etc/modprobe.d/*.conf 2>/dev/null; then`, - ` echo "FAIL: ${mod} disable rule unexpectedly present on AzureLinux 3.0 (bake-in removed; kernel 6.6.139.1-1.azl3+ supersedes)"`, + ` if grep -qsE "^(install ${mod} /bin/false|blacklist ${mod})" /etc/modprobe.d/*.conf 2>/dev/null; then`, + ` echo "FAIL: ${mod} blacklist entry unexpectedly present on AzureLinux 3.0 (bake-in removed; kernel 6.6.139.1-1.azl3+ supersedes)"`, ` failed=1`, ` else`, ` echo "PASS: ${mod} blacklist correctly absent on AzureLinux 3.0"`, @@ -2948,7 +2949,7 @@ func ValidateVulnerableKernelModulesDisabled(ctx context.Context, s *Scenario) { `exit $failed`, }, "\n") execScriptOnVMForScenarioValidateExitCode(ctx, s, script, 0, - "AzureLinux 3.0 modprobe blacklist should be absent (kernel fix 6.6.139.1-1.azl3+ supersedes; bake-in removed)") + "AzureLinux 3.0 modprobe blacklist should be absent (kernel fix 6.6.139.1-1.azl3+ supersedes; bake-in removed; no `install` or `blacklist` directive should remain)") return } diff --git a/parts/linux/cloud-init/artifacts/cse_main.sh b/parts/linux/cloud-init/artifacts/cse_main.sh index 2a799dc230c..ebcce5a9db4 100755 --- a/parts/linux/cloud-init/artifacts/cse_main.sh +++ b/parts/linux/cloud-init/artifacts/cse_main.sh @@ -320,21 +320,20 @@ EOF logs_to_events "AKS.CSE.ensureSysctl" ensureSysctl || exit $ERR_SYSCTL_RELOAD # Disable kernel modules with known LPE vulnerabilities (CVE-2026-31431, DirtyFrag, Fragnesia). - # Ubuntu-only: applies the runtime modprobe blacklist to existing Ubuntu VHDs that don't yet - # have the modprobe-CIS.conf fix baked in. To add a new CVE mitigation, add a - # disableVulnerableKernelModule call below. + # Applied at CSE provisioning time on Ubuntu, Mariner (AzL2), and AzureLinux OSGuard. To add a + # new CVE mitigation, add a disableVulnerableKernelModule call below. # - # AKS no longer builds Mariner (AzureLinux 2.0) VHDs, so Mariner is not gated here. - # - # AzureLinux 3.0 is excluded: kernel 6.6.139.1-1.azl3 and later fix Copy Fail / DirtyFrag / - # Fragnesia upstream, so neither the runtime modprobe blacklist nor the baked-in - # /etc/modprobe.d/CIS.conf entries are required. Newly-built AzL3 VHDs no longer ship - # the four module entries — customers reported the blacklist actively blocks legitimate - # workloads that use algif_aead / esp4 / esp6 / rxrpc on the patched kernel. Existing - # in-support AzL3 VHDs (built before this change) still have the bake-in until they are - # rolled; no CSE-time active removal is performed — customers will get the unblocked - # configuration on their next AzL3 VHD upgrade. See https://github.com/Azure/AKS/issues/5753. - if isUbuntu "$OS"; then + # AzureLinux 3.0 (regular and Kata) is excluded: kernel 6.6.139.1-1.azl3 and later fix Copy + # Fail / DirtyFrag / Fragnesia upstream, so the runtime modprobe blacklist is no longer + # required. Newly-built AzL3 VHDs also no longer ship the four entries in modprobe-CIS.conf — + # customers reported the blacklist actively blocks legitimate workloads that use + # algif_aead / esp4 / esp6 / rxrpc on the patched kernel. Existing in-support AzL3 VHDs + # (built before this change) still have the bake-in until they are rolled; no CSE-time active + # removal is performed — customers will get the unblocked configuration on their next AzL3 + # VHD upgrade. AzureLinux OSGuard is intentionally kept in scope (defense-in-depth — OSGuard + # is the hardened secure-boot variant and explicitly retains the mitigation). + # See https://github.com/Azure/AKS/issues/5753. + if isUbuntu "$OS" || isMariner "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then disableVulnerableKernelModule "algif_aead" "CVE-2026-31431 (Copy Fail)" disableVulnerableKernelModule "esp4" "DirtyFrag (xfrm-ESP page-cache write)" disableVulnerableKernelModule "esp6" "DirtyFrag (xfrm-ESP6 page-cache write)" diff --git a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh index 18eebb3ef44..66bc4f630bc 100644 --- a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh @@ -73,16 +73,17 @@ Describe 'disableVulnerableKernelModule()' End # Tests the OS gate that decides whether to call disableVulnerableKernelModule -# at CSE provisioning time. Ubuntu-only: AKS no longer builds Mariner VHDs, and -# AzureLinux 3.0 is excluded because the kernel fix in 6.6.139.1-1.azl3+ supersedes -# the modprobe blacklist (and customers reported the blacklist actively blocks -# legitimate workloads on AzL3). See https://github.com/Azure/AKS/issues/5753. +# at CSE provisioning time. Apply on: Ubuntu, Mariner (AzL2 — kernel still vulnerable), +# AzureLinux OSGuard (defense-in-depth — hardened variant intentionally retains the +# mitigation). Skip on: AzureLinux 3.0 regular/Kata (kernel 6.6.139.1-1.azl3+ has the +# upstream fix and customers reported the blacklist actively blocks legitimate workloads), +# ACL, Flatcar. See https://github.com/Azure/AKS/issues/5753. Describe 'CVE kernel module mitigation OS gate' Include "./parts/linux/cloud-init/artifacts/cse_helpers.sh" gate() { # Mirrors the condition in cse_main.sh basePrep — must be kept in sync. - if isUbuntu "$OS"; then + if isUbuntu "$OS" || isMariner "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then echo "APPLY" else echo "SKIP" @@ -96,35 +97,49 @@ Describe 'CVE kernel module mitigation OS gate' The output should equal "APPLY" End - It 'skips the runtime mitigation on AzureLinux 2.0 (Mariner) — no longer built' + It 'applies the mitigation on Mariner (AzureLinux 2.0) — kernel still vulnerable' OS="${MARINER_OS_NAME}" OS_VARIANT="" When call gate - The output should equal "SKIP" + The output should equal "APPLY" + End + + It 'applies the mitigation on Mariner Kata' + OS="${MARINER_KATA_OS_NAME}" + OS_VARIANT="" + When call gate + The output should equal "APPLY" + End + + It 'applies the mitigation on AzureLinux 3.0 OSGuard — defense-in-depth retained' + OS="${AZURELINUX_OS_NAME}" + OS_VARIANT="${AZURELINUX_OSGUARD_OS_VARIANT}" + When call gate + The output should equal "APPLY" End - It 'skips the runtime mitigation on AzureLinux 3.0 (kernel 6.6.139.1-1.azl3+ has upstream fix)' + It 'skips the runtime mitigation on AzureLinux 3.0 regular (kernel 6.6.139.1-1.azl3+ has upstream fix)' OS="${AZURELINUX_OS_NAME}" OS_VARIANT="" When call gate The output should equal "SKIP" End - It 'skips the runtime mitigation on AzureLinux 3.0 Kata' + It 'skips the runtime mitigation on AzureLinux 3.0 Kata (same kernel as AzL3 regular)' OS="${AZURELINUX_KATA_OS_NAME}" OS_VARIANT="" When call gate The output should equal "SKIP" End - It 'skips on ACL (Flatcar-based)' + It 'skips on ACL (Flatcar-based; never in scope)' OS="${ACL_OS_NAME}" OS_VARIANT="" When call gate The output should equal "SKIP" End - It 'skips on Flatcar' + It 'skips on Flatcar (never in scope)' OS="${FLATCAR_OS_NAME}" OS_VARIANT="" When call gate diff --git a/vhdbuilder/packer/packer_source.sh b/vhdbuilder/packer/packer_source.sh index 69e725578f5..3531239a427 100644 --- a/vhdbuilder/packer/packer_source.sh +++ b/vhdbuilder/packer/packer_source.sh @@ -432,8 +432,8 @@ copyPackerFiles() { # blocks legitimate use cases. # Ubuntu and Mariner (AzL2) still get the bake-in — their kernels are not patched # upstream yet. See https://github.com/Azure/AKS/issues/5753. - if isAzureLinux "$OS" "$OS_VARIANT" && [ "${OS_VERSION}" = "3.0" ]; then - echo "Skipping modprobe-CIS.conf bake-in on AzureLinux 3.0 (kernel 6.6.139.1-1.azl3+ has upstream fix)" + if isAzureLinux "$OS" "$OS_VARIANT" && [ "${OS_VERSION}" = "3.0" ] && ! isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then + echo "Skipping modprobe-CIS.conf bake-in on AzureLinux 3.0 (kernel 6.6.139.1-1.azl3+ has upstream fix; OSGuard intentionally retains the bake-in)" else cpAndMode $MODPROBE_CIS_SRC $MODPROBE_CIS_DEST 644 fi diff --git a/vhdbuilder/packer/test/linux-vhd-content-test.sh b/vhdbuilder/packer/test/linux-vhd-content-test.sh index 796a53a5b55..ea7d3d9d89e 100644 --- a/vhdbuilder/packer/test/linux-vhd-content-test.sh +++ b/vhdbuilder/packer/test/linux-vhd-content-test.sh @@ -1369,8 +1369,8 @@ testVulnerableKernelModulesDisabled() { if [ "$os_sku" = "AzureLinux" ] && [ "$os_version" = "3.0" ]; then for mod in algif_aead esp4 esp6 rxrpc; do - if grep -qsE "^install ${mod} /bin/false" /etc/modprobe.d/*.conf 2>/dev/null; then - err "$test" "${mod} disable rule unexpectedly present in /etc/modprobe.d/*.conf on AzureLinux 3.0 (bake-in removed; kernel 6.6.139.1-1.azl3+ supersedes)" + if grep -qsE "^(install ${mod} /bin/false|blacklist ${mod})" /etc/modprobe.d/*.conf 2>/dev/null; then + err "$test" "${mod} blacklist entry unexpectedly present in /etc/modprobe.d/*.conf on AzureLinux 3.0 (bake-in removed; kernel 6.6.139.1-1.azl3+ supersedes; no 'install' or 'blacklist' directive should remain)" failed=1 else echo "$test: ${mod} blacklist correctly absent on AzureLinux 3.0" From 6b470f9301aab577278e4c3dc0cc8f5f90493a6e Mon Sep 17 00:00:00 2001 From: Sylvain Boily <4981802+djsly@users.noreply.github.com> Date: Thu, 21 May 2026 17:26:59 -0400 Subject: [PATCH 06/10] fix: drop dead Mariner branch from CVE modprobe gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Sylvain's follow-up review on commit e1ae35c1: the isMariner branch restored in that commit is dead code — verified that AKS stopped building Mariner (AzL2) VHDs on 2025-12-06 and the active build pipeline (.pipelines/.vsts-vhd-builder-release.yaml) only references buildAzureLinuxV3*, buildAzureLinuxOSGuardV3*, and buildflatcar* parameters (no buildMariner*). The mitigation is also already baked into modprobe-CIS.conf on every in-support Mariner VHD, so the runtime apply was purely defense-in-depth duplicating the bake-in. Gate is now: isUbuntu || isAzureLinuxOSGuard. This unconditionally drops the mitigation runtime-apply on Mariner nodes that might scale up via CRP-served CSE during the remaining ~16 days of Mariner VHD support (last build's 6-month window expires ~2026-06). That is acceptable because: 1. The static bake-in in /etc/modprobe.d/modprobe-CIS.conf on the VHD itself remains in place on all in-support Mariner VHDs. 2. Mariner support fully sunsets in ~2 weeks. Updates: * cse_main.sh: gate simplified; comment rewritten with full Mariner rationale. * cse_main_disable_modules_spec.sh: Mariner / Mariner-Kata cases flipped from APPLY to SKIP. 13/13 still pass. AB#38070527 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- parts/linux/cloud-init/artifacts/cse_main.sh | 17 ++++++--- .../cse_main_disable_modules_spec.sh | 35 ++++++++++--------- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/parts/linux/cloud-init/artifacts/cse_main.sh b/parts/linux/cloud-init/artifacts/cse_main.sh index ebcce5a9db4..10f73ba6cc8 100755 --- a/parts/linux/cloud-init/artifacts/cse_main.sh +++ b/parts/linux/cloud-init/artifacts/cse_main.sh @@ -320,8 +320,8 @@ EOF logs_to_events "AKS.CSE.ensureSysctl" ensureSysctl || exit $ERR_SYSCTL_RELOAD # Disable kernel modules with known LPE vulnerabilities (CVE-2026-31431, DirtyFrag, Fragnesia). - # Applied at CSE provisioning time on Ubuntu, Mariner (AzL2), and AzureLinux OSGuard. To add a - # new CVE mitigation, add a disableVulnerableKernelModule call below. + # Applied at CSE provisioning time on Ubuntu and AzureLinux OSGuard. To add a new CVE + # mitigation, add a disableVulnerableKernelModule call below. # # AzureLinux 3.0 (regular and Kata) is excluded: kernel 6.6.139.1-1.azl3 and later fix Copy # Fail / DirtyFrag / Fragnesia upstream, so the runtime modprobe blacklist is no longer @@ -330,10 +330,17 @@ EOF # algif_aead / esp4 / esp6 / rxrpc on the patched kernel. Existing in-support AzL3 VHDs # (built before this change) still have the bake-in until they are rolled; no CSE-time active # removal is performed — customers will get the unblocked configuration on their next AzL3 - # VHD upgrade. AzureLinux OSGuard is intentionally kept in scope (defense-in-depth — OSGuard - # is the hardened secure-boot variant and explicitly retains the mitigation). + # VHD upgrade. AzureLinux OSGuard (hardened secure-boot variant) is intentionally kept in + # scope as defense-in-depth: OSGuard workloads are security-sensitive and do not require + # the affected kernel modules. + # + # Mariner (AzL2) is not gated here: AKS stopped building Mariner VHDs on 2025-12-06 and the + # 6-month support window for the last Mariner VHD closes ~2026-06. The mitigation is already + # baked into modprobe-CIS.conf in every in-support Mariner VHD, so the runtime apply was + # purely defense-in-depth and is no longer needed. + # # See https://github.com/Azure/AKS/issues/5753. - if isUbuntu "$OS" || isMariner "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then + if isUbuntu "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then disableVulnerableKernelModule "algif_aead" "CVE-2026-31431 (Copy Fail)" disableVulnerableKernelModule "esp4" "DirtyFrag (xfrm-ESP page-cache write)" disableVulnerableKernelModule "esp6" "DirtyFrag (xfrm-ESP6 page-cache write)" diff --git a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh index 66bc4f630bc..87f955a7720 100644 --- a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh @@ -73,17 +73,18 @@ Describe 'disableVulnerableKernelModule()' End # Tests the OS gate that decides whether to call disableVulnerableKernelModule -# at CSE provisioning time. Apply on: Ubuntu, Mariner (AzL2 — kernel still vulnerable), -# AzureLinux OSGuard (defense-in-depth — hardened variant intentionally retains the -# mitigation). Skip on: AzureLinux 3.0 regular/Kata (kernel 6.6.139.1-1.azl3+ has the -# upstream fix and customers reported the blacklist actively blocks legitimate workloads), +# at CSE provisioning time. Apply on: Ubuntu, AzureLinux OSGuard (defense-in-depth — +# hardened secure-boot variant intentionally retains the mitigation). Skip on: +# AzureLinux 3.0 regular/Kata (kernel 6.6.139.1-1.azl3+ has the upstream fix and +# customers reported the blacklist actively blocks legitimate workloads), Mariner +# (no longer built; mitigation already baked in all in-support Mariner VHDs), # ACL, Flatcar. See https://github.com/Azure/AKS/issues/5753. Describe 'CVE kernel module mitigation OS gate' Include "./parts/linux/cloud-init/artifacts/cse_helpers.sh" gate() { # Mirrors the condition in cse_main.sh basePrep — must be kept in sync. - if isUbuntu "$OS" || isMariner "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then + if isUbuntu "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then echo "APPLY" else echo "SKIP" @@ -97,35 +98,35 @@ Describe 'CVE kernel module mitigation OS gate' The output should equal "APPLY" End - It 'applies the mitigation on Mariner (AzureLinux 2.0) — kernel still vulnerable' - OS="${MARINER_OS_NAME}" - OS_VARIANT="" + It 'applies the mitigation on AzureLinux 3.0 OSGuard — defense-in-depth retained' + OS="${AZURELINUX_OS_NAME}" + OS_VARIANT="${AZURELINUX_OSGUARD_OS_VARIANT}" When call gate The output should equal "APPLY" End - It 'applies the mitigation on Mariner Kata' - OS="${MARINER_KATA_OS_NAME}" + It 'skips on Mariner (AzL2) — AKS stopped building Mariner on 2025-12-06; bake-in covers in-support VHDs' + OS="${MARINER_OS_NAME}" OS_VARIANT="" When call gate - The output should equal "APPLY" + The output should equal "SKIP" End - It 'applies the mitigation on AzureLinux 3.0 OSGuard — defense-in-depth retained' - OS="${AZURELINUX_OS_NAME}" - OS_VARIANT="${AZURELINUX_OSGUARD_OS_VARIANT}" + It 'skips on Mariner Kata — same rationale as Mariner' + OS="${MARINER_KATA_OS_NAME}" + OS_VARIANT="" When call gate - The output should equal "APPLY" + The output should equal "SKIP" End - It 'skips the runtime mitigation on AzureLinux 3.0 regular (kernel 6.6.139.1-1.azl3+ has upstream fix)' + It 'skips on AzureLinux 3.0 regular (kernel 6.6.139.1-1.azl3+ has upstream fix)' OS="${AZURELINUX_OS_NAME}" OS_VARIANT="" When call gate The output should equal "SKIP" End - It 'skips the runtime mitigation on AzureLinux 3.0 Kata (same kernel as AzL3 regular)' + It 'skips on AzureLinux 3.0 Kata (same kernel as AzL3 regular)' OS="${AZURELINUX_KATA_OS_NAME}" OS_VARIANT="" When call gate From b4c600aa9e7e745ea5fd93a794a5b608876619d5 Mon Sep 17 00:00:00 2001 From: Sylvain Boily <4981802+djsly@users.noreply.github.com> Date: Thu, 21 May 2026 17:37:38 -0400 Subject: [PATCH 07/10] docs: clarify both module lists must be kept in sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Copilot reviewer feedback on commit 6b470f93: * e2e/validators.go: the 'append the module name to the list below' comment was added before this PR introduced two separate module lists (AzL3-absence branch + default presence/load-refusal branch). Clarify that BOTH lists must be updated when adding a new CVE. * linux-vhd-content-test.sh: same issue — testVulnerableKernelModulesDisabled now has two loops (AzL3-absence + default). Update the comment to say BOTH must be appended. No functional changes — comment-only. AB#38070527 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- e2e/validators.go | 3 ++- vhdbuilder/packer/test/linux-vhd-content-test.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/e2e/validators.go b/e2e/validators.go index a173fcf8321..272cd33962f 100644 --- a/e2e/validators.go +++ b/e2e/validators.go @@ -2922,7 +2922,8 @@ func ValidateCollectWindowsLogsScript(ctx context.Context, s *Scenario) { // no longer ship the modprobe-CIS.conf entries, and E2E runs against freshly-built // VHDs. See https://github.com/Azure/AKS/issues/5753. // -// To add a new CVE mitigation, append the module name to the list below. +// To add a new CVE mitigation, append the module name to BOTH lists below — +// the AzureLinux 3.0 absence-check list AND the default presence + load-refusal list. func ValidateVulnerableKernelModulesDisabled(ctx context.Context, s *Scenario) { s.T.Helper() diff --git a/vhdbuilder/packer/test/linux-vhd-content-test.sh b/vhdbuilder/packer/test/linux-vhd-content-test.sh index ea7d3d9d89e..02c3dd5c9dc 100644 --- a/vhdbuilder/packer/test/linux-vhd-content-test.sh +++ b/vhdbuilder/packer/test/linux-vhd-content-test.sh @@ -1353,7 +1353,8 @@ testNfsServerService() { # Verify all kernel modules with known LPE vulnerabilities are disabled. # Covers: CVE-2026-31431 (algif_aead), DirtyFrag (esp4, esp6, rxrpc). -# To add a new CVE mitigation, append the module to the loop below. +# To add a new CVE mitigation, append the module to BOTH loops below — the +# AzureLinux 3.0 absence loop AND the default presence + load-refusal loop. # # AzureLinux 3.0 is descoped: kernel 6.6.139.1-1.azl3+ fixes the CVEs upstream and # the modprobe blacklist is NOT baked into newly-built AzL3 VHDs (customer workloads From 44516624aa46a568d0170b5fc81129db6dcdf758 Mon Sep 17 00:00:00 2001 From: Sylvain Boily <4981802+djsly@users.noreply.github.com> Date: Thu, 21 May 2026 17:48:51 -0400 Subject: [PATCH 08/10] Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- parts/linux/cloud-init/artifacts/cse_main.sh | 10 +++++----- .../artifacts/cse_main_disable_modules_spec.sh | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/parts/linux/cloud-init/artifacts/cse_main.sh b/parts/linux/cloud-init/artifacts/cse_main.sh index 10f73ba6cc8..b26b634485e 100755 --- a/parts/linux/cloud-init/artifacts/cse_main.sh +++ b/parts/linux/cloud-init/artifacts/cse_main.sh @@ -334,13 +334,13 @@ EOF # scope as defense-in-depth: OSGuard workloads are security-sensitive and do not require # the affected kernel modules. # - # Mariner (AzL2) is not gated here: AKS stopped building Mariner VHDs on 2025-12-06 and the - # 6-month support window for the last Mariner VHD closes ~2026-06. The mitigation is already - # baked into modprobe-CIS.conf in every in-support Mariner VHD, so the runtime apply was - # purely defense-in-depth and is no longer needed. + # Mariner/AzureLinux 2.0 (AzL2) images are frozen (see FrozenCBLMarinerV2AndAzureLinuxV2SIGImageVersion=202512.06.0), + # so they cannot pick up new modprobe-CIS.conf entries for these 2026 CVEs via VHD refresh. + # Keep the CSE-time runtime apply enabled for AzL2/Mariner while those images remain supported. + # See https://github.com/Azure/AKS/issues/5753. # # See https://github.com/Azure/AKS/issues/5753. - if isUbuntu "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then + if isUbuntu "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT" || { isMarinerOrAzureLinux "$OS" && [ "${OS_VERSION}" = "2.0" ]; }; then disableVulnerableKernelModule "algif_aead" "CVE-2026-31431 (Copy Fail)" disableVulnerableKernelModule "esp4" "DirtyFrag (xfrm-ESP page-cache write)" disableVulnerableKernelModule "esp6" "DirtyFrag (xfrm-ESP6 page-cache write)" diff --git a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh index 87f955a7720..48b6de9bea4 100644 --- a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh @@ -84,7 +84,7 @@ Describe 'CVE kernel module mitigation OS gate' gate() { # Mirrors the condition in cse_main.sh basePrep — must be kept in sync. - if isUbuntu "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT"; then + if isUbuntu "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT" || { isMarinerOrAzureLinux "$OS" && [ "${OS_VERSION}" = "2.0" ]; }; then echo "APPLY" else echo "SKIP" @@ -105,19 +105,19 @@ Describe 'CVE kernel module mitigation OS gate' The output should equal "APPLY" End - It 'skips on Mariner (AzL2) — AKS stopped building Mariner on 2025-12-06; bake-in covers in-support VHDs' + It 'applies the mitigation on Mariner/AzureLinux 2.0 (AzL2) — VHDs are frozen so CSE-time apply is required' OS="${MARINER_OS_NAME}" OS_VARIANT="" + OS_VERSION="2.0" When call gate - The output should equal "SKIP" - End + The output should equal "APPLY" - It 'skips on Mariner Kata — same rationale as Mariner' + It 'applies the mitigation on Mariner Kata (AzL2) — VHDs are frozen so CSE-time apply is required' OS="${MARINER_KATA_OS_NAME}" OS_VARIANT="" + OS_VERSION="2.0" When call gate - The output should equal "SKIP" - End + The output should equal "APPLY" It 'skips on AzureLinux 3.0 regular (kernel 6.6.139.1-1.azl3+ has upstream fix)' OS="${AZURELINUX_OS_NAME}" From e12170c718321fc7755b6e5f1629370e13888644 Mon Sep 17 00:00:00 2001 From: Sylvain Boily <4981802+djsly@users.noreply.github.com> Date: Thu, 21 May 2026 20:56:48 -0400 Subject: [PATCH 09/10] Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- parts/linux/cloud-init/artifacts/cse_main.sh | 1 - .../artifacts/cse_main_disable_modules_spec.sh | 13 ++++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/parts/linux/cloud-init/artifacts/cse_main.sh b/parts/linux/cloud-init/artifacts/cse_main.sh index b26b634485e..b1fde158bbc 100755 --- a/parts/linux/cloud-init/artifacts/cse_main.sh +++ b/parts/linux/cloud-init/artifacts/cse_main.sh @@ -339,7 +339,6 @@ EOF # Keep the CSE-time runtime apply enabled for AzL2/Mariner while those images remain supported. # See https://github.com/Azure/AKS/issues/5753. # - # See https://github.com/Azure/AKS/issues/5753. if isUbuntu "$OS" || isAzureLinuxOSGuard "$OS" "$OS_VARIANT" || { isMarinerOrAzureLinux "$OS" && [ "${OS_VERSION}" = "2.0" ]; }; then disableVulnerableKernelModule "algif_aead" "CVE-2026-31431 (Copy Fail)" disableVulnerableKernelModule "esp4" "DirtyFrag (xfrm-ESP page-cache write)" diff --git a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh index 48b6de9bea4..96d734d1899 100644 --- a/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/cse_main_disable_modules_spec.sh @@ -73,12 +73,11 @@ Describe 'disableVulnerableKernelModule()' End # Tests the OS gate that decides whether to call disableVulnerableKernelModule -# at CSE provisioning time. Apply on: Ubuntu, AzureLinux OSGuard (defense-in-depth — -# hardened secure-boot variant intentionally retains the mitigation). Skip on: +# at CSE provisioning time. Apply on: Ubuntu, Mariner/AzureLinux 2.0 (AzL2), AzureLinux OSGuard +# (defense-in-depth — hardened secure-boot variant intentionally retains the mitigation). Skip on: # AzureLinux 3.0 regular/Kata (kernel 6.6.139.1-1.azl3+ has the upstream fix and -# customers reported the blacklist actively blocks legitimate workloads), Mariner -# (no longer built; mitigation already baked in all in-support Mariner VHDs), -# ACL, Flatcar. See https://github.com/Azure/AKS/issues/5753. +# customers reported the blacklist actively blocks legitimate workloads), ACL, Flatcar. +# See https://github.com/Azure/AKS/issues/5753. Describe 'CVE kernel module mitigation OS gate' Include "./parts/linux/cloud-init/artifacts/cse_helpers.sh" @@ -111,14 +110,14 @@ Describe 'CVE kernel module mitigation OS gate' OS_VERSION="2.0" When call gate The output should equal "APPLY" - + End It 'applies the mitigation on Mariner Kata (AzL2) — VHDs are frozen so CSE-time apply is required' OS="${MARINER_KATA_OS_NAME}" OS_VARIANT="" OS_VERSION="2.0" When call gate The output should equal "APPLY" - + End It 'skips on AzureLinux 3.0 regular (kernel 6.6.139.1-1.azl3+ has upstream fix)' OS="${AZURELINUX_OS_NAME}" OS_VARIANT="" From 5c342e4a3ce71438ce90f03d97d034348f1f14b6 Mon Sep 17 00:00:00 2001 From: Sylvain Boily <4981802+djsly@users.noreply.github.com> Date: Fri, 22 May 2026 10:53:24 -0400 Subject: [PATCH 10/10] docs(cse_main): include OSGuard + AzL2 in summary line of OS-gate comment Address Copilot reviewer feedback on commit e12170c7: the lead-in comment said the mitigation is applied on "Ubuntu and AzureLinux OSGuard", but the gate also applies on AzL2/Mariner (covered by the detailed paragraph below). Update the summary line so the first line of the comment block accurately reflects all three apply targets. No functional change. AB#38070527 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- parts/linux/cloud-init/artifacts/cse_main.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parts/linux/cloud-init/artifacts/cse_main.sh b/parts/linux/cloud-init/artifacts/cse_main.sh index b1fde158bbc..24d55bde038 100755 --- a/parts/linux/cloud-init/artifacts/cse_main.sh +++ b/parts/linux/cloud-init/artifacts/cse_main.sh @@ -320,8 +320,8 @@ EOF logs_to_events "AKS.CSE.ensureSysctl" ensureSysctl || exit $ERR_SYSCTL_RELOAD # Disable kernel modules with known LPE vulnerabilities (CVE-2026-31431, DirtyFrag, Fragnesia). - # Applied at CSE provisioning time on Ubuntu and AzureLinux OSGuard. To add a new CVE - # mitigation, add a disableVulnerableKernelModule call below. + # Applied at CSE provisioning time on Ubuntu, AzureLinux OSGuard, and AzureLinux 2.0 / Mariner. + # To add a new CVE mitigation, add a disableVulnerableKernelModule call below. # # AzureLinux 3.0 (regular and Kata) is excluded: kernel 6.6.139.1-1.azl3 and later fix Copy # Fail / DirtyFrag / Fragnesia upstream, so the runtime modprobe blacklist is no longer @@ -334,7 +334,7 @@ EOF # scope as defense-in-depth: OSGuard workloads are security-sensitive and do not require # the affected kernel modules. # - # Mariner/AzureLinux 2.0 (AzL2) images are frozen (see FrozenCBLMarinerV2AndAzureLinuxV2SIGImageVersion=202512.06.0), + # Mariner / AzureLinux 2.0 (AzL2) images are frozen (see FrozenCBLMarinerV2AndAzureLinuxV2SIGImageVersion=202512.06.0), # so they cannot pick up new modprobe-CIS.conf entries for these 2026 CVEs via VHD refresh. # Keep the CSE-time runtime apply enabled for AzL2/Mariner while those images remain supported. # See https://github.com/Azure/AKS/issues/5753.