diff --git a/crates/kit/src/cpio.rs b/crates/kit/src/cpio.rs index c937373c4..62787996c 100644 --- a/crates/kit/src/cpio.rs +++ b/crates/kit/src/cpio.rs @@ -29,7 +29,7 @@ fn write_file(writer: &mut impl Write, path: &str, content: &[u8]) -> io::Result Ok(()) } -/// CPIO entry: either a directory or a file with content. +/// CPIO entry: either a directory or a regular file (0644). enum Entry { Dir(&'static str), File(&'static str, &'static [u8]), @@ -41,6 +41,7 @@ pub fn create_initramfs_units_cpio() -> io::Result> { const UNIT_DIR: &str = "usr/lib/systemd/system"; const DROPIN_DIR: &str = "usr/lib/systemd/system/initrd-fs.target.d"; + const ROOT_FS_DROPIN_DIR: &str = "usr/lib/systemd/system/initrd-root-fs.target.d"; let entries: &[Entry] = &[ // Directory hierarchy @@ -49,6 +50,17 @@ pub fn create_initramfs_units_cpio() -> io::Result> { Dir("usr/lib/systemd"), Dir(UNIT_DIR), Dir(DROPIN_DIR), + Dir(ROOT_FS_DROPIN_DIR), + // sysroot.mount — mounts the virtiofs "rootfs" tag read-only at + // /sysroot. bcvk does not set root= on the kernel cmdline, so + // systemd-fstab-generator never generates a competing sysroot.mount, + // and dracut sets rootok=1 via its UNSET branch (no root= arg → trust + // systemd generators). The bcvk-sysroot.conf drop-in below wires + // this unit into initrd-root-fs.target. + File( + "usr/lib/systemd/system/sysroot.mount", + include_bytes!("units/sysroot.mount"), + ), // Service units File( "usr/lib/systemd/system/bcvk-etc-overlay.service", @@ -66,6 +78,15 @@ pub fn create_initramfs_units_cpio() -> io::Result> { "usr/lib/systemd/system/bcvk-journal-stream.service", include_bytes!("units/bcvk-journal-stream.service"), ), + // Drop-in to pull sysroot.mount into initrd-root-fs.target. Without + // this, nothing in the dependency graph actually requests the mount; + // dracut-rootfs-generator normally creates an + // initrd-root-fs.target.requires/sysroot.mount symlink for block-device + // roots, but for virtiofs (not a block device) it skips that step. + File( + "usr/lib/systemd/system/initrd-root-fs.target.d/bcvk-sysroot.conf", + b"[Unit]\nRequires=sysroot.mount\nAfter=sysroot.mount\n", + ), // Drop-in configs to pull units into initrd-fs.target File( "usr/lib/systemd/system/initrd-fs.target.d/bcvk-etc-overlay.conf", @@ -104,6 +125,7 @@ mod tests { let mut entries = Vec::new(); let mut etc_overlay_content = None; + let mut sysroot_mount_content = None; loop { let mut reader = cpio::NewcReader::new(cursor).expect("failed to read CPIO entry"); @@ -115,13 +137,20 @@ mod tests { let size = reader.entry().file_size() as usize; let mode = reader.entry().mode(); - // Read file content for verification - if name == "usr/lib/systemd/system/bcvk-etc-overlay.service" { - let mut content = vec![0u8; size]; - reader - .read_exact(&mut content) - .expect("failed to read file content"); - etc_overlay_content = Some(String::from_utf8(content).expect("invalid UTF-8")); + let mut content_buf = vec![0u8; size]; + reader + .read_exact(&mut content_buf) + .expect("failed to read file content"); + let content_str = String::from_utf8(content_buf).ok(); + + match name.as_str() { + "usr/lib/systemd/system/bcvk-etc-overlay.service" => { + etc_overlay_content = content_str.clone() + } + "usr/lib/systemd/system/sysroot.mount" => { + sysroot_mount_content = content_str.clone() + } + _ => {} } entries.push((name, size, mode)); @@ -130,37 +159,64 @@ mod tests { let names: Vec<_> = entries.iter().map(|(n, _, _)| n.as_str()).collect(); - // Verify directories + // Verify directory hierarchy assert!(names.contains(&"usr")); assert!(names.contains(&"usr/lib")); assert!(names.contains(&"usr/lib/systemd")); assert!(names.contains(&"usr/lib/systemd/system")); assert!(names.contains(&"usr/lib/systemd/system/initrd-fs.target.d")); + assert!(names.contains(&"usr/lib/systemd/system/initrd-root-fs.target.d")); - // Verify service files + // sysroot.mount must be present and correct + assert!( + names.contains(&"usr/lib/systemd/system/sysroot.mount"), + "sysroot.mount must be injected" + ); + let sysroot = sysroot_mount_content.expect("sysroot.mount content missing"); + assert!( + sysroot.contains("Type=virtiofs"), + "sysroot.mount must use virtiofs" + ); + assert!( + sysroot.contains("What=rootfs"), + "sysroot.mount must mount the 'rootfs' tag" + ); + assert!( + sysroot.contains("Where=/sysroot"), + "sysroot.mount must target /sysroot" + ); + assert!( + sysroot.contains("Options=ro"), + "sysroot.mount must be read-only" + ); + + // Service units assert!(names.contains(&"usr/lib/systemd/system/bcvk-etc-overlay.service")); assert!(names.contains(&"usr/lib/systemd/system/bcvk-var-ephemeral.service")); assert!(names.contains(&"usr/lib/systemd/system/bcvk-copy-units.service")); assert!(names.contains(&"usr/lib/systemd/system/bcvk-journal-stream.service")); - // Verify drop-in configs + // initrd-root-fs.target drop-in + assert!(names.contains(&"usr/lib/systemd/system/initrd-root-fs.target.d/bcvk-sysroot.conf")); + + // Drop-in configs assert!(names.contains(&"usr/lib/systemd/system/initrd-fs.target.d/bcvk-etc-overlay.conf")); assert!( names.contains(&"usr/lib/systemd/system/initrd-fs.target.d/bcvk-var-ephemeral.conf") ); assert!(names.contains(&"usr/lib/systemd/system/initrd-fs.target.d/bcvk-copy-units.conf")); - // Verify file modes + // Verify file modes: all entries are either regular files (0644) or directories for (name, _size, mode) in &entries { let file_type = *mode & 0o170000; - if name.ends_with(".service") || name.ends_with(".conf") { - assert_eq!(file_type, 0o100000, "{} should be regular file", name); + if name.ends_with(".service") || name.ends_with(".conf") || name.ends_with(".mount") { + assert_eq!(file_type, 0o100000, "{} should be a regular file", name); } else { - assert_eq!(file_type, 0o040000, "{} should be directory", name); + assert_eq!(file_type, 0o040000, "{} should be a directory", name); } } - // Verify file content is valid systemd unit + // bcvk-etc-overlay.service must be a valid systemd unit let content = etc_overlay_content.expect("bcvk-etc-overlay.service not found"); assert!(content.contains("[Unit]")); assert!(content.contains("[Service]")); diff --git a/crates/kit/src/run_ephemeral.rs b/crates/kit/src/run_ephemeral.rs index 0d0fdf3b8..c3010abe8 100644 --- a/crates/kit/src/run_ephemeral.rs +++ b/crates/kit/src/run_ephemeral.rs @@ -873,8 +873,6 @@ fn parse_service_exit_code(status_content: &str) -> Result { Ok(0) } -/// Check for required binaries in the privileged container -/// /// These binaries must be present in the privileged container that runs bcvk, /// not the guest bootc image that gets booted inside the VM. fn check_required_container_binaries() -> Result<()> { @@ -1355,15 +1353,16 @@ StandardOutput=file:/dev/virtio-ports/executestatus qemu_config.add_smbios_credential(credential); } - // Build kernel command line for direct boot + // Build kernel command line for direct boot. + // + // We deliberately omit root=, rootfstype=, and rootflags= from the + // cmdline. When root= is absent dracut sets rootok=1 via its UNSET + // branch and defers entirely to systemd generators. systemd-fstab- + // generator likewise produces nothing without a root= arg. The + // virtiofs mount is handled solely by the sysroot.mount unit bcvk + // injects into every initramfs via the CPIO append, together with the + // initrd-root-fs.target.d/bcvk-sysroot.conf drop-in that wires it in. let mut kernel_cmdline = [ - // At the core we boot from the mounted container's root, - "rootfstype=virtiofs", - "root=rootfs", - // But read-only. We set up /etc overlay and /var copyup via - // systemd credentials rather than systemd.volatile=overlay - // to have more control over individual directories. - "rootflags=ro", // This avoids having journald interact with the rootfs // at all, which lessens the I/O traffic for virtiofs "systemd.journald.storage=volatile", diff --git a/crates/kit/src/units/sysroot.mount b/crates/kit/src/units/sysroot.mount new file mode 100644 index 000000000..f94994dbe --- /dev/null +++ b/crates/kit/src/units/sysroot.mount @@ -0,0 +1,19 @@ +[Unit] +Description=bcvk ephemeral virtiofs root mount +Documentation=https://github.com/bootc-dev/bcvk +DefaultDependencies=no +# bcvk does not put root= on the kernel cmdline, so neither dracut nor +# systemd-fstab-generator will generate a competing sysroot.mount unit. +# This unit is the sole handler for the virtiofs root mount in all +# initramfs types (dracut, bootc composefs initramfs, mkosi-initrd, …). +# +# The initrd-root-fs.target.d/bcvk-sysroot.conf drop-in wires this unit +# into the dependency graph so initrd-root-fs.target waits for it. +ConditionPathExists=/etc/initrd-release +Before=initrd-root-fs.target + +[Mount] +What=rootfs +Where=/sysroot +Type=virtiofs +Options=ro