diff --git a/architecture/compute-runtimes.md b/architecture/compute-runtimes.md index 02891c03e..45795660c 100644 --- a/architecture/compute-runtimes.md +++ b/architecture/compute-runtimes.md @@ -40,6 +40,12 @@ template resource limits. Docker and Podman apply them as runtime limits. Kubernetes mirrors each limit into the matching request. VM accepts the fields but currently ignores them. +GPU requests enter the driver layer through +`SandboxSpec.resource_requirements.gpu`. The compact interim shape supports a +default GPU request, GPU count, and driver-specific device IDs. Docker and +Podman map default and count requests to concrete NVIDIA CDI device IDs before +creating the sandbox container. + VM runtime state paths are derived only from driver-validated sandbox IDs matching `[A-Za-z0-9._-]{1,128}`. The gateway-owned VM driver socket uses a private `run/` directory plus Unix peer UID/PID checks. Standalone @@ -77,7 +83,9 @@ users. Custom sandbox images must include the agent runtime and any system dependencies, but they should not need to include the gateway. GPU-capable images must include the user-space libraries required by the workload. The -runtime still owns GPU device injection. +runtime still owns GPU device injection. GPU requests can include explicit +driver-native device IDs or a requested count; the gateway validates the public +request shape and each runtime enforces the GPU allocation modes it supports. ## Deployment Shape diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 2254f0c89..686103971 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -1216,9 +1216,13 @@ enum SandboxCommands { /// Target a driver-specific GPU device. Docker and Podman use CDI device IDs /// (for example "nvidia.com/gpu=0"); VM uses a PCI BDF or index. /// Only valid with --gpu. When omitted with --gpu, the driver uses its default GPU selection. - #[arg(long, requires = "gpu")] + #[arg(long, requires = "gpu", conflicts_with = "gpu_count")] gpu_device: Option, + /// Request a specific number of GPUs. Mutually exclusive with --gpu-device. + #[arg(long, value_parser = clap::value_parser!(u32).range(1..), conflicts_with = "gpu_device")] + gpu_count: Option, + /// CPU limit for the sandbox (for example: 500m, 1, 2.5). #[arg(long)] cpu: Option, @@ -2539,6 +2543,7 @@ async fn main() -> Result<()> { editor, gpu, gpu_device, + gpu_count, cpu, memory, providers, @@ -2608,6 +2613,7 @@ async fn main() -> Result<()> { keep, gpu, gpu_device.as_deref(), + gpu_count, cpu.as_deref(), memory.as_deref(), editor, @@ -4287,6 +4293,52 @@ mod tests { } } + #[test] + fn sandbox_create_gpu_count_parses_without_gpu_flag() { + let cli = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu-count", "2"]) + .expect("sandbox create --gpu-count should parse"); + + match cli.command { + Some(Commands::Sandbox { + command: Some(SandboxCommands::Create { gpu, gpu_count, .. }), + .. + }) => { + assert!(!gpu); + assert_eq!(gpu_count, Some(2)); + } + other => panic!("expected SandboxCommands::Create, got: {other:?}"), + } + } + + #[test] + fn sandbox_create_gpu_count_rejects_zero() { + let result = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu-count", "0"]); + + assert!( + result.is_err(), + "sandbox create --gpu-count 0 should be rejected" + ); + } + + #[test] + fn sandbox_create_gpu_count_conflicts_with_gpu_device() { + let result = Cli::try_parse_from([ + "openshell", + "sandbox", + "create", + "--gpu", + "--gpu-device", + "nvidia.com/gpu=0", + "--gpu-count", + "2", + ]); + + assert!( + result.is_err(), + "sandbox create should reject --gpu-count with --gpu-device" + ); + } + #[test] fn service_expose_accepts_positional_target_port_and_service() { let cli = Cli::try_parse_from([ diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 9988d46db..038c821e3 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -39,17 +39,18 @@ use openshell_core::proto::{ GetClusterInferenceRequest, GetDraftHistoryRequest, GetDraftPolicyRequest, GetGatewayConfigRequest, GetProviderProfileRequest, GetProviderRefreshStatusRequest, GetProviderRequest, GetSandboxConfigRequest, GetSandboxLogsRequest, - GetSandboxPolicyStatusRequest, GetSandboxRequest, GetServiceRequest, HealthRequest, - ImportProviderProfilesRequest, LintProviderProfilesRequest, ListProviderProfilesRequest, - ListProvidersRequest, ListSandboxPoliciesRequest, ListSandboxProvidersRequest, - ListSandboxesRequest, ListServicesRequest, PlatformEvent, PolicySource, PolicyStatus, Provider, - ProviderCredentialRefreshStatus, ProviderCredentialRefreshStrategy, ProviderProfile, - ProviderProfileDiagnostic, ProviderProfileImportItem, RejectDraftChunkRequest, - RevokeSshSessionRequest, RotateProviderCredentialRequest, Sandbox, SandboxPhase, SandboxPolicy, - SandboxSpec, SandboxTemplate, ServiceEndpointResponse, SetClusterInferenceRequest, - SettingScope, SettingValue, TcpForwardFrame, TcpForwardInit, TcpRelayTarget, - UpdateConfigRequest, UpdateProviderRequest, WatchSandboxRequest, exec_sandbox_event, - setting_value, tcp_forward_init, + GetSandboxPolicyStatusRequest, GetSandboxRequest, GetServiceRequest, GpuResourceRequirement, + HealthRequest, ImportProviderProfilesRequest, LintProviderProfilesRequest, + ListProviderProfilesRequest, ListProvidersRequest, ListSandboxPoliciesRequest, + ListSandboxProvidersRequest, ListSandboxesRequest, ListServicesRequest, PlatformEvent, + PolicySource, PolicyStatus, Provider, ProviderCredentialRefreshStatus, + ProviderCredentialRefreshStrategy, ProviderProfile, ProviderProfileDiagnostic, + ProviderProfileImportItem, RejectDraftChunkRequest, RevokeSshSessionRequest, + RotateProviderCredentialRequest, Sandbox, SandboxPhase, SandboxPolicy, + SandboxResourceRequirements, SandboxSpec, SandboxTemplate, ServiceEndpointResponse, + SetClusterInferenceRequest, SettingScope, SettingValue, TcpForwardFrame, TcpForwardInit, + TcpRelayTarget, UpdateConfigRequest, UpdateProviderRequest, WatchSandboxRequest, + exec_sandbox_event, setting_value, tcp_forward_init, }; use openshell_core::settings::{self, SettingValueKind}; use openshell_core::{ObjectId, ObjectName}; @@ -1679,6 +1680,7 @@ pub async fn sandbox_create( keep: bool, gpu: bool, gpu_device: Option<&str>, + gpu_count: Option, cpu: Option<&str>, memory: Option<&str>, editor: Option, @@ -1732,7 +1734,8 @@ pub async fn sandbox_create( } None => None, }; - let requested_gpu = gpu || image.as_deref().is_some_and(image_requests_gpu); + let requested_gpu = + gpu || gpu_count.is_some() || image.as_deref().is_some_and(image_requests_gpu); let providers_v2_enabled = gateway_providers_v2_enabled(&mut client).await?; let inferred_types: Vec = if providers_v2_enabled { @@ -1763,8 +1766,11 @@ pub async fn sandbox_create( let request = CreateSandboxRequest { spec: Some(SandboxSpec { - gpu: requested_gpu, - gpu_device: gpu_device.unwrap_or_default().to_string(), + resource_requirements: resource_requirements_from_cli( + requested_gpu, + gpu_device, + gpu_count, + ), policy, providers: configured_providers, template, @@ -2189,6 +2195,22 @@ pub async fn sandbox_create( } } +fn resource_requirements_from_cli( + requested_gpu: bool, + gpu_device: Option<&str>, + gpu_count: Option, +) -> Option { + requested_gpu.then(|| SandboxResourceRequirements { + gpu: Some(GpuResourceRequirement { + device_ids: gpu_device + .filter(|device_id| !device_id.is_empty()) + .map(|device_id| vec![device_id.to_string()]) + .unwrap_or_default(), + count: gpu_count, + }), + }) +} + /// Resolved source for the `--from` flag on `sandbox create`. #[derive(Debug)] enum ResolvedSource { @@ -7444,8 +7466,8 @@ mod tests { plaintext_gateway_is_remote, progress_step_from_metadata, provider_profile_allows_refresh_bootstrap, provisioning_timeout_message, ready_false_condition_message, refresh_status_header, refresh_status_row, resolve_from, - sandbox_should_persist, sandbox_upload_plan, service_expose_status_error, - service_url_for_gateway, + resource_requirements_from_cli, sandbox_should_persist, sandbox_upload_plan, + service_expose_status_error, service_url_for_gateway, }; use crate::TEST_ENV_LOCK; use hyper::StatusCode; @@ -7924,6 +7946,51 @@ mod tests { } } + #[test] + fn resource_requirements_from_cli_uses_presence_for_default_gpu() { + let requirements = resource_requirements_from_cli(true, None, None) + .expect("resource requirements should be present"); + let gpu = requirements.gpu.expect("GPU requirement should be present"); + + assert!(gpu.device_ids.is_empty()); + assert_eq!(gpu.count, None); + } + + #[test] + fn resource_requirements_from_cli_maps_gpu_device_to_one_device_id() { + let requirements = resource_requirements_from_cli(true, Some("0000:2d:00.0"), None) + .expect("resource requirements should be present"); + let gpu = requirements.gpu.expect("GPU requirement should be present"); + + assert_eq!(gpu.device_ids, vec!["0000:2d:00.0"]); + assert_eq!(gpu.count, None); + } + + #[test] + fn resource_requirements_from_cli_maps_gpu_count() { + let requirements = + resource_requirements_from_cli(true, None, Some(2)).expect("requirements should exist"); + let gpu = requirements.gpu.expect("GPU requirement should be present"); + + assert!(gpu.device_ids.is_empty()); + assert_eq!(gpu.count, Some(2)); + } + + #[test] + fn resource_requirements_from_cli_preserves_device_and_gpu_count_for_gateway_validation() { + let requirements = resource_requirements_from_cli(true, Some("nvidia.com/gpu=0"), Some(2)) + .expect("requirements should exist"); + let gpu = requirements.gpu.expect("GPU requirement should be present"); + + assert_eq!(gpu.device_ids, vec!["nvidia.com/gpu=0"]); + assert_eq!(gpu.count, Some(2)); + } + + #[test] + fn resource_requirements_from_cli_omits_gpu_request_when_not_requested() { + assert!(resource_requirements_from_cli(false, Some("0"), None).is_none()); + } + #[test] fn resolve_from_classifies_existing_dockerfile_path() { let temp = tempfile::tempdir().expect("failed to create tempdir"); diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs index aee91de56..0829a2512 100644 --- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs +++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs @@ -787,6 +787,7 @@ async fn sandbox_create_keeps_command_sessions_by_default() { None, None, None, + None, &[], None, None, @@ -826,6 +827,7 @@ async fn sandbox_create_sends_cpu_and_memory_limits_only() { true, false, None, + None, Some("500m"), Some("2Gi"), None, @@ -884,6 +886,53 @@ async fn sandbox_create_sends_cpu_and_memory_limits_only() { assert!(!resources.fields.contains_key("requests")); } +#[tokio::test] +async fn sandbox_create_sends_gpu_count_request() { + let server = run_server().await; + let fake_ssh_dir = tempfile::tempdir().unwrap(); + let xdg_dir = tempfile::tempdir().unwrap(); + let _env = test_env(&fake_ssh_dir, &xdg_dir); + let tls = test_tls(&server); + install_fake_ssh(&fake_ssh_dir); + + run::sandbox_create( + &server.endpoint, + Some("gpu-count"), + None, + "openshell", + None, + true, + false, + None, + Some(2), + None, + None, + None, + &[], + None, + None, + &["echo".to_string(), "OK".to_string()], + Some(false), + Some(false), + &HashMap::new(), + "manual", + &tls, + ) + .await + .expect("sandbox create should succeed"); + + let requests = create_requests(&server).await; + let gpu = requests[0] + .spec + .as_ref() + .and_then(|spec| spec.resource_requirements.as_ref()) + .and_then(|requirements| requirements.gpu.as_ref()) + .expect("GPU request should be sent"); + + assert!(gpu.device_ids.is_empty()); + assert_eq!(gpu.count, Some(2)); +} + #[tokio::test] async fn sandbox_create_does_not_infer_command_providers_when_v2_enabled() { let server = run_server().await; @@ -906,6 +955,7 @@ async fn sandbox_create_does_not_infer_command_providers_when_v2_enabled() { None, None, None, + None, &[], None, None, @@ -963,6 +1013,7 @@ async fn sandbox_create_returns_vm_error_without_waiting_for_timeout() { None, None, None, + None, &[], None, None, @@ -1016,6 +1067,7 @@ async fn sandbox_create_keeps_waiting_while_vm_progress_arrives() { None, None, None, + None, &[], None, None, @@ -1061,6 +1113,7 @@ async fn sandbox_create_times_out_when_only_logs_arrive() { None, None, None, + None, &[], None, None, @@ -1102,6 +1155,7 @@ async fn sandbox_create_deletes_command_sessions_with_no_keep() { None, None, None, + None, &[], None, None, @@ -1147,6 +1201,7 @@ async fn sandbox_create_deletes_shell_sessions_with_no_keep() { None, None, None, + None, &[], None, None, @@ -1192,6 +1247,7 @@ async fn sandbox_create_keeps_sandbox_with_hidden_keep_flag() { None, None, None, + None, &[], None, None, @@ -1237,6 +1293,7 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() { None, None, None, + None, &[], None, Some(openshell_core::forward::ForwardSpec::new(forward_port)), diff --git a/crates/openshell-core/src/gpu.rs b/crates/openshell-core/src/gpu.rs index 5df8702ed..f50d542fe 100644 --- a/crates/openshell-core/src/gpu.rs +++ b/crates/openshell-core/src/gpu.rs @@ -3,46 +3,467 @@ //! Shared GPU request helpers. +use std::fmt; +use std::sync::atomic::{AtomicUsize, Ordering}; + use crate::config::CDI_GPU_DEVICE_ALL; +use crate::proto::compute::v1::DriverGpuResourceRequirement; -/// Resolve the existing GPU request fields into CDI device identifiers. -/// -/// `None` means no GPU was requested. A GPU request with no explicit device -/// ID uses the CDI all-GPU request; otherwise the driver-native ID passes -/// through unchanged. -#[must_use] -pub fn cdi_gpu_device_ids(gpu: bool, gpu_device: &str) -> Option> { - gpu.then(|| { - if gpu_device.is_empty() { - vec![CDI_GPU_DEVICE_ALL.to_string()] +const CDI_NVIDIA_GPU_PREFIX: &str = "nvidia.com/gpu="; +const CDI_NVIDIA_GPU_ALL_SUFFIX: &str = "all"; + +/// Normalized CDI GPU inventory used by local container drivers. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct CdiGpuInventory { + device_ids: Vec, +} + +impl CdiGpuInventory { + /// Build a normalized inventory from runtime-reported CDI device IDs. + /// + /// Non-NVIDIA GPU IDs are ignored. Duplicate IDs are removed. For default + /// selection, indexed IDs and UUID-style IDs are treated as separate naming + /// families because they may refer to the same devices. + #[must_use] + pub fn new(device_ids: impl IntoIterator>) -> Self { + let mut device_ids = device_ids + .into_iter() + .filter_map(|id| { + let id = id.as_ref().trim(); + id.starts_with(CDI_NVIDIA_GPU_PREFIX) + .then(|| id.to_string()) + }) + .collect::>(); + device_ids.sort(); + device_ids.dedup(); + Self { device_ids } + } + + #[must_use] + pub fn as_slice(&self) -> &[String] { + &self.device_ids + } + + #[must_use] + pub fn is_empty(&self) -> bool { + self.device_ids.is_empty() + } + + fn default_device_family(&self) -> Result, CdiGpuSelectionError> { + let mut indexed = self + .device_ids + .iter() + .filter_map(|id| { + let suffix = cdi_nvidia_gpu_suffix(id)?; + let index = suffix.parse::().ok()?; + Some((index, id.clone())) + }) + .collect::>(); + if !indexed.is_empty() { + indexed.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1))); + return Ok(indexed.into_iter().map(|(_, id)| id).collect()); + } + + let mut uuid_style = self + .device_ids + .iter() + .filter_map(|id| { + let suffix = cdi_nvidia_gpu_suffix(id)?; + (suffix != CDI_NVIDIA_GPU_ALL_SUFFIX).then(|| id.clone()) + }) + .collect::>(); + if !uuid_style.is_empty() { + uuid_style.sort(); + return Ok(uuid_style); + } + + if self.device_ids.iter().any(|id| id == CDI_GPU_DEVICE_ALL) { + return Ok(vec![CDI_GPU_DEVICE_ALL.to_string()]); + } + + Err(CdiGpuSelectionError::NoAvailableDevices) + } +} + +/// Concurrency-safe round-robin cursor for default CDI GPU selection. +#[derive(Debug, Default)] +pub struct CdiGpuRoundRobin { + next: AtomicUsize, +} + +impl CdiGpuRoundRobin { + #[must_use] + pub const fn new() -> Self { + Self { + next: AtomicUsize::new(0), + } + } + + /// Return the next default device ID and advance the cursor. + pub fn next_default_device_id( + &self, + inventory: &CdiGpuInventory, + ) -> Result { + self.next_default_device_ids(inventory, 1) + .map(|devices| devices[0].clone()) + } + + /// Return the current default device ID without advancing the cursor. + pub fn peek_default_device_id( + &self, + inventory: &CdiGpuInventory, + ) -> Result { + self.peek_default_device_ids(inventory, 1) + .map(|devices| devices[0].clone()) + } + + /// Return the next default device IDs and advance the cursor by `count`. + pub fn next_default_device_ids( + &self, + inventory: &CdiGpuInventory, + count: usize, + ) -> Result, CdiGpuSelectionError> { + self.selected_default_device_ids(inventory, count, true) + } + + /// Return the current default device IDs without advancing the cursor. + pub fn peek_default_device_ids( + &self, + inventory: &CdiGpuInventory, + count: usize, + ) -> Result, CdiGpuSelectionError> { + self.selected_default_device_ids(inventory, count, false) + } + + fn selected_default_device_ids( + &self, + inventory: &CdiGpuInventory, + count: usize, + consume: bool, + ) -> Result, CdiGpuSelectionError> { + if count == 0 { + return Err(CdiGpuSelectionError::InvalidCount); + } + let devices = inventory.default_device_family()?; + if count > devices.len() { + return Err(CdiGpuSelectionError::InsufficientAvailableDevices { + requested: count, + available: devices.len(), + }); + } + let base = if consume { + self.next.fetch_add(count, Ordering::Relaxed) } else { - vec![gpu_device.to_string()] + self.next.load(Ordering::Relaxed) + }; + let start = base % devices.len(); + Ok((0..count) + .map(|offset| devices[(start + offset) % devices.len()].clone()) + .collect()) + } +} + +/// CDI GPU selection failed. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CdiGpuSelectionError { + NoAvailableDevices, + InvalidCount, + InsufficientAvailableDevices { requested: usize, available: usize }, + MissingDefaultDevice, +} + +impl fmt::Display for CdiGpuSelectionError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NoAvailableDevices => f.write_str("no NVIDIA CDI GPU devices were discovered"), + Self::InvalidCount => f.write_str("GPU count must be greater than 0"), + Self::InsufficientAvailableDevices { + requested, + available, + } => write!( + f, + "GPU count request exceeds discovered NVIDIA CDI GPU devices ({requested} > {available})" + ), + Self::MissingDefaultDevice => { + f.write_str("GPU request requires selected default CDI GPU device IDs") + } } - }) + } +} + +impl std::error::Error for CdiGpuSelectionError {} + +/// Return the number of driver-selected CDI GPU IDs needed for this request. +/// +/// `None` means no GPU was requested or explicit device IDs were supplied. +pub fn cdi_gpu_default_device_count( + gpu: Option<&DriverGpuResourceRequirement>, +) -> Result, CdiGpuSelectionError> { + let Some(gpu) = gpu else { + return Ok(None); + }; + if !gpu.device_ids.is_empty() { + return Ok(None); + } + let count = + usize::try_from(gpu.count.unwrap_or(1)).map_err(|_| CdiGpuSelectionError::InvalidCount)?; + if count == 0 { + return Err(CdiGpuSelectionError::InvalidCount); + } + Ok(Some(count)) +} + +/// Resolve a driver GPU request into CDI device identifiers. +/// +/// `None` means no GPU was requested. A GPU request with explicit device IDs +/// passes through unchanged. A GPU request with no explicit device IDs uses the +/// driver-selected default CDI IDs. +pub fn cdi_gpu_device_ids( + gpu: Option<&DriverGpuResourceRequirement>, + selected_default_devices: Option<&[String]>, +) -> Result>, CdiGpuSelectionError> { + let Some(gpu) = gpu else { + return Ok(None); + }; + if !gpu.device_ids.is_empty() { + return Ok(Some(gpu.device_ids.clone())); + } + let requested = + cdi_gpu_default_device_count(Some(gpu))?.expect("GPU request has default count"); + let devices = selected_default_devices.ok_or(CdiGpuSelectionError::MissingDefaultDevice)?; + if devices.len() != requested { + return Err(CdiGpuSelectionError::MissingDefaultDevice); + } + Ok(Some(devices.to_vec())) +} + +fn cdi_nvidia_gpu_suffix(id: &str) -> Option<&str> { + id.strip_prefix(CDI_NVIDIA_GPU_PREFIX) } #[cfg(test)] mod tests { use super::*; + fn gpu_request(device_ids: Vec<&str>, count: Option) -> DriverGpuResourceRequirement { + DriverGpuResourceRequirement { + device_ids: device_ids.into_iter().map(str::to_string).collect(), + count, + } + } + #[test] fn cdi_gpu_device_ids_returns_none_when_absent() { - assert_eq!(cdi_gpu_device_ids(false, ""), None); + assert_eq!(cdi_gpu_device_ids(None, None), Ok(None)); + } + + #[test] + fn cdi_gpu_device_ids_uses_selected_default_device() { + let request = gpu_request(vec![], None); + let selected = vec!["nvidia.com/gpu=0".to_string()]; + + assert_eq!( + cdi_gpu_device_ids(Some(&request), Some(&selected)), + Ok(Some(vec!["nvidia.com/gpu=0".to_string()])) + ); + } + + #[test] + fn cdi_gpu_device_ids_uses_selected_count_devices() { + let request = gpu_request(vec![], Some(2)); + let selected = vec![ + "nvidia.com/gpu=0".to_string(), + "nvidia.com/gpu=1".to_string(), + ]; + + assert_eq!( + cdi_gpu_device_ids(Some(&request), Some(&selected)), + Ok(Some(selected)) + ); } #[test] - fn cdi_gpu_device_ids_defaults_empty_request_to_all_gpus() { + fn cdi_gpu_device_ids_rejects_missing_default_device() { + let request = gpu_request(vec![], None); + assert_eq!( - cdi_gpu_device_ids(true, ""), - Some(vec![CDI_GPU_DEVICE_ALL.to_string()]) + cdi_gpu_device_ids(Some(&request), None), + Err(CdiGpuSelectionError::MissingDefaultDevice) ); } #[test] - fn cdi_gpu_device_ids_passes_explicit_device_id_through() { + fn cdi_gpu_device_ids_rejects_wrong_selected_device_count() { + let request = gpu_request(vec![], Some(2)); + let selected = vec!["nvidia.com/gpu=0".to_string()]; + + assert_eq!( + cdi_gpu_device_ids(Some(&request), Some(&selected)), + Err(CdiGpuSelectionError::MissingDefaultDevice) + ); + } + + #[test] + fn cdi_gpu_device_ids_rejects_zero_count() { + let request = gpu_request(vec![], Some(0)); + + assert_eq!( + cdi_gpu_device_ids(Some(&request), Some(&[])), + Err(CdiGpuSelectionError::InvalidCount) + ); + } + + #[test] + fn cdi_gpu_device_ids_passes_device_ids_through() { + let request = gpu_request(vec!["nvidia.com/gpu=0", "nvidia.com/gpu=1"], None); + + assert_eq!( + cdi_gpu_device_ids(Some(&request), None), + Ok(Some(vec![ + "nvidia.com/gpu=0".to_string(), + "nvidia.com/gpu=1".to_string() + ])) + ); + } + + #[test] + fn default_device_count_ignores_explicit_device_ids() { + let request = gpu_request(vec!["nvidia.com/gpu=0"], Some(2)); + + assert_eq!(cdi_gpu_default_device_count(Some(&request)), Ok(None)); + } + + #[test] + fn inventory_filters_and_deduplicates_nvidia_gpu_ids() { + let inventory = CdiGpuInventory::new([ + "nvidia.com/gpu=1", + "vendor.example/device=0", + "nvidia.com/gpu=1", + " nvidia.com/gpu=0 ", + ]); + + assert_eq!( + inventory.as_slice(), + &vec![ + "nvidia.com/gpu=0".to_string(), + "nvidia.com/gpu=1".to_string() + ] + ); + } + + #[test] + fn round_robin_prefers_indexed_family_and_sorts_numerically() { + let inventory = CdiGpuInventory::new([ + "nvidia.com/gpu=10", + "nvidia.com/gpu=UUID-b", + "nvidia.com/gpu=2", + "nvidia.com/gpu=all", + ]); + let selector = CdiGpuRoundRobin::new(); + + assert_eq!( + selector.next_default_device_id(&inventory), + Ok("nvidia.com/gpu=2".to_string()) + ); + assert_eq!( + selector.next_default_device_id(&inventory), + Ok("nvidia.com/gpu=10".to_string()) + ); + assert_eq!( + selector.next_default_device_id(&inventory), + Ok("nvidia.com/gpu=2".to_string()) + ); + } + + #[test] + fn round_robin_uses_uuid_family_when_no_indexed_ids_exist() { + let inventory = CdiGpuInventory::new(["nvidia.com/gpu=UUID-b", "nvidia.com/gpu=UUID-a"]); + let selector = CdiGpuRoundRobin::new(); + + assert_eq!( + selector.next_default_device_id(&inventory), + Ok("nvidia.com/gpu=UUID-a".to_string()) + ); + } + + #[test] + fn round_robin_uses_all_only_inventory() { + let inventory = CdiGpuInventory::new([CDI_GPU_DEVICE_ALL]); + let selector = CdiGpuRoundRobin::new(); + + assert_eq!( + selector.next_default_device_id(&inventory), + Ok(CDI_GPU_DEVICE_ALL.to_string()) + ); + } + + #[test] + fn round_robin_rejects_empty_inventory() { + let inventory = CdiGpuInventory::new(["vendor.example/device=0"]); + let selector = CdiGpuRoundRobin::new(); + + assert_eq!( + selector.next_default_device_id(&inventory), + Err(CdiGpuSelectionError::NoAvailableDevices) + ); + } + + #[test] + fn peek_does_not_advance_round_robin_cursor() { + let inventory = CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1"]); + let selector = CdiGpuRoundRobin::new(); + + assert_eq!( + selector.peek_default_device_id(&inventory), + Ok("nvidia.com/gpu=0".to_string()) + ); + assert_eq!( + selector.peek_default_device_id(&inventory), + Ok("nvidia.com/gpu=0".to_string()) + ); + assert_eq!( + selector.next_default_device_id(&inventory), + Ok("nvidia.com/gpu=0".to_string()) + ); + assert_eq!( + selector.next_default_device_id(&inventory), + Ok("nvidia.com/gpu=1".to_string()) + ); + } + + #[test] + fn round_robin_selects_requested_count_and_advances_by_count() { + let inventory = + CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1", "nvidia.com/gpu=2"]); + let selector = CdiGpuRoundRobin::new(); + + assert_eq!( + selector.next_default_device_ids(&inventory, 2), + Ok(vec![ + "nvidia.com/gpu=0".to_string(), + "nvidia.com/gpu=1".to_string() + ]) + ); + assert_eq!( + selector.next_default_device_ids(&inventory, 2), + Ok(vec![ + "nvidia.com/gpu=2".to_string(), + "nvidia.com/gpu=0".to_string() + ]) + ); + } + + #[test] + fn round_robin_rejects_count_above_inventory_size() { + let inventory = CdiGpuInventory::new(["nvidia.com/gpu=0"]); + let selector = CdiGpuRoundRobin::new(); + assert_eq!( - cdi_gpu_device_ids(true, "nvidia.com/gpu=0"), - Some(vec!["nvidia.com/gpu=0".to_string()]) + selector.peek_default_device_ids(&inventory, 2), + Err(CdiGpuSelectionError::InsufficientAvailableDevices { + requested: 2, + available: 1 + }) ); } } diff --git a/crates/openshell-driver-docker/README.md b/crates/openshell-driver-docker/README.md index ea57f44e4..f3bc18354 100644 --- a/crates/openshell-driver-docker/README.md +++ b/crates/openshell-driver-docker/README.md @@ -32,7 +32,7 @@ contract: | `apparmor=unconfined` | Avoids Docker's default profile blocking required mount operations. | | `restart_policy = unless-stopped` | Keeps managed sandboxes resumable across daemon or gateway restarts. | | `PidsLimit` | Enforces the sandbox PID budget at the Docker cgroup layer. Set `[openshell.drivers.docker].sandbox_pids_limit = 0` to inherit the Docker/runtime default. | -| CDI GPU request | Uses the sandbox `gpu_device` value when set; otherwise requests all NVIDIA GPUs when the sandbox spec asks for GPU support and daemon CDI support is detected. | +| CDI GPU request | Uses explicit `resource_requirements.gpu.device_ids` when set; otherwise selects one or `resource_requirements.gpu.count` NVIDIA CDI devices from Docker's discovered CDI inventory. | The agent child process does not retain these supervisor privileges. diff --git a/crates/openshell-driver-docker/src/lib.rs b/crates/openshell-driver-docker/src/lib.rs index e30ee7754..5646461ba 100644 --- a/crates/openshell-driver-docker/src/lib.rs +++ b/crates/openshell-driver-docker/src/lib.rs @@ -25,20 +25,23 @@ use openshell_core::driver_utils::{ LABEL_MANAGED_BY, LABEL_MANAGED_BY_VALUE, LABEL_SANDBOX_ID, LABEL_SANDBOX_NAME, LABEL_SANDBOX_NAMESPACE, SUPERVISOR_IMAGE_BINARY_PATH, }; -use openshell_core::gpu::cdi_gpu_device_ids; +use openshell_core::gpu::{ + CdiGpuInventory, CdiGpuRoundRobin, CdiGpuSelectionError, cdi_gpu_default_device_count, + cdi_gpu_device_ids, +}; use openshell_core::progress::{ PROGRESS_STEP_PULLING_IMAGE, PROGRESS_STEP_REQUESTING_SANDBOX, PROGRESS_STEP_STARTING_SANDBOX, format_bytes, mark_progress_active, mark_progress_complete, mark_progress_detail, }; use openshell_core::proto::compute::v1::{ CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse, - DriverCondition, DriverPlatformEvent, DriverSandbox, DriverSandboxStatus, - DriverSandboxTemplate, GetCapabilitiesRequest, GetCapabilitiesResponse, GetSandboxRequest, - GetSandboxResponse, ListSandboxesRequest, ListSandboxesResponse, StopSandboxRequest, - StopSandboxResponse, ValidateSandboxCreateRequest, ValidateSandboxCreateResponse, - WatchSandboxesDeletedEvent, WatchSandboxesEvent, WatchSandboxesPlatformEvent, - WatchSandboxesRequest, WatchSandboxesSandboxEvent, compute_driver_server::ComputeDriver, - watch_sandboxes_event, + DriverCondition, DriverGpuResourceRequirement, DriverPlatformEvent, DriverSandbox, + DriverSandboxStatus, DriverSandboxTemplate, GetCapabilitiesRequest, GetCapabilitiesResponse, + GetSandboxRequest, GetSandboxResponse, ListSandboxesRequest, ListSandboxesResponse, + StopSandboxRequest, StopSandboxResponse, ValidateSandboxCreateRequest, + ValidateSandboxCreateResponse, WatchSandboxesDeletedEvent, WatchSandboxesEvent, + WatchSandboxesPlatformEvent, WatchSandboxesRequest, WatchSandboxesSandboxEvent, + compute_driver_server::ComputeDriver, watch_sandboxes_event, }; use openshell_core::{Config, Error, Result as CoreResult}; use std::collections::HashMap; @@ -220,6 +223,7 @@ struct DockerDriverRuntimeConfig { guest_tls: Option, daemon_version: String, supports_gpu: bool, + cdi_gpu_inventory: CdiGpuInventory, sandbox_pids_limit: i64, } @@ -239,6 +243,7 @@ pub struct DockerComputeDriver { events: broadcast::Sender, pending: Arc>>, supervisor_readiness: Arc, + gpu_selector: Arc, } struct PendingSandboxRecord { @@ -279,6 +284,7 @@ impl DockerComputeDriver { .cdi_spec_dirs .as_ref() .is_some_and(|dirs| !dirs.is_empty()); + let cdi_gpu_inventory = docker_cdi_gpu_inventory(&info); validate_sandbox_pids_limit(docker_config.sandbox_pids_limit)?; let gateway_port = config.bind_address.port(); if gateway_port == 0 { @@ -326,11 +332,13 @@ impl DockerComputeDriver { guest_tls, daemon_version: version.version.unwrap_or_else(|| "unknown".to_string()), supports_gpu, + cdi_gpu_inventory, sandbox_pids_limit: docker_config.sandbox_pids_limit, }, events: broadcast::channel(WATCH_BUFFER).0, pending: Arc::new(Mutex::new(HashMap::new())), supervisor_readiness, + gpu_selector: Arc::new(CdiGpuRoundRobin::new()), }; let poll_driver = driver.clone(); @@ -375,7 +383,7 @@ impl DockerComputeDriver { "docker sandboxes require a template image", )); } - Self::validate_gpu_request(spec.gpu, config.supports_gpu)?; + Self::validate_gpu_request(driver_gpu_requirement(spec), config.supports_gpu)?; if !template.agent_socket_path.trim().is_empty() { return Err(Status::failed_precondition( "docker compute driver does not support template.agent_socket_path", @@ -409,8 +417,11 @@ impl DockerComputeDriver { )) } - fn validate_gpu_request(gpu: bool, supports_gpu: bool) -> Result<(), Status> { - if gpu && !supports_gpu { + fn validate_gpu_request( + gpu: Option<&DriverGpuResourceRequirement>, + supports_gpu: bool, + ) -> Result<(), Status> { + if gpu.is_some() && !supports_gpu { return Err(Status::failed_precondition( "docker GPU sandboxes require Docker CDI support. Enable CDI on the Docker daemon, then restart the OpenShell gateway/server so GPU capability is detected.", )); @@ -418,6 +429,45 @@ impl DockerComputeDriver { Ok(()) } + fn peek_default_gpu_devices( + &self, + sandbox: &DriverSandbox, + ) -> Result>, Status> { + self.selected_default_gpu_devices(sandbox, false) + } + + fn next_default_gpu_devices( + &self, + sandbox: &DriverSandbox, + ) -> Result>, Status> { + self.selected_default_gpu_devices(sandbox, true) + } + + fn selected_default_gpu_devices( + &self, + sandbox: &DriverSandbox, + consume: bool, + ) -> Result>, Status> { + let Some(spec) = sandbox.spec.as_ref() else { + return Ok(None); + }; + let Some(count) = cdi_gpu_default_device_count(driver_gpu_requirement(spec)) + .map_err(docker_gpu_selection_status)? + else { + return Ok(None); + }; + + let selected = if consume { + self.gpu_selector + .next_default_device_ids(&self.config.cdi_gpu_inventory, count) + } else { + self.gpu_selector + .peek_default_device_ids(&self.config.cdi_gpu_inventory, count) + } + .map_err(docker_gpu_selection_status)?; + Ok(Some(selected)) + } + async fn get_sandbox_snapshot( &self, sandbox_id: &str, @@ -455,7 +505,12 @@ impl DockerComputeDriver { async fn create_sandbox_inner(&self, sandbox: &DriverSandbox) -> Result<(), Status> { Self::validate_sandbox(sandbox, &self.config)?; Self::validate_sandbox_auth(sandbox)?; - let _ = build_container_create_body(sandbox, &self.config)?; + let selected_default_gpu = self.peek_default_gpu_devices(sandbox)?; + let _ = build_container_create_body_with_default( + sandbox, + &self.config, + selected_default_gpu.as_deref(), + )?; if self .find_managed_container_summary(&sandbox.id, &sandbox.name) @@ -529,7 +584,18 @@ impl DockerComputeDriver { })?; let container_name = container_name_for_sandbox(sandbox); - let create_body = build_container_create_body(sandbox, &self.config).map_err(|status| { + let selected_default_gpu = self.next_default_gpu_devices(sandbox).map_err(|status| { + if token_file_created { + cleanup_sandbox_token_file(sandbox, &self.config); + } + DockerProvisioningFailure::new("ContainerCreateFailed", status.message()) + })?; + let create_body = build_container_create_body_with_default( + sandbox, + &self.config, + selected_default_gpu.as_deref(), + ) + .map_err(|status| { if token_file_created { cleanup_sandbox_token_file(sandbox, &self.config); } @@ -1165,6 +1231,7 @@ impl ComputeDriver for DockerComputeDriver { .sandbox .ok_or_else(|| Status::invalid_argument("sandbox is required"))?; Self::validate_sandbox(&sandbox, &self.config)?; + let _ = self.peek_default_gpu_devices(&sandbox)?; Ok(Response::new(ValidateSandboxCreateResponse {})) } @@ -1713,19 +1780,58 @@ fn build_environment(sandbox: &DriverSandbox, config: &DockerDriverRuntimeConfig .collect() } -fn docker_gpu_device_requests(gpu: bool, gpu_device: &str) -> Option> { - cdi_gpu_device_ids(gpu, gpu_device).map(|device_ids| { - vec![DeviceRequest { - driver: Some("cdi".to_string()), - device_ids: Some(device_ids), - ..Default::default() - }] - }) +fn driver_gpu_requirement( + spec: &openshell_core::proto::compute::v1::DriverSandboxSpec, +) -> Option<&DriverGpuResourceRequirement> { + spec.resource_requirements + .as_ref() + .and_then(|requirements| requirements.gpu.as_ref()) +} + +fn docker_cdi_gpu_inventory(info: &SystemInfo) -> CdiGpuInventory { + CdiGpuInventory::new( + info.discovered_devices + .as_deref() + .unwrap_or_default() + .iter() + .filter(|device| device.source.as_deref() == Some("cdi")) + .filter_map(|device| device.id.as_deref()), + ) } +fn docker_gpu_selection_status(err: CdiGpuSelectionError) -> Status { + Status::failed_precondition(err.to_string()) +} + +fn docker_gpu_device_requests( + gpu: Option<&DriverGpuResourceRequirement>, + selected_default_devices: Option<&[String]>, +) -> Result>, Status> { + cdi_gpu_device_ids(gpu, selected_default_devices) + .map(|device_ids| { + device_ids.map(|device_ids| { + vec![DeviceRequest { + driver: Some("cdi".to_string()), + device_ids: Some(device_ids), + ..Default::default() + }] + }) + }) + .map_err(docker_gpu_selection_status) +} + +#[cfg(test)] fn build_container_create_body( sandbox: &DriverSandbox, config: &DockerDriverRuntimeConfig, +) -> Result { + build_container_create_body_with_default(sandbox, config, None) +} + +fn build_container_create_body_with_default( + sandbox: &DriverSandbox, + config: &DockerDriverRuntimeConfig, + selected_default_devices: Option<&[String]>, ) -> Result { let spec = sandbox .spec @@ -1765,7 +1871,10 @@ fn build_container_create_body( nano_cpus: resource_limits.nano_cpus, memory: resource_limits.memory_bytes, pids_limit: docker_pids_limit(config.sandbox_pids_limit)?, - device_requests: docker_gpu_device_requests(spec.gpu, &spec.gpu_device), + device_requests: docker_gpu_device_requests( + driver_gpu_requirement(spec), + selected_default_devices, + )?, binds: Some(build_binds(sandbox, config)?), restart_policy: Some(RestartPolicy { name: Some(RestartPolicyNameEnum::UNLESS_STOPPED), diff --git a/crates/openshell-driver-docker/src/tests.rs b/crates/openshell-driver-docker/src/tests.rs index c9b34ff8f..1bd229fc7 100644 --- a/crates/openshell-driver-docker/src/tests.rs +++ b/crates/openshell-driver-docker/src/tests.rs @@ -2,7 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 use super::*; -use openshell_core::config::{CDI_GPU_DEVICE_ALL, DEFAULT_SERVER_PORT}; +use bollard::models::DeviceInfo; +use openshell_core::config::DEFAULT_SERVER_PORT; use openshell_core::driver_utils::{ LABEL_MANAGED_BY, LABEL_MANAGED_BY_VALUE, LABEL_SANDBOX_ID, LABEL_SANDBOX_NAME, LABEL_SANDBOX_NAMESPACE, @@ -13,7 +14,8 @@ use openshell_core::progress::{ PROGRESS_STEP_STARTING_SANDBOX, }; use openshell_core::proto::compute::v1::{ - DriverResourceRequirements, DriverSandboxSpec, DriverSandboxTemplate, + DriverGpuResourceRequirement, DriverResourceRequirements, DriverSandboxResourceRequirements, + DriverSandboxSpec, DriverSandboxTemplate, }; use std::fs; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; @@ -23,6 +25,15 @@ use tempfile::TempDir; const TLS_MOUNT_DIR: &str = "/etc/openshell/tls/client"; static ENV_LOCK: LazyLock> = LazyLock::new(|| Mutex::new(())); +fn gpu_resource_requirements( + device_ids: Vec, + count: Option, +) -> DriverSandboxResourceRequirements { + DriverSandboxResourceRequirements { + gpu: Some(DriverGpuResourceRequirement { device_ids, count }), + } +} + fn test_sandbox() -> DriverSandbox { // Mirrors the gateway-supplied request: the public `Sandbox` API no // longer carries `namespace`, so the gateway elides the field and the @@ -42,9 +53,8 @@ fn test_sandbox() -> DriverSandbox { resources: None, platform_config: None, }), - gpu: false, - gpu_device: String::new(), sandbox_token: String::new(), + resource_requirements: None, }), status: None, } @@ -75,10 +85,58 @@ fn runtime_config() -> DockerDriverRuntimeConfig { }), daemon_version: "28.0.0".to_string(), supports_gpu: false, + cdi_gpu_inventory: CdiGpuInventory::default(), sandbox_pids_limit: DEFAULT_SANDBOX_PIDS_LIMIT, } } +#[test] +fn docker_cdi_gpu_inventory_filters_discovered_cdi_gpu_devices() { + let info = SystemInfo { + discovered_devices: Some(vec![ + DeviceInfo { + source: Some("cdi".to_string()), + id: Some("example.com/device=0".to_string()), + }, + DeviceInfo { + source: Some("cdi".to_string()), + id: Some("nvidia.com/gpu=1".to_string()), + }, + DeviceInfo { + source: Some("udev".to_string()), + id: Some("nvidia.com/gpu=2".to_string()), + }, + DeviceInfo { + source: Some("cdi".to_string()), + id: Some("nvidia.com/gpu=0".to_string()), + }, + ]), + ..Default::default() + }; + + let inventory = docker_cdi_gpu_inventory(&info); + + assert_eq!( + inventory.as_slice(), + &vec![ + "nvidia.com/gpu=0".to_string(), + "nvidia.com/gpu=1".to_string() + ] + ); +} + +#[test] +fn docker_cdi_gpu_inventory_treats_missing_discovered_devices_as_empty() { + let info = SystemInfo { + cdi_spec_dirs: Some(vec!["/etc/cdi".to_string()]), + ..Default::default() + }; + + let inventory = docker_cdi_gpu_inventory(&info); + + assert!(inventory.is_empty()); +} + #[test] fn container_visible_endpoint_rewrites_loopback_hosts() { assert_eq!( @@ -605,7 +663,8 @@ fn build_container_create_body_clears_inherited_cmd() { fn validate_sandbox_rejects_gpu_when_cdi_unavailable() { let config = runtime_config(); let mut sandbox = test_sandbox(); - sandbox.spec.as_mut().unwrap().gpu = true; + sandbox.spec.as_mut().unwrap().resource_requirements = + Some(gpu_resource_requirements(vec![], None)); let err = DockerComputeDriver::validate_sandbox(&sandbox, &config).unwrap_err(); @@ -613,6 +672,17 @@ fn validate_sandbox_rejects_gpu_when_cdi_unavailable() { assert!(err.message().contains("Docker CDI")); } +#[test] +fn validate_sandbox_accepts_gpu_count_when_cdi_available() { + let mut config = runtime_config(); + config.supports_gpu = true; + let mut sandbox = test_sandbox(); + sandbox.spec.as_mut().unwrap().resource_requirements = + Some(gpu_resource_requirements(vec![], Some(2))); + + DockerComputeDriver::validate_sandbox(&sandbox, &config).unwrap(); +} + #[test] fn validate_sandbox_auth_requires_gateway_token() { let mut sandbox = test_sandbox(); @@ -636,13 +706,16 @@ fn validate_sandbox_auth_accepts_gateway_token() { } #[test] -fn build_container_create_body_maps_gpu_to_all_cdi_device() { +fn build_container_create_body_maps_empty_gpu_request_to_selected_default_cdi_device() { let mut config = runtime_config(); config.supports_gpu = true; let mut sandbox = test_sandbox(); - sandbox.spec.as_mut().unwrap().gpu = true; + sandbox.spec.as_mut().unwrap().resource_requirements = + Some(gpu_resource_requirements(vec![], None)); + let selected = vec!["nvidia.com/gpu=1".to_string()]; - let create_body = build_container_create_body(&sandbox, &config).unwrap(); + let create_body = + build_container_create_body_with_default(&sandbox, &config, Some(&selected)).unwrap(); let request = create_body .host_config .as_ref() @@ -653,18 +726,64 @@ fn build_container_create_body_maps_gpu_to_all_cdi_device() { assert_eq!(request.driver.as_deref(), Some("cdi")); assert_eq!( request.device_ids.as_ref().unwrap(), - &vec![CDI_GPU_DEVICE_ALL.to_string()] + &vec!["nvidia.com/gpu=1".to_string()] ); } #[test] -fn build_container_create_body_passes_explicit_cdi_device_id_through() { +fn build_container_create_body_maps_gpu_count_to_selected_cdi_devices() { let mut config = runtime_config(); config.supports_gpu = true; let mut sandbox = test_sandbox(); - let spec = sandbox.spec.as_mut().unwrap(); - spec.gpu = true; - spec.gpu_device = "nvidia.com/gpu=0".to_string(); + sandbox.spec.as_mut().unwrap().resource_requirements = + Some(gpu_resource_requirements(vec![], Some(2))); + let selected = vec![ + "nvidia.com/gpu=0".to_string(), + "nvidia.com/gpu=1".to_string(), + ]; + + let create_body = + build_container_create_body_with_default(&sandbox, &config, Some(&selected)).unwrap(); + let request = create_body + .host_config + .as_ref() + .and_then(|host_config| host_config.device_requests.as_ref()) + .and_then(|requests| requests.first()) + .expect("GPU request should add a Docker device request"); + + assert_eq!(request.driver.as_deref(), Some("cdi")); + assert_eq!(request.device_ids.as_ref().unwrap(), &selected); +} + +#[test] +fn build_container_create_body_rejects_missing_default_cdi_devices() { + let mut config = runtime_config(); + config.supports_gpu = true; + let mut sandbox = test_sandbox(); + sandbox.spec.as_mut().unwrap().resource_requirements = + Some(gpu_resource_requirements(vec![], Some(2))); + + let err = build_container_create_body(&sandbox, &config).unwrap_err(); + + assert_eq!(err.code(), tonic::Code::FailedPrecondition); + assert_eq!( + err.message(), + "GPU request requires selected default CDI GPU device IDs" + ); +} + +#[test] +fn build_container_create_body_passes_explicit_cdi_device_ids_through() { + let mut config = runtime_config(); + config.supports_gpu = true; + let mut sandbox = test_sandbox(); + sandbox.spec.as_mut().unwrap().resource_requirements = Some(gpu_resource_requirements( + vec![ + "nvidia.com/gpu=0".to_string(), + "nvidia.com/gpu=1".to_string(), + ], + None, + )); let create_body = build_container_create_body(&sandbox, &config).unwrap(); let request = create_body @@ -677,7 +796,10 @@ fn build_container_create_body_passes_explicit_cdi_device_id_through() { assert_eq!(request.driver.as_deref(), Some("cdi")); assert_eq!( request.device_ids.as_ref().unwrap(), - &vec!["nvidia.com/gpu=0".to_string()] + &vec![ + "nvidia.com/gpu=0".to_string(), + "nvidia.com/gpu=1".to_string() + ] ); } diff --git a/crates/openshell-driver-kubernetes/README.md b/crates/openshell-driver-kubernetes/README.md index 1d45a1d83..0ddbc9e2d 100644 --- a/crates/openshell-driver-kubernetes/README.md +++ b/crates/openshell-driver-kubernetes/README.md @@ -49,7 +49,7 @@ pods do not need direct external ingress for SSH. ## GPU Support -When a sandbox requests GPU support, the driver checks node allocatable capacity -for `nvidia.com/gpu` and requests one GPU resource in the workload spec. The -sandbox image must provide the user-space libraries needed by the agent -workload. +When `resource_requirements.gpu` is present, the driver checks node allocatable +capacity for `nvidia.com/gpu` and sets the workload's `nvidia.com/gpu` resource +limit. Requests without an explicit count use one GPU. The sandbox image must +provide the user-space libraries needed by the agent workload. diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index 5a43eb980..c1e2c2a74 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -22,11 +22,12 @@ use openshell_core::progress::{ format_bytes, mark_progress_active, mark_progress_complete, mark_progress_detail, }; use openshell_core::proto::compute::v1::{ - DriverCondition as SandboxCondition, DriverPlatformEvent as PlatformEvent, - DriverSandbox as Sandbox, DriverSandboxSpec as SandboxSpec, - DriverSandboxStatus as SandboxStatus, DriverSandboxTemplate as SandboxTemplate, - GetCapabilitiesResponse, WatchSandboxesDeletedEvent, WatchSandboxesEvent, - WatchSandboxesPlatformEvent, WatchSandboxesSandboxEvent, watch_sandboxes_event, + DriverCondition as SandboxCondition, DriverGpuResourceRequirement, + DriverPlatformEvent as PlatformEvent, DriverSandbox as Sandbox, + DriverSandboxSpec as SandboxSpec, DriverSandboxStatus as SandboxStatus, + DriverSandboxTemplate as SandboxTemplate, GetCapabilitiesResponse, WatchSandboxesDeletedEvent, + WatchSandboxesEvent, WatchSandboxesPlatformEvent, WatchSandboxesSandboxEvent, + watch_sandboxes_event, }; use std::collections::BTreeMap; use std::pin::Pin; @@ -77,7 +78,17 @@ const SANDBOX_VERSION: &str = "v1alpha1"; pub const SANDBOX_KIND: &str = "Sandbox"; const GPU_RESOURCE_NAME: &str = "nvidia.com/gpu"; -const GPU_RESOURCE_QUANTITY: &str = "1"; +const DEFAULT_GPU_COUNT: u32 = 1; + +fn driver_gpu_requirement(spec: &SandboxSpec) -> Option<&DriverGpuResourceRequirement> { + spec.resource_requirements + .as_ref() + .and_then(|requirements| requirements.gpu.as_ref()) +} + +fn gpu_has_explicit_device_ids(gpu: Option<&DriverGpuResourceRequirement>) -> bool { + gpu.is_some_and(|gpu| !gpu.device_ids.is_empty()) +} // --------------------------------------------------------------------------- // Default workspace persistence (temporary — will be replaced by snapshotting) @@ -203,8 +214,20 @@ impl KubernetesComputeDriver { } pub async fn validate_sandbox_create(&self, sandbox: &Sandbox) -> Result<(), tonic::Status> { - let gpu_requested = sandbox.spec.as_ref().is_some_and(|spec| spec.gpu); - if gpu_requested + let gpu = sandbox.spec.as_ref().and_then(driver_gpu_requirement); + self.validate_gpu_request(gpu).await + } + + async fn validate_gpu_request( + &self, + gpu: Option<&DriverGpuResourceRequirement>, + ) -> Result<(), tonic::Status> { + if gpu_has_explicit_device_ids(gpu) { + return Err(tonic::Status::invalid_argument( + "kubernetes compute driver does not support explicit GPU device IDs", + )); + } + if gpu.is_some() && !self.has_gpu_capacity().await.map_err(|err| { tonic::Status::internal(format!("check GPU node capacity failed: {err}")) })? @@ -296,6 +319,14 @@ impl KubernetesComputeDriver { } pub async fn create_sandbox(&self, sandbox: &Sandbox) -> Result<(), KubernetesDriverError> { + if let Some(gpu) = sandbox.spec.as_ref().and_then(driver_gpu_requirement) + && gpu_has_explicit_device_ids(Some(gpu)) + { + return Err(KubernetesDriverError::Precondition( + "kubernetes compute driver does not support explicit GPU device IDs".to_string(), + )); + } + let name = sandbox.name.as_str(); info!( sandbox_id = %sandbox.id, @@ -1102,7 +1133,13 @@ fn sandbox_to_k8s_spec( if let Some(template) = spec.template.as_ref() { root.insert( "podTemplate".to_string(), - sandbox_template_to_k8s(template, spec.gpu, &pod_env, inject_workspace, params), + sandbox_template_to_k8s( + template, + driver_gpu_requirement(spec), + &pod_env, + inject_workspace, + params, + ), ); if !template.agent_socket_path.is_empty() { root.insert( @@ -1134,7 +1171,7 @@ fn sandbox_to_k8s_spec( "podTemplate".to_string(), sandbox_template_to_k8s( &SandboxTemplate::default(), - spec.is_some_and(|s| s.gpu), + spec.and_then(driver_gpu_requirement), &pod_env, inject_workspace, params, @@ -1149,7 +1186,7 @@ fn sandbox_to_k8s_spec( fn sandbox_template_to_k8s( template: &SandboxTemplate, - gpu: bool, + gpu: Option<&DriverGpuResourceRequirement>, spec_environment: &std::collections::HashMap, inject_workspace: bool, params: &SandboxPodParams<'_>, @@ -1203,7 +1240,7 @@ fn sandbox_template_to_k8s( if use_user_namespaces { spec.insert("hostUsers".to_string(), serde_json::json!(false)); - if gpu { + if gpu.is_some() { warn!( "GPU sandbox with user namespaces enabled — \ NVIDIA device plugin compatibility is unverified" @@ -1384,7 +1421,10 @@ fn image_pull_secret_refs(secrets: &[String]) -> Vec { .collect() } -fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option { +fn container_resources( + template: &SandboxTemplate, + gpu: Option<&DriverGpuResourceRequirement>, +) -> Option { // Start from the raw resources passthrough in platform_config (preserves // custom resource types like GPU limits that users set via the public API // Struct), then overlay the typed DriverResourceRequirements on top. @@ -1417,8 +1457,8 @@ fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option Option> = std::sync::LazyLock::new(|| std::sync::Mutex::new(())); + fn gpu_request(count: Option) -> DriverGpuResourceRequirement { + DriverGpuResourceRequirement { + device_ids: vec![], + count, + } + } + #[test] fn kube_pulling_event_adds_image_progress_metadata() { let mut metadata = std::collections::HashMap::new(); @@ -1994,7 +2041,7 @@ mod tests { let params = SandboxPodParams::default(); sandbox_template_to_k8s( &SandboxTemplate::default(), - true, + Some(&gpu_request(None)), &std::collections::HashMap::new(), true, ¶ms, @@ -2007,10 +2054,46 @@ mod tests { ); assert_eq!( pod_template["spec"]["containers"][0]["resources"]["limits"][GPU_RESOURCE_NAME], - serde_json::json!(GPU_RESOURCE_QUANTITY) + serde_json::json!(DEFAULT_GPU_COUNT.to_string()) + ); + } + + #[test] + fn gpu_sandbox_uses_requested_gpu_count() { + let pod_template = { + let params = SandboxPodParams::default(); + sandbox_template_to_k8s( + &SandboxTemplate::default(), + Some(&gpu_request(Some(2))), + &std::collections::HashMap::new(), + true, + ¶ms, + ) + }; + + assert_eq!( + pod_template["spec"]["containers"][0]["resources"]["limits"][GPU_RESOURCE_NAME], + serde_json::json!("2") ); } + #[test] + fn gpu_has_explicit_device_ids_only_when_ids_are_present() { + assert!(!gpu_has_explicit_device_ids(None)); + assert!(!gpu_has_explicit_device_ids(Some( + &DriverGpuResourceRequirement { + device_ids: vec![], + count: None, + } + ))); + assert!(gpu_has_explicit_device_ids(Some( + &DriverGpuResourceRequirement { + device_ids: vec!["nvidia.com/gpu=0".to_string()], + count: None, + } + ))); + } + #[test] fn gpu_sandbox_uses_template_runtime_class_name_when_set() { let template = SandboxTemplate { @@ -2030,7 +2113,7 @@ mod tests { let params = SandboxPodParams::default(); sandbox_template_to_k8s( &template, - true, + Some(&gpu_request(None)), &std::collections::HashMap::new(), true, ¶ms, @@ -2062,7 +2145,7 @@ mod tests { let params = SandboxPodParams::default(); sandbox_template_to_k8s( &template, - false, + None, &std::collections::HashMap::new(), true, ¶ms, @@ -2090,7 +2173,7 @@ mod tests { let params = SandboxPodParams::default(); sandbox_template_to_k8s( &template, - true, + Some(&gpu_request(None)), &std::collections::HashMap::new(), true, ¶ms, @@ -2101,7 +2184,7 @@ mod tests { assert_eq!(limits["cpu"], serde_json::json!("2")); assert_eq!( limits[GPU_RESOURCE_NAME], - serde_json::json!(GPU_RESOURCE_QUANTITY) + serde_json::json!(DEFAULT_GPU_COUNT.to_string()) ); } @@ -2121,7 +2204,7 @@ mod tests { let params = SandboxPodParams::default(); sandbox_template_to_k8s( &template, - false, + None, &std::collections::HashMap::new(), true, ¶ms, @@ -2144,7 +2227,7 @@ mod tests { }; sandbox_template_to_k8s( &SandboxTemplate::default(), - false, + None, &std::collections::HashMap::new(), true, ¶ms, @@ -2169,7 +2252,7 @@ mod tests { let params = SandboxPodParams::default(); sandbox_template_to_k8s( &SandboxTemplate::default(), - false, + None, &std::collections::HashMap::new(), true, ¶ms, @@ -2192,7 +2275,7 @@ mod tests { }; sandbox_template_to_k8s( &template, - false, + None, &std::collections::HashMap::new(), true, ¶ms, @@ -2331,7 +2414,7 @@ mod tests { }; let pod_template = sandbox_template_to_k8s( &SandboxTemplate::default(), - false, + None, &std::collections::HashMap::new(), false, // user provided custom VCTs ¶ms, @@ -2369,7 +2452,7 @@ mod tests { }; sandbox_template_to_k8s( &SandboxTemplate::default(), - false, + None, &std::collections::HashMap::new(), true, ¶ms, @@ -2434,7 +2517,7 @@ mod tests { let params = SandboxPodParams::default(); // cluster default is off let pod_template = sandbox_template_to_k8s( &template, - false, + None, &std::collections::HashMap::new(), true, ¶ms, @@ -2472,7 +2555,7 @@ mod tests { }; let pod_template = sandbox_template_to_k8s( &template, - false, + None, &std::collections::HashMap::new(), true, ¶ms, @@ -2498,7 +2581,7 @@ mod tests { let params = SandboxPodParams::default(); sandbox_template_to_k8s( &SandboxTemplate::default(), - false, + None, &std::collections::HashMap::new(), true, ¶ms, @@ -2520,7 +2603,7 @@ mod tests { }; let pod_template = sandbox_template_to_k8s( &SandboxTemplate::default(), - false, + None, &std::collections::HashMap::new(), true, ¶ms, @@ -2542,7 +2625,7 @@ mod tests { fn sandbox_template_omits_empty_image_pull_secrets() { let pod_template = sandbox_template_to_k8s( &SandboxTemplate::default(), - false, + None, &std::collections::HashMap::new(), true, &SandboxPodParams::default(), @@ -2567,7 +2650,7 @@ mod tests { }; let pod_template = sandbox_template_to_k8s( &SandboxTemplate::default(), - false, + None, &std::collections::HashMap::new(), true, ¶ms, @@ -2596,7 +2679,7 @@ mod tests { }; let pod_template = sandbox_template_to_k8s( &template, - false, + None, &std::collections::HashMap::new(), true, ¶ms, @@ -2724,7 +2807,7 @@ mod tests { let params = SandboxPodParams::default(); sandbox_template_to_k8s( &template, - false, + None, &std::collections::HashMap::new(), false, ¶ms, @@ -2785,7 +2868,7 @@ mod tests { let params = SandboxPodParams::default(); sandbox_template_to_k8s( &template, - false, + None, &std::collections::HashMap::new(), false, ¶ms, diff --git a/crates/openshell-driver-podman/README.md b/crates/openshell-driver-podman/README.md index 77b42ba37..5cc645e76 100644 --- a/crates/openshell-driver-podman/README.md +++ b/crates/openshell-driver-podman/README.md @@ -46,7 +46,7 @@ The container spec in `container.rs` sets these security-critical fields: | `no_new_privileges` | `true` | Prevents privilege escalation after exec. | | `seccomp_profile_path` | `unconfined` | The supervisor installs its own policy-aware BPF filter. A container-level profile can block Landlock/seccomp syscalls during setup. | | `mounts` | Private tmpfs at `/run/netns` | Lets the supervisor create named network namespaces in rootless Podman. | -| CDI GPU devices | Sandbox `gpu_device` value when set, otherwise all NVIDIA GPUs | Exposes requested GPUs to GPU-enabled sandbox containers. | +| CDI GPU devices | Explicit `resource_requirements.gpu.device_ids` when set; otherwise one or `resource_requirements.gpu.count` NVIDIA CDI devices selected from local `/dev/nvidiaN` inventory. | Exposes requested GPUs to GPU-enabled sandbox containers. | The restricted agent child does not retain these supervisor privileges. diff --git a/crates/openshell-driver-podman/src/container.rs b/crates/openshell-driver-podman/src/container.rs index 13f053e93..af8517d1f 100644 --- a/crates/openshell-driver-podman/src/container.rs +++ b/crates/openshell-driver-podman/src/container.rs @@ -4,7 +4,7 @@ //! Container spec construction for the Podman driver. use crate::config::PodmanComputeConfig; -use openshell_core::gpu::cdi_gpu_device_ids; +use openshell_core::gpu::{CdiGpuSelectionError, cdi_gpu_device_ids}; use openshell_core::proto::compute::v1::DriverSandbox; use serde::Serialize; use serde_json::Value; @@ -378,13 +378,22 @@ fn podman_pids_limit(value: i64) -> Option { } /// Build CDI GPU device list if GPU is requested. -fn build_devices(sandbox: &DriverSandbox) -> Option> { - let spec = sandbox.spec.as_ref()?; - cdi_gpu_device_ids(spec.gpu, &spec.gpu_device).map(|device_ids| { - device_ids - .into_iter() - .map(|path| LinuxDevice { path }) - .collect() +fn build_devices( + sandbox: &DriverSandbox, + selected_default_devices: Option<&[String]>, +) -> Result>, CdiGpuSelectionError> { + let gpu = sandbox.spec.as_ref().and_then(|spec| { + spec.resource_requirements + .as_ref() + .and_then(|requirements| requirements.gpu.as_ref()) + }); + cdi_gpu_device_ids(gpu, selected_default_devices).map(|device_ids| { + device_ids.map(|device_ids| { + device_ids + .into_iter() + .map(|path| LinuxDevice { path }) + .collect() + }) }) } @@ -395,12 +404,23 @@ pub fn build_container_spec(sandbox: &DriverSandbox, config: &PodmanComputeConfi build_container_spec_with_token(sandbox, config, None) } +#[cfg(test)] #[must_use] pub fn build_container_spec_with_token( sandbox: &DriverSandbox, config: &PodmanComputeConfig, token_host_path: Option<&std::path::Path>, ) -> Value { + build_container_spec_with_token_and_gpu_default(sandbox, config, token_host_path, None) + .expect("container spec requires caller-selected default GPU devices") +} + +pub fn build_container_spec_with_token_and_gpu_default( + sandbox: &DriverSandbox, + config: &PodmanComputeConfig, + token_host_path: Option<&std::path::Path>, + selected_default_devices: Option<&[String]>, +) -> Result { let image = resolve_image(sandbox, config); let name = container_name(&sandbox.name); let vol = volume_name(&sandbox.id); @@ -408,7 +428,7 @@ pub fn build_container_spec_with_token( let env = build_env(sandbox, config, image); let labels = build_labels(sandbox); let resource_limits = build_resource_limits(sandbox, config); - let devices = build_devices(sandbox); + let devices = build_devices(sandbox, selected_default_devices)?; // Network configuration -- always bridge mode. // Matches libpod's network spec format `{name: {opts}}`; the unit-struct @@ -620,7 +640,7 @@ pub fn build_container_spec_with_token( }], }; - serde_json::to_value(container_spec).expect("ContainerSpec serialization cannot fail") + Ok(serde_json::to_value(container_spec).expect("ContainerSpec serialization cannot fail")) } fn hostadd_entries(config: &PodmanComputeConfig) -> Vec { @@ -806,32 +826,113 @@ mod tests { } #[test] - fn container_spec_maps_empty_gpu_request_to_all_cdi_device() { - use openshell_core::config::CDI_GPU_DEVICE_ALL; - use openshell_core::proto::compute::v1::DriverSandboxSpec; + fn container_spec_maps_empty_gpu_request_to_selected_default_cdi_device() { + use openshell_core::proto::compute::v1::{ + DriverGpuResourceRequirement, DriverSandboxResourceRequirements, DriverSandboxSpec, + }; let mut sandbox = test_sandbox("test-id", "test-name"); sandbox.spec = Some(DriverSandboxSpec { - gpu: true, + resource_requirements: Some(DriverSandboxResourceRequirements { + gpu: Some(DriverGpuResourceRequirement { + device_ids: vec![], + count: None, + }), + }), ..Default::default() }); let config = test_config(); - let spec = build_container_spec(&sandbox, &config); + let selected = vec!["nvidia.com/gpu=1".to_string()]; + let spec = build_container_spec_with_token_and_gpu_default( + &sandbox, + &config, + None, + Some(&selected), + ) + .unwrap(); assert_eq!( spec["devices"][0]["path"].as_str(), - Some(CDI_GPU_DEVICE_ALL) + Some("nvidia.com/gpu=1") ); } + #[test] + fn container_spec_maps_gpu_count_to_selected_cdi_devices() { + use openshell_core::proto::compute::v1::{ + DriverGpuResourceRequirement, DriverSandboxResourceRequirements, DriverSandboxSpec, + }; + + let mut sandbox = test_sandbox("test-id", "test-name"); + sandbox.spec = Some(DriverSandboxSpec { + resource_requirements: Some(DriverSandboxResourceRequirements { + gpu: Some(DriverGpuResourceRequirement { + device_ids: vec![], + count: Some(2), + }), + }), + ..Default::default() + }); + let config = test_config(); + let selected = vec![ + "nvidia.com/gpu=0".to_string(), + "nvidia.com/gpu=1".to_string(), + ]; + let spec = build_container_spec_with_token_and_gpu_default( + &sandbox, + &config, + None, + Some(&selected), + ) + .unwrap(); + + assert_eq!( + spec["devices"][0]["path"].as_str(), + Some("nvidia.com/gpu=0") + ); + assert_eq!( + spec["devices"][1]["path"].as_str(), + Some("nvidia.com/gpu=1") + ); + } + + #[test] + fn container_spec_rejects_missing_default_cdi_devices() { + use openshell_core::proto::compute::v1::{ + DriverGpuResourceRequirement, DriverSandboxResourceRequirements, DriverSandboxSpec, + }; + + let mut sandbox = test_sandbox("test-id", "test-name"); + sandbox.spec = Some(DriverSandboxSpec { + resource_requirements: Some(DriverSandboxResourceRequirements { + gpu: Some(DriverGpuResourceRequirement { + device_ids: vec![], + count: Some(2), + }), + }), + ..Default::default() + }); + let config = test_config(); + let err = build_container_spec_with_token_and_gpu_default(&sandbox, &config, None, None) + .unwrap_err(); + + assert_eq!(err, CdiGpuSelectionError::MissingDefaultDevice); + } + #[test] fn container_spec_passes_explicit_cdi_device_id_through() { - use openshell_core::proto::compute::v1::DriverSandboxSpec; + use openshell_core::proto::compute::v1::{ + DriverGpuResourceRequirement, DriverSandboxResourceRequirements, DriverSandboxSpec, + }; let mut sandbox = test_sandbox("test-id", "test-name"); sandbox.spec = Some(DriverSandboxSpec { - gpu: true, - gpu_device: "nvidia.com/gpu=0".to_string(), + resource_requirements: Some(DriverSandboxResourceRequirements { + gpu: Some(DriverGpuResourceRequirement { + device_ids: vec!["nvidia.com/gpu=0".to_string()], + count: None, + }), + }), ..Default::default() }); let config = test_config(); diff --git a/crates/openshell-driver-podman/src/driver.rs b/crates/openshell-driver-podman/src/driver.rs index e2deb1c63..32269189a 100644 --- a/crates/openshell-driver-podman/src/driver.rs +++ b/crates/openshell-driver-podman/src/driver.rs @@ -10,8 +10,14 @@ use crate::watcher::{ self, WatchStream, driver_sandbox_from_inspect, driver_sandbox_from_list_entry, }; use openshell_core::ComputeDriverError; -use openshell_core::proto::compute::v1::{DriverSandbox, GetCapabilitiesResponse}; -use std::path::PathBuf; +use openshell_core::gpu::{ + CdiGpuInventory, CdiGpuRoundRobin, CdiGpuSelectionError, cdi_gpu_default_device_count, +}; +use openshell_core::proto::compute::v1::{ + DriverGpuResourceRequirement, DriverSandbox, GetCapabilitiesResponse, +}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; use std::time::Duration; use tracing::{info, warn}; @@ -33,6 +39,8 @@ pub struct PodmanComputeDriver { /// The host's IP on the bridge network. Sandbox containers use this to /// reach the gateway server when no explicit gRPC endpoint is configured. network_gateway_ip: Option, + gpu_inventory: CdiGpuInventory, + gpu_selector: Arc, } impl std::fmt::Debug for PodmanComputeDriver { @@ -41,6 +49,7 @@ impl std::fmt::Debug for PodmanComputeDriver { .field("socket_path", &self.config.socket_path) .field("default_image", &self.config.default_image) .field("network_name", &self.config.network_name) + .field("gpu_inventory", &self.gpu_inventory) .finish() } } @@ -115,6 +124,30 @@ fn cleanup_sandbox_token_file(sandbox_id: &str) { } } +fn local_podman_cdi_gpu_inventory_from(dev_root: &Path) -> CdiGpuInventory { + let Ok(entries) = std::fs::read_dir(dev_root) else { + return CdiGpuInventory::default(); + }; + + let device_ids = entries.filter_map(Result::ok).filter_map(|entry| { + let name = entry.file_name(); + let name = name.to_str()?; + let index = name.strip_prefix("nvidia")?; + (!index.is_empty() && index.chars().all(|ch| ch.is_ascii_digit())) + .then(|| format!("nvidia.com/gpu={index}")) + }); + + CdiGpuInventory::new(device_ids) +} + +fn local_podman_cdi_gpu_inventory() -> CdiGpuInventory { + local_podman_cdi_gpu_inventory_from(Path::new("/dev")) +} + +fn podman_gpu_selection_error(err: CdiGpuSelectionError) -> ComputeDriverError { + ComputeDriverError::Precondition(err.to_string()) +} + impl PodmanComputeDriver { /// Create a new driver, verifying the Podman socket is reachable. pub async fn new(mut config: PodmanComputeConfig) -> Result { @@ -212,6 +245,14 @@ impl PodmanComputeDriver { "Bridge network ready" ); + let gpu_inventory = local_podman_cdi_gpu_inventory(); + if !gpu_inventory.is_empty() { + info!( + device_count = gpu_inventory.as_slice().len(), + "Discovered local Podman NVIDIA CDI GPU devices" + ); + } + // Auto-detect the gRPC callback endpoint when not explicitly // configured. Sandbox containers use host.containers.internal // (injected via hostadd with host-gateway in the container spec) @@ -240,6 +281,8 @@ impl PodmanComputeDriver { client, config, network_gateway_ip, + gpu_inventory, + gpu_selector: Arc::new(CdiGpuRoundRobin::new()), }) } @@ -266,31 +309,52 @@ impl PodmanComputeDriver { &self.config.default_image } - /// Check whether GPU devices are available via CDI. - /// - /// The Podman system info response doesn't directly list CDI devices in all - /// versions. As a heuristic, check if the NVIDIA device node exists (this - /// works for both rootful and rootless). - fn has_gpu_capacity() -> bool { - std::path::Path::new("/dev/nvidia0").exists() - } - /// Validate a sandbox before creation. pub fn validate_sandbox_create( &self, sandbox: &DriverSandbox, ) -> Result<(), ComputeDriverError> { - let gpu_requested = sandbox.spec.as_ref().is_some_and(|s| s.gpu); - Self::validate_gpu_request(gpu_requested) + let _ = self.peek_default_gpu_devices(sandbox)?; + Ok(()) } - fn validate_gpu_request(gpu_requested: bool) -> Result<(), ComputeDriverError> { - if gpu_requested && !Self::has_gpu_capacity() { - return Err(ComputeDriverError::Precondition( - "GPU sandbox requested, but no NVIDIA GPU devices are available.".to_string(), - )); + fn peek_default_gpu_devices( + &self, + sandbox: &DriverSandbox, + ) -> Result>, ComputeDriverError> { + self.selected_default_gpu_devices(sandbox, false) + } + + fn next_default_gpu_devices( + &self, + sandbox: &DriverSandbox, + ) -> Result>, ComputeDriverError> { + self.selected_default_gpu_devices(sandbox, true) + } + + fn selected_default_gpu_devices( + &self, + sandbox: &DriverSandbox, + consume: bool, + ) -> Result>, ComputeDriverError> { + let Some(spec) = sandbox.spec.as_ref() else { + return Ok(None); + }; + let Some(count) = cdi_gpu_default_device_count(driver_gpu_requirement(spec)) + .map_err(podman_gpu_selection_error)? + else { + return Ok(None); + }; + + let selected = if consume { + self.gpu_selector + .next_default_device_ids(&self.gpu_inventory, count) + } else { + self.gpu_selector + .peek_default_device_ids(&self.gpu_inventory, count) } - Ok(()) + .map_err(podman_gpu_selection_error)?; + Ok(Some(selected)) } /// Create a sandbox container. @@ -305,6 +369,7 @@ impl PodmanComputeDriver { "sandbox id is required".into(), )); } + self.validate_sandbox_create(sandbox)?; // Validate the composed container name early, before creating any // resources (volume), so we don't leave orphans when the name is @@ -364,11 +429,27 @@ impl PodmanComputeDriver { }; // 3. Create container. - let spec = container::build_container_spec_with_token( + let selected_default_gpu = match self.next_default_gpu_devices(sandbox) { + Ok(device) => device, + Err(e) => { + let _ = self.client.remove_volume(&vol_name).await; + cleanup_sandbox_token_file(&sandbox.id); + return Err(e); + } + }; + let spec = match container::build_container_spec_with_token_and_gpu_default( sandbox, &self.config, token_host_path.as_deref(), - ); + selected_default_gpu.as_deref(), + ) { + Ok(spec) => spec, + Err(e) => { + let _ = self.client.remove_volume(&vol_name).await; + cleanup_sandbox_token_file(&sandbox.id); + return Err(podman_gpu_selection_error(e)); + } + }; match self.client.create_container(&spec).await { Ok(_) => {} Err(PodmanApiError::Conflict(_)) => { @@ -572,14 +653,31 @@ impl PodmanComputeDriver { } } +fn driver_gpu_requirement( + spec: &openshell_core::proto::compute::v1::DriverSandboxSpec, +) -> Option<&DriverGpuResourceRequirement> { + spec.resource_requirements + .as_ref() + .and_then(|requirements| requirements.gpu.as_ref()) +} + #[cfg(test)] impl PodmanComputeDriver { pub(crate) fn for_tests(config: PodmanComputeConfig) -> Self { + Self::for_tests_with_gpu_inventory(config, CdiGpuInventory::default()) + } + + pub(crate) fn for_tests_with_gpu_inventory( + config: PodmanComputeConfig, + gpu_inventory: CdiGpuInventory, + ) -> Self { let client = PodmanClient::new(config.socket_path.clone()); Self { client, config, network_gateway_ip: None, + gpu_inventory, + gpu_selector: Arc::new(CdiGpuRoundRobin::new()), } } } @@ -653,6 +751,7 @@ mod tests { use super::*; use crate::test_utils::{StubResponse, spawn_podman_stub}; use hyper::StatusCode; + use std::fs; use std::path::PathBuf; #[test] @@ -667,6 +766,114 @@ mod tests { assert!(matches!(err, ComputeDriverError::Message(_))); } + #[test] + fn local_podman_cdi_gpu_inventory_maps_nvidia_device_nodes() { + let root = std::env::temp_dir().join(format!( + "openshell-podman-gpu-test-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("system time should be after unix epoch") + .as_nanos() + )); + fs::create_dir(&root).expect("create temp dev root"); + fs::write(root.join("nvidia2"), "").expect("create nvidia2"); + fs::write(root.join("nvidiactl"), "").expect("create nvidiactl"); + fs::write(root.join("nvidia0"), "").expect("create nvidia0"); + + let inventory = local_podman_cdi_gpu_inventory_from(&root); + + fs::remove_dir_all(&root).expect("remove temp dev root"); + assert_eq!( + inventory.as_slice(), + &vec![ + "nvidia.com/gpu=0".to_string(), + "nvidia.com/gpu=2".to_string() + ] + ); + } + + #[test] + fn validate_sandbox_create_accepts_gpu_count_with_inventory() { + use openshell_core::proto::compute::v1::{ + DriverSandboxResourceRequirements, DriverSandboxSpec, + }; + + let driver = PodmanComputeDriver::for_tests_with_gpu_inventory( + PodmanComputeConfig::default(), + CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1"]), + ); + let sandbox = DriverSandbox { + spec: Some(DriverSandboxSpec { + resource_requirements: Some(DriverSandboxResourceRequirements { + gpu: Some(DriverGpuResourceRequirement { + device_ids: vec![], + count: Some(2), + }), + }), + ..Default::default() + }), + ..Default::default() + }; + + driver.validate_sandbox_create(&sandbox).unwrap(); + } + + #[test] + fn validate_sandbox_create_rejects_gpu_count_above_inventory() { + use openshell_core::proto::compute::v1::{ + DriverSandboxResourceRequirements, DriverSandboxSpec, + }; + + let driver = PodmanComputeDriver::for_tests_with_gpu_inventory( + PodmanComputeConfig::default(), + CdiGpuInventory::new(["nvidia.com/gpu=0"]), + ); + let sandbox = DriverSandbox { + spec: Some(DriverSandboxSpec { + resource_requirements: Some(DriverSandboxResourceRequirements { + gpu: Some(DriverGpuResourceRequirement { + device_ids: vec![], + count: Some(2), + }), + }), + ..Default::default() + }), + ..Default::default() + }; + + let err = driver + .validate_sandbox_create(&sandbox) + .expect_err("GPU count above inventory should fail"); + + assert!( + matches!(err, ComputeDriverError::Precondition(message) if message.contains("2 > 1")) + ); + } + + #[test] + fn validate_sandbox_create_passes_explicit_cdi_device_id_without_inventory() { + use openshell_core::proto::compute::v1::{ + DriverSandboxResourceRequirements, DriverSandboxSpec, + }; + + let driver = PodmanComputeDriver::for_tests(PodmanComputeConfig::default()); + let sandbox = DriverSandbox { + spec: Some(DriverSandboxSpec { + resource_requirements: Some(DriverSandboxResourceRequirements { + gpu: Some(DriverGpuResourceRequirement { + device_ids: vec!["nvidia.com/gpu=0".to_string()], + count: None, + }), + }), + ..Default::default() + }), + ..Default::default() + }; + + driver.validate_sandbox_create(&sandbox).unwrap(); + } + // ── grpc_endpoint auto-detection ─────────────────────────────────── // // PodmanComputeDriver::new() fills grpc_endpoint when it is empty. @@ -831,7 +1038,7 @@ mod tests { ), ] ); - let _ = std::fs::remove_file(socket_path); + let _ = fs::remove_file(socket_path); } #[tokio::test] @@ -883,6 +1090,6 @@ mod tests { api_path(&format!("/libpod/volumes/{volume_name}")) )] ); - let _ = std::fs::remove_file(socket_path); + let _ = fs::remove_file(socket_path); } } diff --git a/crates/openshell-driver-vm/README.md b/crates/openshell-driver-vm/README.md index 724bde06c..c5860f9cd 100644 --- a/crates/openshell-driver-vm/README.md +++ b/crates/openshell-driver-vm/README.md @@ -52,8 +52,9 @@ sudo -E env "PATH=$PATH" mise run gateway:vm -- --gpu ``` GPU passthrough uses VFIO and requires host support for IOMMU, root privileges -for bind/unbind operations, and a compatible sandbox image. The public GPU -overview lives in the repository `README.md`. +for bind/unbind operations, and a compatible sandbox image. Sandbox GPU requests +arrive as `resource_requirements.gpu`; the VM driver accepts the default request, +one explicit device ID, or a count of one. Point the CLI at the gateway with one of: diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 445905a1e..c60e525ed 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -30,10 +30,11 @@ use openshell_core::progress::{ }; use openshell_core::proto::compute::v1::{ CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse, - DriverCondition as SandboxCondition, DriverPlatformEvent as PlatformEvent, - DriverSandbox as Sandbox, DriverSandboxStatus as SandboxStatus, GetCapabilitiesRequest, - GetCapabilitiesResponse, GetSandboxRequest, GetSandboxResponse, ListSandboxesRequest, - ListSandboxesResponse, StopSandboxRequest, StopSandboxResponse, ValidateSandboxCreateRequest, + DriverCondition as SandboxCondition, DriverGpuResourceRequirement, + DriverPlatformEvent as PlatformEvent, DriverSandbox as Sandbox, + DriverSandboxStatus as SandboxStatus, GetCapabilitiesRequest, GetCapabilitiesResponse, + GetSandboxRequest, GetSandboxResponse, ListSandboxesRequest, ListSandboxesResponse, + StopSandboxRequest, StopSandboxResponse, ValidateSandboxCreateRequest, ValidateSandboxCreateResponse, WatchSandboxesDeletedEvent, WatchSandboxesEvent, WatchSandboxesPlatformEvent, WatchSandboxesRequest, WatchSandboxesSandboxEvent, compute_driver_server::ComputeDriver, watch_sandboxes_event, @@ -615,10 +616,12 @@ impl VmDriver { ))); } - let spec = sandbox.spec.as_ref(); - let is_gpu = spec.is_some_and(|s| s.gpu); - let gpu_device = spec.map_or("", |s| s.gpu_device.as_str()); - let gpu_bdf = if is_gpu { + let gpu_device = sandbox + .spec + .as_ref() + .and_then(driver_gpu_requirement) + .and_then(|gpu| requested_gpu_device(Some(gpu))); + let gpu_bdf = if let Some(gpu_device) = gpu_device { Some(self.assign_gpu_to_record(&sandbox.id, gpu_device).await?) } else { None @@ -2577,15 +2580,7 @@ fn validate_vm_sandbox(sandbox: &Sandbox, gpu_enabled: bool) -> Result<(), Statu .as_ref() .ok_or_else(|| Status::invalid_argument("sandbox spec is required"))?; - if spec.gpu && !gpu_enabled { - return Err(Status::failed_precondition( - "GPU support is not enabled on this driver; start with --gpu", - )); - } - - if !spec.gpu && !spec.gpu_device.is_empty() { - return Err(Status::invalid_argument("gpu_device requires gpu=true")); - } + validate_gpu_request(driver_gpu_requirement(spec), gpu_enabled)?; if let Some(template) = spec.template.as_ref() { if !template.agent_socket_path.is_empty() { @@ -2628,6 +2623,44 @@ fn validate_sandbox_id(sandbox_id: &str) -> Result<(), Status> { Ok(()) } +fn driver_gpu_requirement( + spec: &openshell_core::proto::compute::v1::DriverSandboxSpec, +) -> Option<&DriverGpuResourceRequirement> { + spec.resource_requirements + .as_ref() + .and_then(|requirements| requirements.gpu.as_ref()) +} + +fn requested_gpu_device(gpu: Option<&DriverGpuResourceRequirement>) -> Option<&str> { + let gpu = gpu?; + Some(gpu.device_ids.first().map_or("", String::as_str)) +} + +#[allow(clippy::result_large_err)] +fn validate_gpu_request( + gpu: Option<&DriverGpuResourceRequirement>, + gpu_enabled: bool, +) -> Result<(), Status> { + if gpu.is_some() && !gpu_enabled { + return Err(Status::failed_precondition( + "GPU support is not enabled on this driver; start with --gpu", + )); + } + + if gpu.is_some_and(|gpu| gpu.count.is_some_and(|count| count > 1)) { + return Err(Status::invalid_argument( + "vm compute driver supports at most one GPU", + )); + } + + if gpu.is_some_and(|gpu| gpu.device_ids.len() > 1) { + return Err(Status::invalid_argument( + "vm compute driver supports at most one GPU device ID", + )); + } + Ok(()) +} + #[allow(clippy::result_large_err)] fn parse_registry_reference(image_ref: &str) -> Result { Reference::try_from(image_ref).map_err(|err| { @@ -4412,6 +4445,7 @@ mod tests { PROGRESS_COMPLETE_STEP_KEY, }; use openshell_core::proto::compute::v1::{ + DriverGpuResourceRequirement, DriverSandboxResourceRequirements, DriverSandboxSpec as SandboxSpec, DriverSandboxTemplate as SandboxTemplate, }; use prost_types::{Struct, Value, value::Kind}; @@ -4424,6 +4458,15 @@ mod tests { static ENV_LOCK: std::sync::LazyLock> = std::sync::LazyLock::new(|| std::sync::Mutex::new(())); + fn gpu_resource_requirements( + device_ids: Vec, + count: Option, + ) -> DriverSandboxResourceRequirements { + DriverSandboxResourceRequirements { + gpu: Some(DriverGpuResourceRequirement { device_ids, count }), + } + } + #[test] fn vm_pulling_layer_event_adds_progress_detail_metadata() { let mut event = platform_event( @@ -4491,7 +4534,7 @@ mod tests { let sandbox = Sandbox { id: "sandbox-123".to_string(), spec: Some(SandboxSpec { - gpu: true, + resource_requirements: Some(gpu_resource_requirements(vec![], None)), ..Default::default() }), ..Default::default() @@ -4507,7 +4550,7 @@ mod tests { let sandbox = Sandbox { id: "sandbox-123".to_string(), spec: Some(SandboxSpec { - gpu: true, + resource_requirements: Some(gpu_resource_requirements(vec![], None)), ..Default::default() }), ..Default::default() @@ -4516,20 +4559,76 @@ mod tests { } #[test] - fn validate_vm_sandbox_rejects_gpu_device_without_gpu() { + fn validate_vm_sandbox_accepts_gpu_count_one_when_enabled() { let sandbox = Sandbox { id: "sandbox-123".to_string(), spec: Some(SandboxSpec { - gpu: false, - gpu_device: "0000:2d:00.0".to_string(), + resource_requirements: Some(gpu_resource_requirements(vec![], Some(1))), + ..Default::default() + }), + ..Default::default() + }; + validate_vm_sandbox(&sandbox, true).expect("gpu count one should be accepted"); + } + + #[test] + fn validate_vm_sandbox_rejects_gpu_count_greater_than_one() { + let sandbox = Sandbox { + id: "sandbox-123".to_string(), + spec: Some(SandboxSpec { + resource_requirements: Some(gpu_resource_requirements(vec![], Some(2))), + ..Default::default() + }), + ..Default::default() + }; + let err = + validate_vm_sandbox(&sandbox, true).expect_err("gpu count > 1 should be rejected"); + assert_eq!(err.code(), Code::InvalidArgument); + assert!(err.message().contains("at most one GPU")); + } + + #[test] + fn validate_vm_sandbox_rejects_multiple_gpu_device_ids() { + let sandbox = Sandbox { + id: "sandbox-123".to_string(), + spec: Some(SandboxSpec { + resource_requirements: Some(gpu_resource_requirements( + vec!["0000:2d:00.0".to_string(), "0000:3d:00.0".to_string()], + None, + )), ..Default::default() }), ..Default::default() }; let err = validate_vm_sandbox(&sandbox, true) - .expect_err("gpu_device without gpu should be rejected"); + .expect_err("multiple GPU device IDs should be rejected"); assert_eq!(err.code(), Code::InvalidArgument); - assert!(err.message().contains("gpu_device requires gpu=true")); + assert!(err.message().contains("at most one GPU device ID")); + } + + #[test] + fn requested_gpu_device_returns_none_without_gpu_request() { + assert_eq!(requested_gpu_device(None), None); + } + + #[test] + fn requested_gpu_device_defaults_empty_request_to_inventory_choice() { + let gpu = DriverGpuResourceRequirement { + device_ids: vec![], + count: None, + }; + + assert_eq!(requested_gpu_device(Some(&gpu)), Some("")); + } + + #[test] + fn requested_gpu_device_returns_first_explicit_device_id() { + let gpu = DriverGpuResourceRequirement { + device_ids: vec!["0000:2d:00.0".to_string()], + count: None, + }; + + assert_eq!(requested_gpu_device(Some(&gpu)), Some("0000:2d:00.0")); } #[test] diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs index 0122f9178..666a7174a 100644 --- a/crates/openshell-server/src/compute/mod.rs +++ b/crates/openshell-server/src/compute/mod.rs @@ -17,8 +17,9 @@ use crate::tracing_bus::TracingLogBus; use futures::{Stream, StreamExt}; use openshell_core::ComputeDriverKind; use openshell_core::proto::compute::v1::{ - CreateSandboxRequest, DeleteSandboxRequest, DriverCondition, DriverPlatformEvent, - DriverResourceRequirements, DriverSandbox, DriverSandboxSpec, DriverSandboxStatus, + CreateSandboxRequest, DeleteSandboxRequest, DriverCondition, DriverGpuResourceRequirement, + DriverPlatformEvent, DriverResourceRequirements, DriverSandbox, + DriverSandboxResourceRequirements, DriverSandboxSpec, DriverSandboxStatus, DriverSandboxTemplate, GetCapabilitiesRequest, GetSandboxRequest, ListSandboxesRequest, ValidateSandboxCreateRequest, WatchSandboxesEvent, WatchSandboxesRequest, compute_driver_client::ComputeDriverClient, compute_driver_server::ComputeDriver, @@ -1267,12 +1268,28 @@ fn driver_sandbox_spec_from_public(spec: &SandboxSpec) -> DriverSandboxSpec { .template .as_ref() .map(driver_sandbox_template_from_public), - gpu: spec.gpu, - gpu_device: spec.gpu_device.clone(), + resource_requirements: spec + .resource_requirements + .as_ref() + .map(driver_resource_requirements_from_public), sandbox_token: String::new(), } } +fn driver_resource_requirements_from_public( + requirements: &openshell_core::proto::SandboxResourceRequirements, +) -> DriverSandboxResourceRequirements { + DriverSandboxResourceRequirements { + gpu: requirements + .gpu + .as_ref() + .map(|gpu| DriverGpuResourceRequirement { + device_ids: gpu.device_ids.clone(), + count: gpu.count, + }), + } +} + fn driver_sandbox_template_from_public(template: &SandboxTemplate) -> DriverSandboxTemplate { DriverSandboxTemplate { image: template.image.clone(), @@ -1623,7 +1640,12 @@ fn derive_phase(status: Option<&DriverSandboxStatus>) -> SandboxPhase { } fn rewrite_user_facing_conditions(status: &mut Option, spec: Option<&SandboxSpec>) { - let gpu_requested = spec.is_some_and(|sandbox_spec| sandbox_spec.gpu); + let gpu_requested = spec.is_some_and(|sandbox_spec| { + sandbox_spec + .resource_requirements + .as_ref() + .is_some_and(|requirements| requirements.gpu.is_some()) + }); if !gpu_requested { return; } @@ -1785,6 +1807,7 @@ mod tests { CreateSandboxResponse, DeleteSandboxResponse, GetCapabilitiesResponse, GetSandboxRequest, GetSandboxResponse, StopSandboxRequest, StopSandboxResponse, ValidateSandboxCreateResponse, }; + use openshell_core::proto::{GpuResourceRequirement, SandboxResourceRequirements}; use std::collections::HashMap; use std::sync::Arc; use tokio::sync::{mpsc, oneshot}; @@ -1801,6 +1824,56 @@ mod tests { } } + #[test] + fn driver_sandbox_spec_from_public_preserves_gpu_request_device_ids() { + let public = SandboxSpec { + resource_requirements: Some(SandboxResourceRequirements { + gpu: Some(GpuResourceRequirement { + device_ids: vec!["nvidia.com/gpu=0".to_string()], + count: None, + }), + }), + ..Default::default() + }; + + let driver = driver_sandbox_spec_from_public(&public); + + assert_eq!( + driver + .resource_requirements + .expect("driver resource requirements should be present") + .gpu + .expect("driver GPU requirement should be present") + .device_ids, + vec!["nvidia.com/gpu=0".to_string()] + ); + } + + #[test] + fn driver_sandbox_spec_from_public_preserves_gpu_count() { + let public = SandboxSpec { + resource_requirements: Some(SandboxResourceRequirements { + gpu: Some(GpuResourceRequirement { + device_ids: vec![], + count: Some(2), + }), + }), + ..Default::default() + }; + + let driver = driver_sandbox_spec_from_public(&public); + + assert_eq!( + driver + .resource_requirements + .expect("driver resource requirements should be present") + .gpu + .expect("driver GPU requirement should be present") + .count, + Some(2) + ); + } + fn struct_value( fields: impl IntoIterator, prost_types::Value)>, ) -> prost_types::Value { @@ -2258,7 +2331,12 @@ mod tests { rewrite_user_facing_conditions( &mut status, Some(&SandboxSpec { - gpu: true, + resource_requirements: Some(SandboxResourceRequirements { + gpu: Some(GpuResourceRequirement { + device_ids: vec![], + count: None, + }), + }), ..Default::default() }), ); @@ -2286,13 +2364,7 @@ mod tests { ..Default::default() }); - rewrite_user_facing_conditions( - &mut status, - Some(&SandboxSpec { - gpu: false, - ..Default::default() - }), - ); + rewrite_user_facing_conditions(&mut status, Some(&SandboxSpec::default())); assert_eq!(status.unwrap().conditions[0].message, original); } @@ -2571,7 +2643,12 @@ mod tests { let sandbox = Sandbox { spec: Some(SandboxSpec { - gpu: true, + resource_requirements: Some(SandboxResourceRequirements { + gpu: Some(GpuResourceRequirement { + device_ids: vec![], + count: None, + }), + }), ..Default::default() }), ..sandbox_record("sb-1", "sandbox-a", SandboxPhase::Provisioning) @@ -2594,7 +2671,11 @@ mod tests { SandboxPhase::try_from(stored.phase()).unwrap(), SandboxPhase::Ready ); - assert!(stored.spec.as_ref().is_some_and(|spec| spec.gpu)); + assert!(stored.spec.as_ref().is_some_and(|spec| { + spec.resource_requirements + .as_ref() + .is_some_and(|requirements| requirements.gpu.is_some()) + })); } #[tokio::test] diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs index 198d5f04c..dec84c4e9 100644 --- a/crates/openshell-server/src/grpc/sandbox.rs +++ b/crates/openshell-server/src/grpc/sandbox.rs @@ -99,7 +99,9 @@ fn emit_sandbox_create_telemetry( }; openshell_core::telemetry::emit_sandbox_create( outcome, - spec.gpu, + spec.resource_requirements + .as_ref() + .is_some_and(|requirements| requirements.gpu.is_some()), spec.providers.len() as u64, spec.policy.is_some(), template_source, diff --git a/crates/openshell-server/src/grpc/validation.rs b/crates/openshell-server/src/grpc/validation.rs index 53f292053..0f3b3fd7c 100644 --- a/crates/openshell-server/src/grpc/validation.rs +++ b/crates/openshell-server/src/grpc/validation.rs @@ -131,6 +131,11 @@ pub(super) fn validate_sandbox_spec( validate_sandbox_template(tmpl)?; } + // --- spec.resource_requirements --- + if let Some(ref requirements) = spec.resource_requirements { + validate_resource_requirements(requirements)?; + } + // --- spec.policy serialized size --- if let Some(ref policy) = spec.policy { let size = policy.encoded_len(); @@ -144,6 +149,31 @@ pub(super) fn validate_sandbox_spec( Ok(()) } +fn validate_resource_requirements( + requirements: &openshell_core::proto::SandboxResourceRequirements, +) -> Result<(), Status> { + if let Some(ref gpu) = requirements.gpu { + validate_gpu_requirement(gpu)?; + } + Ok(()) +} + +fn validate_gpu_requirement( + gpu: &openshell_core::proto::GpuResourceRequirement, +) -> Result<(), Status> { + if gpu.count.is_some() && !gpu.device_ids.is_empty() { + return Err(Status::invalid_argument( + "resource_requirements.gpu.count is mutually exclusive with resource_requirements.gpu.device_ids", + )); + } + if gpu.count == Some(0) { + return Err(Status::invalid_argument( + "resource_requirements.gpu.count must be greater than 0", + )); + } + Ok(()) +} + /// Validate template-level field sizes. fn validate_sandbox_template(tmpl: &SandboxTemplate) -> Result<(), Status> { // String fields. @@ -661,7 +691,7 @@ pub(super) fn level_matches(log_level: &str, min_level: &str) -> bool { #[cfg(test)] mod tests { use super::*; - use openshell_core::proto::SandboxSpec; + use openshell_core::proto::{GpuResourceRequirement, SandboxResourceRequirements, SandboxSpec}; use std::collections::HashMap; use tonic::Code; @@ -687,12 +717,67 @@ mod tests { #[test] fn validate_sandbox_spec_accepts_gpu_flag() { let spec = SandboxSpec { - gpu: true, + resource_requirements: Some(SandboxResourceRequirements { + gpu: Some(GpuResourceRequirement { + device_ids: vec![], + count: None, + }), + }), ..Default::default() }; assert!(validate_sandbox_spec("gpu-sandbox", &spec).is_ok()); } + #[test] + fn validate_sandbox_spec_accepts_gpu_count() { + let spec = SandboxSpec { + resource_requirements: Some(SandboxResourceRequirements { + gpu: Some(GpuResourceRequirement { + device_ids: vec![], + count: Some(2), + }), + }), + ..Default::default() + }; + assert!(validate_sandbox_spec("gpu-count-sandbox", &spec).is_ok()); + } + + #[test] + fn validate_sandbox_spec_rejects_zero_gpu_count() { + let spec = SandboxSpec { + resource_requirements: Some(SandboxResourceRequirements { + gpu: Some(GpuResourceRequirement { + device_ids: vec![], + count: Some(0), + }), + }), + ..Default::default() + }; + + let err = validate_sandbox_spec("gpu-count-sandbox", &spec).unwrap_err(); + + assert_eq!(err.code(), Code::InvalidArgument); + assert!(err.message().contains("count must be greater than 0")); + } + + #[test] + fn validate_sandbox_spec_rejects_gpu_count_with_device_id() { + let spec = SandboxSpec { + resource_requirements: Some(SandboxResourceRequirements { + gpu: Some(GpuResourceRequirement { + device_ids: vec!["nvidia.com/gpu=0".to_string()], + count: Some(1), + }), + }), + ..Default::default() + }; + + let err = validate_sandbox_spec("gpu-count-sandbox", &spec).unwrap_err(); + + assert_eq!(err.code(), Code::InvalidArgument); + assert!(err.message().contains("mutually exclusive")); + } + #[test] fn validate_sandbox_spec_accepts_empty_defaults() { assert!(validate_sandbox_spec("", &default_spec()).is_ok()); diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx index 512abfd3d..856391e39 100644 --- a/docs/sandboxes/manage-sandboxes.mdx +++ b/docs/sandboxes/manage-sandboxes.mdx @@ -51,10 +51,22 @@ To request GPU resources, add `--gpu`: openshell sandbox create --gpu -- claude ``` +Request a specific number of GPUs with `--gpu-count`: + +```shell +openshell sandbox create --gpu-count 2 -- claude +``` + For Docker-backed sandboxes, GPU injection uses Docker CDI. If you enable Docker CDI after the gateway starts, restart the gateway so OpenShell can detect the updated Docker daemon capability. +Kubernetes gateways honor `--gpu-count` by setting the `nvidia.com/gpu` resource +limit. Docker and Podman use CDI device IDs and select concrete NVIDIA CDI +devices for default and count-based GPU requests. VM gateways accept only one +GPU. In the API, these flags populate +`SandboxSpec.resource_requirements.gpu`. + ### Custom Containers Use `--from` to create a sandbox from the base image, another pre-built sandbox name, a local directory, or a container image: diff --git a/e2e/rust/tests/gpu_device_selection.rs b/e2e/rust/tests/gpu_device_selection.rs index 5f5314b9c..8c8309c92 100644 --- a/e2e/rust/tests/gpu_device_selection.rs +++ b/e2e/rust/tests/gpu_device_selection.rs @@ -74,6 +74,28 @@ fn collect_cdi_gpu_device_ids_from_devices(devices: &[Value], device_ids: &mut V } } +fn local_podman_cdi_gpu_device_ids() -> Vec { + let mut device_ids = std::fs::read_dir("/dev") + .ok() + .into_iter() + .flat_map(|entries| entries.filter_map(Result::ok)) + .filter_map(|entry| { + let name = entry.file_name(); + let name = name.to_str()?; + let index = name.strip_prefix("nvidia")?; + (!index.is_empty() && index.chars().all(|ch| ch.is_ascii_digit())) + .then(|| format!("{CDI_GPU_DEVICE_PREFIX}{index}")) + }) + .collect::>(); + device_ids.sort(); + device_ids.dedup(); + device_ids +} + +fn uses_local_podman_inventory(engine: &ContainerEngine) -> bool { + engine.name().rsplit('/').next() == Some("podman") +} + fn parse_cdi_gpu_device_ids(info: &Value) -> Vec { let mut device_ids = Vec::new(); @@ -91,6 +113,16 @@ fn parse_cdi_gpu_device_ids(info: &Value) -> Vec { fn discovered_cdi_gpu_device_ids() -> Vec { let engine = ContainerEngine::from_env(); + if uses_local_podman_inventory(&engine) { + let device_ids = local_podman_cdi_gpu_device_ids(); + assert!( + !device_ids.is_empty(), + "local Podman GPU e2e tests require /dev/nvidiaN device nodes so \ +bare --gpu can be mapped to nvidia.com/gpu=N" + ); + return device_ids; + } + let output = engine .command() .args(["info", "--format", "json"]) @@ -118,12 +150,45 @@ fn discovered_cdi_gpu_device_ids() -> Vec { assert!( !device_ids.is_empty(), "{} info --format json did not report any discovered NVIDIA CDI GPU devices. \ -Expected DiscoveredDevices entries with Source=cdi and ID like nvidia.com/gpu=all.", +Expected DiscoveredDevices entries with Source=cdi and ID like nvidia.com/gpu=0.", engine.name() ); device_ids } +fn default_cdi_gpu_device_id(device_ids: &[String]) -> Option { + let mut indexed = device_ids + .iter() + .filter_map(|device_id| { + let suffix = device_id.strip_prefix(CDI_GPU_DEVICE_PREFIX)?; + let index = suffix.parse::().ok()?; + Some((index, device_id.clone())) + }) + .collect::>(); + if !indexed.is_empty() { + indexed.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1))); + return indexed.into_iter().map(|(_, device_id)| device_id).next(); + } + + let mut uuid_style = device_ids + .iter() + .filter(|device_id| { + device_id.starts_with(CDI_GPU_DEVICE_PREFIX) + && device_id.as_str() != CDI_GPU_DEVICE_ALL + }) + .cloned() + .collect::>(); + if !uuid_style.is_empty() { + uuid_style.sort(); + return uuid_style.into_iter().next(); + } + + device_ids + .iter() + .any(|device_id| device_id == CDI_GPU_DEVICE_ALL) + .then(|| CDI_GPU_DEVICE_ALL.to_string()) +} + fn has_cdi_gpu_device(device_id: &str) -> bool { discovered_cdi_gpu_device_ids() .iter() @@ -210,20 +275,19 @@ async fn sandbox_create_output(args: &[&str]) -> String { } #[tokio::test] -async fn gpu_request_without_device_matches_plain_all_gpu_container() { - if !has_cdi_gpu_device(CDI_GPU_DEVICE_ALL) { - eprintln!( - "skipping default GPU request test because {CDI_GPU_DEVICE_ALL} was not discovered" - ); +async fn gpu_request_without_device_matches_plain_default_gpu_container() { + let device_ids = discovered_cdi_gpu_device_ids(); + let Some(default_gpu_device) = default_cdi_gpu_device_id(&device_ids) else { + eprintln!("skipping default GPU request test because no selectable GPU ID was discovered"); return; - } + }; - let expected = runtime_gpu_lines(CDI_GPU_DEVICE_ALL); + let expected = runtime_gpu_lines(&default_gpu_device); let actual = sandbox_gpu_lines(None).await; assert_eq!( actual, expected, - "default GPU request should expose the same GPU lines as a plain all-GPU container" + "default GPU request should expose the same GPU lines as a plain container using {default_gpu_device}" ); } diff --git a/proto/compute_driver.proto b/proto/compute_driver.proto index 610d491c7..79bff06e2 100644 --- a/proto/compute_driver.proto +++ b/proto/compute_driver.proto @@ -83,12 +83,8 @@ message DriverSandboxSpec { map environment = 5; // Runtime template consumed by the driver during provisioning. DriverSandboxTemplate template = 6; - // Request NVIDIA GPU resources for this sandbox. - bool gpu = 9; - // Optional PCI BDF address (e.g. "0000:2d:00.0") or device index - // (e.g. "0", "1"). When empty with gpu=true, the driver assigns the - // first available GPU. - string gpu_device = 10; + // Portable resource requirements for this sandbox. + DriverSandboxResourceRequirements resource_requirements = 9; // Gateway-minted JWT identifying this sandbox to the gateway. Set by // the gateway on create; the driver materialises it via its native // secret mechanism (Docker/Podman/VM bind-mount a per-sandbox file; @@ -98,6 +94,22 @@ message DriverSandboxSpec { string sandbox_token = 11; } +// Driver-owned resource requirements for the sandbox workload. +message DriverSandboxResourceRequirements { + // GPU requirement for the sandbox. Presence indicates a GPU request. + DriverGpuResourceRequirement gpu = 1; +} + +// Driver-owned GPU resource requirement. Device identifiers are interpreted by +// the selected compute driver and are an interim compatibility surface. +message DriverGpuResourceRequirement { + // Optional number of GPUs requested. Mutually exclusive with device_ids. + optional uint32 count = 1; + // Optional driver-native device identifiers. Mutually exclusive with count. + // Empty means the driver chooses its default GPU assignment behavior. + repeated string device_ids = 2; +} + // Driver-owned runtime template consumed by the compute platform. // // This message describes the sandbox workload in backend-neutral terms. diff --git a/proto/openshell.proto b/proto/openshell.proto index f9b64618b..4731baa9e 100644 --- a/proto/openshell.proto +++ b/proto/openshell.proto @@ -315,18 +315,24 @@ message SandboxSpec { openshell.sandbox.v1.SandboxPolicy policy = 7; // Provider names to attach to this sandbox. repeated string providers = 8; - // Request NVIDIA GPU resources for this sandbox. - bool gpu = 9; - // Optional PCI BDF address (e.g. "0000:2d:00.0") or device index - // (e.g. "0", "1"). When empty with gpu=true, the driver assigns the - // first available GPU. - string gpu_device = 10; - // Field 11 was `proposal_approval_mode`. The approval mode is now a - // runtime setting (gateway or sandbox scope) read via UpdateConfig / - // GetSandboxConfig, so it can be flipped on a running sandbox and - // managed fleet-wide. - reserved 11; - reserved "proposal_approval_mode"; + // Portable resource requirements for this sandbox. + SandboxResourceRequirements resource_requirements = 9; +} + +// Public resource requirements for the sandbox workload. +message SandboxResourceRequirements { + // GPU requirement for the sandbox. Presence indicates a GPU request. + GpuResourceRequirement gpu = 1; +} + +// Public GPU resource requirement. Device identifiers are interpreted by the +// selected compute driver and are an interim compatibility surface. +message GpuResourceRequirement { + // Optional number of GPUs requested. Mutually exclusive with device_ids. + optional uint32 count = 1; + // Optional driver-native device identifiers. Mutually exclusive with count. + // Empty means the driver chooses its default GPU assignment behavior. + repeated string device_ids = 2; } // Public sandbox template mapped onto compute-driver template inputs.