diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index d71133465..63748bb33 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -59,6 +59,8 @@ pub struct KubernetesComputeConfig { pub service_account_name: String, pub default_image: String, pub image_pull_policy: String, + /// Kubernetes `imagePullSecrets` names attached to sandbox pods. + pub image_pull_secrets: Vec, /// Image that provides the `openshell-sandbox` supervisor binary. /// Mounted directly as an image volume, or copied via an init container, /// depending on `supervisor_sideload_method`. @@ -104,6 +106,7 @@ impl Default for KubernetesComputeConfig { // IfNotPresent otherwise). `DEFAULT_IMAGE_PULL_POLICY` ("missing") // is Podman vocabulary and is not a valid Kubernetes value. image_pull_policy: String::new(), + image_pull_secrets: Vec::new(), supervisor_image: DEFAULT_SUPERVISOR_IMAGE.to_string(), supervisor_image_pull_policy: String::new(), supervisor_sideload_method: SupervisorSideloadMethod::default(), @@ -172,4 +175,13 @@ mod tests { let cfg: KubernetesComputeConfig = serde_json::from_value(json).unwrap(); assert_eq!(cfg.service_account_name, "openshell-sandbox"); } + + #[test] + fn serde_override_image_pull_secrets() { + let json = serde_json::json!({ + "image_pull_secrets": ["regcred", "backup-regcred"] + }); + let cfg: KubernetesComputeConfig = serde_json::from_value(json).unwrap(); + assert_eq!(cfg.image_pull_secrets, ["regcred", "backup-regcred"]); + } } diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index 7b38b91c6..19b8d91c1 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -317,6 +317,7 @@ impl KubernetesComputeDriver { let params = SandboxPodParams { default_image: &self.config.default_image, image_pull_policy: &self.config.image_pull_policy, + image_pull_secrets: &self.config.image_pull_secrets, supervisor_image: &self.config.supervisor_image, supervisor_image_pull_policy: &self.config.supervisor_image_pull_policy, supervisor_sideload_method: self.config.supervisor_sideload_method, @@ -1046,6 +1047,7 @@ fn default_workspace_volume_claim_templates(storage_size: &str) -> serde_json::V struct SandboxPodParams<'a> { default_image: &'a str, image_pull_policy: &'a str, + image_pull_secrets: &'a [String], supervisor_image: &'a str, supervisor_image_pull_policy: &'a str, supervisor_sideload_method: SupervisorSideloadMethod, @@ -1068,6 +1070,7 @@ impl Default for SandboxPodParams<'_> { Self { default_image: "", image_pull_policy: "", + image_pull_secrets: &[], supervisor_image: "", supervisor_image_pull_policy: "", supervisor_sideload_method: SupervisorSideloadMethod::default(), @@ -1234,6 +1237,14 @@ fn sandbox_template_to_k8s( ); } + let image_pull_secrets = image_pull_secret_refs(params.image_pull_secrets); + if !image_pull_secrets.is_empty() { + spec.insert( + "imagePullSecrets".to_string(), + serde_json::Value::Array(image_pull_secrets), + ); + } + // Disable service account token auto-mounting for security hardening. // Sandbox pods should not have access to the Kubernetes API by default. spec.insert( @@ -1383,6 +1394,15 @@ fn sandbox_template_to_k8s( result } +fn image_pull_secret_refs(secrets: &[String]) -> Vec { + secrets + .iter() + .map(|secret| secret.trim()) + .filter(|secret| !secret.is_empty()) + .map(|secret| serde_json::json!({ "name": secret })) + .collect() +} + fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option { // Start from the raw resources passthrough in platform_config (preserves // custom resource types like GPU limits that users set via the public API @@ -2537,6 +2557,80 @@ mod tests { ); } + #[test] + fn sandbox_template_omits_empty_image_pull_secrets() { + let pod_template = sandbox_template_to_k8s( + &SandboxTemplate::default(), + false, + &std::collections::HashMap::new(), + true, + &SandboxPodParams::default(), + ); + + assert!( + pod_template["spec"]["imagePullSecrets"].is_null(), + "imagePullSecrets must be omitted when no secrets are configured" + ); + } + + #[test] + fn sandbox_template_renders_configured_image_pull_secrets() { + let secrets = vec![ + "regcred".to_string(), + " backup-regcred ".to_string(), + String::new(), + ]; + let params = SandboxPodParams { + image_pull_secrets: &secrets, + ..Default::default() + }; + let pod_template = sandbox_template_to_k8s( + &SandboxTemplate::default(), + false, + &std::collections::HashMap::new(), + true, + ¶ms, + ); + + assert_eq!( + pod_template["spec"]["imagePullSecrets"], + serde_json::json!([ + { "name": "regcred" }, + { "name": "backup-regcred" } + ]) + ); + } + + #[test] + fn sandbox_template_renders_image_pull_secrets_for_template_image() { + let secrets = vec!["regcred".to_string()]; + let params = SandboxPodParams { + default_image: "default-image:latest", + image_pull_secrets: &secrets, + ..Default::default() + }; + let template = SandboxTemplate { + image: "private.example.com/team/sandbox:v1".to_string(), + ..Default::default() + }; + let pod_template = sandbox_template_to_k8s( + &template, + false, + &std::collections::HashMap::new(), + true, + ¶ms, + ); + + assert_eq!( + pod_template["spec"]["containers"][0]["image"], + serde_json::json!("private.example.com/team/sandbox:v1") + ); + assert_eq!( + pod_template["spec"]["imagePullSecrets"], + serde_json::json!([{ "name": "regcred" }]) + ); + } + #[test] fn platform_config_bool_extracts_value() { let template = SandboxTemplate { diff --git a/crates/openshell-driver-kubernetes/src/main.rs b/crates/openshell-driver-kubernetes/src/main.rs index 703659af3..fd5d902e5 100644 --- a/crates/openshell-driver-kubernetes/src/main.rs +++ b/crates/openshell-driver-kubernetes/src/main.rs @@ -44,6 +44,13 @@ struct Args { #[arg(long, env = "OPENSHELL_SANDBOX_IMAGE_PULL_POLICY")] sandbox_image_pull_policy: Option, + #[arg( + long, + env = "OPENSHELL_SANDBOX_IMAGE_PULL_SECRETS", + value_delimiter = ',' + )] + sandbox_image_pull_secrets: Vec, + #[arg(long, env = "OPENSHELL_GRPC_ENDPOINT")] grpc_endpoint: Option, @@ -98,6 +105,7 @@ async fn main() -> Result<()> { service_account_name: args.sandbox_service_account, default_image: args.sandbox_image.unwrap_or_default(), image_pull_policy: args.sandbox_image_pull_policy.unwrap_or_default(), + image_pull_secrets: args.sandbox_image_pull_secrets, supervisor_image: args .supervisor_image .unwrap_or_else(|| openshell_core::config::DEFAULT_SUPERVISOR_IMAGE.to_string()), diff --git a/deploy/helm/openshell/README.md b/deploy/helm/openshell/README.md index 036222f8a..15c9f7f85 100644 --- a/deploy/helm/openshell/README.md +++ b/deploy/helm/openshell/README.md @@ -207,6 +207,7 @@ cert-manager alternative. | server.oidc.userRole | string | `""` | Role name for standard user access. | | server.sandboxImage | string | `"ghcr.io/nvidia/openshell-community/sandboxes/base:latest"` | Default sandbox image used when requests do not specify one. | | server.sandboxImagePullPolicy | string | `""` | Kubernetes imagePullPolicy for sandbox pods. Empty = Kubernetes default (Always for :latest, IfNotPresent otherwise). Set to "Always" for dev clusters so new images are picked up without manual eviction. | +| server.sandboxImagePullSecrets | list | `[]` | Image pull secrets attached to sandbox pods. Referenced Secrets must exist in the sandbox namespace. | | server.sandboxJwt.gatewayId | string | `""` | Stable gateway identity embedded in iss/aud of every minted token. Defaults to the release name so HA replicas share identity. | | server.sandboxJwt.k8sSaTokenTtlSecs | int | `3600` | Lifetime (seconds) of the projected ServiceAccount token kubelet writes into each sandbox pod for the IssueSandboxToken bootstrap exchange. Kubelet enforces a minimum of 600s; the driver clamps values outside [600, 86400]. Default 3600 — generous, since the supervisor consumes the token within seconds of pod start. | | server.sandboxJwt.secretDefaultMode | string | `""` | File mode for the mounted JWT signing key Secret. Default 0400 (owner-read only). Override to 0440 or 0444 if the container UID does not match the volume file owner. | diff --git a/deploy/helm/openshell/skaffold.yaml b/deploy/helm/openshell/skaffold.yaml index 0e91db505..dcf578d22 100644 --- a/deploy/helm/openshell/skaffold.yaml +++ b/deploy/helm/openshell/skaffold.yaml @@ -26,6 +26,7 @@ build: context: ../../.. custom: buildCommand: | + CONTAINER_ENGINE=docker \ IMAGE_NAME="${IMAGE%:*}" \ IMAGE_TAG="${IMAGE##*:}" \ tasks/scripts/docker-build-image.sh gateway @@ -42,6 +43,7 @@ build: context: ../../.. custom: buildCommand: | + CONTAINER_ENGINE=docker \ IMAGE_NAME="${IMAGE%:*}" \ IMAGE_TAG="${IMAGE##*:}" \ tasks/scripts/docker-build-image.sh supervisor diff --git a/deploy/helm/openshell/templates/gateway-config.yaml b/deploy/helm/openshell/templates/gateway-config.yaml index 52b5a03ac..ac7478de2 100644 --- a/deploy/helm/openshell/templates/gateway-config.yaml +++ b/deploy/helm/openshell/templates/gateway-config.yaml @@ -106,6 +106,15 @@ data: {{- if .Values.server.sandboxImagePullPolicy }} image_pull_policy = {{ .Values.server.sandboxImagePullPolicy | quote }} {{- end }} + {{- $sandboxImagePullSecretNames := list -}} + {{- range .Values.server.sandboxImagePullSecrets }} + {{- if .name }} + {{- $sandboxImagePullSecretNames = append $sandboxImagePullSecretNames .name }} + {{- end }} + {{- end }} + {{- if $sandboxImagePullSecretNames }} + image_pull_secrets = [{{- range $i, $name := $sandboxImagePullSecretNames }}{{ if $i }}, {{ end }}{{ $name | quote }}{{- end }}] + {{- end }} {{- if .Values.server.workspaceDefaultStorageSize }} workspace_default_storage_size = {{ .Values.server.workspaceDefaultStorageSize | quote }} {{- end }} diff --git a/deploy/helm/openshell/tests/gateway_config_test.yaml b/deploy/helm/openshell/tests/gateway_config_test.yaml index 2788bd6b9..be67bea43 100644 --- a/deploy/helm/openshell/tests/gateway_config_test.yaml +++ b/deploy/helm/openshell/tests/gateway_config_test.yaml @@ -59,6 +59,27 @@ tests: path: data["gateway.toml"] pattern: '(?ms)\[openshell\.drivers\.kubernetes\].*?service_account_name\s*=\s*"openshell-sandbox"' + - it: renders sandbox image pull secrets under [openshell.drivers.kubernetes] + template: templates/gateway-config.yaml + set: + server.sandboxImagePullSecrets: + - name: regcred + - name: backup-regcred + asserts: + - matchRegex: + path: data["gateway.toml"] + pattern: '(?ms)\[openshell\.drivers\.kubernetes\].*?image_pull_secrets\s*=\s*\["regcred", "backup-regcred"\]' + + - it: does not reuse gateway image pull secrets for sandbox pods + template: templates/gateway-config.yaml + set: + imagePullSecrets: + - name: gateway-regcred + asserts: + - notMatchRegex: + path: data["gateway.toml"] + pattern: 'image_pull_secrets\s*=' + - it: does not render local mTLS user auth for Kubernetes deployments template: templates/gateway-config.yaml asserts: diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index bfcb68a48..4d03b86d7 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -151,6 +151,9 @@ server: # (Always for :latest, IfNotPresent otherwise). Set to "Always" for dev # clusters so new images are picked up without manual eviction. sandboxImagePullPolicy: "" + # -- Image pull secrets attached to sandbox pods. Referenced Secrets must exist + # in the sandbox namespace. + sandboxImagePullSecrets: [] # -- Default storage size for the workspace PVC in sandbox pods. # Uses Kubernetes quantity syntax (e.g. "2Gi", "10Gi", "500Mi"). # Empty = built-in default (2Gi). diff --git a/docs/kubernetes/setup.mdx b/docs/kubernetes/setup.mdx index 9a13f4b23..8c95978c4 100644 --- a/docs/kubernetes/setup.mdx +++ b/docs/kubernetes/setup.mdx @@ -135,6 +135,7 @@ The most commonly changed values are: | `image.repository` / `image.tag` | Gateway container image. Defaults to `ghcr.io/nvidia/openshell/gateway:latest`. | | `server.sandboxNamespace` | Namespace where sandbox pods are created. Defaults to the Helm release namespace when left empty. | | `server.sandboxImage` | Default sandbox image used when a sandbox does not specify one. | +| `server.sandboxImagePullSecrets` | Image pull secrets attached to sandbox pods. Referenced Secrets must exist in the sandbox namespace. | | `server.grpcEndpoint` | Endpoint that sandbox supervisors use to call back to the gateway. Must be reachable from inside the cluster. | | `server.disableTls` | Run the gateway over plaintext HTTP. Use only behind a trusted transport. | | `server.auth.allowUnauthenticatedUsers` | Accept user-facing calls without OIDC or mTLS credentials. Use only for trusted local development or a fully trusted access proxy. | @@ -152,6 +153,23 @@ helm upgrade --install openshell \ --values my-values.yaml ``` +To use private sandbox images, create a `kubernetes.io/dockerconfigjson` Secret +in the sandbox namespace and reference its name: + +```shell +kubectl -n openshell create secret docker-registry regcred \ + --docker-server=registry.example.com \ + --docker-username="$REGISTRY_USER" \ + --docker-password="$REGISTRY_TOKEN" +``` + +```yaml +server: + sandboxImage: registry.example.com/team/openshell-sandbox:latest + sandboxImagePullSecrets: + - name: regcred +``` + ## RBAC The chart creates the following RBAC resources in the release namespace: diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index 4317c98f0..a4606cdb2 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -163,6 +163,7 @@ namespace = "agents" service_account_name = "openshell-sandbox" default_image = "ghcr.io/nvidia/openshell/sandbox:latest" image_pull_policy = "IfNotPresent" +image_pull_secrets = ["regcred"] supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest" supervisor_image_pull_policy = "IfNotPresent" # Use the image volume on Kubernetes >= 1.35 (GA in 1.36); switch to "init-container" diff --git a/docs/reference/sandbox-compute-drivers.mdx b/docs/reference/sandbox-compute-drivers.mdx index 9d9ecc8cf..ca003fe17 100644 --- a/docs/reference/sandbox-compute-drivers.mdx +++ b/docs/reference/sandbox-compute-drivers.mdx @@ -122,6 +122,7 @@ For maintainer-level implementation details, refer to the [Kubernetes driver REA | `service_account_name` | `sandboxServiceAccount.name` | Set the Kubernetes service account assigned to sandbox pods and accepted by the gateway TokenReview bootstrap path. The Helm chart creates a dedicated sandbox service account by default. | | `default_image` | `server.sandboxImage` | Set the default sandbox image. | | `image_pull_policy` | `server.sandboxImagePullPolicy` | Set the Kubernetes image pull policy for sandbox pods. | +| `image_pull_secrets` | `server.sandboxImagePullSecrets` | Attach Kubernetes image pull secrets to sandbox pods. Referenced Secrets must exist in the sandbox namespace. | | `grpc_endpoint` | `server.grpcEndpoint` | Set the gateway callback endpoint reachable from sandbox pods. | | `client_tls_secret_name` | `server.tls.clientTlsSecretName` | Mount sandbox client TLS materials from a Kubernetes secret. | | `supervisor_image` | `supervisor.image.repository` / `supervisor.image.tag` | Set the supervisor image that provides the `openshell-sandbox` binary. |