Skip to content

Commit d990822

Browse files
authored
feat(kubernetes): support sandbox image pull secrets (#1671)
1 parent 3d441e7 commit d990822

11 files changed

Lines changed: 170 additions & 0 deletions

File tree

crates/openshell-driver-kubernetes/src/config.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ pub struct KubernetesComputeConfig {
5959
pub service_account_name: String,
6060
pub default_image: String,
6161
pub image_pull_policy: String,
62+
/// Kubernetes `imagePullSecrets` names attached to sandbox pods.
63+
pub image_pull_secrets: Vec<String>,
6264
/// Image that provides the `openshell-sandbox` supervisor binary.
6365
/// Mounted directly as an image volume, or copied via an init container,
6466
/// depending on `supervisor_sideload_method`.
@@ -104,6 +106,7 @@ impl Default for KubernetesComputeConfig {
104106
// IfNotPresent otherwise). `DEFAULT_IMAGE_PULL_POLICY` ("missing")
105107
// is Podman vocabulary and is not a valid Kubernetes value.
106108
image_pull_policy: String::new(),
109+
image_pull_secrets: Vec::new(),
107110
supervisor_image: DEFAULT_SUPERVISOR_IMAGE.to_string(),
108111
supervisor_image_pull_policy: String::new(),
109112
supervisor_sideload_method: SupervisorSideloadMethod::default(),
@@ -172,4 +175,13 @@ mod tests {
172175
let cfg: KubernetesComputeConfig = serde_json::from_value(json).unwrap();
173176
assert_eq!(cfg.service_account_name, "openshell-sandbox");
174177
}
178+
179+
#[test]
180+
fn serde_override_image_pull_secrets() {
181+
let json = serde_json::json!({
182+
"image_pull_secrets": ["regcred", "backup-regcred"]
183+
});
184+
let cfg: KubernetesComputeConfig = serde_json::from_value(json).unwrap();
185+
assert_eq!(cfg.image_pull_secrets, ["regcred", "backup-regcred"]);
186+
}
175187
}

crates/openshell-driver-kubernetes/src/driver.rs

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ impl KubernetesComputeDriver {
317317
let params = SandboxPodParams {
318318
default_image: &self.config.default_image,
319319
image_pull_policy: &self.config.image_pull_policy,
320+
image_pull_secrets: &self.config.image_pull_secrets,
320321
supervisor_image: &self.config.supervisor_image,
321322
supervisor_image_pull_policy: &self.config.supervisor_image_pull_policy,
322323
supervisor_sideload_method: self.config.supervisor_sideload_method,
@@ -1028,6 +1029,7 @@ fn default_workspace_volume_claim_templates(storage_size: &str) -> serde_json::V
10281029
struct SandboxPodParams<'a> {
10291030
default_image: &'a str,
10301031
image_pull_policy: &'a str,
1032+
image_pull_secrets: &'a [String],
10311033
supervisor_image: &'a str,
10321034
supervisor_image_pull_policy: &'a str,
10331035
supervisor_sideload_method: SupervisorSideloadMethod,
@@ -1050,6 +1052,7 @@ impl Default for SandboxPodParams<'_> {
10501052
Self {
10511053
default_image: "",
10521054
image_pull_policy: "",
1055+
image_pull_secrets: &[],
10531056
supervisor_image: "",
10541057
supervisor_image_pull_policy: "",
10551058
supervisor_sideload_method: SupervisorSideloadMethod::default(),
@@ -1216,6 +1219,14 @@ fn sandbox_template_to_k8s(
12161219
);
12171220
}
12181221

1222+
let image_pull_secrets = image_pull_secret_refs(params.image_pull_secrets);
1223+
if !image_pull_secrets.is_empty() {
1224+
spec.insert(
1225+
"imagePullSecrets".to_string(),
1226+
serde_json::Value::Array(image_pull_secrets),
1227+
);
1228+
}
1229+
12191230
// Disable service account token auto-mounting for security hardening.
12201231
// Sandbox pods should not have access to the Kubernetes API by default.
12211232
spec.insert(
@@ -1365,6 +1376,15 @@ fn sandbox_template_to_k8s(
13651376
result
13661377
}
13671378

1379+
fn image_pull_secret_refs(secrets: &[String]) -> Vec<serde_json::Value> {
1380+
secrets
1381+
.iter()
1382+
.map(|secret| secret.trim())
1383+
.filter(|secret| !secret.is_empty())
1384+
.map(|secret| serde_json::json!({ "name": secret }))
1385+
.collect()
1386+
}
1387+
13681388
fn container_resources(template: &SandboxTemplate, gpu: bool) -> Option<serde_json::Value> {
13691389
// Start from the raw resources passthrough in platform_config (preserves
13701390
// custom resource types like GPU limits that users set via the public API
@@ -2519,6 +2539,80 @@ mod tests {
25192539
);
25202540
}
25212541

2542+
#[test]
2543+
fn sandbox_template_omits_empty_image_pull_secrets() {
2544+
let pod_template = sandbox_template_to_k8s(
2545+
&SandboxTemplate::default(),
2546+
false,
2547+
&std::collections::HashMap::new(),
2548+
true,
2549+
&SandboxPodParams::default(),
2550+
);
2551+
2552+
assert!(
2553+
pod_template["spec"]["imagePullSecrets"].is_null(),
2554+
"imagePullSecrets must be omitted when no secrets are configured"
2555+
);
2556+
}
2557+
2558+
#[test]
2559+
fn sandbox_template_renders_configured_image_pull_secrets() {
2560+
let secrets = vec![
2561+
"regcred".to_string(),
2562+
" backup-regcred ".to_string(),
2563+
String::new(),
2564+
];
2565+
let params = SandboxPodParams {
2566+
image_pull_secrets: &secrets,
2567+
..Default::default()
2568+
};
2569+
let pod_template = sandbox_template_to_k8s(
2570+
&SandboxTemplate::default(),
2571+
false,
2572+
&std::collections::HashMap::new(),
2573+
true,
2574+
&params,
2575+
);
2576+
2577+
assert_eq!(
2578+
pod_template["spec"]["imagePullSecrets"],
2579+
serde_json::json!([
2580+
{ "name": "regcred" },
2581+
{ "name": "backup-regcred" }
2582+
])
2583+
);
2584+
}
2585+
2586+
#[test]
2587+
fn sandbox_template_renders_image_pull_secrets_for_template_image() {
2588+
let secrets = vec!["regcred".to_string()];
2589+
let params = SandboxPodParams {
2590+
default_image: "default-image:latest",
2591+
image_pull_secrets: &secrets,
2592+
..Default::default()
2593+
};
2594+
let template = SandboxTemplate {
2595+
image: "private.example.com/team/sandbox:v1".to_string(),
2596+
..Default::default()
2597+
};
2598+
let pod_template = sandbox_template_to_k8s(
2599+
&template,
2600+
false,
2601+
&std::collections::HashMap::new(),
2602+
true,
2603+
&params,
2604+
);
2605+
2606+
assert_eq!(
2607+
pod_template["spec"]["containers"][0]["image"],
2608+
serde_json::json!("private.example.com/team/sandbox:v1")
2609+
);
2610+
assert_eq!(
2611+
pod_template["spec"]["imagePullSecrets"],
2612+
serde_json::json!([{ "name": "regcred" }])
2613+
);
2614+
}
2615+
25222616
#[test]
25232617
fn platform_config_bool_extracts_value() {
25242618
let template = SandboxTemplate {

crates/openshell-driver-kubernetes/src/main.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ struct Args {
4444
#[arg(long, env = "OPENSHELL_SANDBOX_IMAGE_PULL_POLICY")]
4545
sandbox_image_pull_policy: Option<String>,
4646

47+
#[arg(
48+
long,
49+
env = "OPENSHELL_SANDBOX_IMAGE_PULL_SECRETS",
50+
value_delimiter = ','
51+
)]
52+
sandbox_image_pull_secrets: Vec<String>,
53+
4754
#[arg(long, env = "OPENSHELL_GRPC_ENDPOINT")]
4855
grpc_endpoint: Option<String>,
4956

@@ -98,6 +105,7 @@ async fn main() -> Result<()> {
98105
service_account_name: args.sandbox_service_account,
99106
default_image: args.sandbox_image.unwrap_or_default(),
100107
image_pull_policy: args.sandbox_image_pull_policy.unwrap_or_default(),
108+
image_pull_secrets: args.sandbox_image_pull_secrets,
101109
supervisor_image: args
102110
.supervisor_image
103111
.unwrap_or_else(|| openshell_core::config::DEFAULT_SUPERVISOR_IMAGE.to_string()),

deploy/helm/openshell/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ cert-manager alternative.
207207
| server.oidc.userRole | string | `""` | Role name for standard user access. |
208208
| server.sandboxImage | string | `"ghcr.io/nvidia/openshell-community/sandboxes/base:latest"` | Default sandbox image used when requests do not specify one. |
209209
| server.sandboxImagePullPolicy | string | `""` | Kubernetes imagePullPolicy for sandbox pods. Empty = Kubernetes default (Always for :latest, IfNotPresent otherwise). Set to "Always" for dev clusters so new images are picked up without manual eviction. |
210+
| server.sandboxImagePullSecrets | list | `[]` | Image pull secrets attached to sandbox pods. Referenced Secrets must exist in the sandbox namespace. |
210211
| server.sandboxJwt.gatewayId | string | `""` | Stable gateway identity embedded in iss/aud of every minted token. Defaults to the release name so HA replicas share identity. |
211212
| server.sandboxJwt.k8sSaTokenTtlSecs | int | `3600` | Lifetime (seconds) of the projected ServiceAccount token kubelet writes into each sandbox pod for the IssueSandboxToken bootstrap exchange. Kubelet enforces a minimum of 600s; the driver clamps values outside [600, 86400]. Default 3600 — generous, since the supervisor consumes the token within seconds of pod start. |
212213
| server.sandboxJwt.secretDefaultMode | string | `""` | File mode for the mounted JWT signing key Secret. Default 0400 (owner-read only). Override to 0440 or 0444 if the container UID does not match the volume file owner. |

deploy/helm/openshell/skaffold.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ build:
2626
context: ../../..
2727
custom:
2828
buildCommand: |
29+
CONTAINER_ENGINE=docker \
2930
IMAGE_NAME="${IMAGE%:*}" \
3031
IMAGE_TAG="${IMAGE##*:}" \
3132
tasks/scripts/docker-build-image.sh gateway
@@ -42,6 +43,7 @@ build:
4243
context: ../../..
4344
custom:
4445
buildCommand: |
46+
CONTAINER_ENGINE=docker \
4547
IMAGE_NAME="${IMAGE%:*}" \
4648
IMAGE_TAG="${IMAGE##*:}" \
4749
tasks/scripts/docker-build-image.sh supervisor

deploy/helm/openshell/templates/gateway-config.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,15 @@ data:
106106
{{- if .Values.server.sandboxImagePullPolicy }}
107107
image_pull_policy = {{ .Values.server.sandboxImagePullPolicy | quote }}
108108
{{- end }}
109+
{{- $sandboxImagePullSecretNames := list -}}
110+
{{- range .Values.server.sandboxImagePullSecrets }}
111+
{{- if .name }}
112+
{{- $sandboxImagePullSecretNames = append $sandboxImagePullSecretNames .name }}
113+
{{- end }}
114+
{{- end }}
115+
{{- if $sandboxImagePullSecretNames }}
116+
image_pull_secrets = [{{- range $i, $name := $sandboxImagePullSecretNames }}{{ if $i }}, {{ end }}{{ $name | quote }}{{- end }}]
117+
{{- end }}
109118
{{- if .Values.server.workspaceDefaultStorageSize }}
110119
workspace_default_storage_size = {{ .Values.server.workspaceDefaultStorageSize | quote }}
111120
{{- end }}

deploy/helm/openshell/tests/gateway_config_test.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,27 @@ tests:
5959
path: data["gateway.toml"]
6060
pattern: '(?ms)\[openshell\.drivers\.kubernetes\].*?service_account_name\s*=\s*"openshell-sandbox"'
6161

62+
- it: renders sandbox image pull secrets under [openshell.drivers.kubernetes]
63+
template: templates/gateway-config.yaml
64+
set:
65+
server.sandboxImagePullSecrets:
66+
- name: regcred
67+
- name: backup-regcred
68+
asserts:
69+
- matchRegex:
70+
path: data["gateway.toml"]
71+
pattern: '(?ms)\[openshell\.drivers\.kubernetes\].*?image_pull_secrets\s*=\s*\["regcred", "backup-regcred"\]'
72+
73+
- it: does not reuse gateway image pull secrets for sandbox pods
74+
template: templates/gateway-config.yaml
75+
set:
76+
imagePullSecrets:
77+
- name: gateway-regcred
78+
asserts:
79+
- notMatchRegex:
80+
path: data["gateway.toml"]
81+
pattern: 'image_pull_secrets\s*='
82+
6283
- it: does not render local mTLS user auth for Kubernetes deployments
6384
template: templates/gateway-config.yaml
6485
asserts:

deploy/helm/openshell/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,9 @@ server:
151151
# (Always for :latest, IfNotPresent otherwise). Set to "Always" for dev
152152
# clusters so new images are picked up without manual eviction.
153153
sandboxImagePullPolicy: ""
154+
# -- Image pull secrets attached to sandbox pods. Referenced Secrets must exist
155+
# in the sandbox namespace.
156+
sandboxImagePullSecrets: []
154157
# -- Default storage size for the workspace PVC in sandbox pods.
155158
# Uses Kubernetes quantity syntax (e.g. "2Gi", "10Gi", "500Mi").
156159
# Empty = built-in default (2Gi).

docs/kubernetes/setup.mdx

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ The most commonly changed values are:
135135
| `image.repository` / `image.tag` | Gateway container image. Defaults to `ghcr.io/nvidia/openshell/gateway:latest`. |
136136
| `server.sandboxNamespace` | Namespace where sandbox pods are created. Defaults to the Helm release namespace when left empty. |
137137
| `server.sandboxImage` | Default sandbox image used when a sandbox does not specify one. |
138+
| `server.sandboxImagePullSecrets` | Image pull secrets attached to sandbox pods. Referenced Secrets must exist in the sandbox namespace. |
138139
| `server.grpcEndpoint` | Endpoint that sandbox supervisors use to call back to the gateway. Must be reachable from inside the cluster. |
139140
| `server.disableTls` | Run the gateway over plaintext HTTP. Use only behind a trusted transport. |
140141
| `server.auth.allowUnauthenticatedUsers` | Accept user-facing calls without OIDC or mTLS credentials. Use only for trusted local development or a fully trusted access proxy. |
@@ -152,6 +153,23 @@ helm upgrade --install openshell \
152153
--values my-values.yaml
153154
```
154155

156+
To use private sandbox images, create a `kubernetes.io/dockerconfigjson` Secret
157+
in the sandbox namespace and reference its name:
158+
159+
```shell
160+
kubectl -n openshell create secret docker-registry regcred \
161+
--docker-server=registry.example.com \
162+
--docker-username="$REGISTRY_USER" \
163+
--docker-password="$REGISTRY_TOKEN"
164+
```
165+
166+
```yaml
167+
server:
168+
sandboxImage: registry.example.com/team/openshell-sandbox:latest
169+
sandboxImagePullSecrets:
170+
- name: regcred
171+
```
172+
155173
## RBAC
156174
157175
The chart creates the following RBAC resources in the release namespace:

docs/reference/gateway-config.mdx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ namespace = "agents"
163163
service_account_name = "openshell-sandbox"
164164
default_image = "ghcr.io/nvidia/openshell/sandbox:latest"
165165
image_pull_policy = "IfNotPresent"
166+
image_pull_secrets = ["regcred"]
166167
supervisor_image = "ghcr.io/nvidia/openshell/supervisor:latest"
167168
supervisor_image_pull_policy = "IfNotPresent"
168169
# Use the image volume on Kubernetes >= 1.35 (GA in 1.36); switch to "init-container"

0 commit comments

Comments
 (0)