diff --git a/Dockerfile b/Dockerfile index ffd457aab..fd39ea468 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,24 +2,27 @@ # Stage 1: Build TypeScript plugin from source FROM node:22-slim@sha256:4f77a690f2f8946ab16fe1e791a3ac0667ae1c3575c3e4d0d4589e9ed5bfaf3d AS builder -COPY nemoclaw/package.json nemoclaw/tsconfig.json /opt/nemoclaw/ +ENV NPM_CONFIG_AUDIT=false \ + NPM_CONFIG_FUND=false \ + NPM_CONFIG_UPDATE_NOTIFIER=false +COPY nemoclaw/package.json nemoclaw/package-lock.json nemoclaw/tsconfig.json /opt/nemoclaw/ COPY nemoclaw/src/ /opt/nemoclaw/src/ WORKDIR /opt/nemoclaw -RUN npm install && npm run build +RUN npm ci && npm run build # Stage 2: Runtime image FROM node:22-slim@sha256:4f77a690f2f8946ab16fe1e791a3ac0667ae1c3575c3e4d0d4589e9ed5bfaf3d ENV DEBIAN_FRONTEND=noninteractive +ENV NPM_CONFIG_AUDIT=false \ + NPM_CONFIG_FUND=false \ + NPM_CONFIG_UPDATE_NOTIFIER=false RUN apt-get update && apt-get install -y --no-install-recommends \ python3=3.11.2-1+b1 \ - python3-pip=23.0.1+dfsg-1 \ - python3-venv=3.11.2-1+b1 \ curl=7.88.1-10+deb12u14 \ git=1:2.39.5-0+deb12u3 \ ca-certificates=20230311+deb12u1 \ - iproute2=6.1.0-3 \ && rm -rf /var/lib/apt/lists/* # gosu for privilege separation (gateway vs sandbox user). @@ -67,19 +70,21 @@ RUN mkdir -p /sandbox/.openclaw-data/agents/main/agent \ && ln -s /sandbox/.openclaw-data/update-check.json /sandbox/.openclaw/update-check.json \ && chown -R sandbox:sandbox /sandbox/.openclaw /sandbox/.openclaw-data -# Install OpenClaw CLI + PyYAML for inline Python scripts in e2e tests +# Install OpenClaw CLI RUN npm install -g openclaw@2026.3.11 \ - && pip3 install --no-cache-dir --break-system-packages "pyyaml==6.0.3" + && rm -rf /usr/local/lib/node_modules/openclaw/docs \ + && find /usr/local/lib/node_modules/openclaw -type f \ + \( -name "*.map" -o -name "README*" -o -name "CHANGELOG*" \) -delete # Copy built plugin and blueprint into the sandbox COPY --from=builder /opt/nemoclaw/dist/ /opt/nemoclaw/dist/ COPY nemoclaw/openclaw.plugin.json /opt/nemoclaw/ -COPY nemoclaw/package.json /opt/nemoclaw/ +COPY nemoclaw/package.json nemoclaw/package-lock.json /opt/nemoclaw/ COPY nemoclaw-blueprint/ /opt/nemoclaw-blueprint/ # Install runtime dependencies only (no devDependencies, no build step) WORKDIR /opt/nemoclaw -RUN npm install --omit=dev +RUN npm ci --omit=dev # Set up blueprint for local resolution RUN mkdir -p /sandbox/.nemoclaw/blueprints/0.1.0 \ @@ -179,6 +184,7 @@ RUN openclaw doctor --fix > /dev/null 2>&1 || true \ # hadolint ignore=DL3002 USER root RUN chown root:root /sandbox/.openclaw \ + && rm -rf /root/.npm /sandbox/.npm \ && find /sandbox/.openclaw -mindepth 1 -maxdepth 1 -exec chown -h root:root {} + \ && chmod 755 /sandbox/.openclaw \ && chmod 444 /sandbox/.openclaw/openclaw.json diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 48a4cb241..be8aeed1d 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -10,6 +10,7 @@ const os = require("os"); const path = require("path"); const { spawn, spawnSync } = require("child_process"); const { ROOT, SCRIPTS, run, runCapture, shellQuote } = require("./runner"); +const { stageOptimizedSandboxBuildContext } = require("./sandbox-build-context"); const { getDefaultOllamaModel, getBootstrapOllamaModelOptions, @@ -1390,14 +1391,8 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null) registry.removeSandbox(sandboxName); } - // Stage build context - const buildCtx = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-")); - const stagedDockerfile = path.join(buildCtx, "Dockerfile"); - fs.copyFileSync(path.join(ROOT, "Dockerfile"), stagedDockerfile); - run(`cp -r "${path.join(ROOT, "nemoclaw")}" "${buildCtx}/nemoclaw"`); - run(`cp -r "${path.join(ROOT, "nemoclaw-blueprint")}" "${buildCtx}/nemoclaw-blueprint"`); - run(`cp -r "${path.join(ROOT, "scripts")}" "${buildCtx}/scripts"`); - run(`rm -rf "${buildCtx}/nemoclaw/node_modules"`, { ignoreError: true }); + // Stage only the files the Docker build actually consumes so uploads stay small. + const { buildCtx, stagedDockerfile } = stageOptimizedSandboxBuildContext(ROOT); // Create sandbox (use -- echo to avoid dropping into interactive shell) // Pass the base policy so sandbox starts in proxy mode (required for policy updates later) diff --git a/bin/lib/sandbox-build-context.js b/bin/lib/sandbox-build-context.js new file mode 100644 index 000000000..7b24a8541 --- /dev/null +++ b/bin/lib/sandbox-build-context.js @@ -0,0 +1,78 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const fs = require("fs"); +const os = require("os"); +const path = require("path"); + +function createBuildContextDir(tmpDir = os.tmpdir()) { + return fs.mkdtempSync(path.join(tmpDir, "nemoclaw-build-")); +} + +function stageLegacySandboxBuildContext(rootDir, tmpDir = os.tmpdir()) { + const buildCtx = createBuildContextDir(tmpDir); + fs.copyFileSync(path.join(rootDir, "Dockerfile"), path.join(buildCtx, "Dockerfile")); + fs.cpSync(path.join(rootDir, "nemoclaw"), path.join(buildCtx, "nemoclaw"), { recursive: true }); + fs.cpSync(path.join(rootDir, "nemoclaw-blueprint"), path.join(buildCtx, "nemoclaw-blueprint"), { recursive: true }); + fs.cpSync(path.join(rootDir, "scripts"), path.join(buildCtx, "scripts"), { recursive: true }); + fs.rmSync(path.join(buildCtx, "nemoclaw", "node_modules"), { recursive: true, force: true }); + return { + buildCtx, + stagedDockerfile: path.join(buildCtx, "Dockerfile"), + }; +} + +function stageOptimizedSandboxBuildContext(rootDir, tmpDir = os.tmpdir()) { + const buildCtx = createBuildContextDir(tmpDir); + const stagedDockerfile = path.join(buildCtx, "Dockerfile"); + const sourceNemoclawDir = path.join(rootDir, "nemoclaw"); + const stagedNemoclawDir = path.join(buildCtx, "nemoclaw"); + const sourceBlueprintDir = path.join(rootDir, "nemoclaw-blueprint"); + const stagedBlueprintDir = path.join(buildCtx, "nemoclaw-blueprint"); + const stagedScriptsDir = path.join(buildCtx, "scripts"); + + fs.copyFileSync(path.join(rootDir, "Dockerfile"), stagedDockerfile); + + fs.mkdirSync(stagedNemoclawDir, { recursive: true }); + for (const file of ["package.json", "package-lock.json", "tsconfig.json", "openclaw.plugin.json"]) { + fs.copyFileSync(path.join(sourceNemoclawDir, file), path.join(stagedNemoclawDir, file)); + } + fs.cpSync(path.join(sourceNemoclawDir, "src"), path.join(stagedNemoclawDir, "src"), { recursive: true }); + + fs.mkdirSync(stagedBlueprintDir, { recursive: true }); + fs.copyFileSync(path.join(sourceBlueprintDir, "blueprint.yaml"), path.join(stagedBlueprintDir, "blueprint.yaml")); + fs.cpSync(path.join(sourceBlueprintDir, "policies"), path.join(stagedBlueprintDir, "policies"), { recursive: true }); + + fs.mkdirSync(stagedScriptsDir, { recursive: true }); + fs.copyFileSync(path.join(rootDir, "scripts", "nemoclaw-start.sh"), path.join(stagedScriptsDir, "nemoclaw-start.sh")); + + return { buildCtx, stagedDockerfile }; +} + +function collectBuildContextStats(dir) { + let fileCount = 0; + let totalBytes = 0; + + function walk(currentDir) { + for (const entry of fs.readdirSync(currentDir, { withFileTypes: true })) { + const entryPath = path.join(currentDir, entry.name); + if (entry.isDirectory()) { + walk(entryPath); + continue; + } + if (entry.isFile()) { + fileCount += 1; + totalBytes += fs.statSync(entryPath).size; + } + } + } + + walk(dir); + return { fileCount, totalBytes }; +} + +module.exports = { + collectBuildContextStats, + stageLegacySandboxBuildContext, + stageOptimizedSandboxBuildContext, +}; diff --git a/scripts/benchmark-sandbox-image-build.js b/scripts/benchmark-sandbox-image-build.js new file mode 100755 index 000000000..23036266b --- /dev/null +++ b/scripts/benchmark-sandbox-image-build.js @@ -0,0 +1,142 @@ +#!/usr/bin/env node +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const fs = require("fs"); +const os = require("os"); +const path = require("path"); +const { execFileSync, spawnSync } = require("child_process"); +const { + collectBuildContextStats, + stageLegacySandboxBuildContext, + stageOptimizedSandboxBuildContext, +} = require("../bin/lib/sandbox-build-context"); + +function parseArgs(argv) { + const args = { + currentRepo: process.cwd(), + mainRef: "origin/main", + noCache: true, + keepWorktree: false, + }; + + for (let i = 0; i < argv.length; i += 1) { + const arg = argv[i]; + if (arg === "--current-repo") args.currentRepo = argv[++i]; + else if (arg === "--main-ref") args.mainRef = argv[++i]; + else if (arg === "--cache") args.noCache = false; + else if (arg === "--keep-worktree") args.keepWorktree = true; + else throw new Error(`Unknown argument: ${arg}`); + } + + return args; +} + +function run(command, args, options = {}) { + const result = spawnSync(command, args, { + encoding: "utf8", + stdio: options.stdio || "pipe", + cwd: options.cwd, + }); + if (result.status !== 0) { + throw new Error(`${command} ${args.join(" ")} failed:\n${result.stderr || result.stdout}`); + } + return result.stdout.trim(); +} + +function makeTempWorktree(mainRef, currentRepo) { + const worktreeRoot = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-main-worktree-")); + run("git", ["worktree", "add", "--detach", worktreeRoot, mainRef], { cwd: currentRepo }); + return worktreeRoot; +} + +function removeWorktree(worktreeRoot, currentRepo) { + try { + run("git", ["worktree", "remove", "--force", worktreeRoot], { cwd: currentRepo }); + } catch { + // Best-effort cleanup; remove the temp directory either way. + } + fs.rmSync(worktreeRoot, { recursive: true, force: true }); +} + +function dockerBuild(repoRoot, stageFn, label, noCache) { + const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), `nemoclaw-bench-${label}-`)); + const { buildCtx } = stageFn(repoRoot, tmpRoot); + const stats = collectBuildContextStats(buildCtx); + const imageTag = `nemoclaw-bench-${label.toLowerCase().replace(/[^a-z0-9]+/g, "-")}-${Date.now()}`; + const args = ["build", "-t", imageTag]; + if (noCache) args.push("--no-cache"); + args.push(buildCtx); + + const startedAt = process.hrtime.bigint(); + try { + run("docker", args); + const elapsedSeconds = Number(process.hrtime.bigint() - startedAt) / 1e9; + const imageBytes = Number(run("docker", ["image", "inspect", imageTag, "--format", "{{.Size}}"])); + return { + label, + buildCtx, + fileCount: stats.fileCount, + totalBytes: stats.totalBytes, + elapsedSeconds, + imageBytes, + imageTag, + }; + } finally { + spawnSync("docker", ["image", "rm", "-f", imageTag], { stdio: "ignore" }); + fs.rmSync(tmpRoot, { recursive: true, force: true }); + } +} + +function fmtMiB(bytes) { + return `${(bytes / (1024 * 1024)).toFixed(1)} MiB`; +} + +function fmtSeconds(seconds) { + return `${seconds.toFixed(1)}s`; +} + +function printSummary(results) { + console.log(""); + console.log("Sandbox image build benchmark"); + console.log(""); + for (const result of results) { + console.log(`${result.label}`); + console.log(` context files: ${result.fileCount}`); + console.log(` context size: ${fmtMiB(result.totalBytes)}`); + console.log(` build time: ${fmtSeconds(result.elapsedSeconds)}`); + console.log(` image size: ${fmtMiB(result.imageBytes)}`); + } + + if (results.length === 2) { + const [base, candidate] = results; + const timeDelta = base.elapsedSeconds - candidate.elapsedSeconds; + const sizeDelta = base.totalBytes - candidate.totalBytes; + console.log(""); + console.log("Delta"); + console.log(` context saved: ${fmtMiB(sizeDelta)}`); + console.log(` time saved: ${fmtSeconds(timeDelta)}`); + } +} + +function main() { + const args = parseArgs(process.argv.slice(2)); + const currentRepo = path.resolve(args.currentRepo); + const currentHead = execFileSync("git", ["rev-parse", "--short", "HEAD"], { cwd: currentRepo, encoding: "utf8" }).trim(); + const currentDirty = execFileSync("git", ["status", "--short"], { cwd: currentRepo, encoding: "utf8" }).trim().length > 0; + const currentLabel = currentDirty ? `${currentHead} + dirty` : currentHead; + const mainWorktree = makeTempWorktree(args.mainRef, currentRepo); + + try { + const mainLabel = execFileSync("git", ["rev-parse", "--short", "HEAD"], { cwd: mainWorktree, encoding: "utf8" }).trim(); + const results = [ + dockerBuild(mainWorktree, stageLegacySandboxBuildContext, `main (${mainLabel})`, args.noCache), + dockerBuild(currentRepo, stageOptimizedSandboxBuildContext, `candidate (${currentLabel})`, args.noCache), + ]; + printSummary(results); + } finally { + if (!args.keepWorktree) removeWorktree(mainWorktree, currentRepo); + } +} + +main(); diff --git a/scripts/setup.sh b/scripts/setup.sh index 978c9d8ae..0b752c31b 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -186,10 +186,17 @@ info "Building and creating NemoClaw sandbox (this takes a few minutes on first # Stage a clean build context (openshell doesn't honor .dockerignore) BUILD_CTX="$(mktemp -d)" cp "$REPO_DIR/Dockerfile" "$BUILD_CTX/" -cp -r "$REPO_DIR/nemoclaw" "$BUILD_CTX/nemoclaw" -cp -r "$REPO_DIR/nemoclaw-blueprint" "$BUILD_CTX/nemoclaw-blueprint" -cp -r "$REPO_DIR/scripts" "$BUILD_CTX/scripts" -rm -rf "$BUILD_CTX/nemoclaw/node_modules" +mkdir -p "$BUILD_CTX/nemoclaw" +cp "$REPO_DIR/nemoclaw/package.json" "$BUILD_CTX/nemoclaw/" +cp "$REPO_DIR/nemoclaw/package-lock.json" "$BUILD_CTX/nemoclaw/" +cp "$REPO_DIR/nemoclaw/tsconfig.json" "$BUILD_CTX/nemoclaw/" +cp "$REPO_DIR/nemoclaw/openclaw.plugin.json" "$BUILD_CTX/nemoclaw/" +cp -r "$REPO_DIR/nemoclaw/src" "$BUILD_CTX/nemoclaw/src" +mkdir -p "$BUILD_CTX/nemoclaw-blueprint" +cp "$REPO_DIR/nemoclaw-blueprint/blueprint.yaml" "$BUILD_CTX/nemoclaw-blueprint/" +cp -r "$REPO_DIR/nemoclaw-blueprint/policies" "$BUILD_CTX/nemoclaw-blueprint/policies" +mkdir -p "$BUILD_CTX/scripts" +cp "$REPO_DIR/scripts/nemoclaw-start.sh" "$BUILD_CTX/scripts/" # Capture full output to a temp file so we can filter for display but still # detect failures. The raw log is kept on failure for debugging. diff --git a/test/e2e-test.sh b/test/e2e-test.sh index 81981e7d6..35fc75214 100755 --- a/test/e2e-test.sh +++ b/test/e2e-test.sh @@ -45,16 +45,19 @@ fi # ------------------------------------------------------- info "3. Verify blueprint YAML is valid" # ------------------------------------------------------- -if python3 -c " -import yaml, sys -bp = yaml.safe_load(open('/opt/nemoclaw-blueprint/blueprint.yaml')) -assert bp['version'] == '0.1.0', f'Bad version: {bp[\"version\"]}' -profiles = bp['components']['inference']['profiles'] -assert 'default' in profiles, 'Missing default profile' -assert 'ncp' in profiles, 'Missing ncp profile' -assert 'vllm' in profiles, 'Missing vllm profile' -assert 'nim-local' in profiles, 'Missing nim-local profile' -print(f'Profiles: {list(profiles.keys())}') +if node --input-type=module -e " + import { createRequire } from 'node:module'; + import { readFileSync } from 'node:fs'; + const require = createRequire('/opt/nemoclaw/'); + const YAML = require('yaml'); + + const bp = YAML.parse(readFileSync('/opt/nemoclaw-blueprint/blueprint.yaml', 'utf-8')); + if (bp.version !== '0.1.0') throw new Error('Bad version: ' + bp.version); + const profiles = bp.components?.inference?.profiles ?? {}; + for (const profile of ['default', 'ncp', 'vllm', 'nim-local']) { + if (!(profile in profiles)) throw new Error('Missing ' + profile + ' profile'); + } + console.log('Profiles: ' + Object.keys(profiles).join(', ')); "; then pass "Blueprint YAML valid with all 4 profiles" else diff --git a/test/onboard.test.js b/test/onboard.test.js index f1240a9ed..3eaada1b0 100644 --- a/test/onboard.test.js +++ b/test/onboard.test.js @@ -11,12 +11,13 @@ import { describe, expect, it } from "vitest"; import { buildSandboxConfigSyncScript, getFutureShellPathHint, - getInstalledOpenshellVersion, getSandboxInferenceConfig, + getInstalledOpenshellVersion, getStableGatewayImageRef, patchStagedDockerfile, writeSandboxConfigSyncFile, } from "../bin/lib/onboard"; +import { stageOptimizedSandboxBuildContext } from "../bin/lib/sandbox-build-context"; describe("onboard helpers", () => { it("builds a sandbox sync script that only writes nemoclaw config", () => { @@ -175,6 +176,25 @@ describe("onboard helpers", () => { } }); + it("stages only the files required to build the sandbox image", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-context-")); + + try { + const { buildCtx, stagedDockerfile } = stageOptimizedSandboxBuildContext(repoRoot, tmpDir); + + expect(stagedDockerfile).toBe(path.join(buildCtx, "Dockerfile")); + expect(fs.existsSync(path.join(buildCtx, "nemoclaw", "package-lock.json"))).toBe(true); + expect(fs.existsSync(path.join(buildCtx, "nemoclaw", "src"))).toBe(true); + expect(fs.existsSync(path.join(buildCtx, "nemoclaw-blueprint", ".venv"))).toBe(false); + expect(fs.existsSync(path.join(buildCtx, "scripts", "nemoclaw-start.sh"))).toBe(true); + expect(fs.existsSync(path.join(buildCtx, "scripts", "setup.sh"))).toBe(false); + expect(fs.existsSync(path.join(buildCtx, "nemoclaw", "node_modules"))).toBe(false); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + it("passes credential names to openshell without embedding secret values in argv", () => { const repoRoot = path.join(import.meta.dirname, ".."); const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-inference-")); diff --git a/test/sandbox-build-context.test.js b/test/sandbox-build-context.test.js new file mode 100644 index 000000000..2f6971713 --- /dev/null +++ b/test/sandbox-build-context.test.js @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { describe, expect, it } from "vitest"; + +import { + collectBuildContextStats, + stageLegacySandboxBuildContext, + stageOptimizedSandboxBuildContext, +} from "../bin/lib/sandbox-build-context"; + +describe("sandbox build context staging", () => { + it("optimized staging excludes blueprint .venv and extra scripts while preserving required files", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-context-opt-")); + + try { + const { buildCtx } = stageOptimizedSandboxBuildContext(repoRoot, tmpDir); + expect(fs.existsSync(path.join(buildCtx, "nemoclaw-blueprint", ".venv"))).toBe(false); + expect(fs.existsSync(path.join(buildCtx, "nemoclaw-blueprint", "blueprint.yaml"))).toBe(true); + expect(fs.existsSync(path.join(buildCtx, "nemoclaw-blueprint", "policies", "openclaw-sandbox.yaml"))).toBe(true); + expect(fs.existsSync(path.join(buildCtx, "scripts", "nemoclaw-start.sh"))).toBe(true); + expect(fs.existsSync(path.join(buildCtx, "scripts", "setup.sh"))).toBe(false); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it("optimized staging is smaller than the legacy build context", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-context-compare-")); + + try { + const legacy = stageLegacySandboxBuildContext(repoRoot, tmpDir); + const optimized = stageOptimizedSandboxBuildContext(repoRoot, tmpDir); + const legacyStats = collectBuildContextStats(legacy.buildCtx); + const optimizedStats = collectBuildContextStats(optimized.buildCtx); + + expect(optimizedStats.fileCount).toBeLessThan(legacyStats.fileCount); + expect(optimizedStats.totalBytes).toBeLessThan(legacyStats.totalBytes); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); +});