Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 53 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ ARG CHAT_UI_URL=http://127.0.0.1:18789
ARG NEMOCLAW_INFERENCE_BASE_URL=https://inference.local/v1
ARG NEMOCLAW_INFERENCE_API=openai-completions
ARG NEMOCLAW_INFERENCE_COMPAT_B64=e30=
# EXPERIMENTAL: small model mode reduces the system prompt for local inference.
# When set to 1, writes compact workspace files and lowers bootstrap token budgets
# so small local models (e.g. qwen2.5:0.5b via Ollama) have more capacity for
# actual conversation instead of digesting a large system prompt.
ARG NEMOCLAW_SMALL_MODEL_MODE=0
# Unique per build to ensure each image gets a fresh auth token.
# Pass --build-arg NEMOCLAW_BUILD_ID=$(date +%s) to bust the cache.
ARG NEMOCLAW_BUILD_ID=default
Expand All @@ -67,7 +72,8 @@ ENV NEMOCLAW_MODEL=${NEMOCLAW_MODEL} \
CHAT_UI_URL=${CHAT_UI_URL} \
NEMOCLAW_INFERENCE_BASE_URL=${NEMOCLAW_INFERENCE_BASE_URL} \
NEMOCLAW_INFERENCE_API=${NEMOCLAW_INFERENCE_API} \
NEMOCLAW_INFERENCE_COMPAT_B64=${NEMOCLAW_INFERENCE_COMPAT_B64}
NEMOCLAW_INFERENCE_COMPAT_B64=${NEMOCLAW_INFERENCE_COMPAT_B64} \
NEMOCLAW_SMALL_MODEL_MODE=${NEMOCLAW_SMALL_MODEL_MODE}

WORKDIR /sandbox
USER sandbox
Expand Down Expand Up @@ -99,8 +105,13 @@ providers = { \
'models': [{**({'compat': inference_compat} if inference_compat else {}), 'id': model, 'name': primary_model_ref, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
} \
}; \
small_model = os.environ.get('NEMOCLAW_SMALL_MODEL_MODE', '0') == '1'; \
agent_defaults = {'model': {'primary': primary_model_ref}}; \
if small_model: \
agent_defaults['bootstrapMaxChars'] = 4000; \
agent_defaults['bootstrapTotalMaxChars'] = 8000; \
config = { \
'agents': {'defaults': {'model': {'primary': primary_model_ref}}}, \
'agents': {'defaults': agent_defaults}, \
'models': {'mode': 'merge', 'providers': providers}, \
'channels': {'defaults': {'configWrites': False}}, \
'gateway': { \
Expand All @@ -122,6 +133,46 @@ os.chmod(path, 0o600)"
RUN openclaw doctor --fix > /dev/null 2>&1 || true \
&& openclaw plugins install /opt/nemoclaw > /dev/null 2>&1 || true

# EXPERIMENTAL: small model mode — write compact workspace files so local models
# spend fewer tokens on system prompt and more on actual conversation.
# Users can still override these files inside the sandbox after creation.
# hadolint ignore=SC2016
RUN if [ "$NEMOCLAW_SMALL_MODEL_MODE" = "1" ]; then \
echo '[experimental] Small model mode: writing compact workspace files'; \
printf '%s\n' \
'# SOUL' \
'' \
'You are a helpful AI assistant running locally. Be concise and direct.' \
'' \
'- Answer questions accurately' \
'- Admit when you don'\''t know something' \
'- Keep responses short unless asked to elaborate' \
'- Use tools when available and appropriate' \
'- Never fabricate information' \
> /sandbox/.openclaw-data/workspace/SOUL.md; \
printf '%s\n' \
'# Agents' \
'' \
'## Startup' \
'' \
'Read these files if they exist:' \
'- `SOUL.md` — your behavioral rules' \
'- `USER.md` — context about the person you are helping' \
'' \
'## Rules' \
'' \
'- Safe to do freely: read files, search, organize workspace' \
'- Ask before: sending messages, deleting files, any external action' \
'- Never exfiltrate private data' \
'- Use `trash` instead of `rm`' \
'' \
'## Memory' \
'' \
'Write notes to `memory/YYYY-MM-DD.md` to remember things across sessions.' \
'Curate important facts into `MEMORY.md`.' \
> /sandbox/.openclaw-data/workspace/AGENTS.md; \
fi

# Lock openclaw.json via DAC: chown to root so the sandbox user cannot modify
# it at runtime. This works regardless of Landlock enforcement status.
# The Landlock policy (/sandbox/.openclaw in read_only) provides defense-in-depth
Expand Down
18 changes: 17 additions & 1 deletion bin/lib/onboard.js
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@
let inferenceBaseUrl = "https://inference.local/v1";
let inferenceApi = preferredInferenceApi || "openai-completions";
let inferenceCompat = null;
let smallModelMode = false;

switch (provider) {
case "openai-api":
Expand Down Expand Up @@ -485,6 +486,12 @@
supportsStore: false,
};
break;
case "ollama-local":
case "vllm-local":
providerKey = "inference";
primaryModelRef = `inference/${model}`;
smallModelMode = true;
break;
case "nvidia-prod":
case "nvidia-nim":
default:
Expand All @@ -493,7 +500,7 @@
break;
}

return { providerKey, primaryModelRef, inferenceBaseUrl, inferenceApi, inferenceCompat };
return { providerKey, primaryModelRef, inferenceBaseUrl, inferenceApi, inferenceCompat, smallModelMode };
}

function patchStagedDockerfile(dockerfilePath, model, chatUiUrl, buildId = String(Date.now()), provider = null, preferredInferenceApi = null) {
Expand All @@ -503,6 +510,7 @@
inferenceBaseUrl,
inferenceApi,
inferenceCompat,
smallModelMode,
} = getSandboxInferenceConfig(model, provider, preferredInferenceApi);
let dockerfile = fs.readFileSync(dockerfilePath, "utf8");
dockerfile = dockerfile.replace(
Expand Down Expand Up @@ -533,6 +541,10 @@
/^ARG NEMOCLAW_INFERENCE_COMPAT_B64=.*$/m,
`ARG NEMOCLAW_INFERENCE_COMPAT_B64=${encodeDockerJsonArg(inferenceCompat)}`
);
dockerfile = dockerfile.replace(
/^ARG NEMOCLAW_SMALL_MODEL_MODE=.*$/m,
`ARG NEMOCLAW_SMALL_MODEL_MODE=${smallModelMode ? "1" : "0"}`
);
dockerfile = dockerfile.replace(
/^ARG NEMOCLAW_BUILD_ID=.*$/m,
`ARG NEMOCLAW_BUILD_ID=${buildId}`
Expand Down Expand Up @@ -1426,7 +1438,7 @@

// ── Step 3: Sandbox ──────────────────────────────────────────────

async function createSandbox(gpu, model, provider, preferredInferenceApi = null) {

Check failure on line 1441 in bin/lib/onboard.js

View workflow job for this annotation

GitHub Actions / lint

Async function 'createSandbox' has a complexity of 21. Maximum allowed is 20
step(5, 7, "Creating sandbox");

const nameAnswer = await promptOrDefault(
Expand Down Expand Up @@ -1490,6 +1502,10 @@
console.log(` Creating sandbox '${sandboxName}' (this takes a few minutes on first run)...`);
const chatUiUrl = process.env.CHAT_UI_URL || "http://127.0.0.1:18789";
patchStagedDockerfile(stagedDockerfile, model, chatUiUrl, String(Date.now()), provider, preferredInferenceApi);
const { smallModelMode } = getSandboxInferenceConfig(model, provider, preferredInferenceApi);
if (smallModelMode) {
console.log(" [experimental] Small model mode: reduced system prompt for local inference");
}
// Only pass non-sensitive env vars to the sandbox. NVIDIA_API_KEY is NOT
// needed inside the sandbox — inference is proxied through the OpenShell
// gateway which injects the stored credential server-side. The gateway
Expand Down
1 change: 1 addition & 0 deletions test/local-inference.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,5 @@ describe("local inference helpers", () => {
);
expect(result).toEqual({ ok: true });
});

});
69 changes: 69 additions & 0 deletions test/onboard.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ describe("onboard helpers", () => {
"ARG NEMOCLAW_PRIMARY_MODEL_REF=nvidia/nemotron-3-super-120b-a12b",
"ARG CHAT_UI_URL=http://127.0.0.1:18789",
"ARG NEMOCLAW_INFERENCE_COMPAT_B64=e30=",
"ARG NEMOCLAW_SMALL_MODEL_MODE=0",
"ARG NEMOCLAW_BUILD_ID=default",
].join("\n")
);
Expand All @@ -76,6 +77,7 @@ describe("onboard helpers", () => {
inferenceBaseUrl: "https://inference.local/v1",
inferenceApi: "openai-completions",
inferenceCompat: null,
smallModelMode: false,
}
);
});
Expand All @@ -93,6 +95,7 @@ describe("onboard helpers", () => {
"ARG NEMOCLAW_INFERENCE_BASE_URL=https://inference.local/v1",
"ARG NEMOCLAW_INFERENCE_API=openai-completions",
"ARG NEMOCLAW_INFERENCE_COMPAT_B64=e30=",
"ARG NEMOCLAW_SMALL_MODEL_MODE=0",
"ARG NEMOCLAW_BUILD_ID=default",
].join("\n")
);
Expand Down Expand Up @@ -127,6 +130,7 @@ describe("onboard helpers", () => {
inferenceCompat: {
supportsStore: false,
},
smallModelMode: false,
}
);
});
Expand All @@ -140,10 +144,75 @@ describe("onboard helpers", () => {
inferenceBaseUrl: "https://inference.local/v1",
inferenceApi: "openai-responses",
inferenceCompat: null,
smallModelMode: false,
}
);
});

it("maps ollama-local to the routed inference provider with small model mode", () => {
assert.deepEqual(
getSandboxInferenceConfig("qwen2.5:7b", "ollama-local", "openai-completions"),
{
providerKey: "inference",
primaryModelRef: "inference/qwen2.5:7b",
inferenceBaseUrl: "https://inference.local/v1",
inferenceApi: "openai-completions",
inferenceCompat: null,
smallModelMode: true,
}
);
});

it("maps vllm-local to the routed inference provider with small model mode", () => {
assert.deepEqual(
getSandboxInferenceConfig("nemotron-3-nano:30b", "vllm-local", "openai-completions"),
{
providerKey: "inference",
primaryModelRef: "inference/nemotron-3-nano:30b",
inferenceBaseUrl: "https://inference.local/v1",
inferenceApi: "openai-completions",
inferenceCompat: null,
smallModelMode: true,
}
);
});

it("patches the staged Dockerfile with small model mode for ollama-local", () => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-dockerfile-ollama-"));
const dockerfilePath = path.join(tmpDir, "Dockerfile");
fs.writeFileSync(
dockerfilePath,
[
"ARG NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b",
"ARG NEMOCLAW_PROVIDER_KEY=nvidia",
"ARG NEMOCLAW_PRIMARY_MODEL_REF=nvidia/nemotron-3-super-120b-a12b",
"ARG CHAT_UI_URL=http://127.0.0.1:18789",
"ARG NEMOCLAW_INFERENCE_BASE_URL=https://inference.local/v1",
"ARG NEMOCLAW_INFERENCE_API=openai-completions",
"ARG NEMOCLAW_INFERENCE_COMPAT_B64=e30=",
"ARG NEMOCLAW_SMALL_MODEL_MODE=0",
"ARG NEMOCLAW_BUILD_ID=default",
].join("\n")
);

try {
patchStagedDockerfile(
dockerfilePath,
"qwen2.5:0.5b",
"http://127.0.0.1:18789",
"build-ollama",
"ollama-local"
);
const patched = fs.readFileSync(dockerfilePath, "utf8");
assert.match(patched, /^ARG NEMOCLAW_MODEL=qwen2\.5:0\.5b$/m);
assert.match(patched, /^ARG NEMOCLAW_PROVIDER_KEY=inference$/m);
assert.match(patched, /^ARG NEMOCLAW_PRIMARY_MODEL_REF=inference\/qwen2\.5:0\.5b$/m);
assert.match(patched, /^ARG NEMOCLAW_SMALL_MODEL_MODE=1$/m);
} finally {
fs.rmSync(tmpDir, { recursive: true, force: true });
}
});

it("pins the gateway image to the installed OpenShell release version", () => {
expect(getInstalledOpenshellVersion("openshell 0.0.12")).toBe("0.0.12");
expect(getInstalledOpenshellVersion("openshell 0.0.13-dev.8+gbbcaed2ea")).toBe("0.0.13");
Expand Down
Loading