Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Dockerfile.base
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ WORKDIR /openclaw
ARG OPENCLAW_GIT_REF=main
RUN git clone --depth 1 --branch "${OPENCLAW_GIT_REF}" https://github.com/openclaw/openclaw.git .

# Patch: fix heartbeat model override ignored (#56788)
COPY patches/apply-heartbeat-model-fix.sh /tmp/apply-heartbeat-model-fix.sh
RUN bash /tmp/apply-heartbeat-model-fix.sh

# Patch: relax version requirements for packages using workspace protocol.
RUN set -eux; \
find ./extensions -name 'package.json' -type f | while read -r f; do \
Expand All @@ -29,6 +33,11 @@ RUN set -eux; \
done

RUN pnpm install --no-frozen-lockfile

# Patch: move context files to after cache boundary (0% → ~80% cache hit rate)
COPY patches/apply-prompt-cache-context-files-fix.sh /tmp/apply-prompt-cache-context-files-fix.sh
RUN bash /tmp/apply-prompt-cache-context-files-fix.sh

RUN pnpm build
ENV OPENCLAW_PREFER_PNPM=1
RUN pnpm ui:install && pnpm ui:build
Expand Down
166 changes: 166 additions & 0 deletions patches/apply-heartbeat-model-fix.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
#!/usr/bin/env bash
# Fix: heartbeat model override ignored (#56788)
# Applies changes from upstream PRs #57094 and #57076 (both unmerged as of 2026-04-05).
#
# Root causes (4 loss points in the model resolution chain):
# 1. runtime-system.ts strips heartbeat.model when forwarding to runHeartbeatOnceInternal
# 2. live-model-switch.ts ignores caller-provided defaults, uses config default instead
# 3. model-fallback.ts swallows LiveSessionModelSwitchError as candidate failure
# 4. get-reply.ts unconditionally overwrites heartbeat model after directive resolution
set -euo pipefail

echo "[patch] Applying heartbeat model override fix (#56788)..."
echo "[patch] Based on upstream PRs #57094 + #57076"

# ── Fix 1: runtime-system.ts — pass model field through (#57076) ────────────
FILE="src/plugins/runtime/runtime-system.ts"
if [ -f "$FILE" ]; then
perl -i -pe 's/heartbeat: heartbeat \? \{ target: heartbeat\.target \} : undefined/heartbeat: heartbeat ? { target: heartbeat.target, model: heartbeat.model } : undefined/' "$FILE"
echo "[patch] Fixed $FILE"
else
echo "[patch] WARNING: $FILE not found"
fi

# ── Fix 1b: types-core.ts — add model to heartbeat type (#57076) ───────────
FILE="src/plugins/runtime/types-core.ts"
if [ -f "$FILE" ]; then
perl -i -pe 's/heartbeat\?: \{ target\?: string \}/heartbeat?: { target?: string; model?: string }/' "$FILE"
echo "[patch] Fixed $FILE"
else
echo "[patch] WARNING: $FILE not found"
fi

# ── Fix 2: live-model-switch.ts — prefer caller-provided defaults (#57076) ──
FILE="src/agents/live-model-switch.ts"
if [ -f "$FILE" ]; then
perl -0777 -i -pe 's{
const\s+defaultModelRef\s*=\s*agentId\s*\n\s*\?\s*resolveDefaultModelForAgent\(\{\s*\n\s*cfg,\s*\n\s*agentId,\s*\n\s*\}\)\s*\n\s*:\s*\{\s*provider:\s*params\.defaultProvider,\s*model:\s*params\.defaultModel\s*\};
}{ const defaultModelRef =
params.defaultProvider \&\& params.defaultModel
? { provider: params.defaultProvider, model: params.defaultModel }
: agentId
? resolveDefaultModelForAgent({ cfg, agentId })
: { provider: params.defaultProvider, model: params.defaultModel };}xms' "$FILE"
echo "[patch] Fixed $FILE"
else
echo "[patch] WARNING: $FILE not found"
fi

# ── Fix 3: model-fallback.ts — rethrow LiveSessionModelSwitchError (#57094) ─
FILE="src/agents/model-fallback.ts"
if [ -f "$FILE" ]; then
# Use node for complex multi-site patching — safer than nested perl
node -e '
const fs = require("fs");
let code = fs.readFileSync(process.argv[1], "utf8");

// 3a: Add isLiveSessionModelSwitchError export after the log line
const checkFn = `

/**
* Structural check for LiveSessionModelSwitchError that works across
* module-boundary duplicates where instanceof would fail.
*/
export function isLiveSessionModelSwitchError(err) {
return (
typeof err === "object" &&
err !== null &&
err.name === "LiveSessionModelSwitchError" &&
typeof err.provider === "string" &&
typeof err.model === "string"
);
}`;

code = code.replace(
/const log = createSubsystemLogger\("model-fallback"\);/,
`const log = createSubsystemLogger("model-fallback");${checkFn}`
);

// 3b: Add rethrowLiveSwitch to runFallbackCandidate params and catch block
code = code.replace(
/async function runFallbackCandidate<T>\(params: \{\n(\s+run: ModelFallbackRunFn<T>;\n\s+provider: string;\n\s+model: string;\n\s+options\?: ModelFallbackRunOptions;)\n\}/,
`async function runFallbackCandidate<T>(params: {\n$1\n rethrowLiveSwitch?: boolean;\n})`
);

// Add rethrow before the normalize line
code = code.replace(
/( \} catch \(err\) \{\n)( \/\/ Normalize abort-wrapped rate-limit errors)/,
`$1 if (params.rethrowLiveSwitch && isLiveSessionModelSwitchError(err)) {\n throw err;\n }\n$2`
);

// 3c: Add rethrowLiveSwitch to runFallbackAttempt params and passthrough
code = code.replace(
/async function runFallbackAttempt<T>\(params: \{\n(\s+run: ModelFallbackRunFn<T>;\n\s+provider: string;\n\s+model: string;\n\s+attempts: FallbackAttempt\[\];\n\s+options\?: ModelFallbackRunOptions;)\n\}/,
`async function runFallbackAttempt<T>(params: {\n$1\n rethrowLiveSwitch?: boolean;\n})`
);

code = code.replace(
/const runResult = await runFallbackCandidate\(\{\n\s+run: params\.run,\n\s+provider: params\.provider,\n\s+model: params\.model,\n\s+options: params\.options,\n\s+\}\);/,
`const runResult = await runFallbackCandidate({\n run: params.run,\n provider: params.provider,\n model: params.model,\n options: params.options,\n rethrowLiveSwitch: params.rethrowLiveSwitch,\n });`
);

// 3d: Add rethrowLiveSwitch to runWithModelFallback signature
code = code.replace(
"onError?: ModelFallbackErrorHandler;\n}): Promise<ModelFallbackRunResult<T>>",
"onError?: ModelFallbackErrorHandler;\n rethrowLiveSwitch?: boolean;\n}): Promise<ModelFallbackRunResult<T>>"
);

// 3e: Pass rethrowLiveSwitch in first runFallbackAttempt call (with options: runOptions)
code = code.replace(
/const attemptRun = await runFallbackAttempt\(\{\n\s+run: params\.run,\n\s+\.\.\.candidate,\n\s+attempts,\n\s+options: runOptions,\n\s+\}\);/,
`const attemptRun = await runFallbackAttempt({\n run: params.run,\n ...candidate,\n attempts,\n options: runOptions,\n rethrowLiveSwitch: params.rethrowLiveSwitch,\n });`
);

fs.writeFileSync(process.argv[1], code, "utf8");
' "$FILE"
echo "[patch] Fixed $FILE"
else
echo "[patch] WARNING: $FILE not found"
fi

# ── Fix 4: agent-runner-execution.ts — structural check + rethrowLiveSwitch (#57094) ─
FILE="src/auto-reply/reply/agent-runner-execution.ts"
if [ -f "$FILE" ]; then
node -e '
const fs = require("fs");
let code = fs.readFileSync(process.argv[1], "utf8");

// 4a: Replace import — remove LiveSessionModelSwitchError, add isLiveSessionModelSwitchError
code = code.replace(
/import \{ LiveSessionModelSwitchError \} from "\.\.\/\.\.\/agents\/live-model-switch-error\.js";\n/,
""
);
code = code.replace(
/import \{ runWithModelFallback, isFallbackSummaryError \} from "\.\.\/\.\.\/agents\/model-fallback\.js";/,
`import {\n runWithModelFallback,\n isFallbackSummaryError,\n isLiveSessionModelSwitchError,\n} from "../../agents/model-fallback.js";`
);

// 4b: Pass rethrowLiveSwitch: true to runWithModelFallback
code = code.replace(
/\.\.\.resolveModelFallbackOptions\(params\.followupRun\.run\),\n(\s+)runId,/,
`...resolveModelFallbackOptions(params.followupRun.run),\n$1runId,\n$1rethrowLiveSwitch: true,`
);

// 4c: Replace instanceof check with structural check
code = code.replace(
/if \(err instanceof LiveSessionModelSwitchError\) \{/g,
"if (isLiveSessionModelSwitchError(err)) {"
);

fs.writeFileSync(process.argv[1], code, "utf8");
' "$FILE"
echo "[patch] Fixed $FILE"
else
echo "[patch] WARNING: $FILE not found"
fi

# ── Fix 5: get-reply.ts — guard post-directive model overwrite (#57076) ─────
FILE="src/auto-reply/reply/get-reply.ts"
if [ -f "$FILE" ]; then
perl -0777 -i -pe 's{(\} = directiveResult\.result;\n)( provider = resolvedProvider;\n model = resolvedModel;)}{$1 if (!hasResolvedHeartbeatModelOverride) \{\n provider = resolvedProvider;\n model = resolvedModel;\n \}}s' "$FILE"
echo "[patch] Fixed $FILE"
else
echo "[patch] WARNING: $FILE not found"
fi

echo "[patch] Heartbeat model override fix applied (5 files, 4 root causes)."
61 changes: 61 additions & 0 deletions patches/apply-prompt-cache-context-files-fix.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env bash
# Fix: 0% prompt cache hit rate — context files invalidate stable prefix
#
# Root cause: context files (MEMORY.md, SOUL.md, USER.md, etc.) are embedded in the
# STABLE PREFIX (before <!-- OPENCLAW_CACHE_BOUNDARY -->). When the agent writes to
# MEMORY.md during a turn, the stable prefix changes on the next call → cache miss.
#
# Fix: Move the "# Project Context" section from before to after the cache boundary.
# Impact: cache hit rate goes from ~0% to ~80-90%.
set -euo pipefail

echo "[patch] Applying prompt cache context files fix..."

FILE="src/agents/system-prompt.ts"
if [ ! -f "$FILE" ]; then
echo "[patch] ERROR: $FILE not found"
exit 1
fi

python3 << 'PYEOF'
import re, sys

with open("src/agents/system-prompt.ts") as f:
code = f.read()

# The block to move — starts at " const contextFiles" and ends just before
# " // Skip silent replies"
BLOCK_PATTERN = r'( const contextFiles = params\.contextFiles.*? (?=// Skip silent replies for subagent))'
match = re.search(BLOCK_PATTERN, code, re.DOTALL)
if not match:
print("[patch] ERROR: Could not find context files block. Source may have changed.")
sys.exit(1)

context_block = match.group(1)
print(f"[patch] Found context files block ({len(context_block)} chars)")

# Step 1: Remove it from before the cache boundary
code = code.replace(context_block, "")

# Step 2: Find the cache boundary push and insert the block after it
CACHE_BOUNDARY_LINE = " lines.push(SYSTEM_PROMPT_CACHE_BOUNDARY);\n"
if CACHE_BOUNDARY_LINE not in code:
print("[patch] ERROR: Could not find cache boundary push line.")
sys.exit(1)

INSERTION = (
CACHE_BOUNDARY_LINE
+ "\n"
+ " // Context files (MEMORY.md, SOUL.md, etc.) are placed AFTER the cache boundary\n"
+ " // so that agent memory writes between turns do not invalidate the cached stable prefix.\n"
+ context_block
)

code = code.replace(CACHE_BOUNDARY_LINE, INSERTION, 1)

with open("src/agents/system-prompt.ts", "w") as f:
f.write(code)

print("[patch] Moved context files block to after cache boundary.")
print("[patch] Impact: cache hit rate ~0% → ~80-90%.")
PYEOF
66 changes: 61 additions & 5 deletions scripts/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,12 @@ HOOKS_LOCATION_BLOCK=""
if [ -n "$HOOKS_PATH" ]; then
HOOKS_LOCATION_BLOCK="location ${HOOKS_PATH} {
proxy_pass http://127.0.0.1:${GATEWAY_PORT};
proxy_set_header Authorization \\\$http_authorization;
proxy_set_header Authorization \$http_authorization;

proxy_set_header Host \\\$host;
proxy_set_header X-Real-IP \\\$remote_addr;
proxy_set_header X-Forwarded-For \\\$proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto \\\$scheme;
proxy_set_header Host \$host;
proxy_set_header X-Real-IP \$remote_addr;
proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto \$scheme;

proxy_http_version 1.1;

Expand Down Expand Up @@ -302,6 +302,62 @@ nginx
rm -f /tmp/openclaw-gateway.lock 2>/dev/null || true
rm -f "$STATE_DIR/gateway.lock" 2>/dev/null || true

# ── LINE webhook route hot-reload workaround ─────────────────────────────────
# Bug: openclaw/openclaw#49803 — LINE (and other webhook-based channels) return
# 404 on cold start because the bundler splits runtime.ts into two chunks. The
# chunk that registers LINE routes and the chunk the HTTP server uses to look up
# routes both initialise the same global "route registry" object — but whichever
# chunk runs first creates the object without the fields the other chunk expects,
# so they end up referencing different registries. Result: routes are registered
# successfully (no error logged) but the HTTP server can't find them → 404.
#
# After a config hot-reload the channel restarts and re-registers routes into the
# correct registry, so the 404 goes away. The workaround below triggers that
# hot-reload automatically 20 seconds after the gateway starts.
#
# How it works:
# 1. Waits 20 s for the gateway to fully start and load all channel plugins.
# 2. Writes a temporary "_reloadTs" field into openclaw.json — the gateway's
# file-watcher sees the change and hot-reloads the LINE channel.
# 3. Removes the temporary field 5 s later (clean-up).
#
# This is a background process — it does NOT block gateway startup.
# Remove this block once openclaw/openclaw#49803 is fixed upstream and the
# upstream fix is merged and shipped in a release we build from.
#
# Upstream PRs tracking the real fix:
# - openclaw/openclaw#53642 (bundle-split registry mismatch fix)
# - openclaw/openclaw#54686 (syncPluginRegistry at runtime boundaries)
CONFIG_FILE="${STATE_DIR}/openclaw.json"
(
sleep 20
if [ -f "$CONFIG_FILE" ]; then
echo "[entrypoint] applying LINE webhook route hot-reload workaround (#49803)..."
# Add a temporary field to trigger the file-watcher hot-reload
node -e "
const fs = require('fs');
try {
const c = JSON.parse(fs.readFileSync(process.argv[1], 'utf8'));
c._reloadTs = Date.now();
fs.writeFileSync(process.argv[1], JSON.stringify(c, null, 2));
} catch(e) { process.exit(0); }
" "$CONFIG_FILE"
sleep 5
# Remove the temporary field
node -e "
const fs = require('fs');
try {
const c = JSON.parse(fs.readFileSync(process.argv[1], 'utf8'));
delete c._reloadTs;
fs.writeFileSync(process.argv[1], JSON.stringify(c, null, 2));
} catch(e) { process.exit(0); }
" "$CONFIG_FILE"
echo "[entrypoint] LINE webhook route hot-reload complete."
else
echo "[entrypoint] WARNING: config not found at $CONFIG_FILE — skipping LINE hot-reload workaround"
fi
) &

# ── Start openclaw gateway ───────────────────────────────────────────────────
echo "[entrypoint] starting openclaw gateway on port $GATEWAY_PORT..."

Expand Down