Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.sh text eol=lf

6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,8 @@ terraform.tfvars
test/e2e/.dev.vars

# Temporary e2e wrangler configs
.wrangler-e2e-*.jsonc
.wrangler-e2e-*.jsonc

# Local PR / helper docs (not part of the repo)
.pr-error-info.md
PR-DESCRIPTION.md
10 changes: 10 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,16 @@ npx wrangler secret list

Enable debug routes with `DEBUG_ROUTES=true` and check `/debug/processes`.

## Troubleshooting

- **Health OK but no reply from agent / Control UI hangs**
- Check `GET /api/status`: `gatewayProcess.status`, `gatewayProcess.exitCode`, and `lastStderrPreview` (if present) for the last failed gateway run.
- Check `GET /debug/processes?logs=true` or `GET /debug/processes?logs=true&failed=1` for full stderr of gateway/start-openclaw (or start-moltbot) processes.
- Run `npx wrangler tail` and look for `[WS] close` / `[WS] error` JSON lines (code, reason, side) when reproducing the issue.

- **Gateway exits with code 126**
- Usually caused by script not executable or CRLF line endings in `start-openclaw.sh` (or `start-moltbot.sh`). Ensure the Dockerfile runs `tr -d '\\r'` on the script and `chmod 755` before use. Keep `*.sh` as LF in `.gitattributes`.

## R2 Storage Notes

R2 is mounted via s3fs at `/data/moltbot`. Important gotchas:
Expand Down
17 changes: 6 additions & 11 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -385,18 +385,14 @@ app.all('*', async (c) => {
}
});

// Handle close events
// Handle close events (structured log for wrangler tail / debugging)
serverWs.addEventListener('close', (event) => {
if (debugLogs) {
console.log('[WS] Client closed:', event.code, event.reason);
}
console.error('[WS] close', JSON.stringify({ side: 'client', code: event.code, reason: event.reason || '(none)' }));
containerWs.close(event.code, event.reason);
});

containerWs.addEventListener('close', (event) => {
if (debugLogs) {
console.log('[WS] Container closed:', event.code, event.reason);
}
console.error('[WS] close', JSON.stringify({ side: 'container', code: event.code, reason: event.reason || '(none)' }));
// Transform the close reason (truncate to 123 bytes max for WebSocket spec)
let reason = transformErrorMessage(event.reason, url.host);
if (reason.length > 123) {
Expand All @@ -407,15 +403,14 @@ app.all('*', async (c) => {
}
serverWs.close(event.code, reason);
});

// Handle errors
// Handle errors (structured log for wrangler tail / debugging)
serverWs.addEventListener('error', (event) => {
console.error('[WS] Client error:', event);
console.error('[WS] error', JSON.stringify({ side: 'client', message: event instanceof ErrorEvent ? event.message : String(event) }));
containerWs.close(1011, 'Client error');
});

containerWs.addEventListener('error', (event) => {
console.error('[WS] Container error:', event);
console.error('[WS] error', JSON.stringify({ side: 'container', message: event instanceof ErrorEvent ? event.message : String(event) }));
serverWs.close(1011, 'Container error');
});

Expand Down
98 changes: 88 additions & 10 deletions src/routes/debug.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { Hono } from 'hono';
import type { AppEnv } from '../types';
import { findExistingMoltbotProcess, waitForProcess } from '../gateway';
import { ensureMoltbotGateway, findExistingMoltbotProcess, waitForProcess } from '../gateway';
import { sanitizeStderr } from '../utils/sanitize';

/**
* Debug routes for inspecting container state
Expand All @@ -9,6 +10,49 @@ import { findExistingMoltbotProcess, waitForProcess } from '../gateway';
*/
const debug = new Hono<AppEnv>();

// GET /debug/start-gateway - Force start the Moltbot gateway (for triggering 126 logs in wrangler tail)
debug.get('/start-gateway', async (c) => {
const sandbox = c.get('sandbox');
try {
const process = await ensureMoltbotGateway(sandbox, c.env);
return c.json({
success: true,
processId: process.id,
status: process.status,
message: 'Gateway started successfully',
});
} catch (err) {
const errorMessage = err instanceof Error ? err.message : String(err);
// Try to attach logs from the most recent failed start-moltbot process
let lastFailed: Record<string, unknown> | null = null;
try {
const processes = await sandbox.listProcesses();
const starter = processes
.filter(p => (p.command.includes('start-openclaw.sh') || p.command.includes('start-moltbot.sh')) &&
(p.status === 'failed' || p.status === 'completed'))
.sort((a, b) => (b.startTime?.getTime() ?? 0) - (a.startTime?.getTime() ?? 0))[0];
if (starter) {
const logs = await starter.getLogs();
lastFailed = {
id: starter.id,
command: starter.command,
status: starter.status,
exitCode: starter.exitCode,
stdout: logs.stdout || '',
stderr: logs.stderr || '',
};
}
} catch {
// ignore
}
return c.json({
success: false,
error: errorMessage,
lastFailedProcess: lastFailed,
}, 503);
}
});

// GET /debug/version - Returns version info from inside the container
debug.get('/version', async (c) => {
const sandbox = c.get('sandbox');
Expand Down Expand Up @@ -36,11 +80,13 @@ debug.get('/version', async (c) => {
});

// GET /debug/processes - List all processes with optional logs
// Query: logs=true (include stdout/stderr), failed=1 (only gateway-related failed/completed with non-zero exit)
debug.get('/processes', async (c) => {
const sandbox = c.get('sandbox');
try {
const processes = await sandbox.listProcesses();
const includeLogs = c.req.query('logs') === 'true';
const failedOnly = c.req.query('failed') === '1';

const processData = await Promise.all(
processes.map(async (p) => {
Expand All @@ -67,28 +113,60 @@ debug.get('/processes', async (c) => {
}),
);

// Sort by status (running first, then starting, completed, failed)
// Within each status, sort by startTime descending (newest first)
// Optionally filter to gateway-related failed/completed only
const isGatewayRelated = (d: Record<string, unknown>) => {
const cmd = (d.command as string) || '';
const status = d.status as string;
const exitCode = d.exitCode as number | undefined;
return (
(cmd.includes('start-openclaw.sh') || cmd.includes('start-moltbot.sh') ||
cmd.includes('openclaw gateway') || cmd.includes('clawdbot gateway')) &&
!cmd.includes('openclaw devices') && !cmd.includes('clawdbot devices') &&
(status === 'failed' || (status === 'completed' && exitCode != null && exitCode !== 0))
);
};
let list = processData;
if (failedOnly) {
list = processData.filter(isGatewayRelated);
}

// Sort by status (running first, then starting, completed, failed), then by startTime descending
const statusOrder: Record<string, number> = {
running: 0,
starting: 1,
completed: 2,
failed: 3,
};

processData.sort((a, b) => {
list.sort((a, b) => {
const statusA = statusOrder[a.status as string] ?? 99;
const statusB = statusOrder[b.status as string] ?? 99;
if (statusA !== statusB) {
return statusA - statusB;
}
// Within same status, sort by startTime descending
if (statusA !== statusB) return statusA - statusB;
const timeA = (a.startTime as string) || '';
const timeB = (b.startTime as string) || '';
return timeB.localeCompare(timeA);
});

return c.json({ count: processes.length, processes: processData });
// Last failed gateway stderr preview (sanitized) when logs=true or failed=1
let lastFailedStderrPreview: string | undefined;
if (includeLogs || failedOnly) {
const failedStarter = processes
.filter(p => (p.command.includes('start-openclaw.sh') || p.command.includes('start-moltbot.sh')) &&
(p.status === 'failed' || (p.status === 'completed' && p.exitCode != null && p.exitCode !== 0)))
.sort((a, b) => (b.startTime?.getTime() ?? 0) - (a.startTime?.getTime() ?? 0))[0];
if (failedStarter) {
try {
const logs = await failedStarter.getLogs();
const stderr = logs.stderr || '';
if (stderr) lastFailedStderrPreview = sanitizeStderr(stderr, 500);
} catch {
lastFailedStderrPreview = '(failed to retrieve logs)';
}
}
}

const payload: Record<string, unknown> = { count: list.length, processes: list };
if (lastFailedStderrPreview != null) payload.lastFailedStderrPreview = lastFailedStderrPreview;
return c.json(payload);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
return c.json({ error: errorMessage }, 500);
Expand Down
71 changes: 61 additions & 10 deletions src/routes/public.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { Hono } from 'hono';
import type { AppEnv } from '../types';
import { MOLTBOT_PORT } from '../config';
import { findExistingMoltbotProcess } from '../gateway';
import { sanitizeStderr } from '../utils/sanitize';

/**
* Public routes - NO Cloudflare Access authentication required
Expand Down Expand Up @@ -30,30 +31,80 @@ publicRoutes.get('/logo-small.png', (c) => {
return c.env.ASSETS.fetch(c.req.raw);
});

// GET /favicon.ico - ゲートウェイに流さず 204 で返す(503 を防ぐ)
publicRoutes.get('/favicon.ico', async (c) => {
const res = await c.env.ASSETS.fetch(new Request(new URL('/favicon.ico', c.req.url)));
if (res.ok) return res;
return new Response(null, { status: 204 });
});

// GET /api/status - Public health check for gateway status (no auth required)
// デバッグ用: プロセス数・ゲートウェイプロセス状態・exitCode を返す
publicRoutes.get('/api/status', async (c) => {
const sandbox = c.get('sandbox');

const debugInfo: Record<string, unknown> = {
ok: false,
status: 'unknown',
processId: null as string | null,
processCount: 0,
gatewayProcess: null as { command: string; status: string; exitCode?: number } | null,
hint: '',
};

try {
const processes = await sandbox.listProcesses();
debugInfo.processCount = processes.length;

const gatewayProc = processes.find(
(p) =>
(p.command.includes('start-openclaw.sh') || p.command.includes('start-moltbot.sh') ||
p.command.includes('openclaw gateway') || p.command.includes('clawdbot gateway')) &&
!p.command.includes('openclaw devices') && !p.command.includes('clawdbot devices')
);
if (gatewayProc) {
debugInfo.gatewayProcess = {
command: gatewayProc.command,
status: gatewayProc.status,
exitCode: gatewayProc.exitCode,
};
}

const process = await findExistingMoltbotProcess(sandbox);
if (!process) {
return c.json({ ok: false, status: 'not_running' });
debugInfo.status = 'not_running';
debugInfo.hint = gatewayProc?.exitCode != null
? `Gateway process exited with code ${gatewayProc.exitCode}. Check wrangler tail or /debug/processes?logs=true`
: 'No gateway process. Visit / or /debug/start-gateway to start.';
if (gatewayProc && (gatewayProc.status === 'failed' || (gatewayProc.status === 'completed' && gatewayProc.exitCode != null && gatewayProc.exitCode !== 0))) {
try {
const logs = await gatewayProc.getLogs();
const stderr = logs.stderr || '';
if (stderr) debugInfo.lastStderrPreview = sanitizeStderr(stderr, 300);
} catch {
// ignore
}
}
return c.json(debugInfo);
}

// Process exists, check if it's actually responding
// Try to reach the gateway with a short timeout
debugInfo.processId = process.id;

try {
await process.waitForPort(18789, { mode: 'tcp', timeout: 5000 });
return c.json({ ok: true, status: 'running', processId: process.id });
debugInfo.ok = true;
debugInfo.status = 'running';
debugInfo.hint = 'Gateway is up. If UI shows "Pairing required", visit /_admin/ to approve this device.';
return c.json(debugInfo);
} catch {
return c.json({ ok: false, status: 'not_responding', processId: process.id });
debugInfo.status = 'not_responding';
debugInfo.hint = 'Process exists but port 18789 not responding. Gateway may be starting or crashed.';
return c.json(debugInfo);
}
} catch (err) {
return c.json({
ok: false,
status: 'error',
error: err instanceof Error ? err.message : 'Unknown error',
});
debugInfo.status = 'error';
debugInfo.hint = err instanceof Error ? err.message : 'Unknown error';
return c.json(debugInfo);
}
});

Expand Down
13 changes: 13 additions & 0 deletions src/utils/sanitize.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/**
* Sanitize stderr/log text for safe exposure in API responses (redact secrets).
*/
export function sanitizeStderr(text: string, maxLen = 500): string {
if (!text || typeof text !== 'string') return '';
let out = text
.replace(/\bsk-ant-[a-zA-Z0-9-]{20,}/g, 'sk-ant-***REDACTED***')
.replace(/\bxoxb-[a-zA-Z0-9-]+/g, 'xoxb-***REDACTED***')
.replace(/\bxoxp-[a-zA-Z0-9-]+/g, 'xoxp-***REDACTED***')
.replace(/\b[A-Za-z0-9_-]{20,}@[a-zA-Z]+\.[a-zA-Z]+/g, '***REDACTED***');
if (out.length > maxLen) out = out.slice(0, maxLen) + '...';
return out;
}