Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 39 additions & 2 deletions codec_chat.html
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,37 @@ <h1><a href="/" style="color:inherit;text-decoration:none">CODEC</a></h1>
return ok;
}catch(e){return false}
}
// ── Step 10 Q11: Project-promotion suggestion chip (2026-07) ──
// Backend emits {escalate_project:{estimated_checkpoints,reason}} after a
// task-shaped chat reply. "Start as Project" pre-fills the input in Project
// mode (user still hits send — no surprise dispatch); "No thanks" silences
// the suggestion for this session.
var _lastEscalateText='';
function renderEscalateChip(sugg,userText){
_lastEscalateText=userText||'';
var n=sugg&&sugg.estimated_checkpoints?sugg.estimated_checkpoints:'several';
var div=document.createElement('div');div.className='msg assistant';
div.innerHTML='<div class="msg-bubble" style="border:1px dashed var(--accent,#a78bfa);background:rgba(167,139,250,0.06)">'+
'<div style="font-weight:600;margin-bottom:6px">\uD83D\uDCA1 This looks like a multi-step project (~'+escHtml(String(n))+' checkpoints)</div>'+
'<div style="font-size:12px;color:var(--text-dim);margin-bottom:8px">CODEC can draft a plan, ask for your approval once, then run it autonomously in the background.</div>'+
'<div style="display:flex;gap:8px">'+
'<button onclick="escalateStartProject(this)" style="padding:6px 14px;background:var(--accent,#a78bfa);color:#000;border:none;border-radius:6px;cursor:pointer;font-size:12px;font-weight:600">Start as Project</button>'+
'<button onclick="escalateDismiss(this)" style="padding:6px 14px;background:transparent;color:var(--text);border:1px solid var(--border,#2a2a30);border-radius:6px;cursor:pointer;font-size:12px">No thanks</button>'+
'</div></div>';
document.getElementById('messages').appendChild(div);scrollBottom();
}
function escalateStartProject(btn){
setMode('project');
var input=document.getElementById('chatInput');
input.value=_lastEscalateText;input.focus();
var card=btn.closest('.msg');if(card)card.remove();
showToast('Project mode — hit send to draft the plan');
}
function escalateDismiss(btn){
try{fetch('/api/chat/escalate_silence',{method:'POST',headers:{'Content-Type':'application/json'},
body:JSON.stringify({session_id:(typeof sessionId!=='undefined'&&sessionId)||''})})}catch(e){}
var card=btn.closest('.msg');if(card)card.remove();
}
function copyCodeBlock(btn){
// Per-code-block copy (2026-07): grabs the rendered code text (already
// HTML-unescaped by innerText) and reuses copyMsgText's clipboard path.
Expand Down Expand Up @@ -1135,7 +1166,7 @@ <h1><a href="/" style="color:inherit;text-decoration:none">CODEC</a></h1>
if(!line.startsWith('data: '))continue;
var payload=line.substring(6);
if(payload==='[DONE]')break;
try{var j=JSON.parse(payload);if(j.token){if(firstToken){bubble.innerHTML='';firstToken=false}fullText+=j.token;bubble.innerHTML=formatMsg(fullText);scrollBottom()}if(j.error){fullText+='\n\nError: '+j.error}}catch(pe){}
try{var j=JSON.parse(payload);if(j.token){if(firstToken){bubble.innerHTML='';firstToken=false}fullText+=j.token;bubble.innerHTML=formatMsg(fullText);scrollBottom()}if(j.escalate_project){renderEscalateChip(j.escalate_project,text)}if(j.error){fullText+='\n\nError: '+j.error}}catch(pe){}
}
}
if(fullText){div.remove();addMessage('assistant',fullText);chatHist.push({role:'assistant',content:fullText});saveMessages([{role:'assistant',content:fullText}])}
Expand Down Expand Up @@ -1839,7 +1870,7 @@ <h1><a href="/" style="color:inherit;text-decoration:none">CODEC</a></h1>
if (!r.ok) return;
var data = await r.json();
var msgs = (data.messages || []).filter(function(m){
return (m.ts || 0) > since && (m.type === 'agent_reply' || m.type === 'agent_status' || m.type === 'plan_revision');
return (m.ts || 0) > since && ['agent_reply','agent_status','plan_revision','agent_update','agent_blocked','agent_question','agent_done','agent_aborted'].indexOf(m.type) >= 0;
});
if (msgs.length){
clearInterval(poll);
Expand Down Expand Up @@ -1868,6 +1899,12 @@ <h1><a href="/" style="color:inherit;text-decoration:none">CODEC</a></h1>
if (!r.ok) return;
var data = await r.json();
var status = (data.manifest && data.manifest.status) || data.status || '';
// Resume live updates after a page reload while the agent is active
// (2026-07 demo fix — poller previously only started from the approve click).
var activeStates = ['approved','running','paused'];
if (activeStates.indexOf(status) >= 0 || status.indexOf('blocked_') === 0){
_startAgentPoller(_activeAgentId);
}
var terminal = ['done','complete','completed','failed','error','aborted','user_aborted'];
if (terminal.indexOf(status) >= 0){
addMessage('assistant', '`'+_activeAgentId.slice(0,12)+'` reached `'+status+'`. Conversation closed — next message starts a fresh project.');
Expand Down
55 changes: 55 additions & 0 deletions routes/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@
import codec_llm # A-12 canonical LLM caller
from codec_chat_stream import SkillTagBuffer, SKILL_TAG_RE # A-6 token machine
from codec_chat_pipeline import _StepBudget, _is_conversational # B6-P2
from codec_chat_pipeline import ( # Step 10 Q11 wiring (2026-07)
_should_escalate_to_project,
silence_session_autoescalate,
)
from routes._shared import CONFIG_PATH

router = APIRouter()
Expand Down Expand Up @@ -616,12 +620,59 @@ def _build_chat_system_prompt(config: dict, budget, has_attachment: bool,



import re as _re_esc

_ESCALATE_HINT_RE = _re_esc.compile(
r"\b(build|create|research|plan|organi[sz]e|automate|migrate|design|"
r"set\s?up|write me|make me|prepare|launch|develop)\b", _re_esc.IGNORECASE)


def _maybe_escalate_suggestion(user_text: str, session_id: str):
"""Step 10 auto-escalation, finally wired (2026-07). Runs AFTER the reply
so it never adds latency to the answer itself. The regex prefilter keeps
the Qwen classifier call off casual messages — only task-shaped text
(>= 60 chars + an action verb) pays for classification. Returns the
suggestion dict for the UI chip, or None."""
try:
if len(user_text or "") < 60 or not _ESCALATE_HINT_RE.search(user_text):
return None
verdict = _should_escalate_to_project(user_text, session_id)
if not verdict.get("escalate"):
return None
log_event("agent_auto_escalated_from_chat", "codec-dashboard",
f"Suggested Project promotion ({verdict.get('estimated_checkpoints')} checkpoints)",
extra={"session_id": session_id,
"estimated_checkpoints": verdict.get("estimated_checkpoints"),
"verdict": verdict.get("reason", "")[:200],
"silenced": False})
return {"estimated_checkpoints": verdict.get("estimated_checkpoints"),
"reason": (verdict.get("reason") or "")[:200]}
except Exception as e:
log.debug(f"escalation check failed (non-fatal): {e}")
return None


@router.post("/api/chat/escalate_silence")
async def escalate_silence(request: Request):
"""Q11: user said "No thanks" to a Project suggestion — silence the
prompt for the rest of this chat session (in-memory, resets on restart)."""
try:
body = await request.json()
except Exception:
body = {}
sid = str(body.get("session_id") or "")
if sid:
silence_session_autoescalate(sid)
return {"ok": True, "silenced": bool(sid)}


@router.post("/api/chat")
async def chat_completion(request: Request):
"""Direct LLM chat with full context window + tool calling"""
from codec_metrics import metrics
metrics.inc("codec_chat_requests_total")
body = await request.json()
_session_id = request.query_params.get("s") or ""
messages = body.get("messages", [])
if not messages:
return JSONResponse({"error": "No messages"}, status_code=400)
Expand Down Expand Up @@ -868,6 +919,10 @@ def _resolve_skill_tag(raw_tag):
"busy, restarting, or out of context. Please try "
"again in a moment."
)
# Step 10 Q11 (2026-07): post-reply Project suggestion.
_sugg = _maybe_escalate_suggestion(last_user_text, _session_id)
if _sugg:
yield f"data: {json.dumps({'escalate_project': _sugg})}\n\n"
yield "data: [DONE]\n\n"
except Exception as e:
yield f"data: {json.dumps({'error': str(e)})}\n\n"
Expand Down
2 changes: 1 addition & 1 deletion skills/.manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
"memory_history.py": "a2762c03c325517d10907f8aa9511103a5716a29f9e956d48473b817105fb65c",
"memory_save.py": "3d801338bfd0818aaf1e65e692e404af0a0fba6886d26150f0fb66e4b4fde424",
"memory_search.py": "249e8644254e039cbdf7155fd372b85c234e18483e8e7ea7361ce28bf8fb875f",
"mouse_control.py": "85398544e83ecfc9d01c6ec9f9d55f50bf9fd9228c95a0d02f82da014f00f725",
"mouse_control.py": "d00f8b94d008b2f69a0ee1880039a53b46454a097f599b3cfbce88db06839ae4",
"music.py": "c42bddc6414b11e8c4734cd826a824a30c4ff34a618d69da11a5d84b737b2f55",
"network_info.py": "bd776b619cf7c18d67fe03cb0f0456cf9c4f9bf71475740a233a9ca1e6672fcd",
"notes.py": "7d50d1544ea955f59917a1f0e7902d115e9dbccd3188b641057a55b6a5b2803a",
Expand Down
30 changes: 20 additions & 10 deletions skills/mouse_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,19 +147,29 @@ def _get_screen_size():
return (1920, 1080)


def _take_screenshot():
"""Capture screen and return base64-encoded PNG."""
def _take_screenshot(timeout_s=10, attempts=2):
"""Capture screen and return base64-encoded PNG.

2026-07 hardening: under load screencapture intermittently exceeds the
old 5s cap (stress test: 2 of 6 vision-locate runs failed on screenshot
timeout while the model itself was stable). One retry + a 10s cap turn
a hard "Could not take screenshot" into a rare slow path."""
try:
os.makedirs(os.path.dirname(_SCREENSHOT_PATH), exist_ok=True)
subprocess.run(
["screencapture", "-x", "-C", _SCREENSHOT_PATH],
capture_output=True, timeout=5
)
if os.path.exists(_SCREENSHOT_PATH) and os.path.getsize(_SCREENSHOT_PATH) > 1000:
with open(_SCREENSHOT_PATH, "rb") as f:
return base64.b64encode(f.read()).decode()
except Exception as e:
log.warning(f"Screenshot error: {e}")
log.warning(f"Screenshot dir error: {e}")
return None
for attempt in range(attempts):
try:
subprocess.run(
["screencapture", "-x", "-C", _SCREENSHOT_PATH],
capture_output=True, timeout=timeout_s
)
if os.path.exists(_SCREENSHOT_PATH) and os.path.getsize(_SCREENSHOT_PATH) > 1000:
with open(_SCREENSHOT_PATH, "rb") as f:
return base64.b64encode(f.read()).decode()
except Exception as e:
log.warning(f"Screenshot error (attempt {attempt + 1}/{attempts}): {e}")
return None


Expand Down
50 changes: 50 additions & 0 deletions tests/test_escalate_wiring.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""Step 10 Q11 wiring (2026-07): post-reply Project-promotion suggestion.

The regex prefilter must keep the Qwen classifier off casual messages;
the silence endpoint must mark the session."""
from __future__ import annotations

import sys
from pathlib import Path

REPO = Path(__file__).resolve().parent.parent
if str(REPO) not in sys.path:
sys.path.insert(0, str(REPO))

import routes.chat as chat


def test_prefilter_skips_short_or_casual(monkeypatch):
def boom(*a, **kw):
raise AssertionError("classifier must not be called for casual text")
monkeypatch.setattr(chat, "_should_escalate_to_project", boom)
assert chat._maybe_escalate_suggestion("hi", "s1") is None
assert chat._maybe_escalate_suggestion("what's the weather like today?" * 3, "s1") is None


def test_prefilter_passes_task_shaped_text(monkeypatch):
calls = {}
def fake_gate(text, sid):
calls["hit"] = (text, sid)
return {"escalate": True, "estimated_checkpoints": 4, "reason": "multi-step"}
monkeypatch.setattr(chat, "_should_escalate_to_project", fake_gate)
monkeypatch.setattr(chat, "log_event", lambda *a, **kw: None)
out = chat._maybe_escalate_suggestion(
"research the top 5 competitors in my niche, build a comparison and prepare a report",
"sess42")
assert out == {"estimated_checkpoints": 4, "reason": "multi-step"}
assert calls["hit"][1] == "sess42"


def test_gate_negative_verdict_returns_none(monkeypatch):
monkeypatch.setattr(chat, "_should_escalate_to_project",
lambda t, s: {"escalate": False, "reason": "single-step"})
assert chat._maybe_escalate_suggestion(
"build me one tiny thing that is actually simple but worded long enough", "s") is None


def test_gate_never_raises(monkeypatch):
monkeypatch.setattr(chat, "_should_escalate_to_project",
lambda t, s: (_ for _ in ()).throw(RuntimeError("qwen down")))
assert chat._maybe_escalate_suggestion(
"research and build and prepare a giant multi step plan for my business", "s") is None