From e93b168e1748d6ee3adfad983e575fc6aec96709 Mon Sep 17 00:00:00 2001
From: Mickael Farina <farina.mickael@gmail.com>
Date: Thu, 2 Jul 2026 16:21:11 +0200
Subject: [PATCH] feat(demo): wire Project auto-escalation, fix agent
 live-progress polling, harden vision-click screenshots
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Demo-hardening from the 2026-07-02 dry-run:

- Step 10 Q11 finally wired: after a task-shaped chat reply (regex
  prefilter: >=60 chars + action verb, so casual messages never pay the
  classifier cost), _should_escalate_to_project runs post-reply and the
  stream emits an escalate_project frame; the UI renders a suggestion
  chip ("Start as Project" pre-fills Project mode — user still hits
  send; "No thanks" → new POST /api/chat/escalate_silence, Q11
  session silence). Emits agent_auto_escalated_from_chat audit event.

- Agent live progress: _startAgentPoller now auto-resumes after a page
  reload while the bound agent is active (piggybacked on the existing
  lifecycle tick, idempotent); schedulePollAgentTimeline's type filter
  included only legacy types (agent_reply/agent_status) — the runner
  posts agent_update/agent_blocked/agent_question/agent_done/_aborted,
  so live updates were filtered out on the reply path.

- mouse_control: screenshot capture 5s→10s timeout + one retry. Stress
  test (6 runs): UI-TARS coords stable within 2px, but 2/6 runs died on
  screencapture timeout — the screenshot, not the model, is the flaky
  part. Manifest regenerated.

Tests: 4 new (escalation prefilter/verdict/never-raises). Full suite
2440 passed (4 pre-existing pilot-e2e failures, known-issues).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 codec_chat.html               | 41 ++++++++++++++++++++++++--
 routes/chat.py                | 55 +++++++++++++++++++++++++++++++++++
 skills/.manifest.json         |  2 +-
 skills/mouse_control.py       | 30 ++++++++++++-------
 tests/test_escalate_wiring.py | 50 +++++++++++++++++++++++++++++++
 5 files changed, 165 insertions(+), 13 deletions(-)
 create mode 100644 tests/test_escalate_wiring.py
diff --git a/codec_chat.html b/codec_chat.html
index 0091f23..118116f 100644
--- a/codec_chat.html
+++ b/codec_chat.html
@@ -499,6 +499,37 @@ <h1><a href="/" style="color:inherit;text-decoration:none">CODEC</a></h1>
     return ok;
   }catch(e){return false}
 }
+// ── Step 10 Q11: Project-promotion suggestion chip (2026-07) ──
+// Backend emits {escalate_project:{estimated_checkpoints,reason}} after a
+// task-shaped chat reply. "Start as Project" pre-fills the input in Project
+// mode (user still hits send — no surprise dispatch); "No thanks" silences
+// the suggestion for this session.
+var _lastEscalateText='';
+function renderEscalateChip(sugg,userText){
+  _lastEscalateText=userText||'';
+  var n=sugg&&sugg.estimated_checkpoints?sugg.estimated_checkpoints:'several';
+  var div=document.createElement('div');div.className='msg assistant';
+  div.innerHTML='<div class="msg-bubble" style="border:1px dashed var(--accent,#a78bfa);background:rgba(167,139,250,0.06)">'+
+    '<div style="font-weight:600;margin-bottom:6px">\uD83D\uDCA1 This looks like a multi-step project (~'+escHtml(String(n))+' checkpoints)</div>'+
+    '<div style="font-size:12px;color:var(--text-dim);margin-bottom:8px">CODEC can draft a plan, ask for your approval once, then run it autonomously in the background.</div>'+
+    '<div style="display:flex;gap:8px">'+
+    '<button onclick="escalateStartProject(this)" style="padding:6px 14px;background:var(--accent,#a78bfa);color:#000;border:none;border-radius:6px;cursor:pointer;font-size:12px;font-weight:600">Start as Project</button>'+
+    '<button onclick="escalateDismiss(this)" style="padding:6px 14px;background:transparent;color:var(--text);border:1px solid var(--border,#2a2a30);border-radius:6px;cursor:pointer;font-size:12px">No thanks</button>'+
+    '</div></div>';
+  document.getElementById('messages').appendChild(div);scrollBottom();
+}
+function escalateStartProject(btn){
+  setMode('project');
+  var input=document.getElementById('chatInput');
+  input.value=_lastEscalateText;input.focus();
+  var card=btn.closest('.msg');if(card)card.remove();
+  showToast('Project mode — hit send to draft the plan');
+}
+function escalateDismiss(btn){
+  try{fetch('/api/chat/escalate_silence',{method:'POST',headers:{'Content-Type':'application/json'},
+    body:JSON.stringify({session_id:(typeof sessionId!=='undefined'&&sessionId)||''})})}catch(e){}
+  var card=btn.closest('.msg');if(card)card.remove();
+}
 function copyCodeBlock(btn){
   // Per-code-block copy (2026-07): grabs the rendered code text (already
   // HTML-unescaped by innerText) and reuses copyMsgText's clipboard path.
@@ -1135,7 +1166,7 @@ <h1><a href="/" style="color:inherit;text-decoration:none">CODEC</a></h1>
           if(!line.startsWith('data: '))continue;
           var payload=line.substring(6);
           if(payload==='[DONE]')break;
-          try{var j=JSON.parse(payload);if(j.token){if(firstToken){bubble.innerHTML='';firstToken=false}fullText+=j.token;bubble.innerHTML=formatMsg(fullText);scrollBottom()}if(j.error){fullText+='\n\nError: '+j.error}}catch(pe){}
+          try{var j=JSON.parse(payload);if(j.token){if(firstToken){bubble.innerHTML='';firstToken=false}fullText+=j.token;bubble.innerHTML=formatMsg(fullText);scrollBottom()}if(j.escalate_project){renderEscalateChip(j.escalate_project,text)}if(j.error){fullText+='\n\nError: '+j.error}}catch(pe){}
         }
       }
       if(fullText){div.remove();addMessage('assistant',fullText);chatHist.push({role:'assistant',content:fullText});saveMessages([{role:'assistant',content:fullText}])}
@@ -1839,7 +1870,7 @@ <h1><a href="/" style="color:inherit;text-decoration:none">CODEC</a></h1>
       if (!r.ok) return;
       var data = await r.json();
       var msgs = (data.messages || []).filter(function(m){
-        return (m.ts || 0) > since && (m.type === 'agent_reply' || m.type === 'agent_status' || m.type === 'plan_revision');
+        return (m.ts || 0) > since && ['agent_reply','agent_status','plan_revision','agent_update','agent_blocked','agent_question','agent_done','agent_aborted'].indexOf(m.type) >= 0;
       });
       if (msgs.length){
         clearInterval(poll);
@@ -1868,6 +1899,12 @@ <h1><a href="/" style="color:inherit;text-decoration:none">CODEC</a></h1>
     if (!r.ok) return;
     var data = await r.json();
     var status = (data.manifest && data.manifest.status) || data.status || '';
+    // Resume live updates after a page reload while the agent is active
+    // (2026-07 demo fix — poller previously only started from the approve click).
+    var activeStates = ['approved','running','paused'];
+    if (activeStates.indexOf(status) >= 0 || status.indexOf('blocked_') === 0){
+      _startAgentPoller(_activeAgentId);
+    }
     var terminal = ['done','complete','completed','failed','error','aborted','user_aborted'];
     if (terminal.indexOf(status) >= 0){
       addMessage('assistant', '`'+_activeAgentId.slice(0,12)+'` reached `'+status+'`. Conversation closed — next message starts a fresh project.');
diff --git a/routes/chat.py b/routes/chat.py
index 0538527..c90748c 100644
--- a/routes/chat.py
+++ b/routes/chat.py
@@ -40,6 +40,10 @@
 import codec_llm  # A-12 canonical LLM caller
 from codec_chat_stream import SkillTagBuffer, SKILL_TAG_RE  # A-6 token machine
 from codec_chat_pipeline import _StepBudget, _is_conversational  # B6-P2
+from codec_chat_pipeline import (  # Step 10 Q11 wiring (2026-07)
+    _should_escalate_to_project,
+    silence_session_autoescalate,
+)
 from routes._shared import CONFIG_PATH
 
 router = APIRouter()
@@ -616,12 +620,59 @@ def _build_chat_system_prompt(config: dict, budget, has_attachment: bool,
 
 
 
+import re as _re_esc
+
+_ESCALATE_HINT_RE = _re_esc.compile(
+    r"\b(build|create|research|plan|organi[sz]e|automate|migrate|design|"
+    r"set\s?up|write me|make me|prepare|launch|develop)\b", _re_esc.IGNORECASE)
+
+
+def _maybe_escalate_suggestion(user_text: str, session_id: str):
+    """Step 10 auto-escalation, finally wired (2026-07). Runs AFTER the reply
+    so it never adds latency to the answer itself. The regex prefilter keeps
+    the Qwen classifier call off casual messages — only task-shaped text
+    (>= 60 chars + an action verb) pays for classification. Returns the
+    suggestion dict for the UI chip, or None."""
+    try:
+        if len(user_text or "") < 60 or not _ESCALATE_HINT_RE.search(user_text):
+            return None
+        verdict = _should_escalate_to_project(user_text, session_id)
+        if not verdict.get("escalate"):
+            return None
+        log_event("agent_auto_escalated_from_chat", "codec-dashboard",
+                  f"Suggested Project promotion ({verdict.get('estimated_checkpoints')} checkpoints)",
+                  extra={"session_id": session_id,
+                         "estimated_checkpoints": verdict.get("estimated_checkpoints"),
+                         "verdict": verdict.get("reason", "")[:200],
+                         "silenced": False})
+        return {"estimated_checkpoints": verdict.get("estimated_checkpoints"),
+                "reason": (verdict.get("reason") or "")[:200]}
+    except Exception as e:
+        log.debug(f"escalation check failed (non-fatal): {e}")
+        return None
+
+
+@router.post("/api/chat/escalate_silence")
+async def escalate_silence(request: Request):
+    """Q11: user said "No thanks" to a Project suggestion — silence the
+    prompt for the rest of this chat session (in-memory, resets on restart)."""
+    try:
+        body = await request.json()
+    except Exception:
+        body = {}
+    sid = str(body.get("session_id") or "")
+    if sid:
+        silence_session_autoescalate(sid)
+    return {"ok": True, "silenced": bool(sid)}
+
+
 @router.post("/api/chat")
 async def chat_completion(request: Request):
     """Direct LLM chat with full context window + tool calling"""
     from codec_metrics import metrics
     metrics.inc("codec_chat_requests_total")
     body = await request.json()
+    _session_id = request.query_params.get("s") or ""
     messages = body.get("messages", [])
     if not messages:
         return JSONResponse({"error": "No messages"}, status_code=400)
@@ -868,6 +919,10 @@ def _resolve_skill_tag(raw_tag):
                                 "busy, restarting, or out of context. Please try "
                                 "again in a moment."
                             )
+                    # Step 10 Q11 (2026-07): post-reply Project suggestion.
+                    _sugg = _maybe_escalate_suggestion(last_user_text, _session_id)
+                    if _sugg:
+                        yield f"data: {json.dumps({'escalate_project': _sugg})}\n\n"
                     yield "data: [DONE]\n\n"
                 except Exception as e:
                     yield f"data: {json.dumps({'error': str(e)})}\n\n"
diff --git a/skills/.manifest.json b/skills/.manifest.json
index a969b53..3801c13 100644
--- a/skills/.manifest.json
+++ b/skills/.manifest.json
@@ -56,7 +56,7 @@
     "memory_history.py": "a2762c03c325517d10907f8aa9511103a5716a29f9e956d48473b817105fb65c",
     "memory_save.py": "3d801338bfd0818aaf1e65e692e404af0a0fba6886d26150f0fb66e4b4fde424",
     "memory_search.py": "249e8644254e039cbdf7155fd372b85c234e18483e8e7ea7361ce28bf8fb875f",
-    "mouse_control.py": "85398544e83ecfc9d01c6ec9f9d55f50bf9fd9228c95a0d02f82da014f00f725",
+    "mouse_control.py": "d00f8b94d008b2f69a0ee1880039a53b46454a097f599b3cfbce88db06839ae4",
     "music.py": "c42bddc6414b11e8c4734cd826a824a30c4ff34a618d69da11a5d84b737b2f55",
     "network_info.py": "bd776b619cf7c18d67fe03cb0f0456cf9c4f9bf71475740a233a9ca1e6672fcd",
     "notes.py": "7d50d1544ea955f59917a1f0e7902d115e9dbccd3188b641057a55b6a5b2803a",
diff --git a/skills/mouse_control.py b/skills/mouse_control.py
index 32e5fa8..7e35330 100644
--- a/skills/mouse_control.py
+++ b/skills/mouse_control.py
@@ -147,19 +147,29 @@ def _get_screen_size():
         return (1920, 1080)
 
 
-def _take_screenshot():
-    """Capture screen and return base64-encoded PNG."""
+def _take_screenshot(timeout_s=10, attempts=2):
+    """Capture screen and return base64-encoded PNG.
+
+    2026-07 hardening: under load screencapture intermittently exceeds the
+    old 5s cap (stress test: 2 of 6 vision-locate runs failed on screenshot
+    timeout while the model itself was stable). One retry + a 10s cap turn
+    a hard "Could not take screenshot" into a rare slow path."""
     try:
         os.makedirs(os.path.dirname(_SCREENSHOT_PATH), exist_ok=True)
-        subprocess.run(
-            ["screencapture", "-x", "-C", _SCREENSHOT_PATH],
-            capture_output=True, timeout=5
-        )
-        if os.path.exists(_SCREENSHOT_PATH) and os.path.getsize(_SCREENSHOT_PATH) > 1000:
-            with open(_SCREENSHOT_PATH, "rb") as f:
-                return base64.b64encode(f.read()).decode()
     except Exception as e:
-        log.warning(f"Screenshot error: {e}")
+        log.warning(f"Screenshot dir error: {e}")
+        return None
+    for attempt in range(attempts):
+        try:
+            subprocess.run(
+                ["screencapture", "-x", "-C", _SCREENSHOT_PATH],
+                capture_output=True, timeout=timeout_s
+            )
+            if os.path.exists(_SCREENSHOT_PATH) and os.path.getsize(_SCREENSHOT_PATH) > 1000:
+                with open(_SCREENSHOT_PATH, "rb") as f:
+                    return base64.b64encode(f.read()).decode()
+        except Exception as e:
+            log.warning(f"Screenshot error (attempt {attempt + 1}/{attempts}): {e}")
     return None
 
 
diff --git a/tests/test_escalate_wiring.py b/tests/test_escalate_wiring.py
new file mode 100644
index 0000000..ec8d342
--- /dev/null
+++ b/tests/test_escalate_wiring.py
@@ -0,0 +1,50 @@
+"""Step 10 Q11 wiring (2026-07): post-reply Project-promotion suggestion.
+
+The regex prefilter must keep the Qwen classifier off casual messages;
+the silence endpoint must mark the session."""
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parent.parent
+if str(REPO) not in sys.path:
+    sys.path.insert(0, str(REPO))
+
+import routes.chat as chat
+
+
+def test_prefilter_skips_short_or_casual(monkeypatch):
+    def boom(*a, **kw):
+        raise AssertionError("classifier must not be called for casual text")
+    monkeypatch.setattr(chat, "_should_escalate_to_project", boom)
+    assert chat._maybe_escalate_suggestion("hi", "s1") is None
+    assert chat._maybe_escalate_suggestion("what's the weather like today?" * 3, "s1") is None
+
+
+def test_prefilter_passes_task_shaped_text(monkeypatch):
+    calls = {}
+    def fake_gate(text, sid):
+        calls["hit"] = (text, sid)
+        return {"escalate": True, "estimated_checkpoints": 4, "reason": "multi-step"}
+    monkeypatch.setattr(chat, "_should_escalate_to_project", fake_gate)
+    monkeypatch.setattr(chat, "log_event", lambda *a, **kw: None)
+    out = chat._maybe_escalate_suggestion(
+        "research the top 5 competitors in my niche, build a comparison and prepare a report",
+        "sess42")
+    assert out == {"estimated_checkpoints": 4, "reason": "multi-step"}
+    assert calls["hit"][1] == "sess42"
+
+
+def test_gate_negative_verdict_returns_none(monkeypatch):
+    monkeypatch.setattr(chat, "_should_escalate_to_project",
+                        lambda t, s: {"escalate": False, "reason": "single-step"})
+    assert chat._maybe_escalate_suggestion(
+        "build me one tiny thing that is actually simple but worded long enough", "s") is None
+
+
+def test_gate_never_raises(monkeypatch):
+    monkeypatch.setattr(chat, "_should_escalate_to_project",
+                        lambda t, s: (_ for _ in ()).throw(RuntimeError("qwen down")))
+    assert chat._maybe_escalate_suggestion(
+        "research and build and prepare a giant multi step plan for my business", "s") is None