diff --git a/harness/phase1-wire-demo.sh b/harness/phase1-wire-demo.sh index 69d216b..b306b37 100755 --- a/harness/phase1-wire-demo.sh +++ b/harness/phase1-wire-demo.sh @@ -1106,19 +1106,29 @@ phase4_surprise() { # guarantee. The human "surprise" at 4.3 is supplementary — an LLM may phrase a # memory-aware reply any number of ways, or even DISOWN the injected context as # a hallucination, so its prose is NOT a reliable success signal. - local hooktest; hooktest="$(sbx_exec "export PATH=\$HOME/.local/bin:\$PATH; hermes hooks test pre_llm_call 2>&1")" + local hooktest inject_ok=false + hooktest="$(sbx_exec "export PATH=\$HOME/.local/bin:\$PATH; hermes hooks test pre_llm_call 2>&1")" echo "$hooktest" | sed 's/^/ /' >&2 if echo "$hooktest" | grep -q 'exit=0' && echo "$hooktest" | grep -qF '"context"'; then ok "4.2 inject (deterministic)" "Hermes' config-wired pre_llm_call hook injected the real memory (no inference)" + inject_ok=true else fail "4.2 inject (deterministic)" "Hermes did NOT inject memory (stdout '{}' → MCP down / scope missing / session bad). THIS is the real failure even if a chat reply 'sounds' memory-aware." fi - # 4.2b structural — all 3 wired hooks exec + emit valid JSON via Hermes' dispatcher. - local doctor; doctor="$(sbx_exec "export PATH=\$HOME/.local/bin:\$PATH; hermes hooks doctor 2>&1")" + # 4.2b structural (ADVISORY) — all 3 wired hooks exec + emit valid JSON via Hermes' + # dispatcher. `hermes hooks doctor` exits non-zero + flags "modified since approval" + # after every `agentkeys wire` rewrite (Hermes pins each script's hash at first-use + # consent; hooks_auto_accept does NOT refresh an already-allowlisted-but-modified + # hook), so this can trip even when the hooks are healthy. 4.2 above is authoritative + # — only HARD-fail when 4.2 ALSO failed; otherwise warn. (`|| true` keeps the output + # despite the non-zero exit.) + local doctor; doctor="$(sbx_exec "export PATH=\$HOME/.local/bin:\$PATH; hermes hooks doctor 2>&1 || true")" if [[ "$(echo "$doctor" | grep -c 'produced valid JSON')" -ge 3 ]]; then ok "4.2b hooks doctor" "all 3 wired hooks exec + valid JSON (config-driven)" + elif [[ "$inject_ok" == true ]]; then + skip "4.2b hooks doctor" "structural check inconclusive (likely 'modified since approval' after re-wire, or MCP timing) — 4.2 (authoritative) passed, so the hooks are healthy. To clear in the sandbox: revoke the 3 hook paths, then run one 'hermes --accept-hooks -z hi' turn." else - fail "4.2b hooks doctor" "hermes hooks doctor: not all hooks healthy — $(echo "$doctor" | tr '\n' ' ' | cut -c1-120)" + fail "4.2b hooks doctor" "hermes hooks doctor: not all hooks healthy AND 4.2 inject failed — $(echo "$doctor" | tr '\n' ' ' | cut -c1-160)" fi # 4.3 the human "surprise" — OPTIONAL live demo. 4.2 above is the real signal.