Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions harness/phase1-wire-demo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1106,19 +1106,29 @@ phase4_surprise() {
# guarantee. The human "surprise" at 4.3 is supplementary — an LLM may phrase a
# memory-aware reply any number of ways, or even DISOWN the injected context as
# a hallucination, so its prose is NOT a reliable success signal.
local hooktest; hooktest="$(sbx_exec "export PATH=\$HOME/.local/bin:\$PATH; hermes hooks test pre_llm_call 2>&1")"
local hooktest inject_ok=false
hooktest="$(sbx_exec "export PATH=\$HOME/.local/bin:\$PATH; hermes hooks test pre_llm_call 2>&1")"
echo "$hooktest" | sed 's/^/ /' >&2
if echo "$hooktest" | grep -q 'exit=0' && echo "$hooktest" | grep -qF '"context"'; then
ok "4.2 inject (deterministic)" "Hermes' config-wired pre_llm_call hook injected the real memory (no inference)"
inject_ok=true
else
fail "4.2 inject (deterministic)" "Hermes did NOT inject memory (stdout '{}' → MCP down / scope missing / session bad). THIS is the real failure even if a chat reply 'sounds' memory-aware."
fi
# 4.2b structural — all 3 wired hooks exec + emit valid JSON via Hermes' dispatcher.
local doctor; doctor="$(sbx_exec "export PATH=\$HOME/.local/bin:\$PATH; hermes hooks doctor 2>&1")"
# 4.2b structural (ADVISORY) — all 3 wired hooks exec + emit valid JSON via Hermes'
# dispatcher. `hermes hooks doctor` exits non-zero + flags "modified since approval"
# after every `agentkeys wire` rewrite (Hermes pins each script's hash at first-use
# consent; hooks_auto_accept does NOT refresh an already-allowlisted-but-modified
# hook), so this can trip even when the hooks are healthy. 4.2 above is authoritative
# — only HARD-fail when 4.2 ALSO failed; otherwise warn. (`|| true` keeps the output
# despite the non-zero exit.)
local doctor; doctor="$(sbx_exec "export PATH=\$HOME/.local/bin:\$PATH; hermes hooks doctor 2>&1 || true")"
if [[ "$(echo "$doctor" | grep -c 'produced valid JSON')" -ge 3 ]]; then
ok "4.2b hooks doctor" "all 3 wired hooks exec + valid JSON (config-driven)"
elif [[ "$inject_ok" == true ]]; then
skip "4.2b hooks doctor" "structural check inconclusive (likely 'modified since approval' after re-wire, or MCP timing) — 4.2 (authoritative) passed, so the hooks are healthy. To clear in the sandbox: revoke the 3 hook paths, then run one 'hermes --accept-hooks -z hi' turn."
else
fail "4.2b hooks doctor" "hermes hooks doctor: not all hooks healthy — $(echo "$doctor" | tr '\n' ' ' | cut -c1-120)"
fail "4.2b hooks doctor" "hermes hooks doctor: not all hooks healthy AND 4.2 inject failed — $(echo "$doctor" | tr '\n' ' ' | cut -c1-160)"
fi

# 4.3 the human "surprise" — OPTIONAL live demo. 4.2 above is the real signal.
Expand Down
Loading