Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
8c1e368
docs(plan §6a): record OpenViking API spike + correct the engine tier…
hanwencheng Jun 2, 2026
5be9216
feat(memory): OpenViking engine adapter — real search/find API, gate-…
hanwencheng Jun 2, 2026
7e33cbf
feat(cli): query-aware memory-inject — OpenViking ranking behind the …
hanwencheng Jun 2, 2026
d98a1bb
docs+feat: OpenViking runbook + wire endpoint baking + harness --open…
hanwencheng Jun 2, 2026
77cbd2a
docs(runbook): fix OpenViking runbook from operator QA — shell-in, no…
hanwencheng Jun 2, 2026
b9d641f
docs(runbook): how to undo 'hermes memory setup' (operator QA) (#147)
hanwencheng Jun 3, 2026
c5ec70d
fix(runbook): correct OpenViking content/write URI + idempotent loade…
hanwencheng Jun 3, 2026
4d45b81
fix(runbook): correct OpenViking search/find response shape (operator…
hanwencheng Jun 3, 2026
f648c1e
docs(plan §6a): cross-link classifier-service as the write-side dual …
hanwencheng Jun 3, 2026
73ba0e1
docs(openviking-runbook): add Step 6a omni-identity recovery from the…
hanwencheng Jun 3, 2026
54e1a29
harness: add idempotent openviking-sandbox-setup.sh (backend-side run…
hanwencheng Jun 3, 2026
5fa532c
harness(openviking-sandbox): surface wire errors + add --openviking-e…
hanwencheng Jun 3, 2026
4b16d2c
fix(scope): grant namespace-qualified memory:<ns>, not bare memory (c…
hanwencheng Jun 3, 2026
abe19e8
fix(heima-bring-up): never deploy duplicate contracts; idempotent ski…
hanwencheng Jun 3, 2026
2ff76bf
fix(chain): retry eth_call on transient RPC 5xx/non-JSON (Heima RPC f…
hanwencheng Jun 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 41 additions & 17 deletions crates/agentkeys-broker-server/src/handlers/cap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -383,23 +383,47 @@ async fn eth_call(
"params": [{"to": to, "data": data}, "latest"],
"id": 1,
});
let resp = http
.post(rpc_url)
.json(&body)
.send()
.await
.map_err(|e| CapError::ChainRpc(format!("eth_call POST failed: {e}")))?;
let v: serde_json::Value = resp
.json()
.await
.map_err(|e| CapError::ChainRpc(format!("eth_call JSON parse: {e}")))?;
if let Some(err) = v.get("error") {
return Err(CapError::ChainRpc(format!("RPC error: {err}")));
}
v.get("result")
.and_then(|r| r.as_str())
.map(|s| s.to_string())
.ok_or_else(|| CapError::ChainRpc("eth_call missing 'result'".into()))
// The Heima public RPC intermittently 500s on eth_call (~12% per call,
// HTML error page → non-JSON). Retry transient failures (transport / HTTP
// 5xx / non-JSON) with backoff so a flaky RPC doesn't randomly fail
// cap-mint; do NOT retry a valid JSON-RPC `error` (a real revert result).
const ATTEMPTS: u32 = 4;
let mut last = String::new();
for attempt in 0..ATTEMPTS {
if attempt > 0 {
let ms = 150u64 * (1u64 << (attempt - 1)); // 150, 300, 600 ms
tokio::time::sleep(std::time::Duration::from_millis(ms)).await;
}
let resp = match http.post(rpc_url).json(&body).send().await {
Ok(r) => r,
Err(e) => {
last = format!("eth_call POST failed: {e}");
continue;
}
};
if resp.status().is_server_error() {
last = format!("eth_call HTTP {}", resp.status());
continue;
}
let v: serde_json::Value = match resp.json().await {
Ok(v) => v,
Err(e) => {
last = format!("eth_call JSON parse: {e}");
continue;
}
};
if let Some(err) = v.get("error") {
return Err(CapError::ChainRpc(format!("RPC error: {err}")));
}
return v
.get("result")
.and_then(|r| r.as_str())
.map(|s| s.to_string())
.ok_or_else(|| CapError::ChainRpc("eth_call missing 'result'".into()));
}
Err(CapError::ChainRpc(format!(
"eth_call failed after {ATTEMPTS} attempts: {last}"
)))
}

pub(crate) async fn call_get_device(
Expand Down
143 changes: 126 additions & 17 deletions crates/agentkeys-cli/src/hook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
//! - `audit` → PostToolUse audit append (never blocks)
//! - `memory-inject` → pre_llm_call context injection (never blocks)

use std::io::Read;
use std::io::{IsTerminal, Read};

use anyhow::{Context, Result};
use serde_json::{json, Value};
Expand Down Expand Up @@ -237,21 +237,38 @@ pub async fn memory_inject(
actor: Option<String>,
operator: Option<String>,
) -> Result<String> {
// NOTE: deliberately does NOT read stdin. memory-inject discards the host
// payload (we inject regardless), and reading stdin would block on
// read_to_string until EOF — which never arrives when the binary is invoked
// directly without a piped payload (e.g. the harness's 1.5 seed probe, or
// any `aiosandbox /v1/shell/exec` call that leaves stdin open). That stall
// silently froze the whole wire demo after step 1.4. Wired hook scripts
// pipe a payload (EOF arrives) so they were unaffected; direct calls were not.
let client = HookClient::resolve(mcp_url, vendor_token, actor, operator);

// Pluggable engine seam (plan §6a): the gate already authorized these bytes;
// the engine — caller-side, deterministic, no LLM — selects which lines to
// the engine — caller-side, no LLM in the gate — selects which lines to
// inject within a budget. Default `passthrough` + unbounded budget injects
// the whole namespace unchanged. Passive injection carries no query (None).
let engine = agentkeys_core::memory_engine::engine_from_env();
// the whole namespace unchanged.
let budget = agentkeys_core::memory_engine::SelectionBudget::from_env();
let engine_name = std::env::var("AGENTKEYS_MEMORY_ENGINE").unwrap_or_default();

// OpenViking (plan §6a, model B) is query-driven, so it only engages when a
// query is present. We read the current turn from the host payload ONLY in
// openviking mode, and ONLY when stdin is piped (the `is_terminal()` guard
// means a direct interactive call can never hang — the historical no-stdin
// rule for the default engines is preserved). When OpenViking is
// unconfigured / has no query / errors, we fall back to a deterministic
// engine, so OpenViking is never load-bearing for availability.
let openviking = if engine_name.trim().eq_ignore_ascii_case("openviking") {
agentkeys_core::openviking::OpenVikingClient::from_env()
} else {
None
};
let query = if openviking.is_some() {
read_turn_query()
} else {
None
};
let fallback_engine: Box<dyn agentkeys_core::memory_engine::MemoryEngine> =
if openviking.is_some() {
Box::new(agentkeys_core::memory_engine::LexicalEngine)
} else {
agentkeys_core::memory_engine::engine_from_env()
};

let mut chunks = Vec::new();
for ns in namespaces
Expand All @@ -265,12 +282,34 @@ pub async fn memory_inject(
{
Ok(result) => {
if let Some(text) = extract_memory_content(&result) {
let selected = agentkeys_core::memory_engine::select_blob(
engine.as_ref(),
None,
&text,
&budget,
);
let selected = match (&openviking, &query) {
(Some(ov), Some(q)) => {
let lines = agentkeys_core::memory_engine::MemoryLine::from_blob(&text);
match agentkeys_core::openviking::rank_gate_bounded(
ov, q, &lines, &budget,
)
.await
{
Some(ranked) => ranked
.into_iter()
.map(|l| l.text)
.collect::<Vec<_>>()
.join("\n"),
None => agentkeys_core::memory_engine::select_blob(
fallback_engine.as_ref(),
query.as_deref(),
&text,
&budget,
),
}
}
_ => agentkeys_core::memory_engine::select_blob(
fallback_engine.as_ref(),
query.as_deref(),
&text,
&budget,
),
};
if !selected.is_empty() {
chunks.push(format!("## Memory: {ns}\n{selected}"));
}
Expand Down Expand Up @@ -329,6 +368,50 @@ pub fn extract_memory_content(result: &Value) -> Option<String> {
.map(|s| s.to_string())
}

/// Read the current user turn from the host hook payload (stdin) for use as the
/// OpenViking search query. Guarded by `is_terminal()` so a direct interactive
/// call can never block on an open stdin — this only runs in openviking mode;
/// the default engines never read stdin. Returns None when stdin is a TTY,
/// empty, or carries no recognizable query field.
fn read_turn_query() -> Option<String> {
if std::io::stdin().is_terminal() {
return None;
}
let mut buf = String::new();
if std::io::stdin().read_to_string(&mut buf).is_err() || buf.trim().is_empty() {
return None;
}
let payload: Value = serde_json::from_str(&buf).ok()?;
extract_query(&payload)
}

/// Pull the user's latest message from a host hook payload. Hermes'
/// `pre_llm_call` payload shape is not pinned, so we try several common field
/// names and a `messages: [{role, content}]` array (last user turn). Pure
/// helper, unit-tested.
pub fn extract_query(payload: &Value) -> Option<String> {
for key in ["query", "prompt", "input", "user_message", "text"] {
if let Some(s) = payload.get(key).and_then(|v| v.as_str()) {
if !s.trim().is_empty() {
return Some(s.trim().to_string());
}
}
}
if let Some(messages) = payload.get("messages").and_then(|v| v.as_array()) {
for message in messages.iter().rev() {
let role = message.get("role").and_then(|v| v.as_str()).unwrap_or("");
if role == "user" || role.is_empty() {
if let Some(content) = message.get("content").and_then(|v| v.as_str()) {
if !content.trim().is_empty() {
return Some(content.trim().to_string());
}
}
}
}
}
None
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -383,4 +466,30 @@ mod tests {
fn extract_memory_content_missing_field_is_none() {
assert_eq!(extract_memory_content(&json!({"ok": true})), None);
}

#[test]
fn extract_query_tries_common_fields_and_messages() {
assert_eq!(
extract_query(&json!({"query": "where did I go"})).as_deref(),
Some("where did I go")
);
assert_eq!(
extract_query(&json!({"prompt": "recall the trip"})).as_deref(),
Some("recall the trip")
);
assert_eq!(
extract_query(&json!({"messages": [
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "hello"},
{"role": "user", "content": "what about Chengdu?"}
]}))
.as_deref(),
Some("what about Chengdu?")
);
// a bare pre_llm_call payload (the demo's default) carries no query
assert_eq!(
extract_query(&json!({"hook_event_name": "pre_llm_call"})),
None
);
}
}
14 changes: 14 additions & 0 deletions crates/agentkeys-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,16 @@ enum Commands {
/// Cap how many memory lines the engine injects (omit = unbounded).
#[arg(long, env = "AGENTKEYS_MEMORY_MAX_LINES")]
memory_max_lines: Option<u32>,

/// OpenViking server URL, baked into the hook as OPENVIKING_ENDPOINT
/// when --memory-engine openviking (plan §6a). e.g. http://127.0.0.1:1933
#[arg(long, env = "OPENVIKING_ENDPOINT")]
openviking_endpoint: Option<String>,

/// Optional OpenViking API key, baked as OPENVIKING_API_KEY when
/// --memory-engine openviking.
#[arg(long, env = "OPENVIKING_API_KEY")]
openviking_api_key: Option<String>,
},

#[command(
Expand Down Expand Up @@ -1099,6 +1109,8 @@ async fn main() {
session_bearer,
memory_engine,
memory_max_lines,
openviking_endpoint,
openviking_api_key,
} => agentkeys_cli::wire::cmd_wire(
runtime,
agentkeys_cli::wire::WireRequest {
Expand All @@ -1111,6 +1123,8 @@ async fn main() {
session_bearer: session_bearer.clone(),
memory_engine: memory_engine.clone(),
memory_max_lines: *memory_max_lines,
memory_engine_endpoint: openviking_endpoint.clone(),
memory_engine_api_key: openviking_api_key.clone(),
check_only: *check_only,
},
),
Expand Down
42 changes: 42 additions & 0 deletions crates/agentkeys-cli/src/wire.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ pub struct WireRequest {
pub memory_engine: String,
/// Optional cap on how many memory lines the engine injects (None = all).
pub memory_max_lines: Option<u32>,
/// OpenViking server URL baked as `OPENVIKING_ENDPOINT` into the hook when
/// `memory_engine == "openviking"` (plan §6a). None → not emitted.
pub memory_engine_endpoint: Option<String>,
/// Optional OpenViking API key baked as `OPENVIKING_API_KEY`.
pub memory_engine_api_key: Option<String>,
/// When true, report drift without writing (drift-check / dry-run).
pub check_only: bool,
}
Expand Down Expand Up @@ -141,6 +146,20 @@ impl HermesAdapter {
if let Some(max_lines) = req.memory_max_lines {
exports.push_str(&format!("export AGENTKEYS_MEMORY_MAX_LINES={max_lines}\n"));
}
if req.memory_engine.eq_ignore_ascii_case("openviking") {
if let Some(endpoint) = req.memory_engine_endpoint.as_deref() {
exports.push_str(&format!(
"export OPENVIKING_ENDPOINT={}\n",
shell_quote(endpoint)
));
}
if let Some(api_key) = req.memory_engine_api_key.as_deref() {
exports.push_str(&format!(
"export OPENVIKING_API_KEY={}\n",
shell_quote(api_key)
));
}
}
exports
};
vec![
Expand Down Expand Up @@ -534,6 +553,8 @@ mod tests {
session_bearer: String::new(),
memory_engine: "passthrough".into(),
memory_max_lines: None,
memory_engine_endpoint: None,
memory_engine_api_key: None,
check_only: false,
}
}
Expand Down Expand Up @@ -604,6 +625,27 @@ mod tests {
assert!(engine_at < exec_at);
}

#[test]
fn scripts_bake_openviking_endpoint_only_for_openviking() {
let a = HermesAdapter;
// endpoint set but engine is lexical → OPENVIKING_* must NOT be emitted
let mut lexical = req();
lexical.memory_engine = "lexical".into();
lexical.memory_engine_endpoint = Some("http://127.0.0.1:1933".into());
assert!(!a.scripts("/usr/local/bin/agentkeys", &lexical)[2]
.1
.contains("OPENVIKING_ENDPOINT"));
// engine openviking + endpoint → baked
let mut ov = req();
ov.memory_engine = "openviking".into();
ov.memory_engine_endpoint = Some("http://127.0.0.1:1933".into());
ov.memory_engine_api_key = Some("sk-ov-123".into());
let prellm = &a.scripts("/usr/local/bin/agentkeys", &ov)[2].1;
assert!(prellm.contains("export AGENTKEYS_MEMORY_ENGINE='openviking'"));
assert!(prellm.contains("export OPENVIKING_ENDPOINT='http://127.0.0.1:1933'"));
assert!(prellm.contains("export OPENVIKING_API_KEY='sk-ov-123'"));
}

#[test]
fn write_if_changed_is_idempotent() {
let dir = std::env::temp_dir().join(format!("agentkeys-wire-{}", std::process::id()));
Expand Down
1 change: 1 addition & 0 deletions crates/agentkeys-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub mod device_crypto;
pub mod init_flow;
pub mod memory_engine;
pub mod mock_client;
pub mod openviking;
pub mod otp;
pub mod payment;
pub mod s3_backend;
Expand Down
Loading
Loading