diff --git a/crates/agentkeys-broker-server/src/handlers/cap.rs b/crates/agentkeys-broker-server/src/handlers/cap.rs index 6209e012..895133f6 100644 --- a/crates/agentkeys-broker-server/src/handlers/cap.rs +++ b/crates/agentkeys-broker-server/src/handlers/cap.rs @@ -383,23 +383,47 @@ async fn eth_call( "params": [{"to": to, "data": data}, "latest"], "id": 1, }); - let resp = http - .post(rpc_url) - .json(&body) - .send() - .await - .map_err(|e| CapError::ChainRpc(format!("eth_call POST failed: {e}")))?; - let v: serde_json::Value = resp - .json() - .await - .map_err(|e| CapError::ChainRpc(format!("eth_call JSON parse: {e}")))?; - if let Some(err) = v.get("error") { - return Err(CapError::ChainRpc(format!("RPC error: {err}"))); - } - v.get("result") - .and_then(|r| r.as_str()) - .map(|s| s.to_string()) - .ok_or_else(|| CapError::ChainRpc("eth_call missing 'result'".into())) + // The Heima public RPC intermittently 500s on eth_call (~12% per call, + // HTML error page → non-JSON). Retry transient failures (transport / HTTP + // 5xx / non-JSON) with backoff so a flaky RPC doesn't randomly fail + // cap-mint; do NOT retry a valid JSON-RPC `error` (a real revert result). + const ATTEMPTS: u32 = 4; + let mut last = String::new(); + for attempt in 0..ATTEMPTS { + if attempt > 0 { + let ms = 150u64 * (1u64 << (attempt - 1)); // 150, 300, 600 ms + tokio::time::sleep(std::time::Duration::from_millis(ms)).await; + } + let resp = match http.post(rpc_url).json(&body).send().await { + Ok(r) => r, + Err(e) => { + last = format!("eth_call POST failed: {e}"); + continue; + } + }; + if resp.status().is_server_error() { + last = format!("eth_call HTTP {}", resp.status()); + continue; + } + let v: serde_json::Value = match resp.json().await { + Ok(v) => v, + Err(e) => { + last = format!("eth_call JSON parse: {e}"); + continue; + } + }; + if let Some(err) = v.get("error") { + return Err(CapError::ChainRpc(format!("RPC error: {err}"))); + } + return v + .get("result") + .and_then(|r| r.as_str()) + .map(|s| s.to_string()) + .ok_or_else(|| CapError::ChainRpc("eth_call missing 'result'".into())); + } + Err(CapError::ChainRpc(format!( + "eth_call failed after {ATTEMPTS} attempts: {last}" + ))) } pub(crate) async fn call_get_device( diff --git a/crates/agentkeys-cli/src/hook.rs b/crates/agentkeys-cli/src/hook.rs index 510c36e9..d11d01da 100644 --- a/crates/agentkeys-cli/src/hook.rs +++ b/crates/agentkeys-cli/src/hook.rs @@ -24,7 +24,7 @@ //! - `audit` → PostToolUse audit append (never blocks) //! - `memory-inject` → pre_llm_call context injection (never blocks) -use std::io::Read; +use std::io::{IsTerminal, Read}; use anyhow::{Context, Result}; use serde_json::{json, Value}; @@ -237,21 +237,38 @@ pub async fn memory_inject( actor: Option, operator: Option, ) -> Result { - // NOTE: deliberately does NOT read stdin. memory-inject discards the host - // payload (we inject regardless), and reading stdin would block on - // read_to_string until EOF — which never arrives when the binary is invoked - // directly without a piped payload (e.g. the harness's 1.5 seed probe, or - // any `aiosandbox /v1/shell/exec` call that leaves stdin open). That stall - // silently froze the whole wire demo after step 1.4. Wired hook scripts - // pipe a payload (EOF arrives) so they were unaffected; direct calls were not. let client = HookClient::resolve(mcp_url, vendor_token, actor, operator); // Pluggable engine seam (plan §6a): the gate already authorized these bytes; - // the engine — caller-side, deterministic, no LLM — selects which lines to + // the engine — caller-side, no LLM in the gate — selects which lines to // inject within a budget. Default `passthrough` + unbounded budget injects - // the whole namespace unchanged. Passive injection carries no query (None). - let engine = agentkeys_core::memory_engine::engine_from_env(); + // the whole namespace unchanged. let budget = agentkeys_core::memory_engine::SelectionBudget::from_env(); + let engine_name = std::env::var("AGENTKEYS_MEMORY_ENGINE").unwrap_or_default(); + + // OpenViking (plan §6a, model B) is query-driven, so it only engages when a + // query is present. We read the current turn from the host payload ONLY in + // openviking mode, and ONLY when stdin is piped (the `is_terminal()` guard + // means a direct interactive call can never hang — the historical no-stdin + // rule for the default engines is preserved). When OpenViking is + // unconfigured / has no query / errors, we fall back to a deterministic + // engine, so OpenViking is never load-bearing for availability. + let openviking = if engine_name.trim().eq_ignore_ascii_case("openviking") { + agentkeys_core::openviking::OpenVikingClient::from_env() + } else { + None + }; + let query = if openviking.is_some() { + read_turn_query() + } else { + None + }; + let fallback_engine: Box = + if openviking.is_some() { + Box::new(agentkeys_core::memory_engine::LexicalEngine) + } else { + agentkeys_core::memory_engine::engine_from_env() + }; let mut chunks = Vec::new(); for ns in namespaces @@ -265,12 +282,34 @@ pub async fn memory_inject( { Ok(result) => { if let Some(text) = extract_memory_content(&result) { - let selected = agentkeys_core::memory_engine::select_blob( - engine.as_ref(), - None, - &text, - &budget, - ); + let selected = match (&openviking, &query) { + (Some(ov), Some(q)) => { + let lines = agentkeys_core::memory_engine::MemoryLine::from_blob(&text); + match agentkeys_core::openviking::rank_gate_bounded( + ov, q, &lines, &budget, + ) + .await + { + Some(ranked) => ranked + .into_iter() + .map(|l| l.text) + .collect::>() + .join("\n"), + None => agentkeys_core::memory_engine::select_blob( + fallback_engine.as_ref(), + query.as_deref(), + &text, + &budget, + ), + } + } + _ => agentkeys_core::memory_engine::select_blob( + fallback_engine.as_ref(), + query.as_deref(), + &text, + &budget, + ), + }; if !selected.is_empty() { chunks.push(format!("## Memory: {ns}\n{selected}")); } @@ -329,6 +368,50 @@ pub fn extract_memory_content(result: &Value) -> Option { .map(|s| s.to_string()) } +/// Read the current user turn from the host hook payload (stdin) for use as the +/// OpenViking search query. Guarded by `is_terminal()` so a direct interactive +/// call can never block on an open stdin — this only runs in openviking mode; +/// the default engines never read stdin. Returns None when stdin is a TTY, +/// empty, or carries no recognizable query field. +fn read_turn_query() -> Option { + if std::io::stdin().is_terminal() { + return None; + } + let mut buf = String::new(); + if std::io::stdin().read_to_string(&mut buf).is_err() || buf.trim().is_empty() { + return None; + } + let payload: Value = serde_json::from_str(&buf).ok()?; + extract_query(&payload) +} + +/// Pull the user's latest message from a host hook payload. Hermes' +/// `pre_llm_call` payload shape is not pinned, so we try several common field +/// names and a `messages: [{role, content}]` array (last user turn). Pure +/// helper, unit-tested. +pub fn extract_query(payload: &Value) -> Option { + for key in ["query", "prompt", "input", "user_message", "text"] { + if let Some(s) = payload.get(key).and_then(|v| v.as_str()) { + if !s.trim().is_empty() { + return Some(s.trim().to_string()); + } + } + } + if let Some(messages) = payload.get("messages").and_then(|v| v.as_array()) { + for message in messages.iter().rev() { + let role = message.get("role").and_then(|v| v.as_str()).unwrap_or(""); + if role == "user" || role.is_empty() { + if let Some(content) = message.get("content").and_then(|v| v.as_str()) { + if !content.trim().is_empty() { + return Some(content.trim().to_string()); + } + } + } + } + } + None +} + #[cfg(test)] mod tests { use super::*; @@ -383,4 +466,30 @@ mod tests { fn extract_memory_content_missing_field_is_none() { assert_eq!(extract_memory_content(&json!({"ok": true})), None); } + + #[test] + fn extract_query_tries_common_fields_and_messages() { + assert_eq!( + extract_query(&json!({"query": "where did I go"})).as_deref(), + Some("where did I go") + ); + assert_eq!( + extract_query(&json!({"prompt": "recall the trip"})).as_deref(), + Some("recall the trip") + ); + assert_eq!( + extract_query(&json!({"messages": [ + {"role": "user", "content": "hi"}, + {"role": "assistant", "content": "hello"}, + {"role": "user", "content": "what about Chengdu?"} + ]})) + .as_deref(), + Some("what about Chengdu?") + ); + // a bare pre_llm_call payload (the demo's default) carries no query + assert_eq!( + extract_query(&json!({"hook_event_name": "pre_llm_call"})), + None + ); + } } diff --git a/crates/agentkeys-cli/src/main.rs b/crates/agentkeys-cli/src/main.rs index c4af2fdd..de9bf7fc 100644 --- a/crates/agentkeys-cli/src/main.rs +++ b/crates/agentkeys-cli/src/main.rs @@ -358,6 +358,16 @@ enum Commands { /// Cap how many memory lines the engine injects (omit = unbounded). #[arg(long, env = "AGENTKEYS_MEMORY_MAX_LINES")] memory_max_lines: Option, + + /// OpenViking server URL, baked into the hook as OPENVIKING_ENDPOINT + /// when --memory-engine openviking (plan §6a). e.g. http://127.0.0.1:1933 + #[arg(long, env = "OPENVIKING_ENDPOINT")] + openviking_endpoint: Option, + + /// Optional OpenViking API key, baked as OPENVIKING_API_KEY when + /// --memory-engine openviking. + #[arg(long, env = "OPENVIKING_API_KEY")] + openviking_api_key: Option, }, #[command( @@ -1099,6 +1109,8 @@ async fn main() { session_bearer, memory_engine, memory_max_lines, + openviking_endpoint, + openviking_api_key, } => agentkeys_cli::wire::cmd_wire( runtime, agentkeys_cli::wire::WireRequest { @@ -1111,6 +1123,8 @@ async fn main() { session_bearer: session_bearer.clone(), memory_engine: memory_engine.clone(), memory_max_lines: *memory_max_lines, + memory_engine_endpoint: openviking_endpoint.clone(), + memory_engine_api_key: openviking_api_key.clone(), check_only: *check_only, }, ), diff --git a/crates/agentkeys-cli/src/wire.rs b/crates/agentkeys-cli/src/wire.rs index adbca31c..cda139c4 100644 --- a/crates/agentkeys-cli/src/wire.rs +++ b/crates/agentkeys-cli/src/wire.rs @@ -44,6 +44,11 @@ pub struct WireRequest { pub memory_engine: String, /// Optional cap on how many memory lines the engine injects (None = all). pub memory_max_lines: Option, + /// OpenViking server URL baked as `OPENVIKING_ENDPOINT` into the hook when + /// `memory_engine == "openviking"` (plan §6a). None → not emitted. + pub memory_engine_endpoint: Option, + /// Optional OpenViking API key baked as `OPENVIKING_API_KEY`. + pub memory_engine_api_key: Option, /// When true, report drift without writing (drift-check / dry-run). pub check_only: bool, } @@ -141,6 +146,20 @@ impl HermesAdapter { if let Some(max_lines) = req.memory_max_lines { exports.push_str(&format!("export AGENTKEYS_MEMORY_MAX_LINES={max_lines}\n")); } + if req.memory_engine.eq_ignore_ascii_case("openviking") { + if let Some(endpoint) = req.memory_engine_endpoint.as_deref() { + exports.push_str(&format!( + "export OPENVIKING_ENDPOINT={}\n", + shell_quote(endpoint) + )); + } + if let Some(api_key) = req.memory_engine_api_key.as_deref() { + exports.push_str(&format!( + "export OPENVIKING_API_KEY={}\n", + shell_quote(api_key) + )); + } + } exports }; vec![ @@ -534,6 +553,8 @@ mod tests { session_bearer: String::new(), memory_engine: "passthrough".into(), memory_max_lines: None, + memory_engine_endpoint: None, + memory_engine_api_key: None, check_only: false, } } @@ -604,6 +625,27 @@ mod tests { assert!(engine_at < exec_at); } + #[test] + fn scripts_bake_openviking_endpoint_only_for_openviking() { + let a = HermesAdapter; + // endpoint set but engine is lexical → OPENVIKING_* must NOT be emitted + let mut lexical = req(); + lexical.memory_engine = "lexical".into(); + lexical.memory_engine_endpoint = Some("http://127.0.0.1:1933".into()); + assert!(!a.scripts("/usr/local/bin/agentkeys", &lexical)[2] + .1 + .contains("OPENVIKING_ENDPOINT")); + // engine openviking + endpoint → baked + let mut ov = req(); + ov.memory_engine = "openviking".into(); + ov.memory_engine_endpoint = Some("http://127.0.0.1:1933".into()); + ov.memory_engine_api_key = Some("sk-ov-123".into()); + let prellm = &a.scripts("/usr/local/bin/agentkeys", &ov)[2].1; + assert!(prellm.contains("export AGENTKEYS_MEMORY_ENGINE='openviking'")); + assert!(prellm.contains("export OPENVIKING_ENDPOINT='http://127.0.0.1:1933'")); + assert!(prellm.contains("export OPENVIKING_API_KEY='sk-ov-123'")); + } + #[test] fn write_if_changed_is_idempotent() { let dir = std::env::temp_dir().join(format!("agentkeys-wire-{}", std::process::id())); diff --git a/crates/agentkeys-core/src/lib.rs b/crates/agentkeys-core/src/lib.rs index 009996fe..0486e4ef 100644 --- a/crates/agentkeys-core/src/lib.rs +++ b/crates/agentkeys-core/src/lib.rs @@ -8,6 +8,7 @@ pub mod device_crypto; pub mod init_flow; pub mod memory_engine; pub mod mock_client; +pub mod openviking; pub mod otp; pub mod payment; pub mod s3_backend; diff --git a/crates/agentkeys-core/src/openviking.rs b/crates/agentkeys-core/src/openviking.rs new file mode 100644 index 00000000..30444369 --- /dev/null +++ b/crates/agentkeys-core/src/openviking.rs @@ -0,0 +1,371 @@ +//! OpenViking engine adapter — plan `docs/plan/agentkeys-memory-design.md` §6a. +//! +//! OpenViking (`volcengine/OpenViking`) is a self-hosted context database. In +//! AgentKeys' Model-B integration it is the pluggable RANKING engine *behind* +//! our gate: AgentKeys still STORES (K3-encrypted S3) + GATES (cap / scope / +//! namespace / audit) + DELIVERS (the `pre_llm_call` hook). OpenViking only +//! reorders. The HTTP contract below is taken verbatim from the Hermes +//! `plugins/memory/openviking` client — not guessed: +//! +//! base http://127.0.0.1:1933 (OPENVIKING_ENDPOINT) +//! headers X-OpenViking-Agent / -Account / -User, plus X-API-Key + +//! `Authorization: Bearer ` when OPENVIKING_API_KEY is set +//! GET /health -> 200 when up +//! POST /api/v1/search/find {query, top_k} +//! -> {result:{results:[{score, content|text, uri}]}} +//! POST /api/v1/content/write {uri, content, mode:"create"} +//! error envelope: HTTP >= 400, or {status:"error", error:{code,message}} +//! +//! SAFETY — the gate bounds visibility: [`rank_gate_bounded`] only ever returns +//! lines that were in the gate-authorized input set. OpenViking can change the +//! ORDER but can never WIDEN what is injectable; a compromised/over-broad +//! OpenViking cannot leak content the gate did not authorize. On any error or +//! empty result it returns `None`, so the caller falls back to a deterministic +//! engine (recency) — OpenViking is never load-bearing for availability. + +use serde::Deserialize; + +use crate::memory_engine::{MemoryLine, SelectionBudget}; + +pub const DEFAULT_ENDPOINT: &str = "http://127.0.0.1:1933"; + +#[derive(Debug, Clone)] +pub struct OpenVikingClient { + endpoint: String, + api_key: String, + account: String, + user: String, + agent: String, + http: reqwest::Client, +} + +#[derive(Debug, thiserror::Error)] +pub enum OpenVikingError { + #[error("openviking transport: {0}")] + Transport(String), + #[error("openviking http {status}: {body}")] + Http { status: u16, body: String }, + #[error("openviking parse: {0}")] + Parse(String), +} + +#[derive(Debug, Deserialize)] +struct FindEnvelope { + #[serde(default)] + result: Option, + #[serde(default)] + status: Option, +} + +#[derive(Debug, Deserialize)] +struct FindResult { + #[serde(default)] + results: Vec, +} + +#[derive(Debug, Deserialize)] +struct FindHit { + #[serde(default)] + score: f64, + #[serde(default)] + content: Option, + #[serde(default)] + text: Option, +} + +impl FindHit { + fn body(&self) -> Option<&str> { + self.content.as_deref().or(self.text.as_deref()) + } +} + +impl OpenVikingClient { + /// Build from the OpenViking env vars; `None` when `OPENVIKING_ENDPOINT` is + /// unset/empty (so the caller cleanly falls back to a built-in engine). + pub fn from_env() -> Option { + let endpoint = std::env::var("OPENVIKING_ENDPOINT") + .ok() + .filter(|s| !s.is_empty())?; + Some(Self::new( + endpoint, + std::env::var("OPENVIKING_API_KEY").unwrap_or_default(), + std::env::var("OPENVIKING_ACCOUNT").unwrap_or_else(|_| "default".to_string()), + std::env::var("OPENVIKING_USER").unwrap_or_else(|_| "default".to_string()), + std::env::var("OPENVIKING_AGENT").unwrap_or_else(|_| "hermes".to_string()), + )) + } + + pub fn new( + endpoint: String, + api_key: String, + account: String, + user: String, + agent: String, + ) -> Self { + Self { + endpoint: endpoint.trim_end_matches('/').to_string(), + api_key, + account, + user, + agent, + http: reqwest::Client::new(), + } + } + + fn with_headers(&self, req: reqwest::RequestBuilder) -> reqwest::RequestBuilder { + let mut req = req.header("X-OpenViking-Agent", &self.agent); + if !self.account.is_empty() { + req = req.header("X-OpenViking-Account", &self.account); + } + if !self.user.is_empty() { + req = req.header("X-OpenViking-User", &self.user); + } + if !self.api_key.is_empty() { + req = req + .header("X-API-Key", &self.api_key) + .header("Authorization", format!("Bearer {}", self.api_key)); + } + req + } + + pub async fn health(&self) -> bool { + let url = format!("{}/health", self.endpoint); + self.with_headers(self.http.get(&url)) + .send() + .await + .map(|r| r.status().is_success()) + .unwrap_or(false) + } + + /// `POST /api/v1/search/find` — semantic ranking. Returns `(score, text)` + /// hits in OpenViking's ranked order. + pub async fn search_find( + &self, + query: &str, + top_k: usize, + ) -> Result, OpenVikingError> { + let url = format!("{}/api/v1/search/find", self.endpoint); + let resp = self + .with_headers( + self.http + .post(&url) + .json(&serde_json::json!({ "query": query, "top_k": top_k })), + ) + .send() + .await + .map_err(|e| OpenVikingError::Transport(e.to_string()))?; + let status = resp.status(); + let body = resp + .text() + .await + .map_err(|e| OpenVikingError::Transport(e.to_string()))?; + if !status.is_success() { + return Err(OpenVikingError::Http { + status: status.as_u16(), + body, + }); + } + let envelope: FindEnvelope = + serde_json::from_str(&body).map_err(|e| OpenVikingError::Parse(e.to_string()))?; + if envelope.status.as_deref() == Some("error") { + return Err(OpenVikingError::Http { + status: status.as_u16(), + body, + }); + } + Ok(envelope + .result + .map(|r| r.results) + .unwrap_or_default() + .into_iter() + .filter_map(|hit| hit.body().map(|b| (hit.score, b.to_string()))) + .collect()) + } + + /// `POST /api/v1/content/write` — mirror one gate-authorized line into + /// OpenViking so `search/find` can rank it. The durable copy stays in + /// AgentKeys' encrypted S3; this is OpenViking's (operator-self-hosted) + /// ranking index only. + pub async fn write_content(&self, uri: &str, content: &str) -> Result<(), OpenVikingError> { + let url = format!("{}/api/v1/content/write", self.endpoint); + let resp = self + .with_headers(self.http.post(&url).json(&serde_json::json!({ + "uri": uri, + "content": content, + "mode": "create", + }))) + .send() + .await + .map_err(|e| OpenVikingError::Transport(e.to_string()))?; + let status = resp.status(); + if !status.is_success() { + let body = resp.text().await.unwrap_or_default(); + return Err(OpenVikingError::Http { + status: status.as_u16(), + body, + }); + } + Ok(()) + } +} + +fn normalize(text: &str) -> String { + text.trim().to_lowercase() +} + +/// Rank gate-authorized `lines` via OpenViking, bounded by the gate. +/// +/// Returns `Some(reordered subset of `lines`)` on success, or `None` on any +/// error / empty / no-match so the caller falls back to a deterministic engine. +/// A hit maps to a line when their normalized text is equal or one contains the +/// other (OpenViking may return a tiered abstract rather than the verbatim +/// line). Only `lines` entries are ever returned — never a raw OpenViking hit. +pub async fn rank_gate_bounded( + client: &OpenVikingClient, + query: &str, + lines: &[MemoryLine], + budget: &SelectionBudget, +) -> Option> { + if lines.is_empty() { + return None; + } + let top_k = budget.max_lines.unwrap_or(lines.len()).max(1); + let hits = client.search_find(query, top_k).await.ok()?; + if hits.is_empty() { + return None; + } + let mut out: Vec = Vec::new(); + let mut taken = std::collections::HashSet::new(); + for (_score, hit_text) in hits { + let hit_norm = normalize(&hit_text); + if let Some(line) = lines.iter().find(|l| { + let line_norm = normalize(&l.text); + line_norm == hit_norm || hit_norm.contains(&line_norm) || line_norm.contains(&hit_norm) + }) { + if taken.insert(line.seq) { + out.push(line.clone()); + } + } + } + if out.is_empty() { + return None; + } + if let Some(max) = budget.max_lines { + out.truncate(max); + } + Some(out) +} + +#[cfg(test)] +mod tests { + use super::*; + use axum::{extract::State, routing::post, Json, Router}; + + async fn spawn_stub(response: serde_json::Value) -> String { + let app = Router::new() + .route( + "/api/v1/search/find", + post(|State(body): State| async move { Json(body) }), + ) + .with_state(response); + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + tokio::spawn(async move { + axum::serve(listener, app).await.unwrap(); + }); + format!("http://{addr}") + } + + fn client(endpoint: String) -> OpenVikingClient { + OpenVikingClient::new( + endpoint, + String::new(), + "default".into(), + "default".into(), + "hermes".into(), + ) + } + + fn lines() -> Vec { + vec![ + MemoryLine { + text: "Chengdu trip — Apr 12 to 16.".into(), + seq: 0, + }, + MemoryLine { + text: "Allergic to peanuts.".into(), + seq: 1, + }, + ] + } + + #[tokio::test] + async fn search_find_parses_score_ordered_hits() { + let endpoint = spawn_stub(serde_json::json!({ + "result": {"results": [ + {"score": 0.9, "content": "Allergic to peanuts."}, + {"score": 0.7, "text": "Chengdu trip — Apr 12 to 16."} + ]} + })) + .await; + let hits = client(endpoint).search_find("peanut", 5).await.unwrap(); + assert_eq!(hits.len(), 2); + assert_eq!(hits[0].1, "Allergic to peanuts."); + } + + #[tokio::test] + async fn rank_is_gate_bounded_and_reordered() { + // OpenViking ranks peanuts top, then chengdu, AND returns an + // unauthorized line that is NOT in the gate set — it must be dropped. + let endpoint = spawn_stub(serde_json::json!({ + "result": {"results": [ + {"score": 0.9, "content": "Allergic to peanuts."}, + {"score": 0.8, "content": "SECRET not in the authorized set"}, + {"score": 0.7, "content": "Chengdu trip — Apr 12 to 16."} + ]} + })) + .await; + let budget = SelectionBudget { + max_lines: Some(5), + max_bytes: None, + }; + let out = rank_gate_bounded(&client(endpoint), "peanut", &lines(), &budget) + .await + .unwrap(); + let texts: Vec<&str> = out.iter().map(|l| l.text.as_str()).collect(); + // gate-bound: only the two authorized lines, in OpenViking's order + assert_eq!( + texts, + vec!["Allergic to peanuts.", "Chengdu trip — Apr 12 to 16."] + ); + } + + #[tokio::test] + async fn empty_results_falls_back_to_none() { + let endpoint = spawn_stub(serde_json::json!({ "result": {"results": []} })).await; + let budget = SelectionBudget::default(); + assert!(rank_gate_bounded(&client(endpoint), "q", &lines(), &budget) + .await + .is_none()); + } + + #[tokio::test] + async fn budget_caps_results() { + let endpoint = spawn_stub(serde_json::json!({ + "result": {"results": [ + {"score": 0.9, "content": "Allergic to peanuts."}, + {"score": 0.7, "content": "Chengdu trip — Apr 12 to 16."} + ]} + })) + .await; + let budget = SelectionBudget { + max_lines: Some(1), + max_bytes: None, + }; + let out = rank_gate_bounded(&client(endpoint), "q", &lines(), &budget) + .await + .unwrap(); + assert_eq!(out.len(), 1); + assert_eq!(out[0].text, "Allergic to peanuts."); + } +} diff --git a/crates/agentkeys-worker-creds/src/verify.rs b/crates/agentkeys-worker-creds/src/verify.rs index d1b32a3f..51fbe1a4 100644 --- a/crates/agentkeys-worker-creds/src/verify.rs +++ b/crates/agentkeys-worker-creds/src/verify.rs @@ -259,23 +259,49 @@ async fn eth_call( "params": [{"to": to, "data": data}, "latest"], "id": 1, }); - let resp = http - .post(rpc_url) - .json(&body) - .send() - .await - .map_err(|e| VerifyError::ChainRpc(format!("eth_call POST: {e}")))?; - let v: serde_json::Value = resp - .json() - .await - .map_err(|e| VerifyError::ChainRpc(format!("eth_call json: {e}")))?; - if let Some(err) = v.get("error") { - return Err(VerifyError::ChainRpc(format!("rpc error: {err}"))); + // The Heima public RPC intermittently 500s on eth_call (~12% per call, + // returning an HTML error page → non-JSON body). A single attempt makes + // every chain-verify a coin-flip and the worker returns a 502. Retry + // transient failures (transport error / HTTP 5xx / non-JSON body) with + // backoff; do NOT retry a valid JSON-RPC `error` (a real revert/bad-arg + // result, which is deterministic). + const ATTEMPTS: u32 = 4; + let mut last = String::new(); + for attempt in 0..ATTEMPTS { + if attempt > 0 { + let ms = 150u64 * (1u64 << (attempt - 1)); // 150, 300, 600 ms + tokio::time::sleep(std::time::Duration::from_millis(ms)).await; + } + let resp = match http.post(rpc_url).json(&body).send().await { + Ok(r) => r, + Err(e) => { + last = format!("eth_call POST: {e}"); + continue; + } + }; + if resp.status().is_server_error() { + last = format!("eth_call HTTP {}", resp.status()); + continue; + } + let v: serde_json::Value = match resp.json().await { + Ok(v) => v, + Err(e) => { + last = format!("eth_call json: {e}"); + continue; + } + }; + if let Some(err) = v.get("error") { + return Err(VerifyError::ChainRpc(format!("rpc error: {err}"))); + } + return v + .get("result") + .and_then(|r| r.as_str()) + .map(|s| s.to_string()) + .ok_or_else(|| VerifyError::ChainRpc("missing 'result'".into())); } - v.get("result") - .and_then(|r| r.as_str()) - .map(|s| s.to_string()) - .ok_or_else(|| VerifyError::ChainRpc("missing 'result'".into())) + Err(VerifyError::ChainRpc(format!( + "eth_call failed after {ATTEMPTS} attempts: {last}" + ))) } fn parse_device_entry(raw: &str) -> Result { diff --git a/docs/arch.md b/docs/arch.md index 689b2d20..7aa783bd 100644 --- a/docs/arch.md +++ b/docs/arch.md @@ -495,9 +495,9 @@ ON AGENT MACHINE (any VM / container / CI runner / cloud sandbox / no-input devi ON MASTER (already initialized; holds J1_master; master ≠ agent machine): 6. The owner scans/enters the displayed code: - agentkeys agent claim --pairing-code --label agent-A --services memory + agentkeys agent claim --pairing-code --label agent-A --services memory:travel 7. CLI → broker: POST /v1/agent/pairing/claim - { pairing_code, label: "agent-A", requested_scope: "memory" } + { pairing_code, label: "agent-A", requested_scope: "memory:travel" } Authorization: Bearer J1_master 8. Broker (J1_master bearer is the gate; K11 is NOT presented here — agents are K10-only per the contract, so there is nothing for the broker to K11-verify): @@ -894,7 +894,7 @@ Each data class gets its own worker — independent IAM, independent deploy life - **Operations:** R/W agent state at high frequency. **STS session policies enable direct S3 access** from the agent process for the duration of the session — the worker is NOT in the LLM-call hot path. The worker mints a TTL-bounded STS session at session start; the agent's localhost SDK uses STS creds for many ops within the TTL. - **OIDC federation (issue #90):** Same `X-Aws-*` header passthrough as creds. Each data-class has its own IAM role (`agentkeys-memory-role`); memory-role STS creds are rejected at the vault bucket and vice versa. See §17.5. - **Namespace = signed service (issue #147):** the memory `service` carries the namespace as **`memory:`** (e.g. `memory:travel`). Because `service` is a signed cap field, the namespace is tamper-proof and is authorized by the existing on-chain `isServiceInScope(operator, actor, keccak("memory:"))` gate. The worker keys storage (`bots//memory/memory:.enc`), the envelope AAD, and the scope check all off that one signed field — so two namespaces are physically segregated with no new mechanism. Minted in `crates/agentkeys-mcp-server/src/tools/memory.rs`; enforced in `crates/agentkeys-worker-memory/src/handlers.rs`. -- **Memory engine — pluggable, not built in v0 (Position C):** the worker is **store + gate only** (deterministic, no ranking, no LLM). Ranking / extraction / consolidation is delegated to an external engine via an adapter trait (`extract` / `rank` / `synthesize`); canonical reference engine **OpenViking**; delivery via the `pre_llm_call` hook (#141), never a runtime `memory.provider`. Full design + Hermes-provider compatibility strategy: [`plan/agentkeys-memory-design.md`](plan/agentkeys-memory-design.md) (§6a engine seam; §22 pluggable-axis row). Background: [`research/ai-memory-systems-survey.md`](research/ai-memory-systems-survey.md), decision record [`research/memory-build-vs-gate-decision.md`](research/memory-build-vs-gate-decision.md), [`research/universal-gate-pattern.md`](research/universal-gate-pattern.md). +- **Memory engine — pluggable, not built in v0 (Position C):** the worker is **store + gate only** (deterministic, no ranking, no LLM). Ranking / extraction / consolidation is delegated to an external engine via an adapter trait (`extract` / `rank` / `synthesize`); canonical reference engine **OpenViking**; delivery via the `pre_llm_call` hook (#141), never a runtime `memory.provider`. Full design + Hermes-provider compatibility strategy: [`plan/agentkeys-memory-design.md`](plan/agentkeys-memory-design.md) (§6a engine seam; §22 pluggable-axis row). Background: [`research/ai-memory-systems-survey.md`](research/ai-memory-systems-survey.md), decision record [`research/memory-build-vs-gate-decision.md`](research/memory-build-vs-gate-decision.md), [`research/universal-gate-pattern.md`](research/universal-gate-pattern.md). Operator test guide (OpenViking behind the gate): [`operator-runbook-openviking.md`](operator-runbook-openviking.md). - **Classifier-service — planned write-side dual of the engine (§15.6):** the engine ranks at *read*; the planned **classifier-service** compiles natural-language intent → the structured policy attribute the gate enforces (memory→namespace, creds→service-category, IoT→device-tier), and tags novel requests — *NL-programmable, deterministically-enforced* authorization fleet-wide, with no model on the gate's hot path. Design + three-phase plan + caching/efficiency model: [`plan/classifier-service.md`](plan/classifier-service.md). ### 15.3 audit-service diff --git a/docs/operator-runbook-openviking.md b/docs/operator-runbook-openviking.md new file mode 100644 index 00000000..304ce079 --- /dev/null +++ b/docs/operator-runbook-openviking.md @@ -0,0 +1,253 @@ +# OpenViking as the AgentKeys memory engine — operator runbook + +> **Model B (plan [§6a](plan/agentkeys-memory-design.md)):** OpenViking **ranks**; AgentKeys keeps **storing** (K3-encrypted per-actor S3), **gating** (cap-token + on-chain scope + namespace + audit), and **delivering** (the `pre_llm_call` hook). OpenViking can re-order what gets injected but can **never widen** it — the gate bounds visibility. This runbook stands OpenViking up next to a wired AgentKeys agent and proves the **gated → OpenViking-ranked → injected** flow. + +## Why this is NOT `memory.provider: openviking` + +| | This runbook (AgentKeys-gated) | Plain Hermes `memory.provider: openviking` | +|---|---|---| +| Where memory lives | AgentKeys' **encrypted S3** (durable, per-actor) + OpenViking holds a ranking index | OpenViking only | +| Gate (cap / scope / namespace / audit) | **yes** — every read is gated | none | +| LLM-facing memory tools (`viking_search`, …) | **no** — the LLM never gets them | **yes** (5 tools) | +| What OpenViking does | **ranks** gate-authorized lines for the current turn | stores + serves + extracts | + +> ⛔ **Do NOT run `hermes memory setup`.** The OpenViking docs tell you to — but that sets `memory.provider: openviking`, which wires OpenViking as a **Hermes provider**: it hands the LLM the 5 `viking_*` tools and makes OpenViking the store, **bypassing our gate** (the ungated "Model A" we rejected). In our gated model the Hermes-side wiring is **`agentkeys wire hermes --memory-engine openviking …`** (Step 6) — that's the replacement for `hermes memory setup`. +> +> **Already ran it? Undo (the provider is just a config key — this does NOT touch the AgentKeys `pre_llm_call` hook, which lives in a separate `# >>> agentkeys wire` managed block):** +> ```bash +> # inspect what it set +> hermes config get memory.provider 2>/dev/null # → openviking +> sed -n '/^memory:/,/^[^[:space:]]/p' ~/.hermes/config.yaml # the memory: block +> grep -nE 'OPENVIKING' ~/.hermes/.env 2>/dev/null +> # turn it off — try the hermes way first, else edit the files: +> hermes memory setup # pick "none"/"disable"/"built-in" if offered +> # else: delete the `memory:`/`provider: openviking` block from ~/.hermes/config.yaml, +> # and remove OPENVIKING_ENDPOINT / OPENVIKING_API_KEY from ~/.hermes/.env +> hermes config get memory.provider # verify empty/none +> ``` +> Keep `openviking-server` running and keep (or run) `agentkeys wire … --memory-engine openviking` — our gated hook still uses OpenViking; you've only removed the ungated Hermes provider. + +OpenViking is a pluggable *engine*, swappable for Holographic / mem0 / a deterministic built-in. If you don't need semantic search, `MEMORY_ENGINE=lexical` gives gated, query-aware ranking with **zero models to deploy** — skip this whole runbook. + +## Prerequisites + +1. **A working wired AgentKeys agent.** Run the wire demo first so memory already flows end-to-end: + ```bash + bash harness/phase1-wire-demo.sh --light # self-contained, or --real for the live worker + ``` + See [`operator-runbook-wire.md`](operator-runbook-wire.md). +2. **Python 3.10+ and `pip`** (already present in the aiosandbox). +3. **An embedding model** (small, local) — OpenViking does *semantic* search, which needs embeddings. The `init` wizard (Step 2) downloads a ~24 MB BGE model on CPU. **A VLM is optional** for our use (see Step 2). + +### Run everything below INSIDE the sandbox +`openviking-server init` is an **interactive wizard** and the server is long-running, so shell into the sandbox and run the commands directly (do **not** use the one-shot `/v1/shell/exec` API / the `sbx` helper from the wire runbook): +```bash +docker exec -it bash # you are now gem@… inside the sandbox +``` + +## Step 1 — install OpenViking +```bash +pip install --quiet openviking && python -c "import openviking; print('openviking installed')" +``` + +## Step 2 — configure OpenViking (`openviking-server init`) +```bash +openviking-server init +``` +Wizard answers for **our** use: +- **Setup mode** → `2` (local embedding via llama.cpp, CPU, no GPU) — or `1` Cloud API only if you have an OpenAI/VolcEngine key. Avoid Ollama in the sandbox (multi-GB pulls). +- **Embedding model** → the offered BGE small model (~24 MB). *(It may be a `-zh` build; fine for a test. If an English `-en` variant is offered and your memory is English, prefer it.)* +- **VLM** → `3` **Skip VLM (embedding only)**. We only use OpenViking's **semantic search** (`search/find`), which is pure embeddings — the VLM is OpenViking's *extraction/tiering* engine, which we don't need (AgentKeys supplies the gate-authorized lines). If a later write insists on a VLM, re-run `init` → `2` Cloud API and point it at your DeepSeek/OpenRouter (`provider openai`, `base_url https://openrouter.ai/api/v1`). + +## Step 3 — start the server and confirm health +```bash +nohup openviking-server >~/openviking.log 2>&1 & +sleep 3; curl -fsS http://localhost:1933/health && echo " — openviking up" +``` +If `/health` fails, read `~/openviking.log` (usually an embedding/VLM config issue from Step 2). + +## Step 4 — load a REAL corpus and SEE semantic search work (direct eval) + +A 3-line database can't show semantic retrieval. Load the diverse sample corpus ([`../harness/fixtures/sample-memory.md`](../harness/fixtures/sample-memory.md), ~36 facts across health / travel / family / work / finance) and query it directly. + +**Get the corpus into the sandbox** — from your **laptop** (repo root): +```bash +curl -sS -X POST "${SANDBOX_URL:-http://localhost:8080}/v1/file/upload" \ + -F "file=@harness/fixtures/sample-memory.md" -F "path=/home/gem/sample-memory.md" +``` +**Load it** (inside the sandbox). The write URI **must** be +`viking://user//memories//.md` — the `` segment **and** +the `.md` extension are required, or the server returns **HTTP 400**. First confirm +one write (show the response, don't hide it), then load the corpus idempotently: + +```bash +OV=http://localhost:1933; OVUSER=default # OVUSER = your OPENVIKING_USER (default: "default") + +# sanity: ONE write, SHOW the response (no -f/-s hiding the error) +curl -sS -X POST "$OV/api/v1/content/write" -H 'content-type: application/json' \ + -d "$(jq -n --arg u "viking://user/$OVUSER/memories/sample/mem_000.md" \ + --arg c "Severely allergic to peanuts." '{uri:$u,content:$c,mode:"create"}')" | jq . +# expect: {"result":{"written_bytes":...}} — if you see an error, paste it. + +# load the corpus — counts ACTUAL successes; re-runs are idempotent +ok=0; n=0 +while IFS= read -r line; do + case "$line" in ''|'#'*) continue ;; esac + uri="viking://user/$OVUSER/memories/sample/mem_$(printf '%03d' "$n").md" + resp="$(curl -sS -X POST "$OV/api/v1/content/write" -H 'content-type: application/json' \ + -d "$(jq -n --arg u "$uri" --arg c "$line" '{uri:$u,content:$c,mode:"create"}')")" + if echo "$resp" | jq -e '.result' >/dev/null 2>&1; then ok=$((ok+1)) + elif echo "$resp" | grep -qi exist; then ok=$((ok+1)) # idempotent: already loaded, no dup + else echo " FAIL [$n]: $(echo "$resp" | jq -rc '.error // .' 2>/dev/null | cut -c1-90)"; fi + n=$((n+1)) +done < ~/sample-memory.md +echo "loaded/present $ok of $n facts" +``` +> **Idempotency:** the filename is **deterministic** (`mem_000.md`, `mem_001.md`, …), so a re-run targets the *same* URIs — `mode:"create"` then reports "exists", which the loader counts as already-loaded (no duplicates). To force a clean reload, change the subdir (e.g. `sample2`). +**Query it semantically.** Results live under **`result.memories`** (also `result.resources` / `result.skills`); each item has `score` / `uri` / `abstract`: +```bash +# see the raw shape the first time: +curl -sS -X POST "$OV/api/v1/search/find" -H 'content-type: application/json' \ + -d "$(jq -n '{query:"what are my dietary restrictions?", top_k:5}')" | jq . + +# the ranked memories: +curl -sS -X POST "$OV/api/v1/search/find" -H 'content-type: application/json' \ + -d "$(jq -n '{query:"what are my dietary restrictions?", top_k:5}')" \ + | jq '.result.memories[]? | {score, uri, abstract}' +``` +Expected: the **peanut / lactose / vegetarian** entries rank top — none contain the word "dietary." That's semantic search earning its keep. Try `"where have I travelled?"` (→ Chengdu / Tokyo / Lisbon) and `"important family dates"` (→ birthday / anniversary). This is a **direct** OpenViking eval — it does not go through the AgentKeys gate (next step). + +> **`abstract` blank?** If you picked **Skip VLM** at setup, OpenViking still embeds + ranks (scores + URIs are correct) but has no model to generate the L0 `abstract`, so it can be empty. The ranking is unaffected — read the verbatim line by URI: +> ```bash +> top=$(curl -sS -X POST "$OV/api/v1/search/find" -H 'content-type: application/json' \ +> -d "$(jq -n '{query:"dietary restrictions", top_k:1}')" | jq -r '.result.memories[0].uri') +> curl -sS -X POST "$OV/api/v1/content/read" -H 'content-type: application/json' \ +> -d "$(jq -n --arg u "$top" '{uri:$u}')" | jq . +> ``` + +## Step 5 — the gated path: mirror gate-authorized lines + +For the *gated* flow, OpenViking may only rank lines AgentKeys authorized. So the lines in OpenViking must match what `memory.get` returns for the namespace (the gate then bounds the result to exactly that set). Mirror the lines already in the agent's memory namespace: +```bash +OV=http://localhost:1933; OVUSER=default +mirror() { # mirror "" + local uri="viking://user/$OVUSER/memories/$1/mem_$3.md" + curl -sS -X POST "$OV/api/v1/content/write" -H 'content-type: application/json' \ + -d "$(jq -n --arg u "$uri" --arg c "$2" '{uri:$u,content:$c,mode:"create"}')" \ + | jq -rc '.result // .error // .' +} +mirror travel "Booked Chengdu flight CA4515 on Apr 12." 0 +mirror travel "Peanut allergy — note for inflight meals." 1 +mirror travel "Hotel in Yulin district near hotpot street." 2 +``` +> The gate matches OpenViking hits back to authorized lines by **text**, not URI — so the subdir/filename here are free; only the `content` must equal the namespace line. +> For production, mirror **on write** (when `agentkeys memory put` runs), not ad hoc — see plan §6a "remaining: write-path mirroring." + +## Step 6 — wire AgentKeys to use OpenViking as the engine + +This is the replacement for `hermes memory setup` (which we do **not** run — see the warning above). It bakes `AGENTKEYS_MEMORY_ENGINE=openviking` **and** `OPENVIKING_ENDPOINT` into the `pre_llm_call` hook — for the **same agent identity** the [Step-0 prerequisite](#prerequisites) (`harness/phase1-wire-demo.sh`) already wired. + +### 6a — inherit the agent's omni identity from the wire demo +`agentkeys wire` reads the actor/operator omni, MCP URL, vendor token, and session bearer from the env (`AGENTKEYS_ACTOR_OMNI`, `AGENTKEYS_OPERATOR_OMNI`, `AGENTKEYS_MCP_URL`, `AGENTKEYS_MCP_VENDOR_TOKEN`, `AGENTKEYS_SESSION_BEARER`) — and the wire demo **baked all five into the hook header**. Recover them so this re-wire keeps the *same* agent (re-typing the 64-hex omnis is error-prone, and passing `--actor-omni ""` from an unset var silently wires an **empty** actor): +```bash +hook=~/.hermes/agent-hooks/agentkeys-prellm-memory-inject.sh +[ -f "$hook" ] || echo "no wired hook yet — run the Step-0 prerequisite (harness/phase1-wire-demo.sh) first" +eval "$(grep -E '^export AGENTKEYS_(ACTOR_OMNI|OPERATOR_OMNI|MCP_URL|MCP_VENDOR_TOKEN|SESSION_BEARER)=' "$hook" 2>/dev/null)" +: "${AGENTKEYS_ACTOR_OMNI:?unset — set it from the wire demo (see the per-mode note below)}" +echo "actor=$AGENTKEYS_ACTOR_OMNI" +echo "operator=$AGENTKEYS_OPERATOR_OMNI" +echo "mcp=$AGENTKEYS_MCP_URL vendor=$AGENTKEYS_MCP_VENDOR_TOKEN bearer=${AGENTKEYS_SESSION_BEARER:+set}" +``` +> **Where the omnis come from** (6a recovers them automatically — this is for setting them by hand): +> - **`--light`** (this runbook's default): the fixed in-memory demo identity (`AGENTKEYS_ACTOR_OMNI=0xa0c7…`, `AGENTKEYS_OPERATOR_OMNI=0x07e8…`). Canonical source: [`crates/agentkeys-mcp-server/src/backend/in_memory.rs`](../crates/agentkeys-mcp-server/src/backend/in_memory.rs) lines 33-35; `agentkeys wire` falls back to exactly these when the env is unset, so light mode works even if 6a found nothing — but only if you **omit** the flags (don't pass `--actor-omni ""`). +> - **`--real`**: the per-agent omnis the wire demo resolves in **Phase P** and prints in the `heima-agent-create.sh` `==> Inputs` block, e.g. +> ``` +> operator_omni = 0x941cb1c3260518bbf40eac7d02663517fc7cff304d9b03e80d2cc54126c6bef2 +> actor_omni = 0x18e49c6020dfef1bd1c973bb001b5fb95fa735c41c3a23efae2b22b6447c5ed8 +> ``` +> The operator omni derives from your master key; the actor omni is the HDKD child minted at pairing. The wire step bakes both into the hook, so 6a recovers them — no copy-paste. (The session bearer is a JWT that **expires**; if a real-mode re-wire later fails auth, re-run `harness/phase1-wire-demo.sh --real` to refresh it.) + +### 6b — wire (adds OpenViking; keeps the 6a identity) +```bash +agentkeys wire hermes \ + --actor-omni "$AGENTKEYS_ACTOR_OMNI" --operator-omni "$AGENTKEYS_OPERATOR_OMNI" \ + --namespaces travel \ + --memory-engine openviking --openviking-endpoint http://localhost:1933 \ + --mcp-url "$AGENTKEYS_MCP_URL" --vendor-token "$AGENTKEYS_MCP_VENDOR_TOKEN" +grep -E 'OPENVIKING_ENDPOINT|AGENTKEYS_MEMORY_ENGINE|AGENTKEYS_ACTOR_OMNI' ~/.hermes/agent-hooks/agentkeys-prellm-memory-inject.sh +``` +Leave `memory.provider` **unset** in `~/.hermes/config.yaml` — the AgentKeys hook stays the sole memory delivery. + +## Step 7 — test: gated → OpenViking-ranked → injected +OpenViking is query-driven, so feed the wired hook a turn (it reads the query from the host payload). The injected block is OpenViking-ranked **and** gate-bounded: +```bash +printf '%s' '{"query":"what about my peanut allergy?"}' \ + | bash ~/.hermes/agent-hooks/agentkeys-prellm-memory-inject.sh +``` +Expected — the peanut line ranked first, only gate-authorized lines: +```json +{"context":"## Memory: travel\nPeanut allergy — note for inflight meals.\n..."} +``` +Then the real chat (Phase 4 of the wire demo): ask the agent about the trip and watch it answer from the OpenViking-ranked, gate-bounded memory — without ever getting an OpenViking tool. + +## Step 8 — verify the safety + privacy properties + +| Property | How to check | Expected | +|---|---|---| +| **Gate bounds visibility** | mirror a line into OpenViking under a namespace the cap does NOT authorize, then query Step 7 | it is **never** injected — `rank_gate_bounded` only returns gate-authorized lines | +| **OpenViking is not load-bearing** | `pkill -f openviking-server`, re-run Step 7 | still injects — falls back to the deterministic lexical engine, never errors | +| **LLM gets no memory tools** | `hermes hooks doctor` + inspect the tool list | only the 3 AgentKeys hooks; **no** `viking_*` | +| **Durable copy stays encrypted** | the S3 object `bots//memory/memory:travel.enc` | unchanged; OpenViking holds only its `viking://` index | + +## Step 9 — teardown +Revert to the deterministic engine **for the same agent**. The block re-recovers the identity from the hook (same as [6a](#6a--inherit-the-agents-omni-identity-from-the-wire-demo)) so it works in a fresh shell too — without it the omni env would be unset and the re-wire would drop the hook back to the demo actor: +```bash +pkill -f openviking-server 2>/dev/null; true +hook=~/.hermes/agent-hooks/agentkeys-prellm-memory-inject.sh +eval "$(grep -E '^export AGENTKEYS_(ACTOR_OMNI|OPERATOR_OMNI|MCP_URL|MCP_VENDOR_TOKEN|SESSION_BEARER)=' "$hook" 2>/dev/null)" +agentkeys wire hermes \ + --actor-omni "$AGENTKEYS_ACTOR_OMNI" --operator-omni "$AGENTKEYS_OPERATOR_OMNI" \ + --namespaces travel \ + --mcp-url "$AGENTKEYS_MCP_URL" --vendor-token "$AGENTKEYS_MCP_VENDOR_TOKEN" # no --memory-engine ⇒ deterministic +``` + +## Automated path + +Two scripts automate different slices — pick by where you run them. + +**Sandbox-side, all-in-one (recommended): [`harness/openviking-sandbox-setup.sh`](../harness/openviking-sandbox-setup.sh).** The idempotent, scripted form of **Steps 2–7**, run **inside the sandbox**: server init (`--init`, first time), start + health, corpus load, mirror, identity recovery (6a), re-wire (6b), and the gated injection test (7). Self-contained — the sample corpus is embedded, so nothing else to upload. It **assumes** the operator/Mac side is done (the agent is already wired — it recovers the omni identity from the hook) and OpenViking is pip-installed (Step 1). +```bash +# laptop (repo root) — upload the script into the sandbox: +curl -sS -X POST "${SANDBOX_URL:-http://localhost:8080}/v1/file/upload" \ + -F "file=@harness/openviking-sandbox-setup.sh" -F "path=/home/gem/openviking-sandbox-setup.sh" +# sandbox (docker exec -it bash): +bash ~/openviking-sandbox-setup.sh # init already done before +bash ~/openviking-sandbox-setup.sh --init # fresh sandbox: run the init wizard too +# --reload force a fresh corpus load · --verify prove the fallback · --no-test stop after wiring +``` +Re-run it any time — every step pre-checks and short-circuits (`ok` / `skip` / `fail`). + +**Laptop-driven harness: `bash harness/phase1-wire-demo.sh --openviking`** runs the AgentKeys-side checks (Steps 6–7) **when `openviking-server` is already reachable** at `OPENVIKING_ENDPOINT`. It does **not** install/configure OpenViking (Steps 1–3) or load a corpus (Steps 4–5). If the server isn't up, the phase skips with a pointer back here. + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| `command not found: sbx` | you are inside the sandbox; `sbx` is a laptop-only helper | drop `sbx`, run the command directly (this runbook already does) | +| `/health` fails | embedding/VLM misconfigured | re-run `openviking-server init`; read `~/openviking.log` | +| `search/find` returns nothing | index empty | run Step 4/5 (load/mirror) — it ranks only what's indexed | +| Step 7 injects the *whole* namespace, unranked | hook fell back (no query, or `OPENVIKING_ENDPOINT` not baked) | confirm Step 6 baked the env; ensure the payload has a `query` field | +| Step 6 bakes an empty `AGENTKEYS_ACTOR_OMNI=''` (or the wrong actor) into the hook | you ran the wire command (6b) before exporting the omni env (6a) — `--actor-omni "$UNSET"` passes an empty string, overriding `wire`'s demo fallback | run **Step 6a** first: it recovers actor/operator omni + MCP URL + vendor token from the hook the [Step-0](#prerequisites) wire demo baked. `--light` falls back to the `in_memory.rs` demo omnis only when you **omit** the flags entirely | +| `memory.get() failed … cap_mint failed … service_not_in_scope` → empty injection | the agent's on-chain scope grants bare `memory`, but the cap requests `memory:` (issue #147; `keccak("memory") ≠ keccak("memory:")`, arch.md §896) | grant the **namespace-qualified** service: re-run `bash harness/phase1-wire-demo.sh --real --webauthn` (now grants `memory:`), or directly `bash scripts/heima-scope-set.sh --webauthn --agent