TYRMars · TYRMars · May 14, 2026 · May 14, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -119,8 +119,10 @@ multi-hunk unified-diff apply, atomic per call, approval-gated),
 toolset, which is otherwise on by default),
 `JARVIS_NO_PROJECT_CONTEXT` (any value disables auto-loading
 `AGENTS.md` / `CLAUDE.md` / `AGENT.md` from the workspace into the
-system prompt; defaults to loading them, capped at 32 KiB),
-`JARVIS_PROJECT_CONTEXT_BYTES` (override the default 32 KiB cap),
+system prompt; defaults to loading them, capped at 8 KiB),
+`JARVIS_PROJECT_CONTEXT_BYTES` (override the default 8 KiB cap;
+truncation logs a startup WARN naming this env var so operators with
+larger instruction files can opt back to a higher cap explicitly),
 `JARVIS_SHELL_TIMEOUT_MS` (default `30000`, per-call default for `shell.exec`),
 `JARVIS_MCP_SERVERS` (comma-separated `prefix=command args...` list of
 external MCP servers to spawn and adapt into Tools),
@@ -968,9 +970,13 @@ binary appends the workspace's `AGENTS.md` / `CLAUDE.md` /
 `harness_tools::workspace::load_instructions(root, max_bytes)`.
 Each file is wrapped in a `=== project context: <name> ===` header
 so the model can tell injected guidance from its own template.
-Combined output is capped at 32 KiB (override:
+Combined output is capped at 8 KiB (override:
 `JARVIS_PROJECT_CONTEXT_BYTES`); overflow is truncated with a
-`[... project context truncated at N bytes ...]` marker. Disable
+`[... project context truncated at N bytes ...]` marker, and
+`apps/jarvis::serve` logs a startup WARN whenever truncation fires so
+operators with larger instruction files notice and can raise the cap
+deliberately. The lower default (was 32 KiB) keeps the system prompt
+focused so prior tool results stay in the model's attention window. Disable
 entirely via `JARVIS_NO_PROJECT_CONTEXT=1` or
 `[agent].include_project_context = false`. `jarvis-cli` honours the
 same env vars plus a `--no-project-context` CLI flag. Deliberately

diff --git a/apps/jarvis/src/serve.rs b/apps/jarvis/src/serve.rs
@@ -376,8 +376,22 @@ pub async fn run(
         if let Some(extra) = load_instructions_bounded(workspace_root.clone(), project_ctx_cap) {
             info!(
                 bytes = extra.len(),
+                cap = project_ctx_cap,
                 "loaded project instructions (AGENTS.md / JARVIS.md / CLAUDE.md / .jarvis)"
             );
+            // The loader truncates at the cap and tags the suffix. When
+            // it fired, surface a WARN so operators with larger
+            // instruction files notice and can opt back to a higher cap
+            // via `JARVIS_PROJECT_CONTEXT_BYTES` rather than silently
+            // shipping truncated guidance to the model.
+            if extra.contains("project context truncated at") {
+                warn!(
+                    cap = project_ctx_cap,
+                    "project instructions exceeded cap — output truncated. \
+                     Set JARVIS_PROJECT_CONTEXT_BYTES=<n> or [agent].project_context_max_bytes \
+                     to raise (default lowered to 8 KiB to keep system prompt focused)."
+                );
+            }
             system_prompt.push_str("\n\n");
             system_prompt.push_str(&extra);
             project_context_loaded = true;
@@ -2018,7 +2032,18 @@ use ask.text instead of guessing.";
 /// checks, and end with a change report.
 const CODING_SYSTEM_PROMPT: &str =
     "You are Jarvis, a coding agent working in the user's repository. \
-Before editing, call workspace.context to orient yourself, then inspect git status. \
+\
+Reuse what you already gathered. Before calling any tool, scan earlier turns of THIS \
+conversation: if a recent tool result already answers the user's current question \
+(workspace.context, todo.list, requirement.list, triage.scan_candidates, fs.read of \
+the same path, code.grep of the same pattern, etc.), refer back to it instead of \
+re-running the tool. Re-running orientation tools on every user turn wastes the \
+user's tokens and dilutes your own attention. Only re-run when you have a specific \
+reason to believe the underlying state changed (a write happened, the user said the \
+file changed, enough turns have passed that staleness matters). \
+\
+If you don't already have the workspace layout in this conversation, call workspace.context \
+once to orient yourself, and inspect git status before editing. \
 Do not overwrite user changes you did not make. \
 Prefer code.grep, fs.read, fs.list, git.status, and git.diff before reaching for shell.exec. \
 When you need a human decision, missing information, or a choice among acceptable options, \
@@ -2027,7 +2052,7 @@ Use fs.edit (uniqueness-checked single replace) or fs.patch (unified-diff multi-
 reviewable edits; reach for fs.write only to create new files. \
 When you run checks (tests, lints, builds), keep them focused on the change rather than the \
 whole repo. \
-At the start of a fresh session, call todo.list to see persistent project follow-ups; \
+Once per conversation (not per turn), call todo.list to see persistent project follow-ups; \
 record new follow-ups via todo.add (not plan.update — that's for the current turn only) \
 and mark them completed/blocked as you go. \
 If your previous turn asked a yes/no question about a specific entity (\"标记为 Done?\", \
@@ -2049,7 +2074,8 @@ the final status report. \
 Spec → Project workflow: when the user describes new work — either inline (\"add a user-avatar \
 upload\") or by pointing at a doc (\"read docs/feature-x.md and lay out the work\") — drive the \
 following sequence: \
-(1) call workspace.context, plus fs.read on any doc the user named; \
+(1) make sure you have the workspace context (call workspace.context only if you don't already have \
+it from earlier in this conversation), and fs.read any doc the user named; \
 (2) call plan.update with the proposed breakdown (titles only) and let the user confirm or edit; \
 (3) once confirmed, resolve the project (project.list / project.get; project.create only if it \
 genuinely does not exist), then call requirement.create per item — populate `verification_plan.commands` \
@@ -2077,14 +2103,20 @@ fn include_project_context(cfg: &Config) -> bool {
 }
 
 /// Cap on the total bytes of project context appended to the system
-/// prompt. Defaults to 32 KiB — enough for realistic agent
-/// instruction files, far short of blowing a small-context model.
+/// prompt. Defaults to 8 KiB — large enough for realistic AGENTS.md /
+/// CLAUDE.md / .jarvis/rules files, but tight enough that the system
+/// prompt doesn't drown out mid-conversation tool results in the
+/// model's attention window. Override with `JARVIS_PROJECT_CONTEXT_BYTES`
+/// or `[agent].project_context_max_bytes` when you have larger
+/// instruction files you genuinely want to inline. The previous default
+/// (32 KiB) regularly produced 30-KiB system blocks that pushed prior
+/// tool output too deep for the model to reuse.
 fn project_context_max_bytes(cfg: &Config) -> usize {
     std::env::var("JARVIS_PROJECT_CONTEXT_BYTES")
         .ok()
         .and_then(|s| s.parse().ok())
         .or(cfg.agent.project_context_max_bytes)
-        .unwrap_or(32 * 1024)
+        .unwrap_or(8 * 1024)
 }
 
 /// Resolve the file-based project memory switch. `Some(true)` means

diff --git a/crates/harness-core/src/agent.rs b/crates/harness-core/src/agent.rs
@@ -92,6 +92,15 @@ pub struct AgentConfig {
     /// dispatch is harmless because the LLM only emits one call at
     /// a time.
     pub parallel_tool_calls: bool,
+    /// When `true` (default), `ensure_system_prompt` will *replace* the
+    /// first `System` message of a loaded conversation when its content
+    /// no longer matches the configured `system_prompt`. The historical
+    /// behaviour was insert-if-missing only, which silently locked old
+    /// conversations into whatever prompt was active at creation —
+    /// updates to the binary's prompt template never reached resumed
+    /// sessions. Set to `false` for workflows that deliberately persist
+    /// per-conversation custom prompts.
+    pub refresh_system_prompt_on_resume: bool,
 }
 
 impl AgentConfig {
@@ -108,6 +117,7 @@ impl AgentConfig {
             tool_filter: None,
             session_workspace: None,
             parallel_tool_calls: false,
+            refresh_system_prompt_on_resume: true,
         }
     }
 
@@ -170,6 +180,13 @@ impl AgentConfig {
         self.parallel_tool_calls = enabled;
         self
     }
+
+    /// Toggle resume-time refresh of the system prompt. See
+    /// [`AgentConfig::refresh_system_prompt_on_resume`].
+    pub fn with_refresh_system_prompt_on_resume(mut self, enabled: bool) -> Self {
+        self.refresh_system_prompt_on_resume = enabled;
+        self
+    }
 }
 
 /// What ended a `run` invocation.
@@ -375,7 +392,11 @@ impl Agent {
     }
 
     async fn run_inner(&self, conversation: &mut Conversation) -> Result<(RunOutcome, Usage)> {
-        Self::ensure_system_prompt(conversation, self.config.system_prompt.as_deref());
+        Self::ensure_system_prompt(
+            conversation,
+            self.config.system_prompt.as_deref(),
+            self.config.refresh_system_prompt_on_resume,
+        );
 
         let mut total_usage = Usage::default();
         let run_span = Span::current();
@@ -519,7 +540,11 @@ impl Agent {
             jarvis.transport = "stream",
         );
         let inner = stream! {
-            Self::ensure_system_prompt(&mut conversation, agent.config.system_prompt.as_deref());
+            Self::ensure_system_prompt(
+                &mut conversation,
+                agent.config.system_prompt.as_deref(),
+                agent.config.refresh_system_prompt_on_resume,
+            );
 
             for iter in 1..=agent.config.max_iterations {
                 let req = match agent.build_request(&conversation).await {
@@ -1145,15 +1170,28 @@ impl Agent {
         })
     }
 
-    fn ensure_system_prompt(conv: &mut Conversation, prompt: Option<&str>) {
+    fn ensure_system_prompt(conv: &mut Conversation, prompt: Option<&str>, refresh: bool) {
         let Some(prompt) = prompt else { return };
-        let has_system = conv
-            .messages
-            .first()
-            .map(|m| matches!(m, Message::System { .. }))
-            .unwrap_or(false);
-        if !has_system {
-            conv.messages.insert(0, Message::system(prompt));
+        match conv.messages.first() {
+            Some(Message::System { content, .. }) if content == prompt => {
+                // Already in sync — nothing to do.
+            }
+            Some(Message::System { .. }) if refresh => {
+                // First message is a stale System (different content from
+                // the configured prompt) and the refresh policy is on:
+                // replace it. This is what unblocks resumed conversations
+                // from staying stuck on whatever prompt was active when
+                // they were originally created.
+                conv.messages[0] = Message::system(prompt);
+            }
+            Some(Message::System { .. }) => {
+                // Stale but refresh is off — historical behaviour, leave
+                // the persisted custom prompt alone.
+            }
+            _ => {
+                // No leading System at all — insert one.
+                conv.messages.insert(0, Message::system(prompt));
+            }
         }
     }
 
@@ -1883,4 +1921,53 @@ mod tests {
         let req = futures::executor::block_on(agent.build_request(&conv)).unwrap();
         assert_eq!(req.parallel_tool_calls, None);
     }
+
+    #[test]
+    fn ensure_system_prompt_inserts_when_missing() {
+        let mut conv = Conversation::new();
+        conv.push(Message::user("hi"));
+        Agent::ensure_system_prompt(&mut conv, Some("PROMPT"), true);
+        assert!(matches!(conv.messages[0], Message::System { ref content, .. } if content == "PROMPT"));
+        assert_eq!(conv.messages.len(), 2);
+    }
+
+    #[test]
+    fn ensure_system_prompt_skips_when_already_in_sync() {
+        let mut conv = Conversation::new();
+        conv.push(Message::system("PROMPT"));
+        conv.push(Message::user("hi"));
+        let before_len = conv.messages.len();
+        Agent::ensure_system_prompt(&mut conv, Some("PROMPT"), true);
+        assert_eq!(conv.messages.len(), before_len);
+        assert!(matches!(conv.messages[0], Message::System { ref content, .. } if content == "PROMPT"));
+    }
+
+    #[test]
+    fn ensure_system_prompt_replaces_stale_when_refresh_on() {
+        let mut conv = Conversation::new();
+        conv.push(Message::system("STALE"));
+        conv.push(Message::user("hi"));
+        Agent::ensure_system_prompt(&mut conv, Some("FRESH"), true);
+        assert_eq!(conv.messages.len(), 2);
+        assert!(matches!(conv.messages[0], Message::System { ref content, .. } if content == "FRESH"));
+    }
+
+    #[test]
+    fn ensure_system_prompt_keeps_stale_when_refresh_off() {
+        let mut conv = Conversation::new();
+        conv.push(Message::system("STALE"));
+        conv.push(Message::user("hi"));
+        Agent::ensure_system_prompt(&mut conv, Some("FRESH"), false);
+        assert_eq!(conv.messages.len(), 2);
+        assert!(matches!(conv.messages[0], Message::System { ref content, .. } if content == "STALE"));
+    }
+
+    #[test]
+    fn ensure_system_prompt_no_op_when_no_prompt_configured() {
+        let mut conv = Conversation::new();
+        conv.push(Message::system("KEEP"));
+        conv.push(Message::user("hi"));
+        Agent::ensure_system_prompt(&mut conv, None, true);
+        assert!(matches!(conv.messages[0], Message::System { ref content, .. } if content == "KEEP"));
+    }
 }
diff --git a/crates/harness-core/src/lib.rs b/crates/harness-core/src/lib.rs
@@ -66,7 +66,7 @@ pub use label::{
 pub use llm::{ChatRequest, ChatResponse, FinishReason, LlmChunk, LlmProvider, LlmStream, Usage};
 pub use memory::{
     cache_breakpoint_indices, default_estimator, estimate_tokens, estimate_total_tokens,
-    CharRatioEstimator, Memory, TokenEstimator,
+    CharRatioEstimator, JsonAwareEstimator, Memory, TokenEstimator,
 };
 pub use message::{CacheHint, Message, ToolCall};
 pub use observability::{

diff --git a/crates/harness-core/src/memory.rs b/crates/harness-core/src/memory.rs
@@ -76,6 +76,53 @@ impl TokenEstimator for CharRatioEstimator {
     }
 }
 
+/// Variant estimator that keeps `CharRatioEstimator`'s numbers for
+/// natural-language messages but uses a tighter ratio (`chars * 2 / 7`,
+/// i.e. roughly `chars / 3.5`) plus a larger per-message overhead for
+/// `Message::Tool` payloads, which are typically dense JSON output from
+/// agent tools (escapes, brackets, quoted keys) and which the
+/// `chars / 4 + 4` heuristic systematically underestimates by ~15-25 %.
+///
+/// Use this when a memory backend is wired without a provider-supplied
+/// tokeniser (e.g. tests, jarvis-cli pipe-mode runs) and the
+/// conversation involves substantial tool output. Production paths
+/// already plug in `TiktokenEstimator` via `LlmProvider::estimator`,
+/// which is more accurate still — `JsonAwareEstimator` only matters
+/// where the provider doesn't override.
+#[derive(Debug, Default, Clone, Copy)]
+pub struct JsonAwareEstimator;
+
+impl JsonAwareEstimator {
+    pub const fn new() -> Self {
+        Self
+    }
+}
+
+impl TokenEstimator for JsonAwareEstimator {
+    fn estimate_message(&self, message: &Message) -> usize {
+        match message {
+            Message::Tool {
+                tool_call_id,
+                content,
+                ..
+            } => {
+                let chars = tool_call_id.chars().count() + content.chars().count();
+                // chars / 3.5 == chars * 2 / 7. Rounded up to be safe.
+                (chars * 2).div_ceil(7) + 8
+            }
+            other => estimate_tokens(other),
+        }
+    }
+
+    fn estimate_text(&self, text: &str) -> usize {
+        // Plain text doesn't carry the JSON-overhead penalty — fall back
+        // to the cheap ratio. This stays consistent with how
+        // `SummarizingMemory` measures the synthetic summary string,
+        // which is prose, not JSON.
+        text.chars().count().div_ceil(4)
+    }
+}
+
 /// Cheap, provider-agnostic token estimate. Roughly `chars / 4` plus a
 /// fixed per-message overhead for role/separator tokens. This is *not* a
 /// precise tiktoken count — it's a heuristic that lets memory backends
@@ -221,4 +268,46 @@ mod tests {
         let msgs = vec![Message::system("sys"), Message::user("hi")];
         assert!(cache_breakpoint_indices(&msgs).is_empty());
     }
+
+    #[test]
+    fn json_aware_estimates_tool_higher_than_char_ratio() {
+        let json_payload = r#"{"branch":"main","head":"abc1234","dirty":false,"instructions":[],"manifests":[{"path":"Cargo.toml","kind":"rust"}]}"#;
+        let tool_msg = Message::Tool {
+            tool_call_id: "call_workspace_ctx".into(),
+            content: json_payload.to_string(),
+            cache: None,
+        };
+        let char_ratio = CharRatioEstimator;
+        let json_aware = JsonAwareEstimator;
+        let cr = char_ratio.estimate_message(&tool_msg);
+        let ja = json_aware.estimate_message(&tool_msg);
+        assert!(
+            ja > cr,
+            "json-aware estimate ({ja}) should exceed char-ratio ({cr}) for dense JSON"
+        );
+        // Sanity: not absurdly higher (ballpark 15-30 % over).
+        assert!(ja < cr * 2, "json-aware ({ja}) blew past 2x char-ratio ({cr})");
+    }
+
+    #[test]
+    fn json_aware_matches_char_ratio_for_non_tool_messages() {
+        let json_aware = JsonAwareEstimator;
+        let char_ratio = CharRatioEstimator;
+        for m in [
+            Message::system("you are jarvis"),
+            Message::user("hello"),
+            Message::assistant_text("hi there"),
+        ] {
+            assert_eq!(json_aware.estimate_message(&m), char_ratio.estimate_message(&m));
+        }
+    }
+
+    #[test]
+    fn json_aware_text_matches_char_ratio() {
+        let json_aware = JsonAwareEstimator;
+        let char_ratio = CharRatioEstimator;
+        for s in ["", "hi", "the quick brown fox", "0123456789abcdef"] {
+            assert_eq!(json_aware.estimate_text(s), char_ratio.estimate_text(s));
+        }
+    }
 }