humancto · humancto · Mar 26, 2026 · chatgpt-codex-connector · Mar 26, 2026
diff --git a/crates/punch-runtime/src/context_budget.rs b/crates/punch-runtime/src/context_budget.rs
@@ -31,6 +31,16 @@ const SINGLE_RESULT_MAX_FRACTION: f64 = 0.50;
 /// Total fraction of window available for all tool results combined.
 const TOTAL_TOOL_HEADROOM_FRACTION: f64 = 0.75;
 
+/// Minimum messages before sliding window summarization is considered.
+const SUMMARIZE_THRESHOLD: usize = 12;
+
+/// Number of recent messages to preserve (not summarized).
+const SUMMARIZE_KEEP_RECENT: usize = 6;
+
+/// Maximum characters to feed into the summarization prompt (from old messages).
+/// Keeps the summarization LLM call cheap.
+const SUMMARIZE_INPUT_MAX_CHARS: usize = 8_000;
+
 /// Context budget configuration and enforcement.
 #[derive(Debug, Clone)]
 pub struct ContextBudget {
@@ -297,6 +307,120 @@ pub enum TrimAction {
     Aggressive,
 }
 
+/// Check whether messages are eligible for sliding window summarization.
+///
+/// Returns `true` if there are enough messages to warrant summarization
+/// (i.e., more than the threshold and not already summarized).
+pub fn needs_summarization(messages: &[Message]) -> bool {
+    if messages.len() < SUMMARIZE_THRESHOLD {
+        return false;
+    }
+    // Don't re-summarize if we already have a summary marker.
+    !messages
+        .iter()
+        .any(|m| m.role == Role::System && m.content.starts_with("[Earlier in this conversation"))
+}
+
+/// Build a compact text representation of old messages for the summarization prompt.
+///
+/// Takes the messages that will be summarized (everything except the first
+/// and the last `SUMMARIZE_KEEP_RECENT` messages). Truncates to `SUMMARIZE_INPUT_MAX_CHARS`.
+pub fn build_summarization_input(messages: &[Message]) -> (String, usize) {
+    let keep_recent = SUMMARIZE_KEEP_RECENT.min(messages.len().saturating_sub(1));
+    let end = messages.len().saturating_sub(keep_recent);
+
+    // Skip first message (initial user prompt, preserved separately).
+    let old_messages = &messages[1..end];
+    let old_count = old_messages.len();
+
+    let mut text = String::with_capacity(SUMMARIZE_INPUT_MAX_CHARS);
+    for msg in old_messages {
+        let role_label = match msg.role {
+            Role::User => "User",
+            Role::Assistant => "Assistant",
+            Role::System => "System",
+            Role::Tool => "Tool",
+        };
+
+        // Append message content compactly.
+        let content = if msg.content.len() > 500 {
+            let boundary = msg.content.floor_char_boundary(500);
+            format!("{}...", &msg.content[..boundary])
+        } else {
+            msg.content.clone()
+        };
+
+        text.push_str(role_label);
+        text.push_str(": ");
+        text.push_str(&content);
+        text.push('\n');
+
+        // Include tool call names (but not full args).
+        for tc in &msg.tool_calls {
+            text.push_str("  [called: ");
+            text.push_str(&tc.name);
+            text.push_str("]\n");
+        }
+
+        // Include short tool results.
+        for tr in &msg.tool_results {
+            let snippet = if tr.content.len() > 200 {
+                let boundary = tr.content.floor_char_boundary(200);
+                format!("{}...", &tr.content[..boundary])
+            } else {
+                tr.content.clone()
+            };
+            text.push_str("  [result: ");
+            text.push_str(&snippet);
+            text.push_str("]\n");
+        }
+
+        if text.len() >= SUMMARIZE_INPUT_MAX_CHARS {
+            text.truncate(text.floor_char_boundary(SUMMARIZE_INPUT_MAX_CHARS));
+            break;
+        }
+    }
+
+    (text, old_count)
+}
+
+/// Apply a summary to the message history: replace old messages with the summary.
+///
+/// Preserves: first message + summary marker + last `SUMMARIZE_KEEP_RECENT` messages.
+pub fn apply_summary(messages: &mut Vec<Message>, summary: &str) {
+    let keep_recent = SUMMARIZE_KEEP_RECENT.min(messages.len().saturating_sub(1));
+    let original_len = messages.len();
+
+    let first = messages[0].clone();
+    let tail: Vec<Message> = messages
+        .iter()
+        .rev()
+        .take(keep_recent)
+        .cloned()
+        .collect::<Vec<_>>()
+        .into_iter()
+        .rev()
+        .collect();
+
+    messages.clear();
+    messages.push(first);
+    messages.push(Message::new(
+        Role::System,
+        format!(
+            "[Earlier in this conversation ({} messages summarized):\n{}]",
+            original_len - 1 - tail.len(),
+            summary
+        ),
+    ));
+    messages.extend(tail);
+
+    info!(
+        original = original_len,
+        summarized_to = messages.len(),
+        "sliding window summarization applied"
+    );
+}
+
 /// Find a valid UTF-8 char boundary at or before `pos`.
 fn find_char_boundary(s: &str, pos: usize) -> usize {
     if pos >= s.len() {
@@ -684,4 +808,103 @@ mod tests {
         assert_eq!(TrimAction::Aggressive, TrimAction::Aggressive);
         assert_ne!(TrimAction::Moderate, TrimAction::Aggressive);
     }
+
+    // -----------------------------------------------------------------------
+    // Sliding window summarization tests
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_needs_summarization_below_threshold() {
+        let msgs: Vec<Message> = (0..10)
+            .map(|i| make_message(Role::User, &format!("msg {}", i)))
+            .collect();
+        assert!(!needs_summarization(&msgs));
+    }
+
+    #[test]
+    fn test_needs_summarization_above_threshold() {
+        let msgs: Vec<Message> = (0..15)
+            .map(|i| make_message(Role::User, &format!("msg {}", i)))
+            .collect();
+        assert!(needs_summarization(&msgs));
+    }
+
+    #[test]
+    fn test_needs_summarization_skips_if_already_summarized() {
+        let mut msgs: Vec<Message> = (0..15)
+            .map(|i| make_message(Role::User, &format!("msg {}", i)))
+            .collect();
+        // Insert a summary marker.
+        msgs.insert(
+            1,
+            make_message(Role::System, "[Earlier in this conversation: some summary]"),
+        );
+        assert!(!needs_summarization(&msgs));
+    }
+
+    #[test]
+    fn test_build_summarization_input_basic() {
+        let mut msgs: Vec<Message> = Vec::new();
+        msgs.push(make_message(Role::User, "initial question"));
+        for i in 1..=10 {
+            msgs.push(make_message(Role::Assistant, &format!("response {}", i)));
+            msgs.push(make_message(Role::User, &format!("follow-up {}", i)));
+        }
+        // 21 messages total: first + 10 pairs
+
+        let (text, old_count) = build_summarization_input(&msgs);
+        // Should have summarized messages between first and last 6.
+        assert!(old_count > 0);
+        assert!(text.contains("response"));
+        assert!(!text.is_empty());
+    }
+
+    #[test]
+    fn test_build_summarization_input_truncates_long_content() {
+        let mut msgs: Vec<Message> = Vec::new();
+        msgs.push(make_message(Role::User, "start"));
+        for _ in 0..15 {
+            msgs.push(make_message(Role::Assistant, &"x".repeat(1000)));
+        }
+        msgs.push(make_message(Role::User, "end"));
+
+        let (text, _) = build_summarization_input(&msgs);
+        // Each message content is 1000 chars but truncated to 500, so text should be manageable.
+        assert!(text.len() <= 12_000); // generous upper bound
+    }
+
+    #[test]
+    fn test_apply_summary_preserves_structure() {
+        let mut msgs: Vec<Message> = (0..15)
+            .map(|i| {
+                if i % 2 == 0 {
+                    make_message(Role::User, &format!("user msg {}", i))
+                } else {
+                    make_message(Role::Assistant, &format!("assistant msg {}", i))
+                }
+            })
+            .collect();
+
+        apply_summary(&mut msgs, "- Key point 1\n- Key point 2");
+
+        // Should have: first msg + summary marker + last 6 = 8
+        assert_eq!(msgs.len(), 8);
+        assert!(msgs[0].content.contains("user msg 0")); // first preserved
+        assert_eq!(msgs[1].role, Role::System);
+        assert!(msgs[1].content.contains("Key point 1"));
+        assert!(msgs[1].content.contains("Earlier in this conversation"));
+        assert!(msgs.last().unwrap().content.contains("msg 14")); // last preserved
+    }
+
+    #[test]
+    fn test_apply_summary_small_history_no_panic() {
+        let mut msgs = vec![
+            make_message(Role::User, "hello"),
+            make_message(Role::Assistant, "hi"),
+        ];
+        // Should not panic even with very small history.
+        apply_summary(&mut msgs, "summary");
+        // With only 2 messages, keep_recent=min(6,1)=1, so: first + summary + last 1 = 3
+        assert!(msgs.len() <= 4);
+    }
 }
diff --git a/crates/punch-runtime/src/fighter_loop.rs b/crates/punch-runtime/src/fighter_loop.rs
@@ -306,6 +306,57 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
                 .clone()
         };
 
+        // --- Sliding window summarization ---
+        // When conversation grows long, summarize older messages to save tokens.
+        // This runs before the hard trim, acting as a proactive compactor.
+        if crate::context_budget::needs_summarization(&messages) {
+            let (input_text, old_count) =
+                crate::context_budget::build_summarization_input(&messages);
+            debug!(
+                old_messages = old_count,
+                input_chars = input_text.len(),
+                "summarizing older messages"
+            );
+
+            let summary_prompt = format!(
+                "Summarize the following conversation excerpt in 2-4 concise bullet points. \
+                 Focus on: key decisions made, important facts established, tasks completed or \
+                 in progress, and any commitments or preferences expressed. Be specific — include \
+                 names, file paths, and concrete details. Respond with ONLY the bullet points, \
+                 no preamble.\n\n{}",
+                input_text
+            );
+
+            let summary_request = CompletionRequest {
+                model: params.manifest.model.model.clone(),
+                messages: vec![Message::new(Role::User, &summary_prompt)],
+                tools: vec![],
+                max_tokens: 300,
+                temperature: Some(0.2),
+                system_prompt: Some(
+                    "You are a conversation summarizer. Be concise and specific.".to_string(),
+                ),
+            };
+
+            match active_driver.complete(summary_request).await {
+                Ok(resp) => {
+                    let summary = resp.message.content.trim().to_string();
+                    if !summary.is_empty() {
+                        crate::context_budget::apply_summary(&mut messages, &summary);
+                        total_usage.accumulate(&resp.usage);
+                        info!(
+                            summary_tokens = resp.usage.total(),
+                            "sliding window summarization complete"
+                        );
+                    }
+                }
+                Err(e) => {
+                    // Non-fatal: if summarization fails, we still have the hard trim as fallback.
+                    warn!(error = %e, "sliding window summarization failed (non-critical)");
+                }
+            }
+        }
+
         // --- Context Budget: check and trim before LLM call ---
         if let Some(trim_action) = budget.check_trim_needed(&messages, &turn_tools) {
             budget.apply_trim(&mut messages, trim_action);