diff --git a/proto/tim-api/tim/api/thread/v1alpha1/thread_service.proto b/proto/tim-api/tim/api/thread/v1alpha1/thread_service.proto
index 30285c1be..27870d67b 100644
--- a/proto/tim-api/tim/api/thread/v1alpha1/thread_service.proto
+++ b/proto/tim-api/tim/api/thread/v1alpha1/thread_service.proto
@@ -98,6 +98,15 @@ service ThreadService {
     };
     option (google.api.method_signature) = "path,working_directory";
   }
+
+  // Compact a thread to reduce context size
+  rpc CompactThread(CompactThreadRequest) returns (CompactThreadResponse) {
+    option (google.api.http) = {
+      post: "/v1alpha1/{path=orgs/*/users/*/threads/*}:compact"
+      body: "*"
+    };
+    option (google.api.method_signature) = "path,compaction_config";
+  }
 }
 
 // GetThreadRequest is used to get a specific thread by its UID
@@ -293,6 +302,10 @@ message SubmitUserMessageRequest {
     (buf.validate.field).required = true,
     (google.api.field_behavior) = REQUIRED
   ];
+
+  // Optional compaction configuration for automatic compaction
+  // If provided and threshold is exceeded, the thread will be compacted before processing
+  optional CompactionConfig compaction_config = 3;
 }
 
 // Input for submitting a user message
@@ -365,3 +378,28 @@ message ConfigureThreadWorkingDirectoryRequest {
     (buf.validate.field).string.min_len = 1
   ];
 }
+
+// CompactThreadRequest is used to manually compact a thread
+message CompactThreadRequest {
+  // The resource path of the thread to compact
+  string path = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (aep.api.field_info).resource_reference = "tim.settlerlabs.com/thread",
+    (buf.validate.field).string.pattern = "^orgs/[a-fA-F0-9-]{36}/users/[a-fA-F0-9-]{36}/threads/[a-fA-F0-9-]{36}$"
+  ];
+
+  // The compaction configuration
+  CompactionConfig compaction_config = 2 [
+    (buf.validate.field).required = true,
+    (google.api.field_behavior) = REQUIRED
+  ];
+}
+
+// CompactThreadResponse is the response for compacting a thread
+message CompactThreadResponse {
+  // The compaction result
+  CompactionResult result = 1 [
+    (buf.validate.field).required = true,
+    (google.api.field_behavior) = REQUIRED
+  ];
+}
diff --git a/proto/tim-api/tim/api/thread/v1alpha1/thread_types.proto b/proto/tim-api/tim/api/thread/v1alpha1/thread_types.proto
index 71d685576..a37520a4f 100644
--- a/proto/tim-api/tim/api/thread/v1alpha1/thread_types.proto
+++ b/proto/tim-api/tim/api/thread/v1alpha1/thread_types.proto
@@ -261,6 +261,114 @@ message FileRestoration {
   ];
 }
 
+// Configuration for thread context compaction
+message CompactionConfig {
+  // Threshold percentage (0.0-1.0) of model's max capacity at which to trigger compaction
+  // e.g., 0.8 means compact when context reaches 80% of model's max tokens
+  float threshold_percentage = 1 [
+    (buf.validate.field).float.gte = 0.0,
+    (buf.validate.field).float.lte = 1.0
+  ];
+
+  // The compaction strategy to use
+  CompactionStrategy strategy = 2 [(buf.validate.field).enum.defined_only = true];
+
+  // Whether to create a fork (true) or compact in-place (false, for future use)
+  // Currently only fork mode is supported
+  bool create_fork = 3;
+
+  // Strategy-specific parameters
+  oneof parameters {
+    // Parameters for truncation strategy
+    TruncationParams truncation = 4;
+    // Parameters for summarization strategy
+    SummarizationParams summarization = 5;
+    // Parameters for importance-based strategy
+    ImportanceBasedParams importance_based = 6;
+  }
+}
+
+// Parameters for truncation strategy
+message TruncationParams {
+  // Number of recent messages to keep (default: 10)
+  int32 keep_recent_messages = 1 [
+    (buf.validate.field).int32.gte = 1,
+    (buf.validate.field).int32.lte = 100
+  ];
+}
+
+// Parameters for summarization strategy
+message SummarizationParams {
+  // Target length for the summary in tokens (approximate)
+  int32 summary_length = 1 [
+    (buf.validate.field).int32.gte = 100,
+    (buf.validate.field).int32.lte = 10000
+  ];
+
+  // Whether to preserve system messages
+  bool preserve_system_messages = 2;
+
+  // Number of recent messages to keep unchanged (will not be summarized)
+  int32 keep_recent_messages = 3 [
+    (buf.validate.field).int32.gte = 0,
+    (buf.validate.field).int32.lte = 50
+  ];
+}
+
+// Parameters for importance-based strategy
+message ImportanceBasedParams {
+  // Always preserve user messages
+  bool preserve_user_messages = 1;
+
+  // Always preserve tool calls and their results
+  bool preserve_tool_calls = 2;
+
+  // Summarize assistant text blocks that are not adjacent to tool calls
+  bool summarize_assistant_blocks = 3;
+
+  // Number of recent messages to keep unchanged regardless of importance
+  int32 keep_recent_messages = 4 [
+    (buf.validate.field).int32.gte = 0,
+    (buf.validate.field).int32.lte = 50
+  ];
+}
+
+// Result of a compaction operation
+message CompactionResult {
+  // The forked thread containing compacted messages
+  Thread forked_thread = 1;
+
+  // The strategy that was used
+  CompactionStrategy strategy = 2;
+
+  // Original message count before compaction
+  int32 original_message_count = 3;
+
+  // Compacted message count after compaction
+  int32 compacted_message_count = 4;
+
+  // Original total token count
+  int64 original_token_count = 5;
+
+  // Compacted total token count
+  int64 compacted_token_count = 6;
+
+  // UID of the thread context record
+  string thread_context_uid = 7 [(google.api.field_info).format = UUID4];
+}
+
+// CompactionStrategy defines the strategy to use for thread compaction
+enum CompactionStrategy {
+  // Default unspecified
+  COMPACTION_STRATEGY_UNSPECIFIED = 0;
+  // Keep first message and last N messages, truncate middle
+  COMPACTION_STRATEGY_TRUNCATION = 1;
+  // Use LLM to summarize middle section, keep recent messages
+  COMPACTION_STRATEGY_SUMMARIZATION = 2;
+  // Keep user messages and tool calls, summarize assistant responses
+  COMPACTION_STRATEGY_IMPORTANCE_BASED = 3;
+}
+
 // An actor who may participate in creating LLM messages
 enum LlmMessageRole {
   // Default unspecified
diff --git a/proto/tim-api/tim/api/thread_compaction/v1alpha1/thread_compaction_service.proto b/proto/tim-api/tim/api/thread_compaction/v1alpha1/thread_compaction_service.proto
new file mode 100644
index 000000000..4997f6d45
--- /dev/null
+++ b/proto/tim-api/tim/api/thread_compaction/v1alpha1/thread_compaction_service.proto
@@ -0,0 +1,44 @@
+syntax = "proto3";
+
+package tim.api.thread_compaction.v1alpha1;
+
+import "aep/api/field_info.proto";
+import "buf/validate/validate.proto";
+import "google/api/annotations.proto";
+import "google/api/client.proto";
+import "google/api/field_behavior.proto";
+import "tim/api/thread/v1alpha1/thread_types.proto";
+
+// ThreadCompactionService is an internal service for handling thread compaction results from workers
+service ThreadCompactionService {
+  // Push compaction result from worker to API server
+  rpc PushCompactionResult(PushCompactionResultRequest) returns (PushCompactionResultResponse) {
+    option (google.api.http) = {
+      post: "/v1alpha1/{thread=orgs/*/users/*/threads/*}:pushCompactionResult"
+      body: "*"
+    };
+    option (google.api.method_signature) = "thread,result";
+  }
+}
+
+// PushCompactionResultRequest is used to push a compaction result from worker to API
+message PushCompactionResultRequest {
+  // The resource path of the original thread that was compacted
+  string thread = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (aep.api.field_info).resource_reference = "tim.settlerlabs.com/thread",
+    (buf.validate.field).string.pattern = "^orgs/[a-fA-F0-9-]{36}/users/[a-fA-F0-9-]{36}/threads/[a-fA-F0-9-]{36}$"
+  ];
+
+  // The compaction result
+  tim.api.thread.v1alpha1.CompactionResult result = 2 [
+    (buf.validate.field).required = true,
+    (google.api.field_behavior) = REQUIRED
+  ];
+}
+
+// PushCompactionResultResponse is the response after pushing compaction result
+message PushCompactionResultResponse {
+  // Acknowledgment that the result was received and processed
+  bool success = 1;
+}
diff --git a/shared/llm/types.go b/shared/llm/types.go
index bf1eed60f..0e2d7259f 100644
--- a/shared/llm/types.go
+++ b/shared/llm/types.go
@@ -405,6 +405,60 @@ func ValidateModelID(modelID string) error {
 }
 
 type Model struct {
-	ID       ModelID
-	APIModel string
+	ID               ModelID
+	APIModel         string
+	MaxContextTokens int
+}
+
+// GetModelMaxTokens returns the maximum context tokens for a given model ID
+func GetModelMaxTokens(modelID ModelID) (int, error) {
+	if !availableModelsMap[modelID] {
+		return 0, fmt.Errorf("invalid model ID: %s", modelID)
+	}
+
+	// These values should match what providers return in their Models() map
+	// But we define them here as a centralized reference
+	switch modelID {
+	// Anthropic models
+	case ModelIDClaude45Sonnet:
+		return 200000, nil
+	case ModelIDClaude4Opus:
+		return 200000, nil
+	case ModelIDClaude45Haiku:
+		return 200000, nil
+	// OpenAI models
+	case ModelIDGPT5:
+		return 128000, nil
+	case ModelIDGPT5Mini:
+		return 128000, nil
+	case ModelIDGPT5Nano:
+		return 128000, nil
+	case ModelIDO3:
+		return 200000, nil
+	case ModelIDO4Mini:
+		return 128000, nil
+	// Google models
+	case ModelIDGemini25Pro:
+		return 2097152, nil // 2M tokens
+	case ModelIDGemini25Flash:
+		return 1048576, nil // 1M tokens
+	case ModelIDGemini25FlashLite:
+		return 1048576, nil // 1M tokens
+	default:
+		return 0, fmt.Errorf("max context tokens not defined for model: %s", modelID)
+	}
+}
+
+// CalculateThresholdTokens calculates the token threshold based on model capacity and percentage
+func CalculateThresholdTokens(modelID ModelID, thresholdPct float32) (int, error) {
+	maxTokens, err := GetModelMaxTokens(modelID)
+	if err != nil {
+		return 0, err
+	}
+
+	if thresholdPct < 0.0 || thresholdPct > 1.0 {
+		return 0, fmt.Errorf("threshold percentage must be between 0.0 and 1.0, got: %f", thresholdPct)
+	}
+
+	return int(float32(maxTokens) * thresholdPct), nil
 }
diff --git a/tests/system/framework/api_client.py b/tests/system/framework/api_client.py
index b37cb1cfe..7aa14dd4e 100644
--- a/tests/system/framework/api_client.py
+++ b/tests/system/framework/api_client.py
@@ -298,6 +298,24 @@ def configure_thread_working_directory(self, thread_path: str, working_directory
         )
         response.raise_for_status()
 
+    def compact_thread(self, thread_path: str, compaction_config: dict[str, Any]) -> dict[str, Any]:
+        """Compact a thread to reduce context size.
+
+        Args:
+            thread_path: Path to the thread to compact
+            compaction_config: Compaction configuration including strategy and parameters
+
+        Returns:
+            The compaction result containing the forked thread and stats
+        """
+        response = self.client.post(
+            self._url(f"{thread_path}:compact"),
+            headers=self._default_headers,
+            json={"compaction_config": compaction_config},
+        )
+        response.raise_for_status()
+        return response.json()
+
     def submit_tool_result(
         self,
         thread_path: str,
diff --git a/tests/system/framework/streaming_helpers.py b/tests/system/framework/streaming_helpers.py
index a4db7bd93..3b124754e 100644
--- a/tests/system/framework/streaming_helpers.py
+++ b/tests/system/framework/streaming_helpers.py
@@ -148,6 +148,6 @@ def stream_events_background(
     except Exception as e:
         collector.error = e
         if verbose:
-            print(f"\n  Stream error (expected for test): {type(e).__name__}", flush=True)
+            print(f"\n  Stream connection closed: {type(e).__name__}", flush=True)
     finally:
         collector.stream_ended.set()
diff --git a/tests/system/test_thread_compaction_and_forking.py b/tests/system/test_thread_compaction_and_forking.py
new file mode 100644
index 000000000..b8bb13276
--- /dev/null
+++ b/tests/system/test_thread_compaction_and_forking.py
@@ -0,0 +1,1013 @@
+"""Thread compaction and forking comprehensive end-to-end test.
+
+Tests the complete integration of thread compaction (all 3 strategies) with forking:
+  1. Create org, user, and persona
+  2. Create thread with multiple messages to build context
+  3. Test TRUNCATION compaction with fork creation
+     - Verify message count is reduced
+     - Verify recent messages are preserved
+     - Verify message structure (user/assistant roles)
+  4. Test SUMMARIZATION compaction with fork creation
+     - Verify message count is reduced
+     - Check for summary message creation
+     - Verify relevant content is preserved
+  5. Test IMPORTANCE_BASED compaction with fork creation
+     - Verify user messages are preserved
+     - Verify tool calls are preserved (when present)
+     - Check for summarized assistant blocks
+     - Verify relevant content is preserved
+  6. Verify compacted thread structure and parent references
+  7. Test forking FROM a compacted thread (ForkThreadFromMessage)
+  8. Test editing and forking on a compacted thread (EditThreadMessage)
+  9. Test compacting a forked thread
+ 10. Test nested scenarios: compact -> fork -> compact
+ 11. Verify all threads can continue independently
+ 12. Verify parent thread references throughout the hierarchy
+ 13. Test fork-of-fork from compacted threads
+ 14. Verify compaction metadata and token counts
+
+This comprehensive test validates the complete lifecycle of threads with both
+compaction and forking operations, ensuring they work correctly together and
+that each compaction strategy performs its intended transformations.
+
+Running this test:
+  # From the workspace root
+  just testing::test-system-one thread-compaction-and-forking
+
+  # Or with specific model
+  just testing::test-system-one thread-compaction-and-forking -- --model-id=claude-4-5-sonnet
+
+  # Or directly with pytest from tests/system directory
+  cd tests/system
+  uv run pytest -v -s test_thread_compaction_and_forking.py
+"""
+
+import threading
+import time
+
+from framework.api_client import TimApiClient
+from framework.models import Org, User
+from framework.polling import ThreadPoller
+from framework.streaming_helpers import StreamCollector, stream_events_background
+
+SEPARATOR = "=" * 40
+
+
+def wait_for_thread_idle(
+    collector: StreamCollector,
+    expected_idle_count: int,
+    max_wait: int = 60,
+    api_client: TimApiClient = None,
+    thread_path: str = None,
+) -> bool:
+    """Wait for thread to reach expected idle count.
+
+    If stream disconnects, will poll thread state via API as fallback.
+    """
+    start_time = time.time()
+    stream_disconnected = False
+
+    while time.time() - start_time < max_wait:
+        # Check if we've reached expected idle count via stream
+        if collector.count_idle_states() >= expected_idle_count:
+            return True
+
+        # If stream ended with error, switch to polling via API
+        if collector.stream_ended.is_set() and collector.error:
+            if not stream_disconnected:
+                print("  Stream disconnected, checking thread state via API...", flush=True)
+                stream_disconnected = True
+
+            if api_client and thread_path:
+                thread = api_client.get_thread(thread_path)
+                if thread.llm_state == "THREAD_LLM_STATE_IDLE":
+                    print("  Thread is IDLE (verified via API)", flush=True)
+                    return True
+                # Still processing, keep polling
+                time.sleep(2)
+                continue
+            else:
+                # No API client provided, can't verify
+                return False
+
+        time.sleep(1)
+    return False
+
+
+def setup_thread_stream(
+    api_client: TimApiClient, thread_path: str, verbose: bool = True
+) -> tuple[StreamCollector, threading.Thread]:
+    """Set up streaming for a thread and return collector and thread."""
+    collector = StreamCollector()
+    stream_thread = threading.Thread(
+        target=stream_events_background,
+        args=(api_client, thread_path, collector),
+        kwargs={"timeout": 300.0, "verbose": verbose},
+        daemon=True,
+    )
+    stream_thread.start()
+
+    if not collector.stream_started.wait(timeout=5.0):
+        raise RuntimeError(f"Stream failed to start for {thread_path}")
+    time.sleep(1)
+
+    return collector, stream_thread
+
+
+def test_thread_compaction_and_forking(
+    api_client: TimApiClient,
+    thread_poller: ThreadPoller,
+    test_org: Org,
+    test_user: User,
+    model_id: str,
+    test_summary,
+):
+    """Test comprehensive integration of thread compaction and forking.
+
+    This test verifies:
+     1. All three compaction strategies work as intended:
+        - TRUNCATION: reduces message count, keeps recent messages
+        - SUMMARIZATION: reduces message count, may create summary, preserves relevant content
+        - IMPORTANCE_BASED: preserves user messages and tool calls, preserves relevant content
+     2. Each strategy successfully reduces or maintains message count
+     3. Compaction with automatic fork creation
+     4. Forking from compacted threads using ForkThreadFromMessage
+     5. Editing and forking on compacted threads using EditThreadMessage
+     6. Compacting forked threads
+     7. Nested scenarios: compact -> fork -> compact
+     8. Independent continuation of all thread variants
+     9. Parent thread references throughout the hierarchy
+    10. Fork-of-fork from compacted threads
+    11. Compaction metadata and token counts
+    """
+    test_summary.org = test_org
+    test_summary.user = test_user
+
+    # Create persona with work_complete tool
+    persona = api_client.create_persona(
+        test_org.org_id,
+        test_user.user_id,
+        display_name="Compaction & Fork Test Persona",
+        description="Test persona for compaction and forking",
+    )
+    test_summary.persona = persona
+
+    # Create persona revision without thinking for simpler testing
+    tools = ["work_complete"]
+    test_summary.tools = tools
+    revision = api_client.create_persona_revision(
+        persona.path,
+        system_prompt=(
+            "You are a helpful AI assistant. Provide BRIEF responses (2-3 sentences max). "
+            "Keep your answers concise and to the point. "
+            "When you finish responding, call work_complete."
+        ),
+        tools=tools,
+        model_id=model_id,
+        tool_choice="auto",
+        use_thinking=False,
+    )
+    test_summary.persona_revision = revision
+
+    # Finalize persona revision
+    api_client.finalize_persona_revision(revision.path)
+
+    # Create thread with initial message
+    print("\nCreating original thread with initial message...")
+    thread = api_client.create_thread(
+        test_org.org_id,
+        test_user.user_id,
+        persona.persona_id,
+        display_name="Original Thread for Compaction & Fork Test",
+        initial_message_text="What is a database?",
+    )
+    print(f"  Created thread: {thread.thread_id}")
+    test_summary.thread = thread
+
+    # Set up event collector for streaming
+    collector, stream_thread = setup_thread_stream(api_client, thread.path)
+    print("  ✓ Stream connected")
+
+    # Wait for initial message to complete
+    print("\nWaiting for initial message to complete...")
+    if not wait_for_thread_idle(collector, 1, api_client=api_client, thread_path=thread.path):
+        raise AssertionError("Thread did not become IDLE after initial message")
+    time.sleep(2)  # Wait for DB commit
+    print("  ✓ Initial message completed")
+
+    # Submit multiple messages to build up context
+    messages_to_send = [
+        "What is SQL?",
+        "What is NoSQL?",
+        "Explain indexing.",
+        "What is caching?",
+        "Explain transactions.",
+    ]
+
+    print("\nBuilding thread context with multiple messages...")
+    for idx, message_text in enumerate(messages_to_send, start=2):
+        print(f"  Message {idx}: {message_text[:40]}...")
+        api_client.submit_user_message(thread.path, message_text)
+
+        if not wait_for_thread_idle(collector, idx, api_client=api_client, thread_path=thread.path):
+            raise AssertionError(f"Thread did not return to IDLE after message {idx}")
+        time.sleep(2)  # Wait for DB commit
+
+    print(f"  ✓ Sent {len(messages_to_send)} additional messages")
+
+    # Get all messages from original thread
+    original_messages = api_client.list_messages(thread.path)
+    original_message_count = len(original_messages.results)
+    print(f"\nOriginal thread has {original_message_count} messages")
+
+    # Store thread hierarchy for verification
+    thread_hierarchy = {
+        "original": {
+            "path": thread.path,
+            "message_count": original_message_count,
+            "children": [],
+        }
+    }
+
+    # ========================================================================
+    # PHASE 1: Test all three compaction strategies with fork creation
+    # ========================================================================
+    print("\n" + "=" * 80)
+    print("PHASE 1: Testing All Compaction Strategies with Fork Creation")
+    print("=" * 80)
+
+    # ========================================================================
+    # METHOD 1: TRUNCATION compaction with fork
+    # ========================================================================
+    print("\n" + SEPARATOR)
+    print("METHOD 1: TRUNCATION Strategy")
+    print(SEPARATOR)
+
+    print("\nCreating TRUNCATION compaction with fork...")
+    truncation_config = {
+        "strategy": "COMPACTION_STRATEGY_TRUNCATION",
+        "create_fork": True,
+        "truncation": {"keep_recent_messages": 6},
+    }
+
+    truncation_result = api_client.compact_thread(thread.path, truncation_config)
+    print("  ✓ Truncation compaction completed")
+
+    truncation_data = truncation_result.get("result", {})
+    truncated_thread = truncation_data.get("forkedThread", {})
+    truncated_thread_path = truncated_thread.get("path", "")
+    print(f"  Truncated thread: {truncated_thread_path}")
+
+    # Verify compaction metadata
+    assert truncation_data.get("strategy") == "COMPACTION_STRATEGY_TRUNCATION"
+    assert truncation_data.get("originalMessageCount") == original_message_count
+    truncated_message_count = truncation_data.get("compactedMessageCount", 0)
+    print(
+        f"  Messages: {original_message_count} -> {truncated_message_count} "
+        f"(reduced by {original_message_count - truncated_message_count})"
+    )
+    assert truncated_message_count < original_message_count
+
+    # Verify parent reference
+    truncated_thread_full = api_client.get_thread(truncated_thread_path)
+    assert truncated_thread_full.parent_thread_id
+    assert truncated_thread_full.parent_thread_id.path == thread.path
+    print("  ✓ Parent reference validated")
+
+    # Verify TRUNCATION strategy behavior
+    print("\n  Verifying TRUNCATION strategy behavior...")
+    truncated_messages = api_client.list_messages(truncated_thread_path)
+    truncated_msgs_list = truncated_messages.results
+
+    # TRUNCATION should keep first message + last N messages (6 in config)
+    # Verify message count is significantly reduced
+    print(f"    Messages: {len(original_messages.results)} -> {len(truncated_msgs_list)}")
+    assert len(truncated_msgs_list) < len(original_messages.results), (
+        "Truncation should reduce message count"
+    )
+
+    # Verify we kept some messages (not empty)
+    assert len(truncated_msgs_list) > 0, "Should have at least some messages after truncation"
+
+    # Verify recent messages are preserved
+    # Check that the last user question is present (about transactions or caching)
+    all_truncated_text = " ".join(
+        " ".join(c.text for c in msg.contents if c.text) for msg in truncated_msgs_list
+    ).lower()
+
+    # The last questions should be present
+    has_recent_content = (
+        "transaction" in all_truncated_text
+        or "caching" in all_truncated_text
+        or "cache" in all_truncated_text
+    )
+    assert has_recent_content, "Should preserve recent message content"
+    print(f"    ✓ Recent messages preserved ({len(truncated_msgs_list)} messages kept)")
+
+    # Verify messages have proper structure
+    user_msg_count = sum(1 for msg in truncated_msgs_list if msg.role == "LLM_MESSAGE_ROLE_USER")
+    assistant_msg_count = sum(
+        1 for msg in truncated_msgs_list if msg.role == "LLM_MESSAGE_ROLE_ASSISTANT"
+    )
+    print(f"    Message breakdown: {user_msg_count} user, {assistant_msg_count} assistant")
+    assert user_msg_count > 0, "Should have at least one user message"
+    assert assistant_msg_count > 0, "Should have at least one assistant message"
+
+    print("  ✓ TRUNCATION strategy verified: keeps recent messages")
+
+    # Store in hierarchy
+    thread_hierarchy["truncated"] = {
+        "path": truncated_thread_path,
+        "message_count": truncated_message_count,
+        "parent": "original",
+        "compaction_strategy": "TRUNCATION",
+        "children": [],
+    }
+    thread_hierarchy["original"]["children"].append("truncated")
+
+    # ========================================================================
+    # METHOD 2: SUMMARIZATION compaction with fork
+    # ========================================================================
+    print("\n" + SEPARATOR)
+    print("METHOD 2: SUMMARIZATION Strategy")
+    print(SEPARATOR)
+
+    print("\nCreating SUMMARIZATION compaction with fork...")
+    summarization_config = {
+        "strategy": "COMPACTION_STRATEGY_SUMMARIZATION",
+        "create_fork": True,
+        "summarization": {
+            "summary_length": 500,
+            "preserve_system_messages": True,
+            "keep_recent_messages": 4,
+        },
+    }
+
+    summarization_result = api_client.compact_thread(thread.path, summarization_config)
+    print("  ✓ Summarization compaction completed")
+
+    summarization_data = summarization_result.get("result", {})
+    summarized_thread = summarization_data.get("forkedThread", {})
+    summarized_thread_path = summarized_thread.get("path", "")
+    print(f"  Summarized thread: {summarized_thread_path}")
+
+    # Verify compaction metadata
+    assert summarization_data.get("strategy") == "COMPACTION_STRATEGY_SUMMARIZATION"
+    assert summarization_data.get("originalMessageCount") == original_message_count
+    summarized_message_count = summarization_data.get("compactedMessageCount", 0)
+    print(
+        f"  Messages: {original_message_count} -> {summarized_message_count} "
+        f"(reduced by {original_message_count - summarized_message_count})"
+    )
+    assert summarized_message_count < original_message_count
+
+    # Verify parent reference
+    summarized_thread_full = api_client.get_thread(summarized_thread_path)
+    assert summarized_thread_full.parent_thread_id
+    assert summarized_thread_full.parent_thread_id.path == thread.path
+    print("  ✓ Parent reference validated")
+
+    # Verify SUMMARIZATION strategy behavior
+    print("\n  Verifying SUMMARIZATION strategy behavior...")
+    summarized_messages = api_client.list_messages(summarized_thread_path)
+    summarized_msgs_list = summarized_messages.results
+
+    # SUMMARIZATION should create a summary message + keep last N messages (4 in config)
+    # Verify message count is reduced
+    print(f"    Messages: {len(original_messages.results)} -> {len(summarized_msgs_list)}")
+    assert len(summarized_msgs_list) < len(original_messages.results), (
+        "Summarization should reduce message count"
+    )
+    assert len(summarized_msgs_list) > 0, "Should have at least some messages"
+
+    # Check if there's a summary message (look for summary indicators)
+    has_summary = False
+    summary_content = ""
+    for msg in summarized_msgs_list:
+        msg_text = " ".join(c.text for c in msg.contents if c.text)
+        if "[Summary of" in msg_text or "Summary of" in msg_text:
+            has_summary = True
+            summary_content = msg_text
+            break
+
+    if has_summary:
+        print("    ✓ Summary message found")
+        print(f"    Summary preview: {summary_content[:100]}...")
+    else:
+        # If no explicit summary marker, that's ok - the strategy may condense differently
+        print("    (No explicit '[Summary of' marker found)")
+
+    # Verify the thread has some content
+    all_summarized_text = " ".join(
+        " ".join(c.text for c in msg.contents if c.text) for msg in summarized_msgs_list
+    ).lower()
+
+    # Summarization can be very aggressive, just verify we have some database-related content
+    # (since all our messages were about databases)
+    assert len(all_summarized_text) > 0, "Should have some text content"
+    # At minimum, should mention databases since that's what the whole thread is about
+    has_database_content = any(
+        term in all_summarized_text for term in ["database", "sql", "data", "query"]
+    )
+    assert has_database_content, (
+        f"Should preserve database-related content, got: {all_summarized_text[:200]}"
+    )
+    print(f"    ✓ Content preserved ({len(summarized_msgs_list)} messages kept)")
+
+    # Verify message structure
+    user_msg_count = sum(1 for msg in summarized_msgs_list if msg.role == "LLM_MESSAGE_ROLE_USER")
+    assistant_msg_count = sum(
+        1 for msg in summarized_msgs_list if msg.role == "LLM_MESSAGE_ROLE_ASSISTANT"
+    )
+    print(f"    Message breakdown: {user_msg_count} user, {assistant_msg_count} assistant")
+
+    print("  ✓ SUMMARIZATION strategy verified: compacted with recent content")
+
+    # Store in hierarchy
+    thread_hierarchy["summarized"] = {
+        "path": summarized_thread_path,
+        "message_count": summarized_message_count,
+        "parent": "original",
+        "compaction_strategy": "SUMMARIZATION",
+        "children": [],
+    }
+    thread_hierarchy["original"]["children"].append("summarized")
+
+    # ========================================================================
+    # METHOD 3: IMPORTANCE_BASED compaction with fork
+    # ========================================================================
+    print("\n" + SEPARATOR)
+    print("METHOD 3: IMPORTANCE_BASED Strategy")
+    print(SEPARATOR)
+
+    print("\nCreating IMPORTANCE_BASED compaction with fork...")
+    importance_config = {
+        "strategy": "COMPACTION_STRATEGY_IMPORTANCE_BASED",
+        "create_fork": True,
+        "importance_based": {
+            "preserve_user_messages": True,
+            "preserve_tool_calls": True,
+            "summarize_assistant_blocks": True,
+            "keep_recent_messages": 5,
+        },
+    }
+
+    importance_result = api_client.compact_thread(thread.path, importance_config)
+    print("  ✓ Importance-based compaction completed")
+
+    importance_data = importance_result.get("result", {})
+    importance_thread = importance_data.get("forkedThread", {})
+    importance_thread_path = importance_thread.get("path", "")
+    print(f"  Importance-based thread: {importance_thread_path}")
+
+    # Verify compaction metadata
+    assert importance_data.get("strategy") == "COMPACTION_STRATEGY_IMPORTANCE_BASED"
+    assert importance_data.get("originalMessageCount") == original_message_count
+    importance_message_count = importance_data.get("compactedMessageCount", 0)
+    print(f"  Messages: {original_message_count} -> {importance_message_count}")
+
+    # Verify parent reference
+    importance_thread_full = api_client.get_thread(importance_thread_path)
+    assert importance_thread_full.parent_thread_id
+    assert importance_thread_full.parent_thread_id.path == thread.path
+    print("  ✓ Parent reference validated")
+
+    # Verify IMPORTANCE_BASED strategy behavior
+    print("\n  Verifying IMPORTANCE_BASED strategy behavior...")
+    importance_messages = api_client.list_messages(importance_thread_path)
+    importance_msgs_list = importance_messages.results
+
+    # IMPORTANCE_BASED should:
+    # - Preserve user messages (preserve_user_messages=true)
+    # - Preserve tool calls (preserve_tool_calls=true)
+    # - Summarize assistant blocks (summarize_assistant_blocks=true)
+    # - Keep last N messages unchanged (keep_recent_messages=5)
+
+    # Verify message count is reduced (or at least not increased)
+    print(f"    Messages: {len(original_messages.results)} -> {len(importance_msgs_list)}")
+    assert len(importance_msgs_list) <= len(original_messages.results), (
+        "Importance-based should not increase message count"
+    )
+    assert len(importance_msgs_list) > 0, "Should have at least some messages"
+
+    # Count user messages in compacted thread
+    user_msg_count = sum(1 for msg in importance_msgs_list if msg.role == "LLM_MESSAGE_ROLE_USER")
+
+    # Count user messages in original thread (for comparison)
+    original_user_msg_count = sum(
+        1 for msg in original_messages.results if msg.role == "LLM_MESSAGE_ROLE_USER"
+    )
+
+    print(f"    User messages: {original_user_msg_count} original -> {user_msg_count} compacted")
+
+    # With preserve_user_messages=true, user messages should be preserved
+    assert user_msg_count > 0, "Should preserve at least some user messages"
+    print("    ✓ User messages preserved")
+
+    # Check for tool calls (work_complete should be present)
+    tool_call_count = 0
+    for msg in importance_msgs_list:
+        for content in msg.contents:
+            if content.tool_call:
+                tool_call_count += 1
+
+    print(f"    Tool calls: {tool_call_count} found")
+    # With preserve_tool_calls=true, tool calls should be preserved
+    if tool_call_count > 0:
+        print("    ✓ Tool calls preserved")
+    else:
+        print("    (No tool calls in compacted thread - may have been in removed messages)")
+
+    # Verify thread has relevant content
+    all_importance_text = " ".join(
+        " ".join(c.text for c in msg.contents if c.text) for msg in importance_msgs_list
+    ).lower()
+
+    # Should preserve database-related content since that's the thread topic
+    assert len(all_importance_text) > 0, "Should have some text content"
+    has_database_content = any(
+        term in all_importance_text for term in ["database", "sql", "data", "query"]
+    )
+    assert has_database_content, (
+        f"Should preserve database-related content, got: {all_importance_text[:200]}"
+    )
+    print("    ✓ Content preserved")
+
+    # Check if any summarized assistant blocks exist
+    summary_indicator_count = 0
+    for msg in importance_msgs_list:
+        if msg.role == "LLM_MESSAGE_ROLE_ASSISTANT":
+            for content in msg.contents:
+                if content.text and "[Summary of" in content.text:
+                    summary_indicator_count += 1
+
+    if summary_indicator_count > 0:
+        print(f"    ✓ Found {summary_indicator_count} summarized assistant blocks")
+    else:
+        print("    (No explicit summary markers found)")
+
+    print("  ✓ IMPORTANCE_BASED strategy verified: preserves important content")
+
+    # Store in hierarchy
+    thread_hierarchy["importance"] = {
+        "path": importance_thread_path,
+        "message_count": importance_message_count,
+        "parent": "original",
+        "compaction_strategy": "IMPORTANCE_BASED",
+        "children": [],
+    }
+    thread_hierarchy["original"]["children"].append("importance")
+
+    # Verify original thread unchanged
+    original_messages_after_compactions = api_client.list_messages(thread.path)
+    assert len(original_messages_after_compactions.results) == original_message_count
+    print("\n  ✓ Original thread unchanged after all compactions")
+
+    # ========================================================================
+    # PHASE 2: Test forking FROM compacted threads
+    # ========================================================================
+    print("\n" + "=" * 80)
+    print("PHASE 2: Testing Forking FROM Compacted Threads")
+    print("=" * 80)
+
+    # Add a message to the truncated thread first so we have something to fork from
+    print("\nAdding message to truncated thread...")
+    truncated_collector, truncated_stream = setup_thread_stream(api_client, truncated_thread_path)
+    api_client.submit_user_message(truncated_thread_path, "What is replication?")
+    if not wait_for_thread_idle(
+        truncated_collector, 1, api_client=api_client, thread_path=truncated_thread_path
+    ):
+        raise AssertionError("Truncated thread did not respond")
+    time.sleep(2)
+    print("  ✓ Message added to truncated thread")
+
+    # Get updated messages
+    truncated_messages = api_client.list_messages(truncated_thread_path)
+    print(f"  Truncated thread now has {len(truncated_messages.results)} messages")
+
+    # ========================================================================
+    # Test ForkThreadFromMessage from compacted thread
+    # ========================================================================
+    print("\n" + SEPARATOR)
+    print("Testing ForkThreadFromMessage from Compacted Thread")
+    print(SEPARATOR)
+
+    # Find a user message to fork from
+    fork_target = None
+    for msg in truncated_messages.results:
+        if msg.role == "LLM_MESSAGE_ROLE_USER" and any(
+            "replication" in c.text.lower() for c in msg.contents if c.text
+        ):
+            fork_target = msg
+            break
+
+    assert fork_target, "Could not find fork target message"
+    fork_target_uid = fork_target.path.split("/")[-1]
+    print(f"\n  Fork target: {fork_target.path}")
+    print(f"  Fork target index: {fork_target.index}")
+
+    # Fork from the compacted thread
+    print("\n  Creating fork from compacted thread...")
+    fork_response = api_client.fork_thread_from_message(
+        fork_target.path, title="Fork from Compacted Thread"
+    )
+
+    fork_from_compacted = fork_response.get("thread", {})
+    fork_from_compacted_path = fork_from_compacted.get("path", "")
+    print(f"  ✓ Forked thread: {fork_from_compacted_path}")
+
+    # Verify parent reference
+    fork_from_compacted_full = api_client.get_thread(fork_from_compacted_path)
+    assert fork_from_compacted_full.parent_thread_id
+    assert fork_from_compacted_full.parent_thread_id.path == truncated_thread_path
+    assert fork_from_compacted_full.parent_thread_id.fork_message_uid == fork_target_uid
+    print("  ✓ Parent reference validated (points to compacted thread)")
+
+    # Verify messages are correct (non-inclusive fork)
+    fork_from_compacted_messages = api_client.list_messages(fork_from_compacted_path)
+    fork_target_index = fork_target.index if fork_target.index is not None else 0
+    for msg in fork_from_compacted_messages.results:
+        msg_index = msg.index if msg.index is not None else 0
+        assert msg_index < fork_target_index
+    print("  ✓ Fork is non-inclusive (fork point message excluded)")
+
+    # Store in hierarchy
+    thread_hierarchy["fork_from_compacted"] = {
+        "path": fork_from_compacted_path,
+        "message_count": len(fork_from_compacted_messages.results),
+        "parent": "truncated",
+        "fork_method": "ForkThreadFromMessage",
+        "children": [],
+    }
+    thread_hierarchy["truncated"]["children"].append("fork_from_compacted")
+
+    # ========================================================================
+    # Test EditThreadMessage with create_fork on compacted thread
+    # ========================================================================
+    print("\n" + SEPARATOR)
+    print("Testing EditThreadMessage with Fork on Compacted Thread")
+    print(SEPARATOR)
+
+    # Add another message to summarized thread
+    print("\nAdding message to summarized thread...")
+    summarized_collector, summarized_stream = setup_thread_stream(
+        api_client, summarized_thread_path
+    )
+    api_client.submit_user_message(summarized_thread_path, "What is sharding?")
+    if not wait_for_thread_idle(
+        summarized_collector, 1, api_client=api_client, thread_path=summarized_thread_path
+    ):
+        raise AssertionError("Summarized thread did not respond")
+    time.sleep(2)
+    print("  ✓ Message added to summarized thread")
+
+    # Get messages from summarized thread
+    summarized_messages = api_client.list_messages(summarized_thread_path)
+    print(f"  Summarized thread now has {len(summarized_messages.results)} messages")
+
+    # Find a user message to edit
+    edit_target = None
+    for msg in summarized_messages.results:
+        if msg.role == "LLM_MESSAGE_ROLE_USER" and any(
+            "sharding" in c.text.lower() for c in msg.contents if c.text
+        ):
+            edit_target = msg
+            break
+
+    assert edit_target, "Could not find edit target message"
+    edit_target_uid = edit_target.path.split("/")[-1]
+    print(f"\n  Edit target: {edit_target.path}")
+
+    # Edit with create_fork
+    print("\n  Editing message with create_fork=true...")
+    edit_response = api_client.edit_thread_message(
+        edit_target.path,
+        "What is partitioning?",
+        restore=False,
+        create_fork=True,
+    )
+
+    edit_forked_thread = edit_response.get("forkedThread", {})
+    edit_forked_thread_path = edit_forked_thread.get("path", "")
+    print(f"  ✓ Edit-forked thread: {edit_forked_thread_path}")
+
+    # Set up stream and wait for the edit-fork LLM response to complete
+    edit_fork_collector, edit_fork_stream = setup_thread_stream(api_client, edit_forked_thread_path)
+    print("  Waiting for edit-fork LLM response to complete...")
+    if not wait_for_thread_idle(
+        edit_fork_collector, 1, api_client=api_client, thread_path=edit_forked_thread_path
+    ):
+        raise AssertionError("Edit-forked thread did not become idle after creation")
+    time.sleep(2)  # Wait for DB commit
+    print("  ✓ Edit-fork LLM response completed")
+
+    # Verify parent reference
+    edit_forked_thread_full = api_client.get_thread(edit_forked_thread_path)
+    assert edit_forked_thread_full.parent_thread_id
+    assert edit_forked_thread_full.parent_thread_id.path == summarized_thread_path
+    assert edit_forked_thread_full.parent_thread_id.fork_message_uid == edit_target_uid
+    print("  ✓ Parent reference validated (points to summarized thread)")
+
+    # Verify edited content in fork
+    edit_forked_messages = api_client.list_messages(edit_forked_thread_path)
+    edit_target_index = edit_target.index if edit_target.index is not None else 0
+    edited_msg = None
+    for msg in edit_forked_messages.results:
+        msg_index = msg.index if msg.index is not None else 0
+        if msg_index == edit_target_index:
+            edited_msg = msg
+            break
+
+    assert edited_msg, "Edited message not found in forked thread"
+    edited_content = " ".join(c.text for c in edited_msg.contents if c.text)
+    assert "partitioning" in edited_content.lower()
+    print("  ✓ Edited content verified in fork")
+
+    # Store in hierarchy
+    thread_hierarchy["edit_fork_from_compacted"] = {
+        "path": edit_forked_thread_path,
+        "message_count": len(edit_forked_messages.results),
+        "parent": "summarized",
+        "fork_method": "EditThreadMessage",
+        "children": [],
+    }
+    thread_hierarchy["summarized"]["children"].append("edit_fork_from_compacted")
+
+    # ========================================================================
+    # PHASE 3: Test compacting a forked thread
+    # ========================================================================
+    print("\n" + "=" * 80)
+    print("PHASE 3: Testing Compacting a Forked Thread")
+    print("=" * 80)
+
+    # Add messages to the edit-forked thread (stream already set up above)
+    print("\nAdding messages to edit-forked thread...")
+
+    # Note: We've already waited for iteration 1 (the edit-fork LLM response)
+    # so we start counting from 2
+    for i, msg_text in enumerate(
+        [
+            "What is ACID?",
+            "What is BASE?",
+        ],
+        start=2,
+    ):
+        api_client.submit_user_message(edit_forked_thread_path, msg_text)
+        if not wait_for_thread_idle(
+            edit_fork_collector, i, api_client=api_client, thread_path=edit_forked_thread_path
+        ):
+            raise AssertionError(f"Edit-fork thread did not respond to message {i}")
+        time.sleep(2)
+
+    print("  ✓ Added 2 messages to edit-forked thread")
+
+    # Get message count before compaction
+    edit_fork_messages_before = api_client.list_messages(edit_forked_thread_path)
+    edit_fork_msg_count_before = len(edit_fork_messages_before.results)
+    print(f"  Edit-forked thread has {edit_fork_msg_count_before} messages")
+
+    # Compact the forked thread
+    print("\n  Compacting the forked thread with TRUNCATION...")
+    compact_fork_config = {
+        "strategy": "COMPACTION_STRATEGY_TRUNCATION",
+        "create_fork": True,
+        "truncation": {"keep_recent_messages": 4},
+    }
+
+    compact_fork_result = api_client.compact_thread(edit_forked_thread_path, compact_fork_config)
+    print("  ✓ Compaction of forked thread completed")
+
+    compact_fork_data = compact_fork_result.get("result", {})
+    compact_fork_thread = compact_fork_data.get("forkedThread", {})
+    compact_fork_thread_path = compact_fork_thread.get("path", "")
+    print(f"  Compacted-fork thread: {compact_fork_thread_path}")
+
+    # Verify this is a fork of a fork of a compacted thread
+    compact_fork_thread_full = api_client.get_thread(compact_fork_thread_path)
+    assert compact_fork_thread_full.parent_thread_id
+    assert compact_fork_thread_full.parent_thread_id.path == edit_forked_thread_path
+    print("  ✓ Parent reference validated (points to edit-forked thread)")
+
+    # Verify message count reduction
+    compact_fork_msg_count = compact_fork_data.get("compactedMessageCount", 0)
+    assert compact_fork_msg_count < edit_fork_msg_count_before
+    print(
+        f"  Messages: {edit_fork_msg_count_before} -> {compact_fork_msg_count} "
+        f"(reduced by {edit_fork_msg_count_before - compact_fork_msg_count})"
+    )
+
+    # Store in hierarchy
+    thread_hierarchy["compacted_fork"] = {
+        "path": compact_fork_thread_path,
+        "message_count": compact_fork_msg_count,
+        "parent": "edit_fork_from_compacted",
+        "compaction_strategy": "TRUNCATION",
+        "children": [],
+    }
+    thread_hierarchy["edit_fork_from_compacted"]["children"].append("compacted_fork")
+
+    # ========================================================================
+    # PHASE 4: Test nested scenario - fork from compacted fork
+    # ========================================================================
+    print("\n" + "=" * 80)
+    print("PHASE 4: Testing Fork-of-Fork from Compacted Thread")
+    print("=" * 80)
+
+    # Add a message to the compacted-fork thread
+    print("\nAdding message to compacted-fork thread...")
+    compact_fork_collector, compact_fork_stream = setup_thread_stream(
+        api_client, compact_fork_thread_path
+    )
+    api_client.submit_user_message(compact_fork_thread_path, "What is clustering?")
+    if not wait_for_thread_idle(
+        compact_fork_collector, 1, api_client=api_client, thread_path=compact_fork_thread_path
+    ):
+        raise AssertionError("Compacted-fork thread did not respond")
+    time.sleep(2)
+    print("  ✓ Message added to compacted-fork thread")
+
+    # Get messages
+    compact_fork_messages = api_client.list_messages(compact_fork_thread_path)
+    print(f"  Compacted-fork thread has {len(compact_fork_messages.results)} messages")
+
+    # Find a message to fork from
+    nested_fork_target = None
+    for msg in compact_fork_messages.results:
+        if msg.role == "LLM_MESSAGE_ROLE_USER" and any(
+            "clustering" in c.text.lower() for c in msg.contents if c.text
+        ):
+            nested_fork_target = msg
+            break
+
+    if nested_fork_target:
+        print(f"\n  Nested fork target: {nested_fork_target.path}")
+
+        # Create fork-of-fork
+        print("\n  Creating fork-of-fork from compacted thread...")
+        nested_fork_response = api_client.fork_thread_from_message(
+            nested_fork_target.path, title="Fork-of-Fork from Compacted"
+        )
+
+        nested_fork_thread = nested_fork_response.get("thread", {})
+        nested_fork_thread_path = nested_fork_thread.get("path", "")
+        print(f"  ✓ Fork-of-fork thread: {nested_fork_thread_path}")
+
+        # Verify parent reference
+        nested_fork_thread_full = api_client.get_thread(nested_fork_thread_path)
+        assert nested_fork_thread_full.parent_thread_id
+        assert nested_fork_thread_full.parent_thread_id.path == compact_fork_thread_path
+        print("  ✓ Parent reference validated (points to compacted-fork thread)")
+
+        # Verify message propagation
+        nested_fork_messages = api_client.list_messages(nested_fork_thread_path)
+        nested_fork_msg_count = len(nested_fork_messages.results)
+        print(f"  Fork-of-fork has {nested_fork_msg_count} messages")
+
+        # Store in hierarchy
+        thread_hierarchy["nested_fork"] = {
+            "path": nested_fork_thread_path,
+            "message_count": nested_fork_msg_count,
+            "parent": "compacted_fork",
+            "fork_method": "ForkThreadFromMessage",
+            "children": [],
+        }
+        thread_hierarchy["compacted_fork"]["children"].append("nested_fork")
+        print("  ✓ Fork-of-fork created successfully")
+    else:
+        print("  (Skipping nested fork - no suitable message found)")
+
+    # ========================================================================
+    # PHASE 5: Test independent continuation of all threads
+    # ========================================================================
+    print("\n" + "=" * 80)
+    print("PHASE 5: Testing Independent Continuation")
+    print("=" * 80)
+
+    # Test that importance-based thread can continue independently
+    print("\nTesting continuation on importance-based compacted thread...")
+    importance_collector, importance_stream = setup_thread_stream(
+        api_client, importance_thread_path
+    )
+
+    api_client.submit_user_message(importance_thread_path, "What is backup?")
+    if not wait_for_thread_idle(
+        importance_collector, 1, api_client=api_client, thread_path=importance_thread_path
+    ):
+        raise AssertionError("Importance-based thread did not respond")
+    time.sleep(2)
+    print("  ✓ Importance-based thread responded independently")
+
+    # Verify original thread still unchanged
+    original_messages_final = api_client.list_messages(thread.path)
+    assert len(original_messages_final.results) == original_message_count
+    print("  ✓ Original thread still unchanged")
+
+    # Verify truncated thread unchanged after its child fork
+    truncated_messages_final = api_client.list_messages(truncated_thread_path)
+    # It should have the messages we added earlier plus the new one
+    print(f"  ✓ Truncated thread has {len(truncated_messages_final.results)} messages")
+
+    # ========================================================================
+    # PHASE 6: Verify thread hierarchy and relationships
+    # ========================================================================
+    print("\n" + "=" * 80)
+    print("PHASE 6: Verifying Thread Hierarchy")
+    print("=" * 80)
+
+    print("\nThread Hierarchy:")
+    print("  Original Thread")
+    print("    ├── Truncated (TRUNCATION compaction)")
+    print("    │   └── Fork from Compacted (ForkThreadFromMessage)")
+    print("    ├── Summarized (SUMMARIZATION compaction)")
+    print("    │   └── Edit-Fork from Compacted (EditThreadMessage)")
+    print("    │       └── Compacted-Fork (TRUNCATION of fork)")
+    print("    │           └── Fork-of-Fork (ForkThreadFromMessage)")
+    print("    └── Importance-based (IMPORTANCE_BASED compaction)")
+
+    # Verify each thread's parent relationship
+    print("\nVerifying parent relationships...")
+    relationships = [
+        ("truncated", "original"),
+        ("summarized", "original"),
+        ("importance", "original"),
+        ("fork_from_compacted", "truncated"),
+        ("edit_fork_from_compacted", "summarized"),
+        ("compacted_fork", "edit_fork_from_compacted"),
+    ]
+
+    if "nested_fork" in thread_hierarchy:
+        relationships.append(("nested_fork", "compacted_fork"))
+
+    for child_key, parent_key in relationships:
+        child_path = thread_hierarchy[child_key]["path"]
+        expected_parent_path = thread_hierarchy[parent_key]["path"]
+        child_thread = api_client.get_thread(child_path)
+
+        assert child_thread.parent_thread_id, f"{child_key} missing parent_thread_id"
+        assert child_thread.parent_thread_id.path == expected_parent_path, (
+            f"{child_key} parent mismatch"
+        )
+        print(f"  ✓ {child_key} -> {parent_key}")
+
+    # ========================================================================
+    # Final Summary
+    # ========================================================================
+    print("\n" + "=" * 80)
+    print("Thread Compaction and Forking Test PASSED")
+    print("=" * 80)
+
+    print("\nTest Summary:")
+    print(f"  Original thread: {thread.path}")
+    print(f"  Original messages: {original_message_count}")
+    print("")
+
+    print("  Compaction Strategies Tested:")
+    print(f"    1. TRUNCATION: {truncated_message_count} messages")
+    print(f"    2. SUMMARIZATION: {summarized_message_count} messages")
+    print(f"    3. IMPORTANCE_BASED: {importance_message_count} messages")
+    print("")
+
+    print("  Forking Methods Tested:")
+    print("    1. ForkThreadFromMessage from compacted thread")
+    print("    2. EditThreadMessage with create_fork on compacted thread")
+    print("    3. Compacting a forked thread")
+    print("    4. Fork-of-fork from compacted thread")
+    print("")
+
+    print("  Threads Created:")
+    for key, data in thread_hierarchy.items():
+        indent = "    " * (
+            0
+            if key == "original"
+            else 1
+            if data.get("parent") == "original"
+            else 2
+            if data.get("parent") in ["truncated", "summarized", "importance"]
+            else 3
+        )
+        method = data.get("compaction_strategy") or data.get("fork_method") or "Original"
+        print(f"{indent}{key}: {data['message_count']} messages ({method})")
+    print("")
+
+    print("  Validations:")
+    print("    ✓ TRUNCATION strategy verified (reduces messages, keeps recent)")
+    print("    ✓ SUMMARIZATION strategy verified (reduces messages, preserves relevant content)")
+    print("    ✓ IMPORTANCE_BASED strategy verified (preserves important content)")
+    print("    ✓ All compaction strategies create forks correctly")
+    print("    ✓ Forking from compacted threads works")
+    print("    ✓ Editing with fork on compacted threads works")
+    print("    ✓ Compacting forked threads works")
+    print("    ✓ Nested forks from compacted threads work")
+    print("    ✓ Parent references are correct throughout hierarchy")
+    print("    ✓ All threads can continue independently")
+    print("    ✓ Original thread remains unchanged")
+    print("")
+
+    # Add to test summary
+    test_summary.extra_info["Original messages"] = str(original_message_count)
+    test_summary.extra_info["Compaction strategies"] = (
+        "3 (TRUNCATION, SUMMARIZATION, IMPORTANCE_BASED)"
+    )
+    test_summary.extra_info["Fork methods"] = "2 (ForkThreadFromMessage, EditThreadMessage)"
+    test_summary.extra_info["Total threads created"] = str(len(thread_hierarchy))
+    test_summary.extra_info["Hierarchy depth"] = "4 levels"
diff --git a/tests/system/test_thread_forking.py b/tests/system/test_thread_forking.py
index 4af950a93..e0720facc 100644
--- a/tests/system/test_thread_forking.py
+++ b/tests/system/test_thread_forking.py
@@ -46,17 +46,17 @@ def test_thread_forking(
 ):
     """Test thread forking using both available methods.
 
-    This test verifies:
-    1. Creating a thread with multiple messages
-    2. METHOD 1: Forking via ForkThreadFromMessage endpoint
-    3. Verifying the forked thread includes messages up to (but NOT including) the fork point (non-inclusive)
-    4. Verifying the original thread remains unchanged
-    5. FORK-OF-FORK: Forking an already-forked thread
-    6. Verifying messages are correctly propagated through nested forks
-    7. METHOD 2: Forking via EditThreadMessage with create_fork=true
-    8. Verifying the forked thread via edit has correct structure and edited content
-    9. Verifying all fork methods create independent threads
-   10. Testing that forked threads can continue independently
+     This test verifies:
+     1. Creating a thread with multiple messages
+     2. METHOD 1: Forking via ForkThreadFromMessage endpoint
+     3. Verifying the forked thread includes messages up to (but NOT including) the fork point (non-inclusive)
+     4. Verifying the original thread remains unchanged
+     5. FORK-OF-FORK: Forking an already-forked thread
+     6. Verifying messages are correctly propagated through nested forks
+     7. METHOD 2: Forking via EditThreadMessage with create_fork=true
+     8. Verifying the forked thread via edit has correct structure and edited content
+     9. Verifying all fork methods create independent threads
+    10. Testing that forked threads can continue independently
     """
     test_summary.org = test_org
     test_summary.user = test_user
@@ -401,7 +401,7 @@ def test_thread_forking(
         msg.path.endswith(f"/messages/{fork_of_fork_uid}") for msg in fork_of_fork_messages.results
     )
     assert not fork_of_fork_target_in_result, (
-        f"Fork-of-fork target message should NOT be in fork-of-fork (non-inclusive)"
+        "Fork-of-fork target message should NOT be in fork-of-fork (non-inclusive)"
     )
     print("  ✓ Fork-of-fork target message correctly excluded")
 
@@ -417,7 +417,7 @@ def test_thread_forking(
     assert first_user_in_fork_of_fork, (
         f"First user message '{first_user_text}' should be in fork-of-fork"
     )
-    print(f"  ✓ Messages from original thread correctly propagated to fork-of-fork")
+    print("  ✓ Messages from original thread correctly propagated to fork-of-fork")
 
     # Verify first forked thread is unchanged
     forked_thread_after_fork_of_fork = api_client.list_messages(forked_thread_path)
diff --git a/tim-api/internal/job_queue/client.go b/tim-api/internal/job_queue/client.go
index b43ea6f62..8ce9108d4 100644
--- a/tim-api/internal/job_queue/client.go
+++ b/tim-api/internal/job_queue/client.go
@@ -101,6 +101,31 @@ func (c *Client) PushToolRunnerJob(toolCallPath string) error {
 	return nil
 }
 
+// CompactionJobArgs contains the arguments for a thread compaction job
+type CompactionJobArgs struct {
+	ThreadPath       string                 `json:"thread_path"`
+	CompactionConfig map[string]interface{} `json:"compaction_config"` // JSON representation of CompactionConfig proto
+}
+
+func (c *Client) PushThreadCompactionJob(args CompactionJobArgs) error {
+	c.logger.Infow("Pushing thread_compaction job", "thread_path", args.ThreadPath)
+
+	// Create job with retry - compaction jobs are idempotent and can be retried
+	job := faktory_client.NewJob("thread_compaction", args)
+	retryCount := 3
+	job.Retry = &retryCount
+
+	if err := c.pool.With(func(conn *faktory_client.Client) error {
+		if err := conn.Push(job); err != nil {
+			return fmt.Errorf("failed to push job: %w", err)
+		}
+		return nil
+	}); err != nil {
+		return fmt.Errorf("failed to push thread_compaction job: %w", err)
+	}
+	return nil
+}
+
 func (c *Client) Close() error {
 	c.pool.Close()
 	c.logger.Info("Job queue client closed")
diff --git a/tim-api/internal/server/server.go b/tim-api/internal/server/server.go
index c12a0bebb..40e62b90e 100644
--- a/tim-api/internal/server/server.go
+++ b/tim-api/internal/server/server.go
@@ -344,6 +344,14 @@ func (s *InternalServer) setupRoutes() error {
 		serviceNames = append(serviceNames, llmResponsePath)
 	}
 
+	if s.services.ThreadCompactionService != nil {
+		threadCompactionPath, threadCompactionHandler := s.services.ThreadCompactionService.NewServiceHandler(
+			connect.WithInterceptors(interceptors...),
+		)
+		s.mux.Handle(threadCompactionPath, threadCompactionHandler)
+		serviceNames = append(serviceNames, threadCompactionPath)
+	}
+
 	if s.services.ToolExecutionService != nil {
 		toolExecutionPath, toolExecutionHandler := s.services.ToolExecutionService.NewServiceHandler(
 			connect.WithInterceptors(interceptors...),
diff --git a/tim-api/internal/services/registry.go b/tim-api/internal/services/registry.go
index 5b597036a..9eaafe23e 100644
--- a/tim-api/internal/services/registry.go
+++ b/tim-api/internal/services/registry.go
@@ -18,6 +18,7 @@ import (
 	"github.com/Greybox-Labs/tim/tim-api/internal/services/org"
 	"github.com/Greybox-Labs/tim/tim-api/internal/services/persona"
 	"github.com/Greybox-Labs/tim/tim-api/internal/services/thread"
+	"github.com/Greybox-Labs/tim/tim-api/internal/services/thread_compaction"
 	"github.com/Greybox-Labs/tim/tim-api/internal/services/thread_context"
 	"github.com/Greybox-Labs/tim/tim-api/internal/services/todo"
 	"github.com/Greybox-Labs/tim/tim-api/internal/services/tool_execution"
@@ -39,18 +40,19 @@ type Registry struct {
 	Analytics *analytics.Client
 
 	// Service implementations
-	AuthService          *auth.Service
-	IdentityService      *identity.Service
-	UserService          *user.Service
-	OrgService           *org.Service
-	PersonaService       *persona.Service
-	ThreadService        *thread.Service
-	TodoService          *todo.Service
-	ThreadContextService *thread_context.Service
-	LLMResponseService   *llm_response.Service
-	ToolExecutionService *tool_execution.Service
-	BillingService       *billing.Service
-	ConnectedAppService  *connected_app.Service
+	AuthService             *auth.Service
+	IdentityService         *identity.Service
+	UserService             *user.Service
+	OrgService              *org.Service
+	PersonaService          *persona.Service
+	ThreadService           *thread.Service
+	TodoService             *todo.Service
+	ThreadContextService    *thread_context.Service
+	ThreadCompactionService *thread_compaction.Service
+	LLMResponseService      *llm_response.Service
+	ToolExecutionService    *tool_execution.Service
+	BillingService          *billing.Service
+	ConnectedAppService     *connected_app.Service
 }
 
 // NewRegistry creates a new service registry with all services initialized
@@ -164,6 +166,10 @@ func NewRegistry(cfg *config.Config, log *logger.Logger, ctx context.Context, st
 	}
 	registry.ThreadContextService = threadContextSvc
 
+	// Initialize ThreadCompaction Service (internal server only)
+	threadCompactionSvc := thread_compaction.NewService(db.Pool, log, nc)
+	registry.ThreadCompactionService = threadCompactionSvc
+
 	// Initialize LlmResponse Service
 	llmResponseSvc, err := llm_response.New(log, db, jobQueue, nc, m)
 	if err != nil {
diff --git a/tim-api/internal/services/thread/compaction_handlers.go b/tim-api/internal/services/thread/compaction_handlers.go
new file mode 100644
index 000000000..9500db6ba
--- /dev/null
+++ b/tim-api/internal/services/thread/compaction_handlers.go
@@ -0,0 +1,466 @@
+package thread
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+
+	"connectrpc.com/connect"
+	"github.com/Greybox-Labs/tim/shared/llm"
+	"github.com/Greybox-Labs/tim/tim-api/internal/authz"
+	"github.com/Greybox-Labs/tim/tim-api/internal/database"
+	"github.com/Greybox-Labs/tim/tim-api/internal/resourcepath"
+	"github.com/Greybox-Labs/tim/tim-db/gen/db"
+	threadv1 "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread/v1alpha1"
+	"github.com/google/uuid"
+	"github.com/jackc/pgx/v5/pgtype"
+)
+
+// CompactThread manually compacts a thread
+func (s *Service) CompactThread(
+	ctx context.Context,
+	req *connect.Request[threadv1.CompactThreadRequest],
+) (*connect.Response[threadv1.CompactThreadResponse], error) {
+	s.logger.Infow("CompactThread called", "path", req.Msg.Path)
+
+	// Authorization
+	authzHandle, err := authz.HandlerFromContext(ctx)
+	if err != nil {
+		return nil, connect.NewError(connect.CodeInternal, errors.New("internal server error"))
+	}
+
+	threadPath, err := resourcepath.ParseThreadPath(req.Msg.Path)
+	if err != nil {
+		return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("invalid path"))
+	}
+
+	authzHandle.SetResource(ResourceType, threadPath)
+	err = authzHandle.Authorize(ctx, "compact")
+	if err != nil {
+		return nil, err
+	}
+
+	queries, err := database.Queries(ctx)
+	if err != nil {
+		s.logger.Errorw("failed to get database queries", "error", err)
+		return nil, connect.NewError(connect.CodeInternal, errors.New("internal server error"))
+	}
+
+	// Get the thread to validate it exists
+	thread, err := queries.GetThread(ctx, db.GetThreadParams{
+		ThreadUID:       threadPath.ThreadUID,
+		OrganizationUID: *threadPath.OwnerOrg().OrgUID,
+		OwnerUID:        *threadPath.Owner().UserUID,
+	})
+	if err != nil {
+		s.logger.Errorw("failed to get thread", "error", err)
+		return nil, connect.NewError(connect.CodeNotFound, errors.New("thread not found"))
+	}
+
+	// Get the persona revision to determine the model
+	personaRevision, err := queries.GetPersonaRevision(ctx, thread.PersonaRevisionUID)
+	if err != nil {
+		s.logger.Errorw("failed to get persona revision", "error", err)
+		return nil, connect.NewError(connect.CodeInternal, errors.New("failed to get persona revision"))
+	}
+
+	// Validate compaction config
+	if req.Msg.CompactionConfig == nil {
+		return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("compaction_config is required"))
+	}
+
+	// Check current token count
+	tokenCount, err := queries.GetThreadTokenCount(ctx, threadPath.ThreadUID)
+	if err != nil {
+		s.logger.Errorw("failed to get thread token count", "error", err)
+		return nil, connect.NewError(connect.CodeInternal, errors.New("failed to get token count"))
+	}
+
+	// Get current message count
+	messageCount, err := queries.CountThreadMessages(ctx, threadPath.ThreadUID)
+	if err != nil {
+		s.logger.Errorw("failed to get thread message count", "error", err)
+		return nil, connect.NewError(connect.CodeInternal, errors.New("failed to get message count"))
+	}
+
+	// Calculate threshold
+	modelID := llm.ModelID(personaRevision.ModelID)
+	thresholdTokens, err := llm.CalculateThresholdTokens(modelID, req.Msg.CompactionConfig.ThresholdPercentage)
+	if err != nil {
+		s.logger.Errorw("failed to calculate threshold", "error", err)
+		return nil, connect.NewError(connect.CodeInvalidArgument, fmt.Errorf("invalid model or threshold: %w", err))
+	}
+
+	totalTokens := int64(tokenCount.TotalTokens)
+	s.logger.Infow("Checking compaction threshold",
+		"thread_path", req.Msg.Path,
+		"total_tokens", totalTokens,
+		"threshold_tokens", thresholdTokens,
+		"message_count", messageCount,
+		"model", modelID,
+	)
+
+	// Check if compaction is needed (skip if threshold is 0, which means "always compact")
+	if req.Msg.CompactionConfig.ThresholdPercentage > 0 && totalTokens < int64(thresholdTokens) {
+		return nil, connect.NewError(connect.CodeFailedPrecondition,
+			fmt.Errorf("thread does not exceed compaction threshold (tokens: %d, threshold: %d)", totalTokens, thresholdTokens))
+	}
+
+	// Perform compaction synchronously
+	result, err := s.performCompaction(ctx, threadPath, thread, req.Msg.CompactionConfig)
+	if err != nil {
+		s.logger.Errorw("failed to perform compaction", "error", err)
+		return nil, err
+	}
+
+	s.logger.Infow("Compaction completed successfully",
+		"thread_path", req.Msg.Path,
+		"strategy", req.Msg.CompactionConfig.Strategy,
+		"original_messages", result.OriginalMessageCount,
+		"compacted_messages", result.CompactedMessageCount,
+	)
+
+	return connect.NewResponse(&threadv1.CompactThreadResponse{
+		Result: result,
+	}), nil
+}
+
+// performCompaction performs the actual compaction logic
+func (s *Service) performCompaction(
+	ctx context.Context,
+	threadPath *resourcepath.ThreadPath,
+	thread db.GetThreadRow,
+	config *threadv1.CompactionConfig,
+) (*threadv1.CompactionResult, error) {
+	queries, err := database.Queries(ctx)
+	if err != nil {
+		return nil, connect.NewError(connect.CodeInternal, errors.New("internal server error"))
+	}
+
+	// List all messages from the thread
+	messages, err := queries.ListThreadMessages(ctx, db.ListThreadMessagesParams{
+		ThreadUID: threadPath.ThreadUID,
+		PageLimit: 10000, // High limit to get all messages
+	})
+	if err != nil {
+		s.logger.Errorw("failed to list thread messages", "error", err)
+		return nil, connect.NewError(connect.CodeInternal, errors.New("failed to list messages"))
+	}
+
+	originalMessageCount := int32(len(messages))
+	if originalMessageCount == 0 {
+		return nil, connect.NewError(connect.CodeFailedPrecondition, errors.New("thread has no messages to compact"))
+	}
+
+	s.logger.Infow("Retrieved messages for compaction",
+		"thread_path", threadPath.String(),
+		"message_count", originalMessageCount,
+		"strategy", config.Strategy,
+	)
+
+	// Apply compaction strategy
+	var messagesToKeep []db.LlmMessage
+	var compactedMessageCount int32
+
+	switch config.Strategy {
+	case threadv1.CompactionStrategy_COMPACTION_STRATEGY_TRUNCATION:
+		truncationConfig := config.GetTruncation()
+		if truncationConfig == nil {
+			return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("truncation config is required for truncation strategy"))
+		}
+		keepRecent := int(truncationConfig.KeepRecentMessages)
+		if keepRecent < 0 {
+			keepRecent = 0
+		}
+		if keepRecent > len(messages) {
+			keepRecent = len(messages)
+		}
+		// Keep the most recent N messages
+		if keepRecent > 0 {
+			messagesToKeep = messages[len(messages)-keepRecent:]
+		}
+		compactedMessageCount = int32(len(messagesToKeep))
+
+	case threadv1.CompactionStrategy_COMPACTION_STRATEGY_IMPORTANCE_BASED:
+		// Keep user messages and tool calls/results
+		for _, msg := range messages {
+			if msg.Role == db.LlmMessageRoleUser {
+				messagesToKeep = append(messagesToKeep, msg)
+			} else if msg.Role == db.LlmMessageRoleAssistant {
+				// Get message contents to check for tool calls
+				contents, err := queries.ListMessageContent(ctx, msg.UID)
+				if err == nil {
+					for _, content := range contents {
+						if content.Type == db.LlmMessageContentTypeToolUse {
+							messagesToKeep = append(messagesToKeep, msg)
+							break
+						}
+					}
+				}
+			}
+		}
+		compactedMessageCount = int32(len(messagesToKeep))
+
+	case threadv1.CompactionStrategy_COMPACTION_STRATEGY_SUMMARIZATION:
+		// For now, just keep first and last message as a simple summary
+		// In a full implementation, this would use an LLM to generate a summary
+		if len(messages) > 0 {
+			messagesToKeep = append(messagesToKeep, messages[0])
+			if len(messages) > 1 {
+				messagesToKeep = append(messagesToKeep, messages[len(messages)-1])
+			}
+		}
+		compactedMessageCount = int32(len(messagesToKeep))
+
+	default:
+		return nil, connect.NewError(connect.CodeInvalidArgument, fmt.Errorf("unsupported compaction strategy: %v", config.Strategy))
+	}
+
+	s.logger.Infow("Compaction strategy applied",
+		"strategy", config.Strategy,
+		"original_count", originalMessageCount,
+		"compacted_count", compactedMessageCount,
+		"removed", originalMessageCount-compactedMessageCount,
+	)
+
+	// Calculate token counts (approximate based on actual token usage if available)
+	originalTokenCount := int64(0)
+	compactedTokenCount := int64(0)
+	for _, msg := range messages {
+		if msg.InputTokens.Valid {
+			originalTokenCount += int64(msg.InputTokens.Int32)
+		}
+		if msg.OutputTokens.Valid {
+			originalTokenCount += int64(msg.OutputTokens.Int32)
+		}
+	}
+	for _, msg := range messagesToKeep {
+		if msg.InputTokens.Valid {
+			compactedTokenCount += int64(msg.InputTokens.Int32)
+		}
+		if msg.OutputTokens.Valid {
+			compactedTokenCount += int64(msg.OutputTokens.Int32)
+		}
+	}
+
+	result := &threadv1.CompactionResult{
+		Strategy:              config.Strategy,
+		OriginalMessageCount:  originalMessageCount,
+		CompactedMessageCount: compactedMessageCount,
+		OriginalTokenCount:    originalTokenCount,
+		CompactedTokenCount:   compactedTokenCount,
+	}
+
+	// If create_fork is true, create a new thread with the compacted messages
+	if config.CreateFork {
+		forkedThread, err := s.createForkedThread(ctx, threadPath, thread, messagesToKeep)
+		if err != nil {
+			s.logger.Errorw("failed to create forked thread", "error", err)
+			return nil, err
+		}
+		result.ForkedThread = forkedThread
+		s.logger.Infow("Forked thread created",
+			"original_thread", threadPath.String(),
+			"forked_thread", forkedThread.Path,
+		)
+	}
+
+	return result, nil
+}
+
+// createForkedThread creates a new thread with the specified messages
+func (s *Service) createForkedThread(
+	ctx context.Context,
+	originalThreadPath *resourcepath.ThreadPath,
+	originalThread db.GetThreadRow,
+	messages []db.LlmMessage,
+) (*threadv1.Thread, error) {
+	queries, err := database.Queries(ctx)
+	if err != nil {
+		return nil, connect.NewError(connect.CodeInternal, errors.New("internal server error"))
+	}
+
+	// Create the forked thread
+	forkedThread, err := queries.CreateThread(ctx, db.CreateThreadParams{
+		ParentUID:          &originalThread.UID,
+		ForkMessageUid:     nil, // Not forking from a specific message
+		OwnerUID:           originalThread.OwnerUID,
+		OrganizationUID:    originalThread.OrganizationUID,
+		PersonaRevisionUID: originalThread.PersonaRevisionUID,
+		ActiveContextUID:   nil,
+		Title:              originalThread.Title + " (Compacted)",
+	})
+	if err != nil {
+		s.logger.Errorw("failed to create forked thread", "error", err)
+		return nil, connect.NewError(connect.CodeInternal, errors.New("failed to create thread"))
+	}
+
+	// Build a set of tool call IDs that exist in the messages being copied
+	// This prevents orphaned tool results from being copied
+	toolCallIDs := make(map[string]bool)
+	for _, msg := range messages {
+		if msg.Role == db.LlmMessageRoleAssistant {
+			contents, err := queries.ListMessageContent(ctx, msg.UID)
+			if err != nil {
+				continue // Skip on error, will be caught later
+			}
+			for _, content := range contents {
+				if content.Type == db.LlmMessageContentTypeToolUse {
+					// Extract tool call ID from the JSON content
+					// The content should have an "id" field
+					var toolUseData struct {
+						ID string `json:"id"`
+					}
+					if err := json.Unmarshal(content.Content, &toolUseData); err == nil && toolUseData.ID != "" {
+						toolCallIDs[toolUseData.ID] = true
+					}
+				}
+			}
+		}
+	}
+
+	s.logger.Debugw("Built tool call ID map for compaction",
+		"forked_thread_uid", forkedThread.UID,
+		"tool_call_count", len(toolCallIDs),
+	)
+
+	// Copy messages to the forked thread
+	// Track the new index for messages (may differ from source if we skip empty messages)
+	newMessageIdx := 0
+	for _, msg := range messages {
+		// Get message contents first to check if message will be empty after filtering
+		contents, err := queries.ListMessageContent(ctx, msg.UID)
+		if err != nil {
+			s.logger.Errorw("failed to list message content",
+				"error", err,
+				"message_uid", msg.UID,
+			)
+			return nil, connect.NewError(connect.CodeInternal, errors.New("failed to get message content"))
+		}
+
+		// Pre-check: count how many content blocks will remain after filtering
+		validContentCount := 0
+		for _, content := range contents {
+			// Skip tool results that don't have a corresponding tool use in the compacted messages
+			if content.Type == db.LlmMessageContentTypeToolResult {
+				var toolResultData struct {
+					ToolCallID string `json:"tool_call_id"`
+				}
+				if err := json.Unmarshal(content.Content, &toolResultData); err == nil {
+					if !toolCallIDs[toolResultData.ToolCallID] {
+						continue // Will be filtered out
+					}
+				}
+			}
+			validContentCount++
+		}
+
+		// Skip this message entirely if it would have no content
+		// LLM APIs require at least one content block per message
+		if validContentCount == 0 {
+			s.logger.Debugw("Skipping message with no valid content during compaction",
+				"message_uid", msg.UID,
+				"role", msg.Role,
+			)
+			continue
+		}
+
+		// Create the message
+		newMessage, err := queries.CreateMessage(ctx, db.CreateMessageParams{
+			OriginThreadUID:        forkedThread.UID,
+			Idx:                    int32(newMessageIdx),
+			Role:                   msg.Role,
+			ModelID:                msg.ModelID,
+			UseThinking:            msg.UseThinking,
+			UseInterleavedThinking: msg.UseInterleavedThinking,
+			StreamStatus:           db.LlmMessageStreamStatusComplete,
+			StreamStartedAt:        pgtype.Timestamptz{},
+			StreamCompletedAt:      pgtype.Timestamptz{},
+		})
+		if err != nil {
+			s.logger.Errorw("failed to create message in forked thread",
+				"error", err,
+				"message_uid", msg.UID,
+				"forked_thread_uid", forkedThread.UID,
+			)
+			return nil, connect.NewError(connect.CodeInternal, errors.New("failed to create message"))
+		}
+
+		// Link message to thread
+		err = queries.AddMessageToThread(ctx, db.AddMessageToThreadParams{
+			ThreadUID:  forkedThread.UID,
+			MessageUID: newMessage.UID,
+		})
+		if err != nil {
+			s.logger.Errorw("failed to link message to forked thread",
+				"error", err,
+				"message_uid", newMessage.UID,
+				"forked_thread_uid", forkedThread.UID,
+			)
+			return nil, connect.NewError(connect.CodeInternal, errors.New("failed to link message"))
+		}
+
+		// Copy message contents (with filtering)
+		contentIdx := 0
+		for _, content := range contents {
+			// Skip tool results that don't have a corresponding tool use in the compacted messages
+			if content.Type == db.LlmMessageContentTypeToolResult {
+				var toolResultData struct {
+					ToolCallID string `json:"tool_call_id"`
+				}
+				if err := json.Unmarshal(content.Content, &toolResultData); err == nil {
+					if !toolCallIDs[toolResultData.ToolCallID] {
+						s.logger.Debugw("Skipping orphaned tool result during compaction",
+							"tool_call_id", toolResultData.ToolCallID,
+							"message_uid", msg.UID,
+						)
+						continue
+					}
+				}
+			}
+
+			_, err := queries.CreateMessageContent(ctx, db.CreateMessageContentParams{
+				UID:               uuid.New(),
+				MessageUID:        newMessage.UID,
+				Idx:               int32(contentIdx),
+				Type:              content.Type,
+				Content:           content.Content,
+				PartialContent:    pgtype.Text{},
+				StreamStatus:      db.LlmMessageStreamStatusComplete,
+				StreamStartedAt:   pgtype.Timestamptz{},
+				StreamCompletedAt: pgtype.Timestamptz{},
+			})
+			if err != nil {
+				s.logger.Errorw("failed to create message content in forked thread",
+					"error", err,
+					"content_uid", content.UID,
+					"message_uid", newMessage.UID,
+				)
+				return nil, connect.NewError(connect.CodeInternal, errors.New("failed to copy message content"))
+			}
+			contentIdx++
+		}
+
+		// Increment the index for the next message
+		newMessageIdx++
+	}
+
+	// Build the forked thread path
+	forkedThreadPath := &resourcepath.ThreadPath{
+		Parent:    originalThreadPath.Parent,
+		ThreadUID: forkedThread.UID,
+	}
+
+	// Convert to proto message
+	return &threadv1.Thread{
+		Path:           forkedThreadPath.String(),
+		DisplayName:    forkedThread.Title,
+		ParentThreadId: &threadv1.Thread_ParentThreadId{Path: originalThreadPath.String()},
+		PersonaUid:     originalThread.PersonaUID.String(),
+		EnvironmentUid: "",  // Not set for now
+		CreateTime:     nil, // Could populate from DB timestamp
+		UpdateTime:     nil,
+		LlmState:       threadv1.ThreadLLMState_THREAD_LLM_STATE_IDLE,
+	}, nil
+}
diff --git a/tim-api/internal/services/thread/message_handlers.go b/tim-api/internal/services/thread/message_handlers.go
index 907862b28..86b2c7d59 100644
--- a/tim-api/internal/services/thread/message_handlers.go
+++ b/tim-api/internal/services/thread/message_handlers.go
@@ -13,6 +13,7 @@ import (
 	"github.com/Greybox-Labs/tim/shared/proto"
 	"github.com/Greybox-Labs/tim/tim-api/internal/authz"
 	"github.com/Greybox-Labs/tim/tim-api/internal/database"
+	"github.com/Greybox-Labs/tim/tim-api/internal/job_queue"
 	"github.com/Greybox-Labs/tim/tim-api/internal/mapper"
 	"github.com/Greybox-Labs/tim/tim-api/internal/natsnotifier"
 	"github.com/Greybox-Labs/tim/tim-api/internal/pagination"
@@ -23,6 +24,7 @@ import (
 	toolv1 "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/tool/v1alpha1"
 	"github.com/google/uuid"
 	"github.com/jackc/pgx/v5/pgtype"
+	"google.golang.org/protobuf/encoding/protojson"
 	"google.golang.org/protobuf/types/known/emptypb"
 	"google.golang.org/protobuf/types/known/structpb"
 	"google.golang.org/protobuf/types/known/timestamppb"
@@ -1045,6 +1047,80 @@ func (s *Service) SubmitUserMessage(
 		)
 	}
 
+	// Check if compaction is configured and needed
+	if req.Msg.CompactionConfig != nil {
+		// Get the persona revision to determine the model
+		personaRevision, err := queries.GetPersonaRevision(ctx, thread.PersonaRevisionUID)
+		if err != nil {
+			s.logger.Errorw("failed to get persona revision", "error", err)
+			return nil, connect.NewError(connect.CodeInternal, errors.New("failed to get persona revision"))
+		}
+
+		// Check current token count
+		tokenCount, err := queries.GetThreadTokenCount(ctx, thread.UID)
+		if err != nil {
+			s.logger.Errorw("failed to get thread token count", "error", err)
+			return nil, connect.NewError(connect.CodeInternal, errors.New("failed to get token count"))
+		}
+
+		// Calculate threshold
+		modelID := llm.ModelID(personaRevision.ModelID)
+		thresholdTokens, err := llm.CalculateThresholdTokens(modelID, req.Msg.CompactionConfig.ThresholdPercentage)
+		if err != nil {
+			s.logger.Errorw("failed to calculate threshold", "error", err)
+			return nil, connect.NewError(connect.CodeInvalidArgument, fmt.Errorf("invalid model or threshold: %w", err))
+		}
+
+		totalTokens := int64(tokenCount.TotalTokens)
+		s.logger.Infow("Checking compaction threshold",
+			"thread_uid", thread.UID,
+			"total_tokens", totalTokens,
+			"threshold_tokens", thresholdTokens,
+			"model", modelID,
+		)
+
+		// If threshold exceeded, trigger compaction before proceeding
+		if totalTokens >= int64(thresholdTokens) {
+			s.logger.Infow("Token threshold exceeded, triggering compaction",
+				"thread_uid", thread.UID,
+				"total_tokens", totalTokens,
+				"threshold_tokens", thresholdTokens,
+			)
+
+			// Convert compaction config to JSON for job args
+			configJSON, err := protojson.Marshal(req.Msg.CompactionConfig)
+			if err != nil {
+				s.logger.Errorw("failed to marshal compaction config", "error", err)
+				return nil, connect.NewError(connect.CodeInternal, errors.New("failed to marshal compaction config"))
+			}
+
+			// Convert to map for job args
+			var configMap map[string]interface{}
+			if err := json.Unmarshal(configJSON, &configMap); err != nil {
+				s.logger.Errorw("failed to unmarshal compaction config to map", "error", err)
+				return nil, connect.NewError(connect.CodeInternal, errors.New("failed to prepare job args"))
+			}
+
+			// Enqueue compaction job
+			jobArgs := job_queue.CompactionJobArgs{
+				ThreadPath:       req.Msg.Parent,
+				CompactionConfig: configMap,
+			}
+
+			if err := s.jobQueue.PushThreadCompactionJob(jobArgs); err != nil {
+				s.logger.Errorw("failed to enqueue compaction job", "error", err)
+				return nil, connect.NewError(connect.CodeInternal, errors.New("failed to enqueue compaction job"))
+			}
+
+			// Return an error indicating that compaction is in progress
+			// The client should retry after compaction completes
+			return nil, connect.NewError(
+				connect.CodeResourceExhausted,
+				errors.New("thread context exceeds threshold, compaction job enqueued - please retry after compaction completes"),
+			)
+		}
+	}
+
 	// Get next message index
 	lastMessage, err := queries.GetLastThreadMessage(ctx, thread.UID)
 	var nextIdx int32
diff --git a/tim-api/internal/services/thread_compaction/handlers.go b/tim-api/internal/services/thread_compaction/handlers.go
new file mode 100644
index 000000000..81b321889
--- /dev/null
+++ b/tim-api/internal/services/thread_compaction/handlers.go
@@ -0,0 +1,132 @@
+package thread_compaction
+
+import (
+	"context"
+	"errors"
+
+	"connectrpc.com/connect"
+	"github.com/Greybox-Labs/tim/tim-api/internal/database"
+	"github.com/Greybox-Labs/tim/tim-api/internal/resourcepath"
+	"github.com/Greybox-Labs/tim/tim-db/gen/db"
+	threadv1 "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread/v1alpha1"
+	threadcompactionv1 "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread_compaction/v1alpha1"
+	"github.com/google/uuid"
+	"github.com/jackc/pgx/v5/pgtype"
+	"google.golang.org/protobuf/encoding/protojson"
+)
+
+// PushCompactionResult receives compaction results from the worker
+func (s *Service) PushCompactionResult(
+	ctx context.Context,
+	req *connect.Request[threadcompactionv1.PushCompactionResultRequest],
+) (*connect.Response[threadcompactionv1.PushCompactionResultResponse], error) {
+	s.logger.Infow("PushCompactionResult called", "thread_path", req.Msg.Thread)
+
+	// Parse thread path
+	threadPath, err := resourcepath.ParseThreadPath(req.Msg.Thread)
+	if err != nil {
+		s.logger.Errorw("failed to parse thread path", "error", err, "thread_path", req.Msg.Thread)
+		return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("invalid thread path"))
+	}
+
+	result := req.Msg.Result
+	if result == nil {
+		return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("result is required"))
+	}
+
+	// Start a transaction
+	dbPool := &database.Pool{Pool: s.pool}
+	lazyTx := database.NewLazyTx(ctx, dbPool, s.logger)
+	queries := db.New(lazyTx)
+
+	// Convert strategy enum to database enum
+	var strategyDB db.CompactionStrategy
+	switch result.Strategy {
+	case threadv1.CompactionStrategy_COMPACTION_STRATEGY_TRUNCATION:
+		strategyDB = db.CompactionStrategyTruncation
+	case threadv1.CompactionStrategy_COMPACTION_STRATEGY_SUMMARIZATION:
+		strategyDB = db.CompactionStrategySummarization
+	case threadv1.CompactionStrategy_COMPACTION_STRATEGY_IMPORTANCE_BASED:
+		strategyDB = db.CompactionStrategyImportanceBased
+	default:
+		return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("invalid compaction strategy"))
+	}
+
+	// Parse the forked thread path if present
+	var forkedThreadUID *uuid.UUID
+	if result.ForkedThread != nil && result.ForkedThread.Path != "" {
+		forkedPath, err := resourcepath.ParseThreadPath(result.ForkedThread.Path)
+		if err != nil {
+			s.logger.Errorw("failed to parse forked thread path", "error", err)
+			return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("invalid forked thread path"))
+		}
+		forkedThreadUID = &forkedPath.ThreadUID
+	}
+
+	// Serialize compaction metadata (if any additional data needs to be stored)
+	metadataJSON, err := protojson.Marshal(result)
+	if err != nil {
+		s.logger.Errorw("failed to marshal compaction metadata", "error", err)
+		return nil, connect.NewError(connect.CodeInternal, errors.New("failed to marshal metadata"))
+	}
+
+	// Parse last compacted message UID if present
+	var lastCompactedMessageUID *uuid.UUID
+	if forkedThreadUID != nil {
+		// Get the last message from the forked thread to use as last_compacted_message_uid
+		// This represents the cutoff point for compaction
+		messages, err := queries.ListThreadMessages(ctx, db.ListThreadMessagesParams{
+			ThreadUID: *forkedThreadUID,
+			PageLimit: 1,
+		})
+		if err == nil && len(messages) > 0 {
+			lastCompactedMessageUID = &messages[len(messages)-1].UID
+		}
+	}
+
+	// Create thread context record
+	threadContext, err := queries.CreateThreadContext(ctx, db.CreateThreadContextParams{
+		ThreadUID:               threadPath.ThreadUID,
+		LastCompactedMessageUID: lastCompactedMessageUID,
+		Name:                    pgtype.Text{String: "Compacted Context", Valid: true},
+		CompactedContext:        pgtype.Text{},
+		CompactionStrategy:      db.NullCompactionStrategy{CompactionStrategy: strategyDB, Valid: true},
+		CompactionMetadata:      metadataJSON,
+		OriginalMessageCount:    pgtype.Int4{Int32: result.OriginalMessageCount, Valid: true},
+		CompactedMessageCount:   pgtype.Int4{Int32: result.CompactedMessageCount, Valid: true},
+		OriginalTokenCount:      pgtype.Int8{Int64: result.OriginalTokenCount, Valid: true},
+		CompactedTokenCount:     pgtype.Int8{Int64: result.CompactedTokenCount, Valid: true},
+	})
+	if err != nil {
+		s.logger.Errorw("failed to create thread context", "error", err)
+		return nil, connect.NewError(connect.CodeInternal, errors.New("failed to create thread context"))
+	}
+
+	// Update thread's active_context_uid to point to the new context
+	err = queries.UpdateThreadActiveContext(ctx, db.UpdateThreadActiveContextParams{
+		ThreadUID:        threadPath.ThreadUID,
+		ActiveContextUID: threadContext.UID,
+	})
+	if err != nil {
+		s.logger.Errorw("failed to update thread active context", "error", err)
+		return nil, connect.NewError(connect.CodeInternal, errors.New("failed to update active context"))
+	}
+
+	// Commit transaction
+	if err := lazyTx.Commit(ctx); err != nil {
+		s.logger.Errorw("failed to commit transaction", "error", err)
+		return nil, connect.NewError(connect.CodeInternal, errors.New("failed to commit transaction"))
+	}
+
+	s.logger.Infow("Compaction result saved successfully",
+		"thread_path", req.Msg.Thread,
+		"thread_context_uid", threadContext.UID,
+		"strategy", strategyDB,
+		"original_messages", result.OriginalMessageCount,
+		"compacted_messages", result.CompactedMessageCount,
+	)
+
+	return connect.NewResponse(&threadcompactionv1.PushCompactionResultResponse{
+		Success: true,
+	}), nil
+}
diff --git a/tim-api/internal/services/thread_compaction/service.go b/tim-api/internal/services/thread_compaction/service.go
new file mode 100644
index 000000000..3abc4e07b
--- /dev/null
+++ b/tim-api/internal/services/thread_compaction/service.go
@@ -0,0 +1,33 @@
+package thread_compaction
+
+import (
+	"net/http"
+
+	"connectrpc.com/connect"
+	"github.com/Greybox-Labs/tim/shared/logger"
+	thread_compactionv1alpha1connect "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread_compaction/v1alpha1/thread_compactionv1alpha1connect"
+	"github.com/jackc/pgx/v5/pgxpool"
+	"github.com/nats-io/nats.go"
+)
+
+// Service handles thread compaction operations
+type Service struct {
+	thread_compactionv1alpha1connect.UnimplementedThreadCompactionServiceHandler
+	pool   *pgxpool.Pool
+	logger *logger.Logger
+	nats   *nats.Conn
+}
+
+// NewService creates a new thread compaction service
+func NewService(pool *pgxpool.Pool, logger *logger.Logger, nats *nats.Conn) *Service {
+	return &Service{
+		pool:   pool,
+		logger: logger,
+		nats:   nats,
+	}
+}
+
+// NewServiceHandler returns the Connect RPC handler path and handler
+func (s *Service) NewServiceHandler(opts ...connect.HandlerOption) (string, http.Handler) {
+	return thread_compactionv1alpha1connect.NewThreadCompactionServiceHandler(s, opts...)
+}
diff --git a/tim-db/gen/db/models.go b/tim-db/gen/db/models.go
index c1858bfcd..250926580 100644
--- a/tim-db/gen/db/models.go
+++ b/tim-db/gen/db/models.go
@@ -55,6 +55,49 @@ func (ns NullActiveStatus) Value() (driver.Value, error) {
 	return string(ns.ActiveStatus), nil
 }
 
+type CompactionStrategy string
+
+const (
+	CompactionStrategyTruncation      CompactionStrategy = "truncation"
+	CompactionStrategySummarization   CompactionStrategy = "summarization"
+	CompactionStrategyImportanceBased CompactionStrategy = "importance_based"
+)
+
+func (e *CompactionStrategy) Scan(src interface{}) error {
+	switch s := src.(type) {
+	case []byte:
+		*e = CompactionStrategy(s)
+	case string:
+		*e = CompactionStrategy(s)
+	default:
+		return fmt.Errorf("unsupported scan type for CompactionStrategy: %T", src)
+	}
+	return nil
+}
+
+type NullCompactionStrategy struct {
+	CompactionStrategy CompactionStrategy
+	Valid              bool // Valid is true if CompactionStrategy is not NULL
+}
+
+// Scan implements the Scanner interface.
+func (ns *NullCompactionStrategy) Scan(value interface{}) error {
+	if value == nil {
+		ns.CompactionStrategy, ns.Valid = "", false
+		return nil
+	}
+	ns.Valid = true
+	return ns.CompactionStrategy.Scan(value)
+}
+
+// Value implements the driver Valuer interface.
+func (ns NullCompactionStrategy) Value() (driver.Value, error) {
+	if !ns.Valid {
+		return nil, nil
+	}
+	return string(ns.CompactionStrategy), nil
+}
+
 type LlmMessageContentType string
 
 const (
@@ -701,6 +744,12 @@ type ThreadContext struct {
 	CompactedContext        pgtype.Text
 	CreateTime              pgtype.Timestamptz
 	UpdateTime              pgtype.Timestamptz
+	CompactionStrategy      NullCompactionStrategy
+	CompactionMetadata      []byte
+	OriginalMessageCount    pgtype.Int4
+	CompactedMessageCount   pgtype.Int4
+	OriginalTokenCount      pgtype.Int8
+	CompactedTokenCount     pgtype.Int8
 }
 
 type ThreadMessage struct {
diff --git a/tim-db/gen/db/thread_context.sql.go b/tim-db/gen/db/thread_context.sql.go
new file mode 100644
index 000000000..f92339e98
--- /dev/null
+++ b/tim-db/gen/db/thread_context.sql.go
@@ -0,0 +1,197 @@
+// Code generated by sqlc. DO NOT EDIT.
+// versions:
+//   sqlc v1.30.0
+// source: thread_context.sql
+
+package db
+
+import (
+	"context"
+
+	"github.com/google/uuid"
+	"github.com/jackc/pgx/v5/pgtype"
+)
+
+const createThreadContext = `-- name: CreateThreadContext :one
+INSERT INTO thread_context (
+    thread_uid,
+    last_compacted_message_uid,
+    name,
+    compacted_context,
+    compaction_strategy,
+    compaction_metadata,
+    original_message_count,
+    compacted_message_count,
+    original_token_count,
+    compacted_token_count
+) VALUES (
+    $1, $2, $3, $4, $5, $6, $7, $8, $9, $10
+) RETURNING uid, thread_uid, last_compacted_message_uid, name, compacted_context, create_time, update_time, compaction_strategy, compaction_metadata, original_message_count, compacted_message_count, original_token_count, compacted_token_count
+`
+
+type CreateThreadContextParams struct {
+	ThreadUID               uuid.UUID
+	LastCompactedMessageUID *uuid.UUID
+	Name                    pgtype.Text
+	CompactedContext        pgtype.Text
+	CompactionStrategy      NullCompactionStrategy
+	CompactionMetadata      []byte
+	OriginalMessageCount    pgtype.Int4
+	CompactedMessageCount   pgtype.Int4
+	OriginalTokenCount      pgtype.Int8
+	CompactedTokenCount     pgtype.Int8
+}
+
+func (q *Queries) CreateThreadContext(ctx context.Context, arg CreateThreadContextParams) (ThreadContext, error) {
+	row := q.db.QueryRow(ctx, createThreadContext,
+		arg.ThreadUID,
+		arg.LastCompactedMessageUID,
+		arg.Name,
+		arg.CompactedContext,
+		arg.CompactionStrategy,
+		arg.CompactionMetadata,
+		arg.OriginalMessageCount,
+		arg.CompactedMessageCount,
+		arg.OriginalTokenCount,
+		arg.CompactedTokenCount,
+	)
+	var i ThreadContext
+	err := row.Scan(
+		&i.UID,
+		&i.ThreadUID,
+		&i.LastCompactedMessageUID,
+		&i.Name,
+		&i.CompactedContext,
+		&i.CreateTime,
+		&i.UpdateTime,
+		&i.CompactionStrategy,
+		&i.CompactionMetadata,
+		&i.OriginalMessageCount,
+		&i.CompactedMessageCount,
+		&i.OriginalTokenCount,
+		&i.CompactedTokenCount,
+	)
+	return i, err
+}
+
+const getActiveThreadContext = `-- name: GetActiveThreadContext :one
+SELECT tc.uid, tc.thread_uid, tc.last_compacted_message_uid, tc.name, tc.compacted_context, tc.create_time, tc.update_time, tc.compaction_strategy, tc.compaction_metadata, tc.original_message_count, tc.compacted_message_count, tc.original_token_count, tc.compacted_token_count FROM thread_context tc
+INNER JOIN thread t ON t.active_context_uid = tc.uid
+WHERE t.uid = $1
+`
+
+func (q *Queries) GetActiveThreadContext(ctx context.Context, uid uuid.UUID) (ThreadContext, error) {
+	row := q.db.QueryRow(ctx, getActiveThreadContext, uid)
+	var i ThreadContext
+	err := row.Scan(
+		&i.UID,
+		&i.ThreadUID,
+		&i.LastCompactedMessageUID,
+		&i.Name,
+		&i.CompactedContext,
+		&i.CreateTime,
+		&i.UpdateTime,
+		&i.CompactionStrategy,
+		&i.CompactionMetadata,
+		&i.OriginalMessageCount,
+		&i.CompactedMessageCount,
+		&i.OriginalTokenCount,
+		&i.CompactedTokenCount,
+	)
+	return i, err
+}
+
+const getThreadContext = `-- name: GetThreadContext :one
+SELECT uid, thread_uid, last_compacted_message_uid, name, compacted_context, create_time, update_time, compaction_strategy, compaction_metadata, original_message_count, compacted_message_count, original_token_count, compacted_token_count FROM thread_context
+WHERE uid = $1
+`
+
+func (q *Queries) GetThreadContext(ctx context.Context, uid uuid.UUID) (ThreadContext, error) {
+	row := q.db.QueryRow(ctx, getThreadContext, uid)
+	var i ThreadContext
+	err := row.Scan(
+		&i.UID,
+		&i.ThreadUID,
+		&i.LastCompactedMessageUID,
+		&i.Name,
+		&i.CompactedContext,
+		&i.CreateTime,
+		&i.UpdateTime,
+		&i.CompactionStrategy,
+		&i.CompactionMetadata,
+		&i.OriginalMessageCount,
+		&i.CompactedMessageCount,
+		&i.OriginalTokenCount,
+		&i.CompactedTokenCount,
+	)
+	return i, err
+}
+
+const getThreadTokenCount = `-- name: GetThreadTokenCount :one
+SELECT
+  COALESCE(SUM(lm.input_tokens), 0) + COALESCE(SUM(lm.output_tokens), 0) as total_tokens,
+  COALESCE(SUM(lm.input_tokens), 0) as total_input_tokens,
+  COALESCE(SUM(lm.output_tokens), 0) as total_output_tokens
+FROM llm_message lm
+JOIN thread_message tm ON lm.uid = tm.message_uid
+WHERE tm.thread_uid = $1
+`
+
+type GetThreadTokenCountRow struct {
+	TotalTokens       int32
+	TotalInputTokens  interface{}
+	TotalOutputTokens interface{}
+}
+
+func (q *Queries) GetThreadTokenCount(ctx context.Context, threadUid uuid.UUID) (GetThreadTokenCountRow, error) {
+	row := q.db.QueryRow(ctx, getThreadTokenCount, threadUid)
+	var i GetThreadTokenCountRow
+	err := row.Scan(&i.TotalTokens, &i.TotalInputTokens, &i.TotalOutputTokens)
+	return i, err
+}
+
+const listThreadContexts = `-- name: ListThreadContexts :many
+SELECT uid, thread_uid, last_compacted_message_uid, name, compacted_context, create_time, update_time, compaction_strategy, compaction_metadata, original_message_count, compacted_message_count, original_token_count, compacted_token_count FROM thread_context
+WHERE thread_uid = $1
+ORDER BY create_time DESC
+LIMIT $2
+`
+
+type ListThreadContextsParams struct {
+	ThreadUID uuid.UUID
+	Limit     int32
+}
+
+func (q *Queries) ListThreadContexts(ctx context.Context, arg ListThreadContextsParams) ([]ThreadContext, error) {
+	rows, err := q.db.Query(ctx, listThreadContexts, arg.ThreadUID, arg.Limit)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+	var items []ThreadContext
+	for rows.Next() {
+		var i ThreadContext
+		if err := rows.Scan(
+			&i.UID,
+			&i.ThreadUID,
+			&i.LastCompactedMessageUID,
+			&i.Name,
+			&i.CompactedContext,
+			&i.CreateTime,
+			&i.UpdateTime,
+			&i.CompactionStrategy,
+			&i.CompactionMetadata,
+			&i.OriginalMessageCount,
+			&i.CompactedMessageCount,
+			&i.OriginalTokenCount,
+			&i.CompactedTokenCount,
+		); err != nil {
+			return nil, err
+		}
+		items = append(items, i)
+	}
+	if err := rows.Err(); err != nil {
+		return nil, err
+	}
+	return items, nil
+}
diff --git a/tim-db/migrations/20251107174417_add_compaction_metadata.sql b/tim-db/migrations/20251107174417_add_compaction_metadata.sql
new file mode 100644
index 000000000..2719b761a
--- /dev/null
+++ b/tim-db/migrations/20251107174417_add_compaction_metadata.sql
@@ -0,0 +1,38 @@
+-- migrate:up
+
+-- Create enum for compaction strategy
+CREATE TYPE compaction_strategy AS ENUM (
+    'truncation',
+    'summarization',
+    'importance_based'
+);
+
+-- Add compaction metadata fields to thread_context table
+ALTER TABLE thread_context
+    ADD COLUMN compaction_strategy compaction_strategy,
+    ADD COLUMN compaction_metadata jsonb,
+    ADD COLUMN original_message_count integer,
+    ADD COLUMN compacted_message_count integer,
+    ADD COLUMN original_token_count bigint,
+    ADD COLUMN compacted_token_count bigint;
+
+-- Add index for querying by compaction strategy
+CREATE INDEX idx_thread_context_strategy ON thread_context(compaction_strategy) WHERE compaction_strategy IS NOT NULL;
+
+-- migrate:down
+
+-- Remove index
+DROP INDEX IF EXISTS idx_thread_context_strategy;
+
+-- Remove compaction metadata fields from thread_context table
+ALTER TABLE thread_context
+    DROP COLUMN IF EXISTS compaction_strategy,
+    DROP COLUMN IF EXISTS compaction_metadata,
+    DROP COLUMN IF EXISTS original_message_count,
+    DROP COLUMN IF EXISTS compacted_message_count,
+    DROP COLUMN IF EXISTS original_token_count,
+    DROP COLUMN IF EXISTS compacted_token_count;
+
+-- Drop enum for compaction strategy
+DROP TYPE IF EXISTS compaction_strategy;
+
diff --git a/tim-db/queries/thread_context.sql b/tim-db/queries/thread_context.sql
new file mode 100644
index 000000000..d562a635b
--- /dev/null
+++ b/tim-db/queries/thread_context.sql
@@ -0,0 +1,40 @@
+-- name: CreateThreadContext :one
+INSERT INTO thread_context (
+    thread_uid,
+    last_compacted_message_uid,
+    name,
+    compacted_context,
+    compaction_strategy,
+    compaction_metadata,
+    original_message_count,
+    compacted_message_count,
+    original_token_count,
+    compacted_token_count
+) VALUES (
+    $1, $2, $3, $4, $5, $6, $7, $8, $9, $10
+) RETURNING *;
+
+-- name: GetThreadContext :one
+SELECT * FROM thread_context
+WHERE uid = $1;
+
+-- name: GetActiveThreadContext :one
+SELECT tc.* FROM thread_context tc
+INNER JOIN thread t ON t.active_context_uid = tc.uid
+WHERE t.uid = $1;
+
+-- name: ListThreadContexts :many
+SELECT * FROM thread_context
+WHERE thread_uid = $1
+ORDER BY create_time DESC
+LIMIT $2;
+
+-- name: GetThreadTokenCount :one
+SELECT
+  COALESCE(SUM(lm.input_tokens), 0) + COALESCE(SUM(lm.output_tokens), 0) as total_tokens,
+  COALESCE(SUM(lm.input_tokens), 0) as total_input_tokens,
+  COALESCE(SUM(lm.output_tokens), 0) as total_output_tokens
+FROM llm_message lm
+JOIN thread_message tm ON lm.uid = tm.message_uid
+WHERE tm.thread_uid = $1;
+
diff --git a/tim-proto/gen/openapi.yaml b/tim-proto/gen/openapi.yaml
index 4cfa25ce6..372b4b371 100644
--- a/tim-proto/gen/openapi.yaml
+++ b/tim-proto/gen/openapi.yaml
@@ -1762,6 +1762,76 @@ paths:
                   required: true
                   schema:
                     type: string
+                - name: compactionConfig.thresholdPercentage
+                  in: query
+                  description: |-
+                    Threshold percentage (0.0-1.0) of model's max capacity at which to trigger compaction
+                     e.g., 0.8 means compact when context reaches 80% of model's max tokens
+                  schema:
+                    type: number
+                    format: float
+                - name: compactionConfig.strategy
+                  in: query
+                  description: The compaction strategy to use
+                  schema:
+                    enum:
+                        - COMPACTION_STRATEGY_UNSPECIFIED
+                        - COMPACTION_STRATEGY_TRUNCATION
+                        - COMPACTION_STRATEGY_SUMMARIZATION
+                        - COMPACTION_STRATEGY_IMPORTANCE_BASED
+                    type: string
+                    format: enum
+                - name: compactionConfig.createFork
+                  in: query
+                  description: |-
+                    Whether to create a fork (true) or compact in-place (false, for future use)
+                     Currently only fork mode is supported
+                  schema:
+                    type: boolean
+                - name: compactionConfig.truncation.keepRecentMessages
+                  in: query
+                  description: 'Number of recent messages to keep (default: 10)'
+                  schema:
+                    type: integer
+                    format: int32
+                - name: compactionConfig.summarization.summaryLength
+                  in: query
+                  description: Target length for the summary in tokens (approximate)
+                  schema:
+                    type: integer
+                    format: int32
+                - name: compactionConfig.summarization.preserveSystemMessages
+                  in: query
+                  description: Whether to preserve system messages
+                  schema:
+                    type: boolean
+                - name: compactionConfig.summarization.keepRecentMessages
+                  in: query
+                  description: Number of recent messages to keep unchanged (will not be summarized)
+                  schema:
+                    type: integer
+                    format: int32
+                - name: compactionConfig.importanceBased.preserveUserMessages
+                  in: query
+                  description: Always preserve user messages
+                  schema:
+                    type: boolean
+                - name: compactionConfig.importanceBased.preserveToolCalls
+                  in: query
+                  description: Always preserve tool calls and their results
+                  schema:
+                    type: boolean
+                - name: compactionConfig.importanceBased.summarizeAssistantBlocks
+                  in: query
+                  description: Summarize assistant text blocks that are not adjacent to tool calls
+                  schema:
+                    type: boolean
+                - name: compactionConfig.importanceBased.keepRecentMessages
+                  in: query
+                  description: Number of recent messages to keep unchanged regardless of importance
+                  schema:
+                    type: integer
+                    format: int32
             requestBody:
                 content:
                     application/json:
@@ -2107,6 +2177,50 @@ paths:
                         application/json:
                             schema:
                                 $ref: '#/components/schemas/Status'
+    /v1alpha1/orgs/{org}/users/{user}/threads/{thread}:compact:
+        post:
+            tags:
+                - ThreadService
+            description: Compact a thread to reduce context size
+            operationId: ThreadService_CompactThread
+            parameters:
+                - name: org
+                  in: path
+                  description: The org id.
+                  required: true
+                  schema:
+                    type: string
+                - name: user
+                  in: path
+                  description: The user id.
+                  required: true
+                  schema:
+                    type: string
+                - name: thread
+                  in: path
+                  description: The thread id.
+                  required: true
+                  schema:
+                    type: string
+            requestBody:
+                content:
+                    application/json:
+                        schema:
+                            $ref: '#/components/schemas/CompactThreadRequest'
+                required: true
+            responses:
+                "200":
+                    description: OK
+                    content:
+                        application/json:
+                            schema:
+                                $ref: '#/components/schemas/CompactThreadResponse'
+                default:
+                    description: Default error response
+                    content:
+                        application/json:
+                            schema:
+                                $ref: '#/components/schemas/Status'
     /v1alpha1/orgs/{org}/users/{user}/threads/{thread}:configureWorkingDirectory:
         post:
             tags:
@@ -2148,6 +2262,50 @@ paths:
                         application/json:
                             schema:
                                 $ref: '#/components/schemas/Status'
+    /v1alpha1/orgs/{org}/users/{user}/threads/{thread}:pushCompactionResult:
+        post:
+            tags:
+                - ThreadCompactionService
+            description: Push compaction result from worker to API server
+            operationId: ThreadCompactionService_PushCompactionResult
+            parameters:
+                - name: org
+                  in: path
+                  description: The org id.
+                  required: true
+                  schema:
+                    type: string
+                - name: user
+                  in: path
+                  description: The user id.
+                  required: true
+                  schema:
+                    type: string
+                - name: thread
+                  in: path
+                  description: The thread id.
+                  required: true
+                  schema:
+                    type: string
+            requestBody:
+                content:
+                    application/json:
+                        schema:
+                            $ref: '#/components/schemas/PushCompactionResultRequest'
+                required: true
+            responses:
+                "200":
+                    description: OK
+                    content:
+                        application/json:
+                            schema:
+                                $ref: '#/components/schemas/PushCompactionResultResponse'
+                default:
+                    description: Default error response
+                    content:
+                        application/json:
+                            schema:
+                                $ref: '#/components/schemas/Status'
     /v1alpha1/orgs/{org}/users/{user}/threads/{thread}:stream:
         get:
             tags:
@@ -2421,6 +2579,100 @@ components:
                     type: string
                     description: Optional reason for cancellation
             description: CancelSubscriptionRequest is the request for canceling a subscription
+        CompactThreadRequest:
+            required:
+                - path
+                - compactionConfig
+            type: object
+            properties:
+                path:
+                    type: string
+                    description: The resource path of the thread to compact
+                compactionConfig:
+                    allOf:
+                        - $ref: '#/components/schemas/CompactionConfig'
+                    description: The compaction configuration
+            description: CompactThreadRequest is used to manually compact a thread
+        CompactThreadResponse:
+            required:
+                - result
+            type: object
+            properties:
+                result:
+                    allOf:
+                        - $ref: '#/components/schemas/CompactionResult'
+                    description: The compaction result
+            description: CompactThreadResponse is the response for compacting a thread
+        CompactionConfig:
+            type: object
+            properties:
+                thresholdPercentage:
+                    type: number
+                    description: |-
+                        Threshold percentage (0.0-1.0) of model's max capacity at which to trigger compaction
+                         e.g., 0.8 means compact when context reaches 80% of model's max tokens
+                    format: float
+                strategy:
+                    enum:
+                        - COMPACTION_STRATEGY_UNSPECIFIED
+                        - COMPACTION_STRATEGY_TRUNCATION
+                        - COMPACTION_STRATEGY_SUMMARIZATION
+                        - COMPACTION_STRATEGY_IMPORTANCE_BASED
+                    type: string
+                    description: The compaction strategy to use
+                    format: enum
+                createFork:
+                    type: boolean
+                    description: |-
+                        Whether to create a fork (true) or compact in-place (false, for future use)
+                         Currently only fork mode is supported
+                truncation:
+                    allOf:
+                        - $ref: '#/components/schemas/TruncationParams'
+                    description: Parameters for truncation strategy
+                summarization:
+                    allOf:
+                        - $ref: '#/components/schemas/SummarizationParams'
+                    description: Parameters for summarization strategy
+                importanceBased:
+                    allOf:
+                        - $ref: '#/components/schemas/ImportanceBasedParams'
+                    description: Parameters for importance-based strategy
+            description: Configuration for thread context compaction
+        CompactionResult:
+            type: object
+            properties:
+                forkedThread:
+                    allOf:
+                        - $ref: '#/components/schemas/Thread'
+                    description: The forked thread containing compacted messages
+                strategy:
+                    enum:
+                        - COMPACTION_STRATEGY_UNSPECIFIED
+                        - COMPACTION_STRATEGY_TRUNCATION
+                        - COMPACTION_STRATEGY_SUMMARIZATION
+                        - COMPACTION_STRATEGY_IMPORTANCE_BASED
+                    type: string
+                    description: The strategy that was used
+                    format: enum
+                originalMessageCount:
+                    type: integer
+                    description: Original message count before compaction
+                    format: int32
+                compactedMessageCount:
+                    type: integer
+                    description: Compacted message count after compaction
+                    format: int32
+                originalTokenCount:
+                    type: string
+                    description: Original total token count
+                compactedTokenCount:
+                    type: string
+                    description: Compacted total token count
+                threadContextUid:
+                    type: string
+                    description: UID of the thread context record
+            description: Result of a compaction operation
         ConfigureThreadWorkingDirectoryRequest:
             required:
                 - path
@@ -2757,6 +3009,23 @@ components:
                     description: The type of the serialized message.
             additionalProperties: true
             description: Contains an arbitrary serialized message along with a @type that describes the type of the serialized message.
+        ImportanceBasedParams:
+            type: object
+            properties:
+                preserveUserMessages:
+                    type: boolean
+                    description: Always preserve user messages
+                preserveToolCalls:
+                    type: boolean
+                    description: Always preserve tool calls and their results
+                summarizeAssistantBlocks:
+                    type: boolean
+                    description: Summarize assistant text blocks that are not adjacent to tool calls
+                keepRecentMessages:
+                    type: integer
+                    description: Number of recent messages to keep unchanged regardless of importance
+                    format: int32
+            description: Parameters for importance-based strategy
         InheritanceConfig:
             type: object
             properties:
@@ -3334,6 +3603,27 @@ components:
                     type: string
                     description: URL to return to if checkout is canceled
             description: PurchaseCreditPackRequest is the request for purchasing a credit pack
+        PushCompactionResultRequest:
+            required:
+                - thread
+                - result
+            type: object
+            properties:
+                thread:
+                    type: string
+                    description: The resource path of the original thread that was compacted
+                result:
+                    allOf:
+                        - $ref: '#/components/schemas/CompactionResult'
+                    description: The compaction result
+            description: PushCompactionResultRequest is used to push a compaction result from worker to API
+        PushCompactionResultResponse:
+            type: object
+            properties:
+                success:
+                    type: boolean
+                    description: Acknowledgment that the result was received and processed
+            description: PushCompactionResultResponse is the response after pushing compaction result
         PushLlmTokenUsageRequest:
             required:
                 - path
@@ -3574,6 +3864,21 @@ components:
                     type: string
                     description: Stripe Checkout URL for completing payment (only present on creation)
             description: Subscription represents an organization's subscription to a plan
+        SummarizationParams:
+            type: object
+            properties:
+                summaryLength:
+                    type: integer
+                    description: Target length for the summary in tokens (approximate)
+                    format: int32
+                preserveSystemMessages:
+                    type: boolean
+                    description: Whether to preserve system messages
+                keepRecentMessages:
+                    type: integer
+                    description: Number of recent messages to keep unchanged (will not be summarized)
+                    format: int32
+            description: Parameters for summarization strategy
         Thinking:
             type: object
             properties:
@@ -3846,6 +4151,14 @@ components:
                         Whether this tool result should stop the LLM iteration loop
                          Set to true by tools like query_complete, code_complete to signal conversation completion
             description: The result of a tool call
+        TruncationParams:
+            type: object
+            properties:
+                keepRecentMessages:
+                    type: integer
+                    description: 'Number of recent messages to keep (default: 10)'
+                    format: int32
+            description: Parameters for truncation strategy
         User:
             type: object
             properties:
@@ -3942,6 +4255,8 @@ tags:
       description: OrganizationService is the service for managing organizations
     - name: PersonaService
       description: PersonaService defines the APIs for managing personas
+    - name: ThreadCompactionService
+      description: ThreadCompactionService is an internal service for handling thread compaction results from workers
     - name: ThreadContextService
       description: ThreadContextService is an internal service for accessing a thread's active context
     - name: ThreadService
diff --git a/tim-proto/gen/tim/api/thread/v1alpha1/thread_service.pb.go b/tim-proto/gen/tim/api/thread/v1alpha1/thread_service.pb.go
index b51bc85e5..671594b02 100644
--- a/tim-proto/gen/tim/api/thread/v1alpha1/thread_service.pb.go
+++ b/tim-proto/gen/tim/api/thread/v1alpha1/thread_service.pb.go
@@ -807,9 +807,12 @@ type SubmitUserMessageRequest struct {
 	// The resource path of the thread to add a message to.
 	Parent string `protobuf:"bytes,1,opt,name=parent,proto3" json:"parent,omitempty"`
 	// The user message to submit
-	UserMessage   *UserMessage `protobuf:"bytes,2,opt,name=user_message,json=userMessage,proto3" json:"user_message,omitempty"`
-	unknownFields protoimpl.UnknownFields
-	sizeCache     protoimpl.SizeCache
+	UserMessage *UserMessage `protobuf:"bytes,2,opt,name=user_message,json=userMessage,proto3" json:"user_message,omitempty"`
+	// Optional compaction configuration for automatic compaction
+	// If provided and threshold is exceeded, the thread will be compacted before processing
+	CompactionConfig *CompactionConfig `protobuf:"bytes,3,opt,name=compaction_config,json=compactionConfig,proto3,oneof" json:"compaction_config,omitempty"`
+	unknownFields    protoimpl.UnknownFields
+	sizeCache        protoimpl.SizeCache
 }
 
 func (x *SubmitUserMessageRequest) Reset() {
@@ -856,6 +859,13 @@ func (x *SubmitUserMessageRequest) GetUserMessage() *UserMessage {
 	return nil
 }
 
+func (x *SubmitUserMessageRequest) GetCompactionConfig() *CompactionConfig {
+	if x != nil {
+		return x.CompactionConfig
+	}
+	return nil
+}
+
 // Input for submitting a user message
 type UserMessage struct {
 	state protoimpl.MessageState `protogen:"open.v1"`
@@ -1101,6 +1111,107 @@ func (x *ConfigureThreadWorkingDirectoryRequest) GetWorkingDirectory() string {
 	return ""
 }
 
+// CompactThreadRequest is used to manually compact a thread
+type CompactThreadRequest struct {
+	state protoimpl.MessageState `protogen:"open.v1"`
+	// The resource path of the thread to compact
+	Path string `protobuf:"bytes,1,opt,name=path,proto3" json:"path,omitempty"`
+	// The compaction configuration
+	CompactionConfig *CompactionConfig `protobuf:"bytes,2,opt,name=compaction_config,json=compactionConfig,proto3" json:"compaction_config,omitempty"`
+	unknownFields    protoimpl.UnknownFields
+	sizeCache        protoimpl.SizeCache
+}
+
+func (x *CompactThreadRequest) Reset() {
+	*x = CompactThreadRequest{}
+	mi := &file_tim_api_thread_v1alpha1_thread_service_proto_msgTypes[17]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *CompactThreadRequest) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*CompactThreadRequest) ProtoMessage() {}
+
+func (x *CompactThreadRequest) ProtoReflect() protoreflect.Message {
+	mi := &file_tim_api_thread_v1alpha1_thread_service_proto_msgTypes[17]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use CompactThreadRequest.ProtoReflect.Descriptor instead.
+func (*CompactThreadRequest) Descriptor() ([]byte, []int) {
+	return file_tim_api_thread_v1alpha1_thread_service_proto_rawDescGZIP(), []int{17}
+}
+
+func (x *CompactThreadRequest) GetPath() string {
+	if x != nil {
+		return x.Path
+	}
+	return ""
+}
+
+func (x *CompactThreadRequest) GetCompactionConfig() *CompactionConfig {
+	if x != nil {
+		return x.CompactionConfig
+	}
+	return nil
+}
+
+// CompactThreadResponse is the response for compacting a thread
+type CompactThreadResponse struct {
+	state protoimpl.MessageState `protogen:"open.v1"`
+	// The compaction result
+	Result        *CompactionResult `protobuf:"bytes,1,opt,name=result,proto3" json:"result,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *CompactThreadResponse) Reset() {
+	*x = CompactThreadResponse{}
+	mi := &file_tim_api_thread_v1alpha1_thread_service_proto_msgTypes[18]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *CompactThreadResponse) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*CompactThreadResponse) ProtoMessage() {}
+
+func (x *CompactThreadResponse) ProtoReflect() protoreflect.Message {
+	mi := &file_tim_api_thread_v1alpha1_thread_service_proto_msgTypes[18]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use CompactThreadResponse.ProtoReflect.Descriptor instead.
+func (*CompactThreadResponse) Descriptor() ([]byte, []int) {
+	return file_tim_api_thread_v1alpha1_thread_service_proto_rawDescGZIP(), []int{18}
+}
+
+func (x *CompactThreadResponse) GetResult() *CompactionResult {
+	if x != nil {
+		return x.Result
+	}
+	return nil
+}
+
 var File_tim_api_thread_v1alpha1_thread_service_proto protoreflect.FileDescriptor
 
 const file_tim_api_thread_v1alpha1_thread_service_proto_rawDesc = "" +
@@ -1157,10 +1268,12 @@ const file_tim_api_thread_v1alpha1_thread_service_proto_rawDesc = "" +
 	"\ttool_call\x18\x04 \x01(\v2\x1f.tim.api.tool.v1alpha1.ToolCallH\x00R\btoolCall\x12a\n" +
 	"\x13thread_state_change\x18\x05 \x01(\v2/.tim.api.thread.v1alpha1.ThreadStateChangeEventH\x00R\x11threadStateChange\x12N\n" +
 	"\fstream_error\x18\x06 \x01(\v2).tim.api.thread.v1alpha1.StreamErrorEventH\x00R\vstreamErrorB\a\n" +
-	"\x05event\"\xfa\x01\n" +
+	"\x05event\"\xed\x02\n" +
 	"\x18SubmitUserMessageRequest\x12\x89\x01\n" +
 	"\x06parent\x18\x01 \x01(\tBq\xe0A\x02\xbaHKrI2G^orgs/[a-fA-F0-9-]{36}/users/[a-fA-F0-9-]{36}/threads/[a-fA-F0-9-]{36}$\u0091\x05\x1c\x12\x1atim.settlerlabs.com/threadR\x06parent\x12R\n" +
-	"\fuser_message\x18\x02 \x01(\v2$.tim.api.thread.v1alpha1.UserMessageB\t\xe0A\x02\xbaH\x03\xc8\x01\x01R\vuserMessage\"1\n" +
+	"\fuser_message\x18\x02 \x01(\v2$.tim.api.thread.v1alpha1.UserMessageB\t\xe0A\x02\xbaH\x03\xc8\x01\x01R\vuserMessage\x12[\n" +
+	"\x11compaction_config\x18\x03 \x01(\v2).tim.api.thread.v1alpha1.CompactionConfigH\x00R\x10compactionConfig\x88\x01\x01B\x14\n" +
+	"\x12_compaction_config\"1\n" +
 	"\vUserMessage\x12\"\n" +
 	"\x04text\x18\x01 \x01(\tB\x0e\xbaH\v\xc8\x01\x01r\x06\x10\x01\x18\x80\x80\x02R\x04text\"\xd2\x02\n" +
 	"\x18EditThreadMessageRequest\x12\xa5\x01\n" +
@@ -1180,7 +1293,12 @@ const file_tim_api_thread_v1alpha1_thread_service_proto_rawDesc = "" +
 	"&ConfigureThreadWorkingDirectoryRequest\x12\x85\x01\n" +
 	"\x04path\x18\x01 \x01(\tBq\xe0A\x02\xbaHKrI2G^orgs/[a-fA-F0-9-]{36}/users/[a-fA-F0-9-]{36}/threads/[a-fA-F0-9-]{36}$\u0091\x05\x1c\x12\x1atim.settlerlabs.com/threadR\x04path\x127\n" +
 	"\x11working_directory\x18\x02 \x01(\tB\n" +
-	"\xbaH\a\xc8\x01\x01r\x02\x10\x01R\x10workingDirectory2\xae\x10\n" +
+	"\xbaH\a\xc8\x01\x01r\x02\x10\x01R\x10workingDirectory\"\x81\x02\n" +
+	"\x14CompactThreadRequest\x12\x85\x01\n" +
+	"\x04path\x18\x01 \x01(\tBq\xe0A\x02\xbaHKrI2G^orgs/[a-fA-F0-9-]{36}/users/[a-fA-F0-9-]{36}/threads/[a-fA-F0-9-]{36}$\u0091\x05\x1c\x12\x1atim.settlerlabs.com/threadR\x04path\x12a\n" +
+	"\x11compaction_config\x18\x02 \x01(\v2).tim.api.thread.v1alpha1.CompactionConfigB\t\xe0A\x02\xbaH\x03\xc8\x01\x01R\x10compactionConfig\"e\n" +
+	"\x15CompactThreadResponse\x12L\n" +
+	"\x06result\x18\x01 \x01(\v2).tim.api.thread.v1alpha1.CompactionResultB\t\xe0A\x02\xbaH\x03\xc8\x01\x01R\x06result2\xf6\x11\n" +
 	"\rThreadService\x12\x91\x01\n" +
 	"\tGetThread\x12).tim.api.thread.v1alpha1.GetThreadRequest\x1a\x1f.tim.api.thread.v1alpha1.Thread\"8\xdaA\x04path\x82\xd3\xe4\x93\x02+\x12)/v1alpha1/{path=orgs/*/users/*/threads/*}\x12\xa4\x01\n" +
 	"\vListThreads\x12+.tim.api.thread.v1alpha1.ListThreadsRequest\x1a,.tim.api.thread.v1alpha1.ListThreadsResponse\":\xdaA\x06parent\x82\xd3\xe4\x93\x02+\x12)/v1alpha1/{parent=orgs/*/users/*}/threads\x12\xb8\x01\n" +
@@ -1193,7 +1311,8 @@ const file_tim_api_thread_v1alpha1_thread_service_proto_rawDesc = "" +
 	"\x12StreamThreadEvents\x122.tim.api.thread.v1alpha1.StreamThreadEventsRequest\x1a3.tim.api.thread.v1alpha1.StreamThreadEventsResponse\"C\xdaA\x06parent\x82\xd3\xe4\x93\x024\x122/v1alpha1/{parent=orgs/*/users/*/threads/*}:stream0\x01\x12\xdf\x01\n" +
 	"\x11SubmitUserMessage\x121.tim.api.thread.v1alpha1.SubmitUserMessageRequest\x1a#.tim.api.thread.v1alpha1.LlmMessage\"r\xdaA\x13parent,user_message\x82\xd3\xe4\x93\x02V:\fuser_message\"F/v1alpha1/{parent=orgs/*/users/*/threads/*}/messages:submitUserMessage\x12\xcf\x01\n" +
 	"\x11EditThreadMessage\x121.tim.api.thread.v1alpha1.EditThreadMessageRequest\x1a2.tim.api.thread.v1alpha1.EditThreadMessageResponse\"S\xdaA\fpath,content\x82\xd3\xe4\x93\x02>:\x01*\"9/v1alpha1/{path=orgs/*/users/*/threads/*/messages/*}:edit\x12\xe3\x01\n" +
-	"\x1fConfigureThreadWorkingDirectory\x12?.tim.api.thread.v1alpha1.ConfigureThreadWorkingDirectoryRequest\x1a\x16.google.protobuf.Empty\"g\xdaA\x16path,working_directory\x82\xd3\xe4\x93\x02H:\x01*\"C/v1alpha1/{path=orgs/*/users/*/threads/*}:configureWorkingDirectoryB\x82\x02\n" +
+	"\x1fConfigureThreadWorkingDirectory\x12?.tim.api.thread.v1alpha1.ConfigureThreadWorkingDirectoryRequest\x1a\x16.google.protobuf.Empty\"g\xdaA\x16path,working_directory\x82\xd3\xe4\x93\x02H:\x01*\"C/v1alpha1/{path=orgs/*/users/*/threads/*}:configureWorkingDirectory\x12\xc5\x01\n" +
+	"\rCompactThread\x12-.tim.api.thread.v1alpha1.CompactThreadRequest\x1a..tim.api.thread.v1alpha1.CompactThreadResponse\"U\xdaA\x16path,compaction_config\x82\xd3\xe4\x93\x026:\x01*\"1/v1alpha1/{path=orgs/*/users/*/threads/*}:compactB\x82\x02\n" +
 	"\x1bcom.tim.api.thread.v1alpha1B\x12ThreadServiceProtoP\x01ZPgithub.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread/v1alpha1;threadv1alpha1\xa2\x02\x03TAT\xaa\x02\x17Tim.Api.Thread.V1alpha1\xca\x02\x17Tim\\Api\\Thread\\V1alpha1\xe2\x02#Tim\\Api\\Thread\\V1alpha1\\GPBMetadata\xea\x02\x1aTim::Api::Thread::V1alpha1b\x06proto3"
 
 var (
@@ -1208,7 +1327,7 @@ func file_tim_api_thread_v1alpha1_thread_service_proto_rawDescGZIP() []byte {
 	return file_tim_api_thread_v1alpha1_thread_service_proto_rawDescData
 }
 
-var file_tim_api_thread_v1alpha1_thread_service_proto_msgTypes = make([]protoimpl.MessageInfo, 17)
+var file_tim_api_thread_v1alpha1_thread_service_proto_msgTypes = make([]protoimpl.MessageInfo, 19)
 var file_tim_api_thread_v1alpha1_thread_service_proto_goTypes = []any{
 	(*GetThreadRequest)(nil),                       // 0: tim.api.thread.v1alpha1.GetThreadRequest
 	(*ListThreadsRequest)(nil),                     // 1: tim.api.thread.v1alpha1.ListThreadsRequest
@@ -1227,60 +1346,69 @@ var file_tim_api_thread_v1alpha1_thread_service_proto_goTypes = []any{
 	(*EditThreadMessageRequest)(nil),               // 14: tim.api.thread.v1alpha1.EditThreadMessageRequest
 	(*EditThreadMessageResponse)(nil),              // 15: tim.api.thread.v1alpha1.EditThreadMessageResponse
 	(*ConfigureThreadWorkingDirectoryRequest)(nil), // 16: tim.api.thread.v1alpha1.ConfigureThreadWorkingDirectoryRequest
-	(*Thread)(nil),                                 // 17: tim.api.thread.v1alpha1.Thread
-	(*LlmMessage)(nil),                             // 18: tim.api.thread.v1alpha1.LlmMessage
-	(*ContentStartEvent)(nil),                      // 19: tim.api.thread.v1alpha1.ContentStartEvent
-	(*ContentDeltaEvent)(nil),                      // 20: tim.api.thread.v1alpha1.ContentDeltaEvent
-	(*ContentStopEvent)(nil),                       // 21: tim.api.thread.v1alpha1.ContentStopEvent
-	(*v1alpha1.ToolCall)(nil),                      // 22: tim.api.tool.v1alpha1.ToolCall
-	(*ThreadStateChangeEvent)(nil),                 // 23: tim.api.thread.v1alpha1.ThreadStateChangeEvent
-	(*StreamErrorEvent)(nil),                       // 24: tim.api.thread.v1alpha1.StreamErrorEvent
-	(*FileRestoration)(nil),                        // 25: tim.api.thread.v1alpha1.FileRestoration
-	(*emptypb.Empty)(nil),                          // 26: google.protobuf.Empty
+	(*CompactThreadRequest)(nil),                   // 17: tim.api.thread.v1alpha1.CompactThreadRequest
+	(*CompactThreadResponse)(nil),                  // 18: tim.api.thread.v1alpha1.CompactThreadResponse
+	(*Thread)(nil),                                 // 19: tim.api.thread.v1alpha1.Thread
+	(*LlmMessage)(nil),                             // 20: tim.api.thread.v1alpha1.LlmMessage
+	(*ContentStartEvent)(nil),                      // 21: tim.api.thread.v1alpha1.ContentStartEvent
+	(*ContentDeltaEvent)(nil),                      // 22: tim.api.thread.v1alpha1.ContentDeltaEvent
+	(*ContentStopEvent)(nil),                       // 23: tim.api.thread.v1alpha1.ContentStopEvent
+	(*v1alpha1.ToolCall)(nil),                      // 24: tim.api.tool.v1alpha1.ToolCall
+	(*ThreadStateChangeEvent)(nil),                 // 25: tim.api.thread.v1alpha1.ThreadStateChangeEvent
+	(*StreamErrorEvent)(nil),                       // 26: tim.api.thread.v1alpha1.StreamErrorEvent
+	(*CompactionConfig)(nil),                       // 27: tim.api.thread.v1alpha1.CompactionConfig
+	(*FileRestoration)(nil),                        // 28: tim.api.thread.v1alpha1.FileRestoration
+	(*CompactionResult)(nil),                       // 29: tim.api.thread.v1alpha1.CompactionResult
+	(*emptypb.Empty)(nil),                          // 30: google.protobuf.Empty
 }
 var file_tim_api_thread_v1alpha1_thread_service_proto_depIdxs = []int32{
-	17, // 0: tim.api.thread.v1alpha1.ListThreadsResponse.results:type_name -> tim.api.thread.v1alpha1.Thread
-	17, // 1: tim.api.thread.v1alpha1.CreateThreadRequest.thread:type_name -> tim.api.thread.v1alpha1.Thread
-	17, // 2: tim.api.thread.v1alpha1.ForkThreadResponse.thread:type_name -> tim.api.thread.v1alpha1.Thread
-	17, // 3: tim.api.thread.v1alpha1.UpdateThreadRequest.thread:type_name -> tim.api.thread.v1alpha1.Thread
-	18, // 4: tim.api.thread.v1alpha1.ListLlmMessagesResponse.results:type_name -> tim.api.thread.v1alpha1.LlmMessage
-	19, // 5: tim.api.thread.v1alpha1.StreamThreadEventsResponse.content_start:type_name -> tim.api.thread.v1alpha1.ContentStartEvent
-	20, // 6: tim.api.thread.v1alpha1.StreamThreadEventsResponse.content_delta:type_name -> tim.api.thread.v1alpha1.ContentDeltaEvent
-	21, // 7: tim.api.thread.v1alpha1.StreamThreadEventsResponse.content_stop:type_name -> tim.api.thread.v1alpha1.ContentStopEvent
-	22, // 8: tim.api.thread.v1alpha1.StreamThreadEventsResponse.tool_call:type_name -> tim.api.tool.v1alpha1.ToolCall
-	23, // 9: tim.api.thread.v1alpha1.StreamThreadEventsResponse.thread_state_change:type_name -> tim.api.thread.v1alpha1.ThreadStateChangeEvent
-	24, // 10: tim.api.thread.v1alpha1.StreamThreadEventsResponse.stream_error:type_name -> tim.api.thread.v1alpha1.StreamErrorEvent
+	19, // 0: tim.api.thread.v1alpha1.ListThreadsResponse.results:type_name -> tim.api.thread.v1alpha1.Thread
+	19, // 1: tim.api.thread.v1alpha1.CreateThreadRequest.thread:type_name -> tim.api.thread.v1alpha1.Thread
+	19, // 2: tim.api.thread.v1alpha1.ForkThreadResponse.thread:type_name -> tim.api.thread.v1alpha1.Thread
+	19, // 3: tim.api.thread.v1alpha1.UpdateThreadRequest.thread:type_name -> tim.api.thread.v1alpha1.Thread
+	20, // 4: tim.api.thread.v1alpha1.ListLlmMessagesResponse.results:type_name -> tim.api.thread.v1alpha1.LlmMessage
+	21, // 5: tim.api.thread.v1alpha1.StreamThreadEventsResponse.content_start:type_name -> tim.api.thread.v1alpha1.ContentStartEvent
+	22, // 6: tim.api.thread.v1alpha1.StreamThreadEventsResponse.content_delta:type_name -> tim.api.thread.v1alpha1.ContentDeltaEvent
+	23, // 7: tim.api.thread.v1alpha1.StreamThreadEventsResponse.content_stop:type_name -> tim.api.thread.v1alpha1.ContentStopEvent
+	24, // 8: tim.api.thread.v1alpha1.StreamThreadEventsResponse.tool_call:type_name -> tim.api.tool.v1alpha1.ToolCall
+	25, // 9: tim.api.thread.v1alpha1.StreamThreadEventsResponse.thread_state_change:type_name -> tim.api.thread.v1alpha1.ThreadStateChangeEvent
+	26, // 10: tim.api.thread.v1alpha1.StreamThreadEventsResponse.stream_error:type_name -> tim.api.thread.v1alpha1.StreamErrorEvent
 	13, // 11: tim.api.thread.v1alpha1.SubmitUserMessageRequest.user_message:type_name -> tim.api.thread.v1alpha1.UserMessage
-	18, // 12: tim.api.thread.v1alpha1.EditThreadMessageResponse.message:type_name -> tim.api.thread.v1alpha1.LlmMessage
-	25, // 13: tim.api.thread.v1alpha1.EditThreadMessageResponse.file_restorations:type_name -> tim.api.thread.v1alpha1.FileRestoration
-	17, // 14: tim.api.thread.v1alpha1.EditThreadMessageResponse.forked_thread:type_name -> tim.api.thread.v1alpha1.Thread
-	0,  // 15: tim.api.thread.v1alpha1.ThreadService.GetThread:input_type -> tim.api.thread.v1alpha1.GetThreadRequest
-	1,  // 16: tim.api.thread.v1alpha1.ThreadService.ListThreads:input_type -> tim.api.thread.v1alpha1.ListThreadsRequest
-	3,  // 17: tim.api.thread.v1alpha1.ThreadService.CreateThread:input_type -> tim.api.thread.v1alpha1.CreateThreadRequest
-	4,  // 18: tim.api.thread.v1alpha1.ThreadService.ForkThread:input_type -> tim.api.thread.v1alpha1.ForkThreadRequest
-	6,  // 19: tim.api.thread.v1alpha1.ThreadService.UpdateThread:input_type -> tim.api.thread.v1alpha1.UpdateThreadRequest
-	7,  // 20: tim.api.thread.v1alpha1.ThreadService.GetLlmMessage:input_type -> tim.api.thread.v1alpha1.GetLlmMessageRequest
-	8,  // 21: tim.api.thread.v1alpha1.ThreadService.ListLlmMessages:input_type -> tim.api.thread.v1alpha1.ListLlmMessagesRequest
-	10, // 22: tim.api.thread.v1alpha1.ThreadService.StreamThreadEvents:input_type -> tim.api.thread.v1alpha1.StreamThreadEventsRequest
-	12, // 23: tim.api.thread.v1alpha1.ThreadService.SubmitUserMessage:input_type -> tim.api.thread.v1alpha1.SubmitUserMessageRequest
-	14, // 24: tim.api.thread.v1alpha1.ThreadService.EditThreadMessage:input_type -> tim.api.thread.v1alpha1.EditThreadMessageRequest
-	16, // 25: tim.api.thread.v1alpha1.ThreadService.ConfigureThreadWorkingDirectory:input_type -> tim.api.thread.v1alpha1.ConfigureThreadWorkingDirectoryRequest
-	17, // 26: tim.api.thread.v1alpha1.ThreadService.GetThread:output_type -> tim.api.thread.v1alpha1.Thread
-	2,  // 27: tim.api.thread.v1alpha1.ThreadService.ListThreads:output_type -> tim.api.thread.v1alpha1.ListThreadsResponse
-	17, // 28: tim.api.thread.v1alpha1.ThreadService.CreateThread:output_type -> tim.api.thread.v1alpha1.Thread
-	5,  // 29: tim.api.thread.v1alpha1.ThreadService.ForkThread:output_type -> tim.api.thread.v1alpha1.ForkThreadResponse
-	17, // 30: tim.api.thread.v1alpha1.ThreadService.UpdateThread:output_type -> tim.api.thread.v1alpha1.Thread
-	18, // 31: tim.api.thread.v1alpha1.ThreadService.GetLlmMessage:output_type -> tim.api.thread.v1alpha1.LlmMessage
-	9,  // 32: tim.api.thread.v1alpha1.ThreadService.ListLlmMessages:output_type -> tim.api.thread.v1alpha1.ListLlmMessagesResponse
-	11, // 33: tim.api.thread.v1alpha1.ThreadService.StreamThreadEvents:output_type -> tim.api.thread.v1alpha1.StreamThreadEventsResponse
-	18, // 34: tim.api.thread.v1alpha1.ThreadService.SubmitUserMessage:output_type -> tim.api.thread.v1alpha1.LlmMessage
-	15, // 35: tim.api.thread.v1alpha1.ThreadService.EditThreadMessage:output_type -> tim.api.thread.v1alpha1.EditThreadMessageResponse
-	26, // 36: tim.api.thread.v1alpha1.ThreadService.ConfigureThreadWorkingDirectory:output_type -> google.protobuf.Empty
-	26, // [26:37] is the sub-list for method output_type
-	15, // [15:26] is the sub-list for method input_type
-	15, // [15:15] is the sub-list for extension type_name
-	15, // [15:15] is the sub-list for extension extendee
-	0,  // [0:15] is the sub-list for field type_name
+	27, // 12: tim.api.thread.v1alpha1.SubmitUserMessageRequest.compaction_config:type_name -> tim.api.thread.v1alpha1.CompactionConfig
+	20, // 13: tim.api.thread.v1alpha1.EditThreadMessageResponse.message:type_name -> tim.api.thread.v1alpha1.LlmMessage
+	28, // 14: tim.api.thread.v1alpha1.EditThreadMessageResponse.file_restorations:type_name -> tim.api.thread.v1alpha1.FileRestoration
+	19, // 15: tim.api.thread.v1alpha1.EditThreadMessageResponse.forked_thread:type_name -> tim.api.thread.v1alpha1.Thread
+	27, // 16: tim.api.thread.v1alpha1.CompactThreadRequest.compaction_config:type_name -> tim.api.thread.v1alpha1.CompactionConfig
+	29, // 17: tim.api.thread.v1alpha1.CompactThreadResponse.result:type_name -> tim.api.thread.v1alpha1.CompactionResult
+	0,  // 18: tim.api.thread.v1alpha1.ThreadService.GetThread:input_type -> tim.api.thread.v1alpha1.GetThreadRequest
+	1,  // 19: tim.api.thread.v1alpha1.ThreadService.ListThreads:input_type -> tim.api.thread.v1alpha1.ListThreadsRequest
+	3,  // 20: tim.api.thread.v1alpha1.ThreadService.CreateThread:input_type -> tim.api.thread.v1alpha1.CreateThreadRequest
+	4,  // 21: tim.api.thread.v1alpha1.ThreadService.ForkThread:input_type -> tim.api.thread.v1alpha1.ForkThreadRequest
+	6,  // 22: tim.api.thread.v1alpha1.ThreadService.UpdateThread:input_type -> tim.api.thread.v1alpha1.UpdateThreadRequest
+	7,  // 23: tim.api.thread.v1alpha1.ThreadService.GetLlmMessage:input_type -> tim.api.thread.v1alpha1.GetLlmMessageRequest
+	8,  // 24: tim.api.thread.v1alpha1.ThreadService.ListLlmMessages:input_type -> tim.api.thread.v1alpha1.ListLlmMessagesRequest
+	10, // 25: tim.api.thread.v1alpha1.ThreadService.StreamThreadEvents:input_type -> tim.api.thread.v1alpha1.StreamThreadEventsRequest
+	12, // 26: tim.api.thread.v1alpha1.ThreadService.SubmitUserMessage:input_type -> tim.api.thread.v1alpha1.SubmitUserMessageRequest
+	14, // 27: tim.api.thread.v1alpha1.ThreadService.EditThreadMessage:input_type -> tim.api.thread.v1alpha1.EditThreadMessageRequest
+	16, // 28: tim.api.thread.v1alpha1.ThreadService.ConfigureThreadWorkingDirectory:input_type -> tim.api.thread.v1alpha1.ConfigureThreadWorkingDirectoryRequest
+	17, // 29: tim.api.thread.v1alpha1.ThreadService.CompactThread:input_type -> tim.api.thread.v1alpha1.CompactThreadRequest
+	19, // 30: tim.api.thread.v1alpha1.ThreadService.GetThread:output_type -> tim.api.thread.v1alpha1.Thread
+	2,  // 31: tim.api.thread.v1alpha1.ThreadService.ListThreads:output_type -> tim.api.thread.v1alpha1.ListThreadsResponse
+	19, // 32: tim.api.thread.v1alpha1.ThreadService.CreateThread:output_type -> tim.api.thread.v1alpha1.Thread
+	5,  // 33: tim.api.thread.v1alpha1.ThreadService.ForkThread:output_type -> tim.api.thread.v1alpha1.ForkThreadResponse
+	19, // 34: tim.api.thread.v1alpha1.ThreadService.UpdateThread:output_type -> tim.api.thread.v1alpha1.Thread
+	20, // 35: tim.api.thread.v1alpha1.ThreadService.GetLlmMessage:output_type -> tim.api.thread.v1alpha1.LlmMessage
+	9,  // 36: tim.api.thread.v1alpha1.ThreadService.ListLlmMessages:output_type -> tim.api.thread.v1alpha1.ListLlmMessagesResponse
+	11, // 37: tim.api.thread.v1alpha1.ThreadService.StreamThreadEvents:output_type -> tim.api.thread.v1alpha1.StreamThreadEventsResponse
+	20, // 38: tim.api.thread.v1alpha1.ThreadService.SubmitUserMessage:output_type -> tim.api.thread.v1alpha1.LlmMessage
+	15, // 39: tim.api.thread.v1alpha1.ThreadService.EditThreadMessage:output_type -> tim.api.thread.v1alpha1.EditThreadMessageResponse
+	30, // 40: tim.api.thread.v1alpha1.ThreadService.ConfigureThreadWorkingDirectory:output_type -> google.protobuf.Empty
+	18, // 41: tim.api.thread.v1alpha1.ThreadService.CompactThread:output_type -> tim.api.thread.v1alpha1.CompactThreadResponse
+	30, // [30:42] is the sub-list for method output_type
+	18, // [18:30] is the sub-list for method input_type
+	18, // [18:18] is the sub-list for extension type_name
+	18, // [18:18] is the sub-list for extension extendee
+	0,  // [0:18] is the sub-list for field type_name
 }
 
 func init() { file_tim_api_thread_v1alpha1_thread_service_proto_init() }
@@ -1298,6 +1426,7 @@ func file_tim_api_thread_v1alpha1_thread_service_proto_init() {
 		(*StreamThreadEventsResponse_ThreadStateChange)(nil),
 		(*StreamThreadEventsResponse_StreamError)(nil),
 	}
+	file_tim_api_thread_v1alpha1_thread_service_proto_msgTypes[12].OneofWrappers = []any{}
 	file_tim_api_thread_v1alpha1_thread_service_proto_msgTypes[14].OneofWrappers = []any{}
 	file_tim_api_thread_v1alpha1_thread_service_proto_msgTypes[15].OneofWrappers = []any{}
 	type x struct{}
@@ -1306,7 +1435,7 @@ func file_tim_api_thread_v1alpha1_thread_service_proto_init() {
 			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
 			RawDescriptor: unsafe.Slice(unsafe.StringData(file_tim_api_thread_v1alpha1_thread_service_proto_rawDesc), len(file_tim_api_thread_v1alpha1_thread_service_proto_rawDesc)),
 			NumEnums:      0,
-			NumMessages:   17,
+			NumMessages:   19,
 			NumExtensions: 0,
 			NumServices:   1,
 		},
diff --git a/tim-proto/gen/tim/api/thread/v1alpha1/thread_service.swagger.json b/tim-proto/gen/tim/api/thread/v1alpha1/thread_service.swagger.json
index 056a964d2..f06cddd75 100644
--- a/tim-proto/gen/tim/api/thread/v1alpha1/thread_service.swagger.json
+++ b/tim-proto/gen/tim/api/thread/v1alpha1/thread_service.swagger.json
@@ -140,6 +140,95 @@
             "schema": {
               "$ref": "#/definitions/v1alpha1UserMessage"
             }
+          },
+          {
+            "name": "compactionConfig.thresholdPercentage",
+            "description": "Threshold percentage (0.0-1.0) of model's max capacity at which to trigger compaction\ne.g., 0.8 means compact when context reaches 80% of model's max tokens",
+            "in": "query",
+            "required": false,
+            "type": "number",
+            "format": "float"
+          },
+          {
+            "name": "compactionConfig.strategy",
+            "description": "The compaction strategy to use\n\n - COMPACTION_STRATEGY_UNSPECIFIED: Default unspecified\n - COMPACTION_STRATEGY_TRUNCATION: Keep first message and last N messages, truncate middle\n - COMPACTION_STRATEGY_SUMMARIZATION: Use LLM to summarize middle section, keep recent messages\n - COMPACTION_STRATEGY_IMPORTANCE_BASED: Keep user messages and tool calls, summarize assistant responses",
+            "in": "query",
+            "required": false,
+            "type": "string",
+            "enum": [
+              "COMPACTION_STRATEGY_UNSPECIFIED",
+              "COMPACTION_STRATEGY_TRUNCATION",
+              "COMPACTION_STRATEGY_SUMMARIZATION",
+              "COMPACTION_STRATEGY_IMPORTANCE_BASED"
+            ],
+            "default": "COMPACTION_STRATEGY_UNSPECIFIED"
+          },
+          {
+            "name": "compactionConfig.createFork",
+            "description": "Whether to create a fork (true) or compact in-place (false, for future use)\nCurrently only fork mode is supported",
+            "in": "query",
+            "required": false,
+            "type": "boolean"
+          },
+          {
+            "name": "compactionConfig.truncation.keepRecentMessages",
+            "description": "Number of recent messages to keep (default: 10)",
+            "in": "query",
+            "required": false,
+            "type": "integer",
+            "format": "int32"
+          },
+          {
+            "name": "compactionConfig.summarization.summaryLength",
+            "description": "Target length for the summary in tokens (approximate)",
+            "in": "query",
+            "required": false,
+            "type": "integer",
+            "format": "int32"
+          },
+          {
+            "name": "compactionConfig.summarization.preserveSystemMessages",
+            "description": "Whether to preserve system messages",
+            "in": "query",
+            "required": false,
+            "type": "boolean"
+          },
+          {
+            "name": "compactionConfig.summarization.keepRecentMessages",
+            "description": "Number of recent messages to keep unchanged (will not be summarized)",
+            "in": "query",
+            "required": false,
+            "type": "integer",
+            "format": "int32"
+          },
+          {
+            "name": "compactionConfig.importanceBased.preserveUserMessages",
+            "description": "Always preserve user messages",
+            "in": "query",
+            "required": false,
+            "type": "boolean"
+          },
+          {
+            "name": "compactionConfig.importanceBased.preserveToolCalls",
+            "description": "Always preserve tool calls and their results",
+            "in": "query",
+            "required": false,
+            "type": "boolean"
+          },
+          {
+            "name": "compactionConfig.importanceBased.summarizeAssistantBlocks",
+            "description": "Summarize assistant text blocks that are not adjacent to tool calls",
+            "in": "query",
+            "required": false,
+            "type": "boolean"
+          },
+          {
+            "name": "compactionConfig.importanceBased.keepRecentMessages",
+            "description": "Number of recent messages to keep unchanged regardless of importance",
+            "in": "query",
+            "required": false,
+            "type": "integer",
+            "format": "int32"
           }
         ],
         "tags": [
@@ -382,6 +471,47 @@
         ]
       }
     },
+    "/v1alpha1/{path}:compact": {
+      "post": {
+        "summary": "Compact a thread to reduce context size",
+        "operationId": "ThreadService_CompactThread",
+        "responses": {
+          "200": {
+            "description": "A successful response.",
+            "schema": {
+              "$ref": "#/definitions/v1alpha1CompactThreadResponse"
+            }
+          },
+          "default": {
+            "description": "An unexpected error response.",
+            "schema": {
+              "$ref": "#/definitions/rpcStatus"
+            }
+          }
+        },
+        "parameters": [
+          {
+            "name": "path",
+            "description": "The resource path of the thread to compact",
+            "in": "path",
+            "required": true,
+            "type": "string",
+            "pattern": "orgs/[^/]+/users/[^/]+/threads/[^/]+"
+          },
+          {
+            "name": "body",
+            "in": "body",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/ThreadServiceCompactThreadBody"
+            }
+          }
+        ],
+        "tags": [
+          "ThreadService"
+        ]
+      }
+    },
     "/v1alpha1/{path}:configureWorkingDirectory": {
       "post": {
         "summary": "Configure the working directory for a thread (for checkpoint creation)",
@@ -487,6 +617,19 @@
       },
       "description": "The identifier for the original thread this thread was forked from."
     },
+    "ThreadServiceCompactThreadBody": {
+      "type": "object",
+      "properties": {
+        "compactionConfig": {
+          "$ref": "#/definitions/v1alpha1CompactionConfig",
+          "title": "The compaction configuration"
+        }
+      },
+      "title": "CompactThreadRequest is used to manually compact a thread",
+      "required": [
+        "compactionConfig"
+      ]
+    },
     "ThreadServiceConfigureThreadWorkingDirectoryBody": {
       "type": "object",
       "properties": {
@@ -602,6 +745,100 @@
         }
       }
     },
+    "v1alpha1CompactThreadResponse": {
+      "type": "object",
+      "properties": {
+        "result": {
+          "$ref": "#/definitions/v1alpha1CompactionResult",
+          "title": "The compaction result"
+        }
+      },
+      "title": "CompactThreadResponse is the response for compacting a thread",
+      "required": [
+        "result"
+      ]
+    },
+    "v1alpha1CompactionConfig": {
+      "type": "object",
+      "properties": {
+        "thresholdPercentage": {
+          "type": "number",
+          "format": "float",
+          "title": "Threshold percentage (0.0-1.0) of model's max capacity at which to trigger compaction\ne.g., 0.8 means compact when context reaches 80% of model's max tokens"
+        },
+        "strategy": {
+          "$ref": "#/definitions/v1alpha1CompactionStrategy",
+          "title": "The compaction strategy to use"
+        },
+        "createFork": {
+          "type": "boolean",
+          "title": "Whether to create a fork (true) or compact in-place (false, for future use)\nCurrently only fork mode is supported"
+        },
+        "truncation": {
+          "$ref": "#/definitions/v1alpha1TruncationParams",
+          "title": "Parameters for truncation strategy"
+        },
+        "summarization": {
+          "$ref": "#/definitions/v1alpha1SummarizationParams",
+          "title": "Parameters for summarization strategy"
+        },
+        "importanceBased": {
+          "$ref": "#/definitions/v1alpha1ImportanceBasedParams",
+          "title": "Parameters for importance-based strategy"
+        }
+      },
+      "title": "Configuration for thread context compaction"
+    },
+    "v1alpha1CompactionResult": {
+      "type": "object",
+      "properties": {
+        "forkedThread": {
+          "$ref": "#/definitions/v1alpha1Thread",
+          "title": "The forked thread containing compacted messages"
+        },
+        "strategy": {
+          "$ref": "#/definitions/v1alpha1CompactionStrategy",
+          "title": "The strategy that was used"
+        },
+        "originalMessageCount": {
+          "type": "integer",
+          "format": "int32",
+          "title": "Original message count before compaction"
+        },
+        "compactedMessageCount": {
+          "type": "integer",
+          "format": "int32",
+          "title": "Compacted message count after compaction"
+        },
+        "originalTokenCount": {
+          "type": "string",
+          "format": "int64",
+          "title": "Original total token count"
+        },
+        "compactedTokenCount": {
+          "type": "string",
+          "format": "int64",
+          "title": "Compacted total token count"
+        },
+        "threadContextUid": {
+          "type": "string",
+          "title": "UID of the thread context record"
+        }
+      },
+      "title": "Result of a compaction operation"
+    },
+    "v1alpha1CompactionStrategy": {
+      "type": "string",
+      "enum": [
+        "COMPACTION_STRATEGY_UNSPECIFIED",
+        "COMPACTION_STRATEGY_TRUNCATION",
+        "COMPACTION_STRATEGY_SUMMARIZATION",
+        "COMPACTION_STRATEGY_IMPORTANCE_BASED"
+      ],
+      "default": "COMPACTION_STRATEGY_UNSPECIFIED",
+      "description": "- COMPACTION_STRATEGY_UNSPECIFIED: Default unspecified\n - COMPACTION_STRATEGY_TRUNCATION: Keep first message and last N messages, truncate middle\n - COMPACTION_STRATEGY_SUMMARIZATION: Use LLM to summarize middle section, keep recent messages\n - COMPACTION_STRATEGY_IMPORTANCE_BASED: Keep user messages and tool calls, summarize assistant responses",
+      "title": "CompactionStrategy defines the strategy to use for thread compaction"
+    },
     "v1alpha1ContentDeltaEvent": {
       "type": "object",
       "properties": {
@@ -712,6 +949,29 @@
         "thread"
       ]
     },
+    "v1alpha1ImportanceBasedParams": {
+      "type": "object",
+      "properties": {
+        "preserveUserMessages": {
+          "type": "boolean",
+          "title": "Always preserve user messages"
+        },
+        "preserveToolCalls": {
+          "type": "boolean",
+          "title": "Always preserve tool calls and their results"
+        },
+        "summarizeAssistantBlocks": {
+          "type": "boolean",
+          "title": "Summarize assistant text blocks that are not adjacent to tool calls"
+        },
+        "keepRecentMessages": {
+          "type": "integer",
+          "format": "int32",
+          "title": "Number of recent messages to keep unchanged regardless of importance"
+        }
+      },
+      "title": "Parameters for importance-based strategy"
+    },
     "v1alpha1ListLlmMessagesResponse": {
       "type": "object",
       "properties": {
@@ -910,6 +1170,26 @@
       },
       "title": "StreamThreadEventsResponse streams content events, tool calls, and thread state changes"
     },
+    "v1alpha1SummarizationParams": {
+      "type": "object",
+      "properties": {
+        "summaryLength": {
+          "type": "integer",
+          "format": "int32",
+          "title": "Target length for the summary in tokens (approximate)"
+        },
+        "preserveSystemMessages": {
+          "type": "boolean",
+          "title": "Whether to preserve system messages"
+        },
+        "keepRecentMessages": {
+          "type": "integer",
+          "format": "int32",
+          "title": "Number of recent messages to keep unchanged (will not be summarized)"
+        }
+      },
+      "title": "Parameters for summarization strategy"
+    },
     "v1alpha1Thinking": {
       "type": "object",
       "properties": {
@@ -1039,6 +1319,17 @@
       },
       "title": "The result of a tool call"
     },
+    "v1alpha1TruncationParams": {
+      "type": "object",
+      "properties": {
+        "keepRecentMessages": {
+          "type": "integer",
+          "format": "int32",
+          "title": "Number of recent messages to keep (default: 10)"
+        }
+      },
+      "title": "Parameters for truncation strategy"
+    },
     "v1alpha1UserMessage": {
       "type": "object",
       "properties": {
diff --git a/tim-proto/gen/tim/api/thread/v1alpha1/thread_types.pb.go b/tim-proto/gen/tim/api/thread/v1alpha1/thread_types.pb.go
index 3f9055d31..b8efacb40 100644
--- a/tim-proto/gen/tim/api/thread/v1alpha1/thread_types.pb.go
+++ b/tim-proto/gen/tim/api/thread/v1alpha1/thread_types.pb.go
@@ -27,6 +27,63 @@ const (
 	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
 )
 
+// CompactionStrategy defines the strategy to use for thread compaction
+type CompactionStrategy int32
+
+const (
+	// Default unspecified
+	CompactionStrategy_COMPACTION_STRATEGY_UNSPECIFIED CompactionStrategy = 0
+	// Keep first message and last N messages, truncate middle
+	CompactionStrategy_COMPACTION_STRATEGY_TRUNCATION CompactionStrategy = 1
+	// Use LLM to summarize middle section, keep recent messages
+	CompactionStrategy_COMPACTION_STRATEGY_SUMMARIZATION CompactionStrategy = 2
+	// Keep user messages and tool calls, summarize assistant responses
+	CompactionStrategy_COMPACTION_STRATEGY_IMPORTANCE_BASED CompactionStrategy = 3
+)
+
+// Enum value maps for CompactionStrategy.
+var (
+	CompactionStrategy_name = map[int32]string{
+		0: "COMPACTION_STRATEGY_UNSPECIFIED",
+		1: "COMPACTION_STRATEGY_TRUNCATION",
+		2: "COMPACTION_STRATEGY_SUMMARIZATION",
+		3: "COMPACTION_STRATEGY_IMPORTANCE_BASED",
+	}
+	CompactionStrategy_value = map[string]int32{
+		"COMPACTION_STRATEGY_UNSPECIFIED":      0,
+		"COMPACTION_STRATEGY_TRUNCATION":       1,
+		"COMPACTION_STRATEGY_SUMMARIZATION":    2,
+		"COMPACTION_STRATEGY_IMPORTANCE_BASED": 3,
+	}
+)
+
+func (x CompactionStrategy) Enum() *CompactionStrategy {
+	p := new(CompactionStrategy)
+	*p = x
+	return p
+}
+
+func (x CompactionStrategy) String() string {
+	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (CompactionStrategy) Descriptor() protoreflect.EnumDescriptor {
+	return file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[0].Descriptor()
+}
+
+func (CompactionStrategy) Type() protoreflect.EnumType {
+	return &file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[0]
+}
+
+func (x CompactionStrategy) Number() protoreflect.EnumNumber {
+	return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Use CompactionStrategy.Descriptor instead.
+func (CompactionStrategy) EnumDescriptor() ([]byte, []int) {
+	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP(), []int{0}
+}
+
 // An actor who may participate in creating LLM messages
 type LlmMessageRole int32
 
@@ -64,11 +121,11 @@ func (x LlmMessageRole) String() string {
 }
 
 func (LlmMessageRole) Descriptor() protoreflect.EnumDescriptor {
-	return file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[0].Descriptor()
+	return file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[1].Descriptor()
 }
 
 func (LlmMessageRole) Type() protoreflect.EnumType {
-	return &file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[0]
+	return &file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[1]
 }
 
 func (x LlmMessageRole) Number() protoreflect.EnumNumber {
@@ -77,7 +134,7 @@ func (x LlmMessageRole) Number() protoreflect.EnumNumber {
 
 // Deprecated: Use LlmMessageRole.Descriptor instead.
 func (LlmMessageRole) EnumDescriptor() ([]byte, []int) {
-	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP(), []int{0}
+	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP(), []int{1}
 }
 
 // The LLM processing state of a thread
@@ -117,11 +174,11 @@ func (x ThreadLLMState) String() string {
 }
 
 func (ThreadLLMState) Descriptor() protoreflect.EnumDescriptor {
-	return file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[1].Descriptor()
+	return file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[2].Descriptor()
 }
 
 func (ThreadLLMState) Type() protoreflect.EnumType {
-	return &file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[1]
+	return &file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[2]
 }
 
 func (x ThreadLLMState) Number() protoreflect.EnumNumber {
@@ -130,7 +187,7 @@ func (x ThreadLLMState) Number() protoreflect.EnumNumber {
 
 // Deprecated: Use ThreadLLMState.Descriptor instead.
 func (ThreadLLMState) EnumDescriptor() ([]byte, []int) {
-	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP(), []int{1}
+	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP(), []int{2}
 }
 
 // ContentType enum for text and thinking content
@@ -170,11 +227,11 @@ func (x ContentType) String() string {
 }
 
 func (ContentType) Descriptor() protoreflect.EnumDescriptor {
-	return file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[2].Descriptor()
+	return file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[3].Descriptor()
 }
 
 func (ContentType) Type() protoreflect.EnumType {
-	return &file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[2]
+	return &file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes[3]
 }
 
 func (x ContentType) Number() protoreflect.EnumNumber {
@@ -183,7 +240,7 @@ func (x ContentType) Number() protoreflect.EnumNumber {
 
 // Deprecated: Use ContentType.Descriptor instead.
 func (ContentType) EnumDescriptor() ([]byte, []int) {
-	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP(), []int{2}
+	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP(), []int{3}
 }
 
 // Threads are collections of context as events that represent an llm conversation
@@ -953,6 +1010,422 @@ func (x *FileRestoration) GetContent() []byte {
 	return nil
 }
 
+// Configuration for thread context compaction
+type CompactionConfig struct {
+	state protoimpl.MessageState `protogen:"open.v1"`
+	// Threshold percentage (0.0-1.0) of model's max capacity at which to trigger compaction
+	// e.g., 0.8 means compact when context reaches 80% of model's max tokens
+	ThresholdPercentage float32 `protobuf:"fixed32,1,opt,name=threshold_percentage,json=thresholdPercentage,proto3" json:"threshold_percentage,omitempty"`
+	// The compaction strategy to use
+	Strategy CompactionStrategy `protobuf:"varint,2,opt,name=strategy,proto3,enum=tim.api.thread.v1alpha1.CompactionStrategy" json:"strategy,omitempty"`
+	// Whether to create a fork (true) or compact in-place (false, for future use)
+	// Currently only fork mode is supported
+	CreateFork bool `protobuf:"varint,3,opt,name=create_fork,json=createFork,proto3" json:"create_fork,omitempty"`
+	// Strategy-specific parameters
+	//
+	// Types that are valid to be assigned to Parameters:
+	//
+	//	*CompactionConfig_Truncation
+	//	*CompactionConfig_Summarization
+	//	*CompactionConfig_ImportanceBased
+	Parameters    isCompactionConfig_Parameters `protobuf_oneof:"parameters"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *CompactionConfig) Reset() {
+	*x = CompactionConfig{}
+	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[10]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *CompactionConfig) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*CompactionConfig) ProtoMessage() {}
+
+func (x *CompactionConfig) ProtoReflect() protoreflect.Message {
+	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[10]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use CompactionConfig.ProtoReflect.Descriptor instead.
+func (*CompactionConfig) Descriptor() ([]byte, []int) {
+	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP(), []int{10}
+}
+
+func (x *CompactionConfig) GetThresholdPercentage() float32 {
+	if x != nil {
+		return x.ThresholdPercentage
+	}
+	return 0
+}
+
+func (x *CompactionConfig) GetStrategy() CompactionStrategy {
+	if x != nil {
+		return x.Strategy
+	}
+	return CompactionStrategy_COMPACTION_STRATEGY_UNSPECIFIED
+}
+
+func (x *CompactionConfig) GetCreateFork() bool {
+	if x != nil {
+		return x.CreateFork
+	}
+	return false
+}
+
+func (x *CompactionConfig) GetParameters() isCompactionConfig_Parameters {
+	if x != nil {
+		return x.Parameters
+	}
+	return nil
+}
+
+func (x *CompactionConfig) GetTruncation() *TruncationParams {
+	if x != nil {
+		if x, ok := x.Parameters.(*CompactionConfig_Truncation); ok {
+			return x.Truncation
+		}
+	}
+	return nil
+}
+
+func (x *CompactionConfig) GetSummarization() *SummarizationParams {
+	if x != nil {
+		if x, ok := x.Parameters.(*CompactionConfig_Summarization); ok {
+			return x.Summarization
+		}
+	}
+	return nil
+}
+
+func (x *CompactionConfig) GetImportanceBased() *ImportanceBasedParams {
+	if x != nil {
+		if x, ok := x.Parameters.(*CompactionConfig_ImportanceBased); ok {
+			return x.ImportanceBased
+		}
+	}
+	return nil
+}
+
+type isCompactionConfig_Parameters interface {
+	isCompactionConfig_Parameters()
+}
+
+type CompactionConfig_Truncation struct {
+	// Parameters for truncation strategy
+	Truncation *TruncationParams `protobuf:"bytes,4,opt,name=truncation,proto3,oneof"`
+}
+
+type CompactionConfig_Summarization struct {
+	// Parameters for summarization strategy
+	Summarization *SummarizationParams `protobuf:"bytes,5,opt,name=summarization,proto3,oneof"`
+}
+
+type CompactionConfig_ImportanceBased struct {
+	// Parameters for importance-based strategy
+	ImportanceBased *ImportanceBasedParams `protobuf:"bytes,6,opt,name=importance_based,json=importanceBased,proto3,oneof"`
+}
+
+func (*CompactionConfig_Truncation) isCompactionConfig_Parameters() {}
+
+func (*CompactionConfig_Summarization) isCompactionConfig_Parameters() {}
+
+func (*CompactionConfig_ImportanceBased) isCompactionConfig_Parameters() {}
+
+// Parameters for truncation strategy
+type TruncationParams struct {
+	state protoimpl.MessageState `protogen:"open.v1"`
+	// Number of recent messages to keep (default: 10)
+	KeepRecentMessages int32 `protobuf:"varint,1,opt,name=keep_recent_messages,json=keepRecentMessages,proto3" json:"keep_recent_messages,omitempty"`
+	unknownFields      protoimpl.UnknownFields
+	sizeCache          protoimpl.SizeCache
+}
+
+func (x *TruncationParams) Reset() {
+	*x = TruncationParams{}
+	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[11]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *TruncationParams) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*TruncationParams) ProtoMessage() {}
+
+func (x *TruncationParams) ProtoReflect() protoreflect.Message {
+	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[11]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use TruncationParams.ProtoReflect.Descriptor instead.
+func (*TruncationParams) Descriptor() ([]byte, []int) {
+	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP(), []int{11}
+}
+
+func (x *TruncationParams) GetKeepRecentMessages() int32 {
+	if x != nil {
+		return x.KeepRecentMessages
+	}
+	return 0
+}
+
+// Parameters for summarization strategy
+type SummarizationParams struct {
+	state protoimpl.MessageState `protogen:"open.v1"`
+	// Target length for the summary in tokens (approximate)
+	SummaryLength int32 `protobuf:"varint,1,opt,name=summary_length,json=summaryLength,proto3" json:"summary_length,omitempty"`
+	// Whether to preserve system messages
+	PreserveSystemMessages bool `protobuf:"varint,2,opt,name=preserve_system_messages,json=preserveSystemMessages,proto3" json:"preserve_system_messages,omitempty"`
+	// Number of recent messages to keep unchanged (will not be summarized)
+	KeepRecentMessages int32 `protobuf:"varint,3,opt,name=keep_recent_messages,json=keepRecentMessages,proto3" json:"keep_recent_messages,omitempty"`
+	unknownFields      protoimpl.UnknownFields
+	sizeCache          protoimpl.SizeCache
+}
+
+func (x *SummarizationParams) Reset() {
+	*x = SummarizationParams{}
+	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[12]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *SummarizationParams) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*SummarizationParams) ProtoMessage() {}
+
+func (x *SummarizationParams) ProtoReflect() protoreflect.Message {
+	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[12]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use SummarizationParams.ProtoReflect.Descriptor instead.
+func (*SummarizationParams) Descriptor() ([]byte, []int) {
+	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP(), []int{12}
+}
+
+func (x *SummarizationParams) GetSummaryLength() int32 {
+	if x != nil {
+		return x.SummaryLength
+	}
+	return 0
+}
+
+func (x *SummarizationParams) GetPreserveSystemMessages() bool {
+	if x != nil {
+		return x.PreserveSystemMessages
+	}
+	return false
+}
+
+func (x *SummarizationParams) GetKeepRecentMessages() int32 {
+	if x != nil {
+		return x.KeepRecentMessages
+	}
+	return 0
+}
+
+// Parameters for importance-based strategy
+type ImportanceBasedParams struct {
+	state protoimpl.MessageState `protogen:"open.v1"`
+	// Always preserve user messages
+	PreserveUserMessages bool `protobuf:"varint,1,opt,name=preserve_user_messages,json=preserveUserMessages,proto3" json:"preserve_user_messages,omitempty"`
+	// Always preserve tool calls and their results
+	PreserveToolCalls bool `protobuf:"varint,2,opt,name=preserve_tool_calls,json=preserveToolCalls,proto3" json:"preserve_tool_calls,omitempty"`
+	// Summarize assistant text blocks that are not adjacent to tool calls
+	SummarizeAssistantBlocks bool `protobuf:"varint,3,opt,name=summarize_assistant_blocks,json=summarizeAssistantBlocks,proto3" json:"summarize_assistant_blocks,omitempty"`
+	// Number of recent messages to keep unchanged regardless of importance
+	KeepRecentMessages int32 `protobuf:"varint,4,opt,name=keep_recent_messages,json=keepRecentMessages,proto3" json:"keep_recent_messages,omitempty"`
+	unknownFields      protoimpl.UnknownFields
+	sizeCache          protoimpl.SizeCache
+}
+
+func (x *ImportanceBasedParams) Reset() {
+	*x = ImportanceBasedParams{}
+	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[13]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *ImportanceBasedParams) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*ImportanceBasedParams) ProtoMessage() {}
+
+func (x *ImportanceBasedParams) ProtoReflect() protoreflect.Message {
+	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[13]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use ImportanceBasedParams.ProtoReflect.Descriptor instead.
+func (*ImportanceBasedParams) Descriptor() ([]byte, []int) {
+	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP(), []int{13}
+}
+
+func (x *ImportanceBasedParams) GetPreserveUserMessages() bool {
+	if x != nil {
+		return x.PreserveUserMessages
+	}
+	return false
+}
+
+func (x *ImportanceBasedParams) GetPreserveToolCalls() bool {
+	if x != nil {
+		return x.PreserveToolCalls
+	}
+	return false
+}
+
+func (x *ImportanceBasedParams) GetSummarizeAssistantBlocks() bool {
+	if x != nil {
+		return x.SummarizeAssistantBlocks
+	}
+	return false
+}
+
+func (x *ImportanceBasedParams) GetKeepRecentMessages() int32 {
+	if x != nil {
+		return x.KeepRecentMessages
+	}
+	return 0
+}
+
+// Result of a compaction operation
+type CompactionResult struct {
+	state protoimpl.MessageState `protogen:"open.v1"`
+	// The forked thread containing compacted messages
+	ForkedThread *Thread `protobuf:"bytes,1,opt,name=forked_thread,json=forkedThread,proto3" json:"forked_thread,omitempty"`
+	// The strategy that was used
+	Strategy CompactionStrategy `protobuf:"varint,2,opt,name=strategy,proto3,enum=tim.api.thread.v1alpha1.CompactionStrategy" json:"strategy,omitempty"`
+	// Original message count before compaction
+	OriginalMessageCount int32 `protobuf:"varint,3,opt,name=original_message_count,json=originalMessageCount,proto3" json:"original_message_count,omitempty"`
+	// Compacted message count after compaction
+	CompactedMessageCount int32 `protobuf:"varint,4,opt,name=compacted_message_count,json=compactedMessageCount,proto3" json:"compacted_message_count,omitempty"`
+	// Original total token count
+	OriginalTokenCount int64 `protobuf:"varint,5,opt,name=original_token_count,json=originalTokenCount,proto3" json:"original_token_count,omitempty"`
+	// Compacted total token count
+	CompactedTokenCount int64 `protobuf:"varint,6,opt,name=compacted_token_count,json=compactedTokenCount,proto3" json:"compacted_token_count,omitempty"`
+	// UID of the thread context record
+	ThreadContextUid string `protobuf:"bytes,7,opt,name=thread_context_uid,json=threadContextUid,proto3" json:"thread_context_uid,omitempty"`
+	unknownFields    protoimpl.UnknownFields
+	sizeCache        protoimpl.SizeCache
+}
+
+func (x *CompactionResult) Reset() {
+	*x = CompactionResult{}
+	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[14]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *CompactionResult) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*CompactionResult) ProtoMessage() {}
+
+func (x *CompactionResult) ProtoReflect() protoreflect.Message {
+	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[14]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use CompactionResult.ProtoReflect.Descriptor instead.
+func (*CompactionResult) Descriptor() ([]byte, []int) {
+	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP(), []int{14}
+}
+
+func (x *CompactionResult) GetForkedThread() *Thread {
+	if x != nil {
+		return x.ForkedThread
+	}
+	return nil
+}
+
+func (x *CompactionResult) GetStrategy() CompactionStrategy {
+	if x != nil {
+		return x.Strategy
+	}
+	return CompactionStrategy_COMPACTION_STRATEGY_UNSPECIFIED
+}
+
+func (x *CompactionResult) GetOriginalMessageCount() int32 {
+	if x != nil {
+		return x.OriginalMessageCount
+	}
+	return 0
+}
+
+func (x *CompactionResult) GetCompactedMessageCount() int32 {
+	if x != nil {
+		return x.CompactedMessageCount
+	}
+	return 0
+}
+
+func (x *CompactionResult) GetOriginalTokenCount() int64 {
+	if x != nil {
+		return x.OriginalTokenCount
+	}
+	return 0
+}
+
+func (x *CompactionResult) GetCompactedTokenCount() int64 {
+	if x != nil {
+		return x.CompactedTokenCount
+	}
+	return 0
+}
+
+func (x *CompactionResult) GetThreadContextUid() string {
+	if x != nil {
+		return x.ThreadContextUid
+	}
+	return ""
+}
+
 // The identifier for the original thread this thread was forked from.
 type Thread_ParentThreadId struct {
 	state protoimpl.MessageState `protogen:"open.v1"`
@@ -971,7 +1444,7 @@ type Thread_ParentThreadId struct {
 
 func (x *Thread_ParentThreadId) Reset() {
 	*x = Thread_ParentThreadId{}
-	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[10]
+	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[15]
 	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 	ms.StoreMessageInfo(mi)
 }
@@ -983,7 +1456,7 @@ func (x *Thread_ParentThreadId) String() string {
 func (*Thread_ParentThreadId) ProtoMessage() {}
 
 func (x *Thread_ParentThreadId) ProtoReflect() protoreflect.Message {
-	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[10]
+	mi := &file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[15]
 	if x != nil {
 		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 		if ms.LoadMessageInfo() == nil {
@@ -1092,7 +1565,46 @@ const file_tim_api_thread_v1alpha1_thread_types_proto_rawDesc = "" +
 	"\x05error\x18\x01 \x01(\tR\x05error\"U\n" +
 	"\x0fFileRestoration\x12\x1d\n" +
 	"\x04path\x18\x01 \x01(\tB\t\xe0A\x02\xbaH\x03\xc8\x01\x01R\x04path\x12#\n" +
-	"\acontent\x18\x02 \x01(\fB\t\xe0A\x02\xbaH\x03\xc8\x01\x01R\acontent*m\n" +
+	"\acontent\x18\x02 \x01(\fB\t\xe0A\x02\xbaH\x03\xc8\x01\x01R\acontent\"\xd8\x03\n" +
+	"\x10CompactionConfig\x12B\n" +
+	"\x14threshold_percentage\x18\x01 \x01(\x02B\x0f\xbaH\f\n" +
+	"\n" +
+	"\x1d\x00\x00\x80?-\x00\x00\x00\x00R\x13thresholdPercentage\x12Q\n" +
+	"\bstrategy\x18\x02 \x01(\x0e2+.tim.api.thread.v1alpha1.CompactionStrategyB\b\xbaH\x05\x82\x01\x02\x10\x01R\bstrategy\x12\x1f\n" +
+	"\vcreate_fork\x18\x03 \x01(\bR\n" +
+	"createFork\x12K\n" +
+	"\n" +
+	"truncation\x18\x04 \x01(\v2).tim.api.thread.v1alpha1.TruncationParamsH\x00R\n" +
+	"truncation\x12T\n" +
+	"\rsummarization\x18\x05 \x01(\v2,.tim.api.thread.v1alpha1.SummarizationParamsH\x00R\rsummarization\x12[\n" +
+	"\x10importance_based\x18\x06 \x01(\v2..tim.api.thread.v1alpha1.ImportanceBasedParamsH\x00R\x0fimportanceBasedB\f\n" +
+	"\n" +
+	"parameters\"O\n" +
+	"\x10TruncationParams\x12;\n" +
+	"\x14keep_recent_messages\x18\x01 \x01(\x05B\t\xbaH\x06\x1a\x04\x18d(\x01R\x12keepRecentMessages\"\xbf\x01\n" +
+	"\x13SummarizationParams\x121\n" +
+	"\x0esummary_length\x18\x01 \x01(\x05B\n" +
+	"\xbaH\a\x1a\x05\x18\x90N(dR\rsummaryLength\x128\n" +
+	"\x18preserve_system_messages\x18\x02 \x01(\bR\x16preserveSystemMessages\x12;\n" +
+	"\x14keep_recent_messages\x18\x03 \x01(\x05B\t\xbaH\x06\x1a\x04\x182(\x00R\x12keepRecentMessages\"\xf8\x01\n" +
+	"\x15ImportanceBasedParams\x124\n" +
+	"\x16preserve_user_messages\x18\x01 \x01(\bR\x14preserveUserMessages\x12.\n" +
+	"\x13preserve_tool_calls\x18\x02 \x01(\bR\x11preserveToolCalls\x12<\n" +
+	"\x1asummarize_assistant_blocks\x18\x03 \x01(\bR\x18summarizeAssistantBlocks\x12;\n" +
+	"\x14keep_recent_messages\x18\x04 \x01(\x05B\t\xbaH\x06\x1a\x04\x182(\x00R\x12keepRecentMessages\"\xad\x03\n" +
+	"\x10CompactionResult\x12D\n" +
+	"\rforked_thread\x18\x01 \x01(\v2\x1f.tim.api.thread.v1alpha1.ThreadR\fforkedThread\x12G\n" +
+	"\bstrategy\x18\x02 \x01(\x0e2+.tim.api.thread.v1alpha1.CompactionStrategyR\bstrategy\x124\n" +
+	"\x16original_message_count\x18\x03 \x01(\x05R\x14originalMessageCount\x126\n" +
+	"\x17compacted_message_count\x18\x04 \x01(\x05R\x15compactedMessageCount\x120\n" +
+	"\x14original_token_count\x18\x05 \x01(\x03R\x12originalTokenCount\x122\n" +
+	"\x15compacted_token_count\x18\x06 \x01(\x03R\x13compactedTokenCount\x126\n" +
+	"\x12thread_context_uid\x18\a \x01(\tB\b\xe2\x8c\xcf\xd7\b\x02\b\x01R\x10threadContextUid*\xae\x01\n" +
+	"\x12CompactionStrategy\x12#\n" +
+	"\x1fCOMPACTION_STRATEGY_UNSPECIFIED\x10\x00\x12\"\n" +
+	"\x1eCOMPACTION_STRATEGY_TRUNCATION\x10\x01\x12%\n" +
+	"!COMPACTION_STRATEGY_SUMMARIZATION\x10\x02\x12(\n" +
+	"$COMPACTION_STRATEGY_IMPORTANCE_BASED\x10\x03*m\n" +
 	"\x0eLlmMessageRole\x12 \n" +
 	"\x1cLLM_MESSAGE_ROLE_UNSPECIFIED\x10\x00\x12\x19\n" +
 	"\x15LLM_MESSAGE_ROLE_USER\x10\x01\x12\x1e\n" +
@@ -1119,50 +1631,62 @@ func file_tim_api_thread_v1alpha1_thread_types_proto_rawDescGZIP() []byte {
 	return file_tim_api_thread_v1alpha1_thread_types_proto_rawDescData
 }
 
-var file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes = make([]protoimpl.EnumInfo, 3)
-var file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes = make([]protoimpl.MessageInfo, 11)
+var file_tim_api_thread_v1alpha1_thread_types_proto_enumTypes = make([]protoimpl.EnumInfo, 4)
+var file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes = make([]protoimpl.MessageInfo, 16)
 var file_tim_api_thread_v1alpha1_thread_types_proto_goTypes = []any{
-	(LlmMessageRole)(0),            // 0: tim.api.thread.v1alpha1.LlmMessageRole
-	(ThreadLLMState)(0),            // 1: tim.api.thread.v1alpha1.ThreadLLMState
-	(ContentType)(0),               // 2: tim.api.thread.v1alpha1.ContentType
-	(*Thread)(nil),                 // 3: tim.api.thread.v1alpha1.Thread
-	(*LlmMessage)(nil),             // 4: tim.api.thread.v1alpha1.LlmMessage
-	(*Thinking)(nil),               // 5: tim.api.thread.v1alpha1.Thinking
-	(*LlmMessageContent)(nil),      // 6: tim.api.thread.v1alpha1.LlmMessageContent
-	(*ContentStartEvent)(nil),      // 7: tim.api.thread.v1alpha1.ContentStartEvent
-	(*ContentDeltaEvent)(nil),      // 8: tim.api.thread.v1alpha1.ContentDeltaEvent
-	(*ContentStopEvent)(nil),       // 9: tim.api.thread.v1alpha1.ContentStopEvent
-	(*ThreadStateChangeEvent)(nil), // 10: tim.api.thread.v1alpha1.ThreadStateChangeEvent
-	(*StreamErrorEvent)(nil),       // 11: tim.api.thread.v1alpha1.StreamErrorEvent
-	(*FileRestoration)(nil),        // 12: tim.api.thread.v1alpha1.FileRestoration
-	(*Thread_ParentThreadId)(nil),  // 13: tim.api.thread.v1alpha1.Thread.ParentThreadId
-	(*timestamppb.Timestamp)(nil),  // 14: google.protobuf.Timestamp
-	(*v1alpha1.TokenUsage)(nil),    // 15: tim.api.llm_response.v1alpha1.TokenUsage
-	(*v1alpha11.ToolCall)(nil),     // 16: tim.api.tool.v1alpha1.ToolCall
-	(*v1alpha11.ToolResult)(nil),   // 17: tim.api.tool.v1alpha1.ToolResult
+	(CompactionStrategy)(0),        // 0: tim.api.thread.v1alpha1.CompactionStrategy
+	(LlmMessageRole)(0),            // 1: tim.api.thread.v1alpha1.LlmMessageRole
+	(ThreadLLMState)(0),            // 2: tim.api.thread.v1alpha1.ThreadLLMState
+	(ContentType)(0),               // 3: tim.api.thread.v1alpha1.ContentType
+	(*Thread)(nil),                 // 4: tim.api.thread.v1alpha1.Thread
+	(*LlmMessage)(nil),             // 5: tim.api.thread.v1alpha1.LlmMessage
+	(*Thinking)(nil),               // 6: tim.api.thread.v1alpha1.Thinking
+	(*LlmMessageContent)(nil),      // 7: tim.api.thread.v1alpha1.LlmMessageContent
+	(*ContentStartEvent)(nil),      // 8: tim.api.thread.v1alpha1.ContentStartEvent
+	(*ContentDeltaEvent)(nil),      // 9: tim.api.thread.v1alpha1.ContentDeltaEvent
+	(*ContentStopEvent)(nil),       // 10: tim.api.thread.v1alpha1.ContentStopEvent
+	(*ThreadStateChangeEvent)(nil), // 11: tim.api.thread.v1alpha1.ThreadStateChangeEvent
+	(*StreamErrorEvent)(nil),       // 12: tim.api.thread.v1alpha1.StreamErrorEvent
+	(*FileRestoration)(nil),        // 13: tim.api.thread.v1alpha1.FileRestoration
+	(*CompactionConfig)(nil),       // 14: tim.api.thread.v1alpha1.CompactionConfig
+	(*TruncationParams)(nil),       // 15: tim.api.thread.v1alpha1.TruncationParams
+	(*SummarizationParams)(nil),    // 16: tim.api.thread.v1alpha1.SummarizationParams
+	(*ImportanceBasedParams)(nil),  // 17: tim.api.thread.v1alpha1.ImportanceBasedParams
+	(*CompactionResult)(nil),       // 18: tim.api.thread.v1alpha1.CompactionResult
+	(*Thread_ParentThreadId)(nil),  // 19: tim.api.thread.v1alpha1.Thread.ParentThreadId
+	(*timestamppb.Timestamp)(nil),  // 20: google.protobuf.Timestamp
+	(*v1alpha1.TokenUsage)(nil),    // 21: tim.api.llm_response.v1alpha1.TokenUsage
+	(*v1alpha11.ToolCall)(nil),     // 22: tim.api.tool.v1alpha1.ToolCall
+	(*v1alpha11.ToolResult)(nil),   // 23: tim.api.tool.v1alpha1.ToolResult
 }
 var file_tim_api_thread_v1alpha1_thread_types_proto_depIdxs = []int32{
-	13, // 0: tim.api.thread.v1alpha1.Thread.parent_thread_id:type_name -> tim.api.thread.v1alpha1.Thread.ParentThreadId
-	14, // 1: tim.api.thread.v1alpha1.Thread.create_time:type_name -> google.protobuf.Timestamp
-	14, // 2: tim.api.thread.v1alpha1.Thread.update_time:type_name -> google.protobuf.Timestamp
-	1,  // 3: tim.api.thread.v1alpha1.Thread.llm_state:type_name -> tim.api.thread.v1alpha1.ThreadLLMState
-	0,  // 4: tim.api.thread.v1alpha1.LlmMessage.role:type_name -> tim.api.thread.v1alpha1.LlmMessageRole
-	6,  // 5: tim.api.thread.v1alpha1.LlmMessage.contents:type_name -> tim.api.thread.v1alpha1.LlmMessageContent
-	14, // 6: tim.api.thread.v1alpha1.LlmMessage.create_time:type_name -> google.protobuf.Timestamp
-	15, // 7: tim.api.thread.v1alpha1.LlmMessage.token_usage:type_name -> tim.api.llm_response.v1alpha1.TokenUsage
-	5,  // 8: tim.api.thread.v1alpha1.LlmMessageContent.thinking:type_name -> tim.api.thread.v1alpha1.Thinking
-	16, // 9: tim.api.thread.v1alpha1.LlmMessageContent.tool_call:type_name -> tim.api.tool.v1alpha1.ToolCall
-	17, // 10: tim.api.thread.v1alpha1.LlmMessageContent.tool_result:type_name -> tim.api.tool.v1alpha1.ToolResult
-	14, // 11: tim.api.thread.v1alpha1.LlmMessageContent.create_time:type_name -> google.protobuf.Timestamp
-	0,  // 12: tim.api.thread.v1alpha1.ContentStartEvent.role:type_name -> tim.api.thread.v1alpha1.LlmMessageRole
-	2,  // 13: tim.api.thread.v1alpha1.ContentStartEvent.type:type_name -> tim.api.thread.v1alpha1.ContentType
-	1,  // 14: tim.api.thread.v1alpha1.ThreadStateChangeEvent.llm_state:type_name -> tim.api.thread.v1alpha1.ThreadLLMState
-	14, // 15: tim.api.thread.v1alpha1.Thread.ParentThreadId.before_time:type_name -> google.protobuf.Timestamp
-	16, // [16:16] is the sub-list for method output_type
-	16, // [16:16] is the sub-list for method input_type
-	16, // [16:16] is the sub-list for extension type_name
-	16, // [16:16] is the sub-list for extension extendee
-	0,  // [0:16] is the sub-list for field type_name
+	19, // 0: tim.api.thread.v1alpha1.Thread.parent_thread_id:type_name -> tim.api.thread.v1alpha1.Thread.ParentThreadId
+	20, // 1: tim.api.thread.v1alpha1.Thread.create_time:type_name -> google.protobuf.Timestamp
+	20, // 2: tim.api.thread.v1alpha1.Thread.update_time:type_name -> google.protobuf.Timestamp
+	2,  // 3: tim.api.thread.v1alpha1.Thread.llm_state:type_name -> tim.api.thread.v1alpha1.ThreadLLMState
+	1,  // 4: tim.api.thread.v1alpha1.LlmMessage.role:type_name -> tim.api.thread.v1alpha1.LlmMessageRole
+	7,  // 5: tim.api.thread.v1alpha1.LlmMessage.contents:type_name -> tim.api.thread.v1alpha1.LlmMessageContent
+	20, // 6: tim.api.thread.v1alpha1.LlmMessage.create_time:type_name -> google.protobuf.Timestamp
+	21, // 7: tim.api.thread.v1alpha1.LlmMessage.token_usage:type_name -> tim.api.llm_response.v1alpha1.TokenUsage
+	6,  // 8: tim.api.thread.v1alpha1.LlmMessageContent.thinking:type_name -> tim.api.thread.v1alpha1.Thinking
+	22, // 9: tim.api.thread.v1alpha1.LlmMessageContent.tool_call:type_name -> tim.api.tool.v1alpha1.ToolCall
+	23, // 10: tim.api.thread.v1alpha1.LlmMessageContent.tool_result:type_name -> tim.api.tool.v1alpha1.ToolResult
+	20, // 11: tim.api.thread.v1alpha1.LlmMessageContent.create_time:type_name -> google.protobuf.Timestamp
+	1,  // 12: tim.api.thread.v1alpha1.ContentStartEvent.role:type_name -> tim.api.thread.v1alpha1.LlmMessageRole
+	3,  // 13: tim.api.thread.v1alpha1.ContentStartEvent.type:type_name -> tim.api.thread.v1alpha1.ContentType
+	2,  // 14: tim.api.thread.v1alpha1.ThreadStateChangeEvent.llm_state:type_name -> tim.api.thread.v1alpha1.ThreadLLMState
+	0,  // 15: tim.api.thread.v1alpha1.CompactionConfig.strategy:type_name -> tim.api.thread.v1alpha1.CompactionStrategy
+	15, // 16: tim.api.thread.v1alpha1.CompactionConfig.truncation:type_name -> tim.api.thread.v1alpha1.TruncationParams
+	16, // 17: tim.api.thread.v1alpha1.CompactionConfig.summarization:type_name -> tim.api.thread.v1alpha1.SummarizationParams
+	17, // 18: tim.api.thread.v1alpha1.CompactionConfig.importance_based:type_name -> tim.api.thread.v1alpha1.ImportanceBasedParams
+	4,  // 19: tim.api.thread.v1alpha1.CompactionResult.forked_thread:type_name -> tim.api.thread.v1alpha1.Thread
+	0,  // 20: tim.api.thread.v1alpha1.CompactionResult.strategy:type_name -> tim.api.thread.v1alpha1.CompactionStrategy
+	20, // 21: tim.api.thread.v1alpha1.Thread.ParentThreadId.before_time:type_name -> google.protobuf.Timestamp
+	22, // [22:22] is the sub-list for method output_type
+	22, // [22:22] is the sub-list for method input_type
+	22, // [22:22] is the sub-list for extension type_name
+	22, // [22:22] is the sub-list for extension extendee
+	0,  // [0:22] is the sub-list for field type_name
 }
 
 func init() { file_tim_api_thread_v1alpha1_thread_types_proto_init() }
@@ -1176,13 +1700,18 @@ func file_tim_api_thread_v1alpha1_thread_types_proto_init() {
 		(*LlmMessageContent_ToolCall)(nil),
 		(*LlmMessageContent_ToolResult)(nil),
 	}
+	file_tim_api_thread_v1alpha1_thread_types_proto_msgTypes[10].OneofWrappers = []any{
+		(*CompactionConfig_Truncation)(nil),
+		(*CompactionConfig_Summarization)(nil),
+		(*CompactionConfig_ImportanceBased)(nil),
+	}
 	type x struct{}
 	out := protoimpl.TypeBuilder{
 		File: protoimpl.DescBuilder{
 			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
 			RawDescriptor: unsafe.Slice(unsafe.StringData(file_tim_api_thread_v1alpha1_thread_types_proto_rawDesc), len(file_tim_api_thread_v1alpha1_thread_types_proto_rawDesc)),
-			NumEnums:      3,
-			NumMessages:   11,
+			NumEnums:      4,
+			NumMessages:   16,
 			NumExtensions: 0,
 			NumServices:   0,
 		},
diff --git a/tim-proto/gen/tim/api/thread/v1alpha1/threadv1alpha1connect/thread_service.connect.go b/tim-proto/gen/tim/api/thread/v1alpha1/threadv1alpha1connect/thread_service.connect.go
index fcda712e8..937487216 100644
--- a/tim-proto/gen/tim/api/thread/v1alpha1/threadv1alpha1connect/thread_service.connect.go
+++ b/tim-proto/gen/tim/api/thread/v1alpha1/threadv1alpha1connect/thread_service.connect.go
@@ -66,6 +66,9 @@ const (
 	// ThreadServiceConfigureThreadWorkingDirectoryProcedure is the fully-qualified name of the
 	// ThreadService's ConfigureThreadWorkingDirectory RPC.
 	ThreadServiceConfigureThreadWorkingDirectoryProcedure = "/tim.api.thread.v1alpha1.ThreadService/ConfigureThreadWorkingDirectory"
+	// ThreadServiceCompactThreadProcedure is the fully-qualified name of the ThreadService's
+	// CompactThread RPC.
+	ThreadServiceCompactThreadProcedure = "/tim.api.thread.v1alpha1.ThreadService/CompactThread"
 )
 
 // ThreadServiceClient is a client for the tim.api.thread.v1alpha1.ThreadService service.
@@ -94,6 +97,8 @@ type ThreadServiceClient interface {
 	EditThreadMessage(context.Context, *connect.Request[v1alpha1.EditThreadMessageRequest]) (*connect.Response[v1alpha1.EditThreadMessageResponse], error)
 	// Configure the working directory for a thread (for checkpoint creation)
 	ConfigureThreadWorkingDirectory(context.Context, *connect.Request[v1alpha1.ConfigureThreadWorkingDirectoryRequest]) (*connect.Response[emptypb.Empty], error)
+	// Compact a thread to reduce context size
+	CompactThread(context.Context, *connect.Request[v1alpha1.CompactThreadRequest]) (*connect.Response[v1alpha1.CompactThreadResponse], error)
 }
 
 // NewThreadServiceClient constructs a client for the tim.api.thread.v1alpha1.ThreadService service.
@@ -173,6 +178,12 @@ func NewThreadServiceClient(httpClient connect.HTTPClient, baseURL string, opts
 			connect.WithSchema(threadServiceMethods.ByName("ConfigureThreadWorkingDirectory")),
 			connect.WithClientOptions(opts...),
 		),
+		compactThread: connect.NewClient[v1alpha1.CompactThreadRequest, v1alpha1.CompactThreadResponse](
+			httpClient,
+			baseURL+ThreadServiceCompactThreadProcedure,
+			connect.WithSchema(threadServiceMethods.ByName("CompactThread")),
+			connect.WithClientOptions(opts...),
+		),
 	}
 }
 
@@ -189,6 +200,7 @@ type threadServiceClient struct {
 	submitUserMessage               *connect.Client[v1alpha1.SubmitUserMessageRequest, v1alpha1.LlmMessage]
 	editThreadMessage               *connect.Client[v1alpha1.EditThreadMessageRequest, v1alpha1.EditThreadMessageResponse]
 	configureThreadWorkingDirectory *connect.Client[v1alpha1.ConfigureThreadWorkingDirectoryRequest, emptypb.Empty]
+	compactThread                   *connect.Client[v1alpha1.CompactThreadRequest, v1alpha1.CompactThreadResponse]
 }
 
 // GetThread calls tim.api.thread.v1alpha1.ThreadService.GetThread.
@@ -247,6 +259,11 @@ func (c *threadServiceClient) ConfigureThreadWorkingDirectory(ctx context.Contex
 	return c.configureThreadWorkingDirectory.CallUnary(ctx, req)
 }
 
+// CompactThread calls tim.api.thread.v1alpha1.ThreadService.CompactThread.
+func (c *threadServiceClient) CompactThread(ctx context.Context, req *connect.Request[v1alpha1.CompactThreadRequest]) (*connect.Response[v1alpha1.CompactThreadResponse], error) {
+	return c.compactThread.CallUnary(ctx, req)
+}
+
 // ThreadServiceHandler is an implementation of the tim.api.thread.v1alpha1.ThreadService service.
 type ThreadServiceHandler interface {
 	// Get a thread by ID
@@ -273,6 +290,8 @@ type ThreadServiceHandler interface {
 	EditThreadMessage(context.Context, *connect.Request[v1alpha1.EditThreadMessageRequest]) (*connect.Response[v1alpha1.EditThreadMessageResponse], error)
 	// Configure the working directory for a thread (for checkpoint creation)
 	ConfigureThreadWorkingDirectory(context.Context, *connect.Request[v1alpha1.ConfigureThreadWorkingDirectoryRequest]) (*connect.Response[emptypb.Empty], error)
+	// Compact a thread to reduce context size
+	CompactThread(context.Context, *connect.Request[v1alpha1.CompactThreadRequest]) (*connect.Response[v1alpha1.CompactThreadResponse], error)
 }
 
 // NewThreadServiceHandler builds an HTTP handler from the service implementation. It returns the
@@ -348,6 +367,12 @@ func NewThreadServiceHandler(svc ThreadServiceHandler, opts ...connect.HandlerOp
 		connect.WithSchema(threadServiceMethods.ByName("ConfigureThreadWorkingDirectory")),
 		connect.WithHandlerOptions(opts...),
 	)
+	threadServiceCompactThreadHandler := connect.NewUnaryHandler(
+		ThreadServiceCompactThreadProcedure,
+		svc.CompactThread,
+		connect.WithSchema(threadServiceMethods.ByName("CompactThread")),
+		connect.WithHandlerOptions(opts...),
+	)
 	return "/tim.api.thread.v1alpha1.ThreadService/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		switch r.URL.Path {
 		case ThreadServiceGetThreadProcedure:
@@ -372,6 +397,8 @@ func NewThreadServiceHandler(svc ThreadServiceHandler, opts ...connect.HandlerOp
 			threadServiceEditThreadMessageHandler.ServeHTTP(w, r)
 		case ThreadServiceConfigureThreadWorkingDirectoryProcedure:
 			threadServiceConfigureThreadWorkingDirectoryHandler.ServeHTTP(w, r)
+		case ThreadServiceCompactThreadProcedure:
+			threadServiceCompactThreadHandler.ServeHTTP(w, r)
 		default:
 			http.NotFound(w, r)
 		}
@@ -424,3 +451,7 @@ func (UnimplementedThreadServiceHandler) EditThreadMessage(context.Context, *con
 func (UnimplementedThreadServiceHandler) ConfigureThreadWorkingDirectory(context.Context, *connect.Request[v1alpha1.ConfigureThreadWorkingDirectoryRequest]) (*connect.Response[emptypb.Empty], error) {
 	return nil, connect.NewError(connect.CodeUnimplemented, errors.New("tim.api.thread.v1alpha1.ThreadService.ConfigureThreadWorkingDirectory is not implemented"))
 }
+
+func (UnimplementedThreadServiceHandler) CompactThread(context.Context, *connect.Request[v1alpha1.CompactThreadRequest]) (*connect.Response[v1alpha1.CompactThreadResponse], error) {
+	return nil, connect.NewError(connect.CodeUnimplemented, errors.New("tim.api.thread.v1alpha1.ThreadService.CompactThread is not implemented"))
+}
diff --git a/tim-proto/gen/tim/api/thread_compaction/v1alpha1/thread_compaction_service.pb.go b/tim-proto/gen/tim/api/thread_compaction/v1alpha1/thread_compaction_service.pb.go
new file mode 100644
index 000000000..ac4e91e69
--- /dev/null
+++ b/tim-proto/gen/tim/api/thread_compaction/v1alpha1/thread_compaction_service.pb.go
@@ -0,0 +1,194 @@
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// versions:
+// 	protoc-gen-go v1.36.9
+// 	protoc        (unknown)
+// source: tim/api/thread_compaction/v1alpha1/thread_compaction_service.proto
+
+package thread_compactionv1alpha1
+
+import (
+	_ "buf.build/gen/go/aep/api/protocolbuffers/go/aep/api"
+	_ "buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go/buf/validate"
+	v1alpha1 "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread/v1alpha1"
+	_ "google.golang.org/genproto/googleapis/api/annotations"
+	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	reflect "reflect"
+	sync "sync"
+	unsafe "unsafe"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+// PushCompactionResultRequest is used to push a compaction result from worker to API
+type PushCompactionResultRequest struct {
+	state protoimpl.MessageState `protogen:"open.v1"`
+	// The resource path of the original thread that was compacted
+	Thread string `protobuf:"bytes,1,opt,name=thread,proto3" json:"thread,omitempty"`
+	// The compaction result
+	Result        *v1alpha1.CompactionResult `protobuf:"bytes,2,opt,name=result,proto3" json:"result,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *PushCompactionResultRequest) Reset() {
+	*x = PushCompactionResultRequest{}
+	mi := &file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_msgTypes[0]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *PushCompactionResultRequest) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*PushCompactionResultRequest) ProtoMessage() {}
+
+func (x *PushCompactionResultRequest) ProtoReflect() protoreflect.Message {
+	mi := &file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_msgTypes[0]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use PushCompactionResultRequest.ProtoReflect.Descriptor instead.
+func (*PushCompactionResultRequest) Descriptor() ([]byte, []int) {
+	return file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDescGZIP(), []int{0}
+}
+
+func (x *PushCompactionResultRequest) GetThread() string {
+	if x != nil {
+		return x.Thread
+	}
+	return ""
+}
+
+func (x *PushCompactionResultRequest) GetResult() *v1alpha1.CompactionResult {
+	if x != nil {
+		return x.Result
+	}
+	return nil
+}
+
+// PushCompactionResultResponse is the response after pushing compaction result
+type PushCompactionResultResponse struct {
+	state protoimpl.MessageState `protogen:"open.v1"`
+	// Acknowledgment that the result was received and processed
+	Success       bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *PushCompactionResultResponse) Reset() {
+	*x = PushCompactionResultResponse{}
+	mi := &file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_msgTypes[1]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *PushCompactionResultResponse) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*PushCompactionResultResponse) ProtoMessage() {}
+
+func (x *PushCompactionResultResponse) ProtoReflect() protoreflect.Message {
+	mi := &file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_msgTypes[1]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use PushCompactionResultResponse.ProtoReflect.Descriptor instead.
+func (*PushCompactionResultResponse) Descriptor() ([]byte, []int) {
+	return file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDescGZIP(), []int{1}
+}
+
+func (x *PushCompactionResultResponse) GetSuccess() bool {
+	if x != nil {
+		return x.Success
+	}
+	return false
+}
+
+var File_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto protoreflect.FileDescriptor
+
+const file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDesc = "" +
+	"\n" +
+	"Btim/api/thread_compaction/v1alpha1/thread_compaction_service.proto\x12\"tim.api.thread_compaction.v1alpha1\x1a\x18aep/api/field_info.proto\x1a\x1bbuf/validate/validate.proto\x1a\x1cgoogle/api/annotations.proto\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a*tim/api/thread/v1alpha1/thread_types.proto\"\xf7\x01\n" +
+	"\x1bPushCompactionResultRequest\x12\x89\x01\n" +
+	"\x06thread\x18\x01 \x01(\tBq\xe0A\x02\xbaHKrI2G^orgs/[a-fA-F0-9-]{36}/users/[a-fA-F0-9-]{36}/threads/[a-fA-F0-9-]{36}$\u0091\x05\x1c\x12\x1atim.settlerlabs.com/threadR\x06thread\x12L\n" +
+	"\x06result\x18\x02 \x01(\v2).tim.api.thread.v1alpha1.CompactionResultB\t\xe0A\x02\xbaH\x03\xc8\x01\x01R\x06result\"8\n" +
+	"\x1cPushCompactionResultResponse\x12\x18\n" +
+	"\asuccess\x18\x01 \x01(\bR\asuccess2\x92\x02\n" +
+	"\x17ThreadCompactionService\x12\xf6\x01\n" +
+	"\x14PushCompactionResult\x12?.tim.api.thread_compaction.v1alpha1.PushCompactionResultRequest\x1a@.tim.api.thread_compaction.v1alpha1.PushCompactionResultResponse\"[\xdaA\rthread,result\x82\xd3\xe4\x93\x02E:\x01*\"@/v1alpha1/{thread=orgs/*/users/*/threads/*}:pushCompactionResultB\xd5\x02\n" +
+	"&com.tim.api.thread_compaction.v1alpha1B\x1cThreadCompactionServiceProtoP\x01Zfgithub.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread_compaction/v1alpha1;thread_compactionv1alpha1\xa2\x02\x03TAT\xaa\x02!Tim.Api.ThreadCompaction.V1alpha1\xca\x02!Tim\\Api\\ThreadCompaction\\V1alpha1\xe2\x02-Tim\\Api\\ThreadCompaction\\V1alpha1\\GPBMetadata\xea\x02$Tim::Api::ThreadCompaction::V1alpha1b\x06proto3"
+
+var (
+	file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDescOnce sync.Once
+	file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDescData []byte
+)
+
+func file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDescGZIP() []byte {
+	file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDescOnce.Do(func() {
+		file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDesc), len(file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDesc)))
+	})
+	return file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDescData
+}
+
+var file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_msgTypes = make([]protoimpl.MessageInfo, 2)
+var file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_goTypes = []any{
+	(*PushCompactionResultRequest)(nil),  // 0: tim.api.thread_compaction.v1alpha1.PushCompactionResultRequest
+	(*PushCompactionResultResponse)(nil), // 1: tim.api.thread_compaction.v1alpha1.PushCompactionResultResponse
+	(*v1alpha1.CompactionResult)(nil),    // 2: tim.api.thread.v1alpha1.CompactionResult
+}
+var file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_depIdxs = []int32{
+	2, // 0: tim.api.thread_compaction.v1alpha1.PushCompactionResultRequest.result:type_name -> tim.api.thread.v1alpha1.CompactionResult
+	0, // 1: tim.api.thread_compaction.v1alpha1.ThreadCompactionService.PushCompactionResult:input_type -> tim.api.thread_compaction.v1alpha1.PushCompactionResultRequest
+	1, // 2: tim.api.thread_compaction.v1alpha1.ThreadCompactionService.PushCompactionResult:output_type -> tim.api.thread_compaction.v1alpha1.PushCompactionResultResponse
+	2, // [2:3] is the sub-list for method output_type
+	1, // [1:2] is the sub-list for method input_type
+	1, // [1:1] is the sub-list for extension type_name
+	1, // [1:1] is the sub-list for extension extendee
+	0, // [0:1] is the sub-list for field type_name
+}
+
+func init() { file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_init() }
+func file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_init() {
+	if File_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto != nil {
+		return
+	}
+	type x struct{}
+	out := protoimpl.TypeBuilder{
+		File: protoimpl.DescBuilder{
+			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
+			RawDescriptor: unsafe.Slice(unsafe.StringData(file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDesc), len(file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_rawDesc)),
+			NumEnums:      0,
+			NumMessages:   2,
+			NumExtensions: 0,
+			NumServices:   1,
+		},
+		GoTypes:           file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_goTypes,
+		DependencyIndexes: file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_depIdxs,
+		MessageInfos:      file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_msgTypes,
+	}.Build()
+	File_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto = out.File
+	file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_goTypes = nil
+	file_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto_depIdxs = nil
+}
diff --git a/tim-proto/gen/tim/api/thread_compaction/v1alpha1/thread_compaction_service.swagger.json b/tim-proto/gen/tim/api/thread_compaction/v1alpha1/thread_compaction_service.swagger.json
new file mode 100644
index 000000000..46dd2abca
--- /dev/null
+++ b/tim-proto/gen/tim/api/thread_compaction/v1alpha1/thread_compaction_service.swagger.json
@@ -0,0 +1,242 @@
+{
+  "swagger": "2.0",
+  "info": {
+    "title": "tim/api/thread_compaction/v1alpha1/thread_compaction_service.proto",
+    "version": "version not set"
+  },
+  "tags": [
+    {
+      "name": "ThreadCompactionService"
+    }
+  ],
+  "consumes": [
+    "application/json"
+  ],
+  "produces": [
+    "application/json"
+  ],
+  "paths": {
+    "/v1alpha1/{thread}:pushCompactionResult": {
+      "post": {
+        "summary": "Push compaction result from worker to API server",
+        "operationId": "ThreadCompactionService_PushCompactionResult",
+        "responses": {
+          "200": {
+            "description": "A successful response.",
+            "schema": {
+              "$ref": "#/definitions/v1alpha1PushCompactionResultResponse"
+            }
+          },
+          "default": {
+            "description": "An unexpected error response.",
+            "schema": {
+              "$ref": "#/definitions/rpcStatus"
+            }
+          }
+        },
+        "parameters": [
+          {
+            "name": "thread",
+            "description": "The resource path of the original thread that was compacted",
+            "in": "path",
+            "required": true,
+            "type": "string",
+            "pattern": "orgs/[^/]+/users/[^/]+/threads/[^/]+"
+          },
+          {
+            "name": "body",
+            "in": "body",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/ThreadCompactionServicePushCompactionResultBody"
+            }
+          }
+        ],
+        "tags": [
+          "ThreadCompactionService"
+        ]
+      }
+    }
+  },
+  "definitions": {
+    "ThreadCompactionServicePushCompactionResultBody": {
+      "type": "object",
+      "properties": {
+        "result": {
+          "$ref": "#/definitions/v1alpha1CompactionResult",
+          "title": "The compaction result"
+        }
+      },
+      "title": "PushCompactionResultRequest is used to push a compaction result from worker to API",
+      "required": [
+        "result"
+      ]
+    },
+    "ThreadParentThreadId": {
+      "type": "object",
+      "properties": {
+        "path": {
+          "type": "string",
+          "title": "The path identifier of the parent thread"
+        },
+        "beforeTime": {
+          "type": "string",
+          "format": "date-time",
+          "description": "The time at which the parent thread was forked, the thread events\nbelonging to this thread will include all events which occurred before\nthis timestamp."
+        },
+        "forkMessageUid": {
+          "type": "string",
+          "description": "The message at which this thread was forked (optional for backward compatibility)\nIf set, this is the exact message that served as the fork point.",
+          "readOnly": true
+        }
+      },
+      "description": "The identifier for the original thread this thread was forked from."
+    },
+    "protobufAny": {
+      "type": "object",
+      "properties": {
+        "@type": {
+          "type": "string",
+          "description": "A URL/resource name that uniquely identifies the type of the serialized\nprotocol buffer message. This string must contain at least\none \"/\" character. The last segment of the URL's path must represent\nthe fully qualified name of the type (as in\n`path/google.protobuf.Duration`). The name should be in a canonical form\n(e.g., leading \".\" is not accepted).\n\nIn practice, teams usually precompile into the binary all types that they\nexpect it to use in the context of Any. However, for URLs which use the\nscheme `http`, `https`, or no scheme, one can optionally set up a type\nserver that maps type URLs to message definitions as follows:\n\n* If no scheme is provided, `https` is assumed.\n* An HTTP GET on the URL must yield a [google.protobuf.Type][]\n  value in binary format, or produce an error.\n* Applications are allowed to cache lookup results based on the\n  URL, or have them precompiled into a binary to avoid any\n  lookup. Therefore, binary compatibility needs to be preserved\n  on changes to types. (Use versioned type names to manage\n  breaking changes.)\n\nNote: this functionality is not currently available in the official\nprotobuf release, and it is not used for type URLs beginning with\ntype.googleapis.com. As of May 2023, there are no widely used type server\nimplementations and no plans to implement one.\n\nSchemes other than `http`, `https` (or the empty scheme) might be\nused with implementation specific semantics."
+        }
+      },
+      "additionalProperties": {},
+      "description": "`Any` contains an arbitrary serialized protocol buffer message along with a\nURL that describes the type of the serialized message.\n\nProtobuf library provides support to pack/unpack Any values in the form\nof utility functions or additional generated methods of the Any type.\n\nExample 1: Pack and unpack a message in C++.\n\n    Foo foo = ...;\n    Any any;\n    any.PackFrom(foo);\n    ...\n    if (any.UnpackTo(\u0026foo)) {\n      ...\n    }\n\nExample 2: Pack and unpack a message in Java.\n\n    Foo foo = ...;\n    Any any = Any.pack(foo);\n    ...\n    if (any.is(Foo.class)) {\n      foo = any.unpack(Foo.class);\n    }\n    // or ...\n    if (any.isSameTypeAs(Foo.getDefaultInstance())) {\n      foo = any.unpack(Foo.getDefaultInstance());\n    }\n\n Example 3: Pack and unpack a message in Python.\n\n    foo = Foo(...)\n    any = Any()\n    any.Pack(foo)\n    ...\n    if any.Is(Foo.DESCRIPTOR):\n      any.Unpack(foo)\n      ...\n\n Example 4: Pack and unpack a message in Go\n\n     foo := \u0026pb.Foo{...}\n     any, err := anypb.New(foo)\n     if err != nil {\n       ...\n     }\n     ...\n     foo := \u0026pb.Foo{}\n     if err := any.UnmarshalTo(foo); err != nil {\n       ...\n     }\n\nThe pack methods provided by protobuf library will by default use\n'type.googleapis.com/full.type.name' as the type URL and the unpack\nmethods only use the fully qualified type name after the last '/'\nin the type URL, for example \"foo.bar.com/x/y.z\" will yield type\nname \"y.z\".\n\nJSON\n====\nThe JSON representation of an `Any` value uses the regular\nrepresentation of the deserialized, embedded message, with an\nadditional field `@type` which contains the type URL. Example:\n\n    package google.profile;\n    message Person {\n      string first_name = 1;\n      string last_name = 2;\n    }\n\n    {\n      \"@type\": \"type.googleapis.com/google.profile.Person\",\n      \"firstName\": \u003cstring\u003e,\n      \"lastName\": \u003cstring\u003e\n    }\n\nIf the embedded message type is well-known and has a custom JSON\nrepresentation, that representation will be embedded adding a field\n`value` which holds the custom JSON in addition to the `@type`\nfield. Example (for message [google.protobuf.Duration][]):\n\n    {\n      \"@type\": \"type.googleapis.com/google.protobuf.Duration\",\n      \"value\": \"1.212s\"\n    }"
+    },
+    "rpcStatus": {
+      "type": "object",
+      "properties": {
+        "code": {
+          "type": "integer",
+          "format": "int32"
+        },
+        "message": {
+          "type": "string"
+        },
+        "details": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "$ref": "#/definitions/protobufAny"
+          }
+        }
+      }
+    },
+    "v1alpha1CompactionResult": {
+      "type": "object",
+      "properties": {
+        "forkedThread": {
+          "$ref": "#/definitions/v1alpha1Thread",
+          "title": "The forked thread containing compacted messages"
+        },
+        "strategy": {
+          "$ref": "#/definitions/v1alpha1CompactionStrategy",
+          "title": "The strategy that was used"
+        },
+        "originalMessageCount": {
+          "type": "integer",
+          "format": "int32",
+          "title": "Original message count before compaction"
+        },
+        "compactedMessageCount": {
+          "type": "integer",
+          "format": "int32",
+          "title": "Compacted message count after compaction"
+        },
+        "originalTokenCount": {
+          "type": "string",
+          "format": "int64",
+          "title": "Original total token count"
+        },
+        "compactedTokenCount": {
+          "type": "string",
+          "format": "int64",
+          "title": "Compacted total token count"
+        },
+        "threadContextUid": {
+          "type": "string",
+          "title": "UID of the thread context record"
+        }
+      },
+      "title": "Result of a compaction operation"
+    },
+    "v1alpha1CompactionStrategy": {
+      "type": "string",
+      "enum": [
+        "COMPACTION_STRATEGY_UNSPECIFIED",
+        "COMPACTION_STRATEGY_TRUNCATION",
+        "COMPACTION_STRATEGY_SUMMARIZATION",
+        "COMPACTION_STRATEGY_IMPORTANCE_BASED"
+      ],
+      "default": "COMPACTION_STRATEGY_UNSPECIFIED",
+      "description": "- COMPACTION_STRATEGY_UNSPECIFIED: Default unspecified\n - COMPACTION_STRATEGY_TRUNCATION: Keep first message and last N messages, truncate middle\n - COMPACTION_STRATEGY_SUMMARIZATION: Use LLM to summarize middle section, keep recent messages\n - COMPACTION_STRATEGY_IMPORTANCE_BASED: Keep user messages and tool calls, summarize assistant responses",
+      "title": "CompactionStrategy defines the strategy to use for thread compaction"
+    },
+    "v1alpha1PushCompactionResultResponse": {
+      "type": "object",
+      "properties": {
+        "success": {
+          "type": "boolean",
+          "title": "Acknowledgment that the result was received and processed"
+        }
+      },
+      "title": "PushCompactionResultResponse is the response after pushing compaction result"
+    },
+    "v1alpha1Thread": {
+      "type": "object",
+      "properties": {
+        "path": {
+          "type": "string",
+          "title": "The resource path identifier",
+          "readOnly": true
+        },
+        "displayName": {
+          "type": "string",
+          "title": "User visible display name of the thread, optional\nSystem may auto-generate a display name if not provided"
+        },
+        "parentThreadId": {
+          "$ref": "#/definitions/ThreadParentThreadId",
+          "description": "If this thread was forked from another thread, this is the ID of that thread\nIf not set, this is an original thread. Can only be set at creation time.",
+          "readOnly": true
+        },
+        "personaUid": {
+          "type": "string",
+          "title": "The unique identifier for the persona this thread will utilize"
+        },
+        "environmentUid": {
+          "type": "string",
+          "title": "If this thread is tied to an execution environment, this is the ID of that environment"
+        },
+        "createTime": {
+          "type": "string",
+          "format": "date-time",
+          "title": "Timestamp when the thread was created",
+          "readOnly": true
+        },
+        "updateTime": {
+          "type": "string",
+          "format": "date-time",
+          "title": "Timestamp when the thread was last updated",
+          "readOnly": true
+        },
+        "llmState": {
+          "$ref": "#/definitions/v1alpha1ThreadLLMState",
+          "title": "The current LLM processing state of the thread",
+          "readOnly": true
+        }
+      },
+      "title": "Threads are collections of context as events that represent an llm conversation"
+    },
+    "v1alpha1ThreadLLMState": {
+      "type": "string",
+      "enum": [
+        "THREAD_LLM_STATE_UNSPECIFIED",
+        "THREAD_LLM_STATE_IDLE",
+        "THREAD_LLM_STATE_PROCESSING"
+      ],
+      "default": "THREAD_LLM_STATE_UNSPECIFIED",
+      "description": "- THREAD_LLM_STATE_UNSPECIFIED: Default unspecified\n - THREAD_LLM_STATE_IDLE: LLM is idle, ready for user input\n - THREAD_LLM_STATE_PROCESSING: LLM is actively processing (may iterate with tools)",
+      "title": "The LLM processing state of a thread"
+    }
+  }
+}
diff --git a/tim-proto/gen/tim/api/thread_compaction/v1alpha1/thread_compactionv1alpha1connect/thread_compaction_service.connect.go b/tim-proto/gen/tim/api/thread_compaction/v1alpha1/thread_compactionv1alpha1connect/thread_compaction_service.connect.go
new file mode 100644
index 000000000..8dd270e17
--- /dev/null
+++ b/tim-proto/gen/tim/api/thread_compaction/v1alpha1/thread_compactionv1alpha1connect/thread_compaction_service.connect.go
@@ -0,0 +1,115 @@
+// Code generated by protoc-gen-connect-go. DO NOT EDIT.
+//
+// Source: tim/api/thread_compaction/v1alpha1/thread_compaction_service.proto
+
+package thread_compactionv1alpha1connect
+
+import (
+	connect "connectrpc.com/connect"
+	context "context"
+	errors "errors"
+	v1alpha1 "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread_compaction/v1alpha1"
+	http "net/http"
+	strings "strings"
+)
+
+// This is a compile-time assertion to ensure that this generated file and the connect package are
+// compatible. If you get a compiler error that this constant is not defined, this code was
+// generated with a version of connect newer than the one compiled into your binary. You can fix the
+// problem by either regenerating this code with an older version of connect or updating the connect
+// version compiled into your binary.
+const _ = connect.IsAtLeastVersion1_13_0
+
+const (
+	// ThreadCompactionServiceName is the fully-qualified name of the ThreadCompactionService service.
+	ThreadCompactionServiceName = "tim.api.thread_compaction.v1alpha1.ThreadCompactionService"
+)
+
+// These constants are the fully-qualified names of the RPCs defined in this package. They're
+// exposed at runtime as Spec.Procedure and as the final two segments of the HTTP route.
+//
+// Note that these are different from the fully-qualified method names used by
+// google.golang.org/protobuf/reflect/protoreflect. To convert from these constants to
+// reflection-formatted method names, remove the leading slash and convert the remaining slash to a
+// period.
+const (
+	// ThreadCompactionServicePushCompactionResultProcedure is the fully-qualified name of the
+	// ThreadCompactionService's PushCompactionResult RPC.
+	ThreadCompactionServicePushCompactionResultProcedure = "/tim.api.thread_compaction.v1alpha1.ThreadCompactionService/PushCompactionResult"
+)
+
+// ThreadCompactionServiceClient is a client for the
+// tim.api.thread_compaction.v1alpha1.ThreadCompactionService service.
+type ThreadCompactionServiceClient interface {
+	// Push compaction result from worker to API server
+	PushCompactionResult(context.Context, *connect.Request[v1alpha1.PushCompactionResultRequest]) (*connect.Response[v1alpha1.PushCompactionResultResponse], error)
+}
+
+// NewThreadCompactionServiceClient constructs a client for the
+// tim.api.thread_compaction.v1alpha1.ThreadCompactionService service. By default, it uses the
+// Connect protocol with the binary Protobuf Codec, asks for gzipped responses, and sends
+// uncompressed requests. To use the gRPC or gRPC-Web protocols, supply the connect.WithGRPC() or
+// connect.WithGRPCWeb() options.
+//
+// The URL supplied here should be the base URL for the Connect or gRPC server (for example,
+// http://api.acme.com or https://acme.com/grpc).
+func NewThreadCompactionServiceClient(httpClient connect.HTTPClient, baseURL string, opts ...connect.ClientOption) ThreadCompactionServiceClient {
+	baseURL = strings.TrimRight(baseURL, "/")
+	threadCompactionServiceMethods := v1alpha1.File_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto.Services().ByName("ThreadCompactionService").Methods()
+	return &threadCompactionServiceClient{
+		pushCompactionResult: connect.NewClient[v1alpha1.PushCompactionResultRequest, v1alpha1.PushCompactionResultResponse](
+			httpClient,
+			baseURL+ThreadCompactionServicePushCompactionResultProcedure,
+			connect.WithSchema(threadCompactionServiceMethods.ByName("PushCompactionResult")),
+			connect.WithClientOptions(opts...),
+		),
+	}
+}
+
+// threadCompactionServiceClient implements ThreadCompactionServiceClient.
+type threadCompactionServiceClient struct {
+	pushCompactionResult *connect.Client[v1alpha1.PushCompactionResultRequest, v1alpha1.PushCompactionResultResponse]
+}
+
+// PushCompactionResult calls
+// tim.api.thread_compaction.v1alpha1.ThreadCompactionService.PushCompactionResult.
+func (c *threadCompactionServiceClient) PushCompactionResult(ctx context.Context, req *connect.Request[v1alpha1.PushCompactionResultRequest]) (*connect.Response[v1alpha1.PushCompactionResultResponse], error) {
+	return c.pushCompactionResult.CallUnary(ctx, req)
+}
+
+// ThreadCompactionServiceHandler is an implementation of the
+// tim.api.thread_compaction.v1alpha1.ThreadCompactionService service.
+type ThreadCompactionServiceHandler interface {
+	// Push compaction result from worker to API server
+	PushCompactionResult(context.Context, *connect.Request[v1alpha1.PushCompactionResultRequest]) (*connect.Response[v1alpha1.PushCompactionResultResponse], error)
+}
+
+// NewThreadCompactionServiceHandler builds an HTTP handler from the service implementation. It
+// returns the path on which to mount the handler and the handler itself.
+//
+// By default, handlers support the Connect, gRPC, and gRPC-Web protocols with the binary Protobuf
+// and JSON codecs. They also support gzip compression.
+func NewThreadCompactionServiceHandler(svc ThreadCompactionServiceHandler, opts ...connect.HandlerOption) (string, http.Handler) {
+	threadCompactionServiceMethods := v1alpha1.File_tim_api_thread_compaction_v1alpha1_thread_compaction_service_proto.Services().ByName("ThreadCompactionService").Methods()
+	threadCompactionServicePushCompactionResultHandler := connect.NewUnaryHandler(
+		ThreadCompactionServicePushCompactionResultProcedure,
+		svc.PushCompactionResult,
+		connect.WithSchema(threadCompactionServiceMethods.ByName("PushCompactionResult")),
+		connect.WithHandlerOptions(opts...),
+	)
+	return "/tim.api.thread_compaction.v1alpha1.ThreadCompactionService/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case ThreadCompactionServicePushCompactionResultProcedure:
+			threadCompactionServicePushCompactionResultHandler.ServeHTTP(w, r)
+		default:
+			http.NotFound(w, r)
+		}
+	})
+}
+
+// UnimplementedThreadCompactionServiceHandler returns CodeUnimplemented from all methods.
+type UnimplementedThreadCompactionServiceHandler struct{}
+
+func (UnimplementedThreadCompactionServiceHandler) PushCompactionResult(context.Context, *connect.Request[v1alpha1.PushCompactionResultRequest]) (*connect.Response[v1alpha1.PushCompactionResultResponse], error) {
+	return nil, connect.NewError(connect.CodeUnimplemented, errors.New("tim.api.thread_compaction.v1alpha1.ThreadCompactionService.PushCompactionResult is not implemented"))
+}
diff --git a/tim-server/internal/llm/provider/anthropic.go b/tim-server/internal/llm/provider/anthropic.go
index 1e24fb8d8..c9df8fe20 100644
--- a/tim-server/internal/llm/provider/anthropic.go
+++ b/tim-server/internal/llm/provider/anthropic.go
@@ -33,12 +33,14 @@ func NewAnthropicProvider(cfg *config.Config) *AnthropicProvider {
 func (p *AnthropicProvider) Models() map[llm.ModelID]llm.Model {
 	return map[llm.ModelID]llm.Model{
 		ModelIDClaude4Sonnet: {
-			ID:       ModelIDClaude4Sonnet,
-			APIModel: string(anthropic.ModelClaude4Sonnet20250514),
+			ID:               ModelIDClaude4Sonnet,
+			APIModel:         string(anthropic.ModelClaude4Sonnet20250514),
+			MaxContextTokens: 200000,
 		},
 		ModelIDClaude4Opus: {
-			ID:       ModelIDClaude4Opus,
-			APIModel: string(anthropic.ModelClaude4Opus20250514),
+			ID:               ModelIDClaude4Opus,
+			APIModel:         string(anthropic.ModelClaude4Opus20250514),
+			MaxContextTokens: 200000,
 		},
 	}
 }
@@ -50,8 +52,9 @@ func (p *AnthropicProvider) IsValidModel(id llm.ModelID) bool {
 
 func (p *AnthropicProvider) SmallModel() llm.Model {
 	return llm.Model{
-		ID:       ModelIDClaude3_5Haiku,
-		APIModel: string(anthropic.ModelClaude3_5Haiku20241022),
+		ID:               ModelIDClaude3_5Haiku,
+		APIModel:         string(anthropic.ModelClaude3_5Haiku20241022),
+		MaxContextTokens: 200000,
 	}
 }
 
diff --git a/tim-server/internal/llm/provider/openai.go b/tim-server/internal/llm/provider/openai.go
index 990a2c595..f86a3d72f 100644
--- a/tim-server/internal/llm/provider/openai.go
+++ b/tim-server/internal/llm/provider/openai.go
@@ -44,24 +44,29 @@ func (p *OpenAIAdapter) Client() *openai.Client {
 func (p *OpenAIAdapter) Models() map[llm.ModelID]llm.Model {
 	return map[llm.ModelID]llm.Model{
 		ModelIDOpenAIGPT5: {
-			ID:       ModelIDOpenAIGPT5,
-			APIModel: openai.ChatModelGPT5,
+			ID:               ModelIDOpenAIGPT5,
+			APIModel:         openai.ChatModelGPT5,
+			MaxContextTokens: 128000,
 		},
 		ModelIDOpenAIGPT5Mini: {
-			ID:       ModelIDOpenAIGPT5Mini,
-			APIModel: openai.ChatModelGPT5Mini,
+			ID:               ModelIDOpenAIGPT5Mini,
+			APIModel:         openai.ChatModelGPT5Mini,
+			MaxContextTokens: 128000,
 		},
 		ModelIDOpenAIGPT5Nano: {
-			ID:       ModelIDOpenAIGPT5Nano,
-			APIModel: openai.ChatModelGPT5Nano,
+			ID:               ModelIDOpenAIGPT5Nano,
+			APIModel:         openai.ChatModelGPT5Nano,
+			MaxContextTokens: 128000,
 		},
 		ModelIDOpenAIO3: {
-			ID:       ModelIDOpenAIO3,
-			APIModel: openai.ChatModelO3,
+			ID:               ModelIDOpenAIO3,
+			APIModel:         openai.ChatModelO3,
+			MaxContextTokens: 200000,
 		},
 		ModelIDOpenAIO4Mini: {
-			ID:       ModelIDOpenAIO4Mini,
-			APIModel: openai.ChatModelO4Mini,
+			ID:               ModelIDOpenAIO4Mini,
+			APIModel:         openai.ChatModelO4Mini,
+			MaxContextTokens: 128000,
 		},
 	}
 }
diff --git a/tim-worker/internal/apiclient/client.go b/tim-worker/internal/apiclient/client.go
index 4bd143fd8..68e661571 100644
--- a/tim-worker/internal/apiclient/client.go
+++ b/tim-worker/internal/apiclient/client.go
@@ -13,6 +13,8 @@ import (
 	"github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/llm_response/v1alpha1/llm_responsev1alpha1connect"
 	thread "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread/v1alpha1"
 	"github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread/v1alpha1/threadv1alpha1connect"
+	threadcompactionv1 "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread_compaction/v1alpha1"
+	"github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread_compaction/v1alpha1/thread_compactionv1alpha1connect"
 	threadcontextv1 "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread_context/v1alpha1"
 	"github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread_context/v1alpha1/thread_contextv1alpha1connect"
 	todov1 "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/todo/v1alpha1"
@@ -24,13 +26,14 @@ import (
 
 // Client provides methods to interact with the tim-api internal services
 type Client struct {
-	logger              *logger.Logger
-	baseURL             string
-	threadClient        threadv1alpha1connect.ThreadServiceClient
-	threadContextClient thread_contextv1alpha1connect.ThreadContextServiceClient
-	llmResponseClient   llm_responsev1alpha1connect.LlmResponseServiceClient
-	toolExecutionClient tool_executionv1alpha1connect.ToolExecutionServiceClient
-	todoClient          todov1alpha1connect.TodoServiceClient
+	logger                 *logger.Logger
+	baseURL                string
+	threadClient           threadv1alpha1connect.ThreadServiceClient
+	threadContextClient    thread_contextv1alpha1connect.ThreadContextServiceClient
+	threadCompactionClient thread_compactionv1alpha1connect.ThreadCompactionServiceClient
+	llmResponseClient      llm_responsev1alpha1connect.LlmResponseServiceClient
+	toolExecutionClient    tool_executionv1alpha1connect.ToolExecutionServiceClient
+	todoClient             todov1alpha1connect.TodoServiceClient
 }
 
 // NewClient creates a new internal API client
@@ -48,6 +51,10 @@ func NewClient(baseURL string, logger *logger.Logger) *Client {
 			httpClient,
 			baseURL,
 		),
+		threadCompactionClient: thread_compactionv1alpha1connect.NewThreadCompactionServiceClient(
+			httpClient,
+			baseURL,
+		),
 		llmResponseClient: llm_responsev1alpha1connect.NewLlmResponseServiceClient(
 			httpClient,
 			baseURL,
@@ -413,6 +420,44 @@ func (c *Client) DeleteTodo(ctx context.Context, todoPath string) error {
 	return nil
 }
 
+// ForkThread creates a fork of a thread at a specific message
+func (c *Client) ForkThread(ctx context.Context, messagePath string, title string) (*thread.Thread, error) {
+	titlePtr := &title
+	req := connect.NewRequest(&thread.ForkThreadRequest{
+		Message: messagePath,
+		Title:   titlePtr,
+	})
+
+	resp, err := c.threadClient.ForkThread(ctx, req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to fork thread: %w", err)
+	}
+
+	return resp.Msg.Thread, nil
+}
+
+// PushCompactionResult pushes compaction results to the API server
+func (c *Client) PushCompactionResult(ctx context.Context, threadPath string, result *thread.CompactionResult) error {
+	req := connect.NewRequest(&threadcompactionv1.PushCompactionResultRequest{
+		Thread: threadPath,
+		Result: result,
+	})
+
+	_, err := c.threadCompactionClient.PushCompactionResult(ctx, req)
+	if err != nil {
+		return fmt.Errorf("failed to push compaction result: %w", err)
+	}
+
+	c.logger.Infow("pushed compaction result",
+		"thread_path", threadPath,
+		"strategy", result.Strategy,
+		"original_messages", result.OriginalMessageCount,
+		"compacted_messages", result.CompactedMessageCount,
+	)
+
+	return nil
+}
+
 func (c *Client) CheckHealth(ctx context.Context) error {
 	req, err := http.NewRequestWithContext(ctx, "GET", c.baseURL+"/ping", nil)
 	if err != nil {
diff --git a/tim-worker/internal/llm_provider/anthropic.go b/tim-worker/internal/llm_provider/anthropic.go
index 928a07d7f..e19f33d67 100644
--- a/tim-worker/internal/llm_provider/anthropic.go
+++ b/tim-worker/internal/llm_provider/anthropic.go
@@ -31,16 +31,19 @@ func newAnthropicProvider(cfg *config.LLMConfig) *anthropicProvider {
 func (p *anthropicProvider) Models() map[llm.ModelID]llm.Model {
 	return map[llm.ModelID]llm.Model{
 		llm.ModelIDClaude45Sonnet: {
-			ID:       llm.ModelIDClaude45Sonnet,
-			APIModel: string(anthropic.ModelClaude4Sonnet20250514),
+			ID:               llm.ModelIDClaude45Sonnet,
+			APIModel:         string(anthropic.ModelClaude4Sonnet20250514),
+			MaxContextTokens: 200000,
 		},
 		llm.ModelIDClaude4Opus: {
-			ID:       llm.ModelIDClaude4Opus,
-			APIModel: string(anthropic.ModelClaude4Opus20250514),
+			ID:               llm.ModelIDClaude4Opus,
+			APIModel:         string(anthropic.ModelClaude4Opus20250514),
+			MaxContextTokens: 200000,
 		},
 		llm.ModelIDClaude45Haiku: {
-			ID:       llm.ModelIDClaude45Haiku,
-			APIModel: string(anthropic.ModelClaudeHaiku4_5_20251001),
+			ID:               llm.ModelIDClaude45Haiku,
+			APIModel:         string(anthropic.ModelClaudeHaiku4_5_20251001),
+			MaxContextTokens: 200000,
 		},
 	}
 }
diff --git a/tim-worker/internal/llm_provider/google.go b/tim-worker/internal/llm_provider/google.go
index ea01d9ca8..9351c0fab 100644
--- a/tim-worker/internal/llm_provider/google.go
+++ b/tim-worker/internal/llm_provider/google.go
@@ -100,16 +100,19 @@ func (t *urlRewriteTransport) RoundTrip(req *http.Request) (*http.Response, erro
 func (p *GoogleProvider) Models() map[llm.ModelID]llm.Model {
 	return map[llm.ModelID]llm.Model{
 		llm.ModelIDGemini25Pro: {
-			ID:       llm.ModelIDGemini25Pro,
-			APIModel: string(llm.ModelIDGemini25Pro),
+			ID:               llm.ModelIDGemini25Pro,
+			APIModel:         string(llm.ModelIDGemini25Pro),
+			MaxContextTokens: 2097152, // 2M tokens
 		},
 		llm.ModelIDGemini25Flash: {
-			ID:       llm.ModelIDGemini25Flash,
-			APIModel: string(llm.ModelIDGemini25Flash),
+			ID:               llm.ModelIDGemini25Flash,
+			APIModel:         string(llm.ModelIDGemini25Flash),
+			MaxContextTokens: 1048576, // 1M tokens
 		},
 		llm.ModelIDGemini25FlashLite: {
-			ID:       llm.ModelIDGemini25FlashLite,
-			APIModel: string(llm.ModelIDGemini25FlashLite),
+			ID:               llm.ModelIDGemini25FlashLite,
+			APIModel:         string(llm.ModelIDGemini25FlashLite),
+			MaxContextTokens: 1048576, // 1M tokens
 		},
 	}
 }
diff --git a/tim-worker/internal/llm_provider/openai.go b/tim-worker/internal/llm_provider/openai.go
index e3a8d5570..7a1d9ad2a 100644
--- a/tim-worker/internal/llm_provider/openai.go
+++ b/tim-worker/internal/llm_provider/openai.go
@@ -33,24 +33,29 @@ func newOpenAIProvider(cfg *config.LLMConfig) *openAIProvider {
 func (a *openAIProvider) Models() map[llm.ModelID]llm.Model {
 	return map[llm.ModelID]llm.Model{
 		llm.ModelIDGPT5: {
-			ID:       llm.ModelIDGPT5,
-			APIModel: openai.ChatModelGPT5,
+			ID:               llm.ModelIDGPT5,
+			APIModel:         openai.ChatModelGPT5,
+			MaxContextTokens: 128000,
 		},
 		llm.ModelIDGPT5Mini: {
-			ID:       llm.ModelIDGPT5Mini,
-			APIModel: openai.ChatModelGPT5Mini,
+			ID:               llm.ModelIDGPT5Mini,
+			APIModel:         openai.ChatModelGPT5Mini,
+			MaxContextTokens: 128000,
 		},
 		llm.ModelIDGPT5Nano: {
-			ID:       llm.ModelIDGPT5Nano,
-			APIModel: openai.ChatModelGPT5Nano,
+			ID:               llm.ModelIDGPT5Nano,
+			APIModel:         openai.ChatModelGPT5Nano,
+			MaxContextTokens: 128000,
 		},
 		llm.ModelIDO3: {
-			ID:       llm.ModelIDO3,
-			APIModel: openai.ChatModelO3,
+			ID:               llm.ModelIDO3,
+			APIModel:         openai.ChatModelO3,
+			MaxContextTokens: 200000,
 		},
 		llm.ModelIDO4Mini: {
-			ID:       llm.ModelIDO4Mini,
-			APIModel: openai.ChatModelO4Mini,
+			ID:               llm.ModelIDO4Mini,
+			APIModel:         openai.ChatModelO4Mini,
+			MaxContextTokens: 128000,
 		},
 	}
 }
diff --git a/tim-worker/internal/metrics/metrics.go b/tim-worker/internal/metrics/metrics.go
index febd826c9..2c1f03a98 100644
--- a/tim-worker/internal/metrics/metrics.go
+++ b/tim-worker/internal/metrics/metrics.go
@@ -46,6 +46,10 @@ type Metrics struct {
 	countTokensJobDuration metric.Float64Histogram
 	countTokensJobsTotal   metric.Int64Counter
 
+	// Thread compaction job metrics
+	threadCompactionJobDuration metric.Float64Histogram
+	threadCompactionJobsTotal   metric.Int64Counter
+
 	// Tool execution metrics
 	toolExecutionDuration metric.Float64Histogram
 	toolExecutionsTotal   metric.Int64Counter
@@ -216,6 +220,25 @@ func (m *Metrics) initializeInstruments() error {
 		return fmt.Errorf("failed to create count tokens jobs counter: %w", err)
 	}
 
+	// Thread compaction job duration histogram
+	m.threadCompactionJobDuration, err = m.meter.Float64Histogram(
+		"tim_worker_thread_compaction_job_duration_seconds",
+		metric.WithDescription("Duration of thread compaction job executions in seconds"),
+		metric.WithExplicitBucketBoundaries(0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0, 30.0, 60.0, 90.0, 120.0, 180.0, 300.0),
+	)
+	if err != nil {
+		return fmt.Errorf("failed to create thread compaction job duration histogram: %w", err)
+	}
+
+	// Thread compaction jobs total counter
+	m.threadCompactionJobsTotal, err = m.meter.Int64Counter(
+		"tim_worker_thread_compaction_jobs_total",
+		metric.WithDescription("Total number of thread compaction job executions"),
+	)
+	if err != nil {
+		return fmt.Errorf("failed to create thread compaction jobs counter: %w", err)
+	}
+
 	// Tool execution duration histogram (for individual tool executions within jobs)
 	// Note: Covers fast system tools (API calls to tim-api) and slow remote tools (external APIs, sub-agents)
 	m.toolExecutionDuration, err = m.meter.Float64Histogram(
@@ -399,6 +422,26 @@ func (m *Metrics) RecordCountTokensJob(ctx context.Context, duration float64, su
 	m.countTokensJobsTotal.Add(ctx, 1, attrs)
 }
 
+// RecordThreadCompactionJob records the completion of a thread compaction job
+func (m *Metrics) RecordThreadCompactionJob(ctx context.Context, duration float64, success bool, errorType string) {
+	status := "success"
+	if success {
+		// Always include error_type label for consistent cardinality
+		// Use "none" for success cases
+		errorType = "none"
+	} else {
+		status = "error"
+	}
+
+	attrs := metric.WithAttributes(
+		attribute.String("status", status),
+		attribute.String("error_type", errorType),
+	)
+
+	m.threadCompactionJobDuration.Record(ctx, duration, attrs)
+	m.threadCompactionJobsTotal.Add(ctx, 1, attrs)
+}
+
 // RecordToolExecution records the execution of an individual tool
 func (m *Metrics) RecordToolExecution(ctx context.Context, toolName string, toolActor string, duration float64, success bool) {
 	status := "success"
diff --git a/tim-worker/internal/worker/compaction.go b/tim-worker/internal/worker/compaction.go
new file mode 100644
index 000000000..3cb23ea05
--- /dev/null
+++ b/tim-worker/internal/worker/compaction.go
@@ -0,0 +1,404 @@
+package worker
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	thread "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread/v1alpha1"
+)
+
+// CompactionConfig represents the parsed compaction configuration
+type CompactionConfig struct {
+	Strategy            thread.CompactionStrategy
+	CreateFork          bool
+	TruncationParams    *TruncationParams
+	SummarizationParams *SummarizationParams
+	ImportanceParams    *ImportanceBasedParams
+}
+
+// TruncationParams holds parameters for truncation strategy
+type TruncationParams struct {
+	KeepRecentMessages int32
+}
+
+// SummarizationParams holds parameters for summarization strategy
+type SummarizationParams struct {
+	SummaryLength          int32
+	PreserveSystemMessages bool
+	KeepRecentMessages     int32
+}
+
+// ImportanceBasedParams holds parameters for importance-based strategy
+type ImportanceBasedParams struct {
+	PreserveUserMessages     bool
+	PreserveToolCalls        bool
+	SummarizeAssistantBlocks bool
+	KeepRecentMessages       int32
+}
+
+// parseCompactionConfig parses the compaction config from job arguments
+func parseCompactionConfig(configMap map[string]interface{}) (*CompactionConfig, error) {
+	config := &CompactionConfig{}
+
+	// Parse strategy
+	strategyStr, ok := configMap["strategy"].(string)
+	if !ok {
+		return nil, fmt.Errorf("strategy is required")
+	}
+
+	switch strategyStr {
+	case "COMPACTION_STRATEGY_TRUNCATION":
+		config.Strategy = thread.CompactionStrategy_COMPACTION_STRATEGY_TRUNCATION
+	case "COMPACTION_STRATEGY_SUMMARIZATION":
+		config.Strategy = thread.CompactionStrategy_COMPACTION_STRATEGY_SUMMARIZATION
+	case "COMPACTION_STRATEGY_IMPORTANCE_BASED":
+		config.Strategy = thread.CompactionStrategy_COMPACTION_STRATEGY_IMPORTANCE_BASED
+	default:
+		return nil, fmt.Errorf("unknown strategy: %s", strategyStr)
+	}
+
+	// Parse create_fork
+	if createFork, ok := configMap["create_fork"].(bool); ok {
+		config.CreateFork = createFork
+	}
+
+	// Parse strategy-specific parameters
+	switch config.Strategy {
+	case thread.CompactionStrategy_COMPACTION_STRATEGY_TRUNCATION:
+		if truncation, ok := configMap["truncation"].(map[string]interface{}); ok {
+			config.TruncationParams = &TruncationParams{}
+			if keepRecent, ok := truncation["keep_recent_messages"].(float64); ok {
+				config.TruncationParams.KeepRecentMessages = int32(keepRecent)
+			}
+		}
+	case thread.CompactionStrategy_COMPACTION_STRATEGY_SUMMARIZATION:
+		if summarization, ok := configMap["summarization"].(map[string]interface{}); ok {
+			config.SummarizationParams = &SummarizationParams{}
+			if summaryLength, ok := summarization["summary_length"].(float64); ok {
+				config.SummarizationParams.SummaryLength = int32(summaryLength)
+			}
+			if preserveSystem, ok := summarization["preserve_system_messages"].(bool); ok {
+				config.SummarizationParams.PreserveSystemMessages = preserveSystem
+			}
+			if keepRecent, ok := summarization["keep_recent_messages"].(float64); ok {
+				config.SummarizationParams.KeepRecentMessages = int32(keepRecent)
+			}
+		}
+	case thread.CompactionStrategy_COMPACTION_STRATEGY_IMPORTANCE_BASED:
+		if importance, ok := configMap["importance_based"].(map[string]interface{}); ok {
+			config.ImportanceParams = &ImportanceBasedParams{}
+			if preserveUser, ok := importance["preserve_user_messages"].(bool); ok {
+				config.ImportanceParams.PreserveUserMessages = preserveUser
+			}
+			if preserveTools, ok := importance["preserve_tool_calls"].(bool); ok {
+				config.ImportanceParams.PreserveToolCalls = preserveTools
+			}
+			if summarizeAssistant, ok := importance["summarize_assistant_blocks"].(bool); ok {
+				config.ImportanceParams.SummarizeAssistantBlocks = summarizeAssistant
+			}
+			if keepRecent, ok := importance["keep_recent_messages"].(float64); ok {
+				config.ImportanceParams.KeepRecentMessages = int32(keepRecent)
+			}
+		}
+	}
+
+	return config, nil
+}
+
+// applyCompactionStrategy applies the appropriate compaction strategy to messages
+func (w *Worker) applyCompactionStrategy(ctx context.Context, messages []*thread.LlmMessage, config *CompactionConfig) ([]*thread.LlmMessage, error) {
+	switch config.Strategy {
+	case thread.CompactionStrategy_COMPACTION_STRATEGY_TRUNCATION:
+		return w.applyTruncationStrategy(messages, config.TruncationParams)
+	case thread.CompactionStrategy_COMPACTION_STRATEGY_SUMMARIZATION:
+		return w.applySummarizationStrategy(ctx, messages, config.SummarizationParams)
+	case thread.CompactionStrategy_COMPACTION_STRATEGY_IMPORTANCE_BASED:
+		return w.applyImportanceBasedStrategy(ctx, messages, config.ImportanceParams)
+	default:
+		return nil, fmt.Errorf("unknown compaction strategy: %v", config.Strategy)
+	}
+}
+
+// applyTruncationStrategy keeps first message and last N messages
+func (w *Worker) applyTruncationStrategy(messages []*thread.LlmMessage, params *TruncationParams) ([]*thread.LlmMessage, error) {
+	if len(messages) == 0 {
+		return messages, nil
+	}
+
+	keepRecent := int(params.KeepRecentMessages)
+	if keepRecent <= 0 {
+		keepRecent = 10 // default
+	}
+
+	// If total messages <= keepRecent + 1 (for first message), return all
+	if len(messages) <= keepRecent+1 {
+		return messages, nil
+	}
+
+	// Keep first message + last N messages
+	compacted := make([]*thread.LlmMessage, 0, keepRecent+1)
+	compacted = append(compacted, messages[0]) // Keep first message
+
+	// Keep last N messages
+	startIdx := len(messages) - keepRecent
+	compacted = append(compacted, messages[startIdx:]...)
+
+	w.logger.Infow("applied truncation strategy",
+		"original_count", len(messages),
+		"compacted_count", len(compacted),
+		"kept_recent", keepRecent,
+	)
+
+	return compacted, nil
+}
+
+// applySummarizationStrategy summarizes older messages and keeps recent ones
+func (w *Worker) applySummarizationStrategy(ctx context.Context, messages []*thread.LlmMessage, params *SummarizationParams) ([]*thread.LlmMessage, error) {
+	if len(messages) == 0 {
+		return messages, nil
+	}
+
+	keepRecent := int(params.KeepRecentMessages)
+	if keepRecent <= 0 {
+		keepRecent = 4 // default
+	}
+
+	// If total messages <= keepRecent, no need to summarize
+	if len(messages) <= keepRecent {
+		return messages, nil
+	}
+
+	// Split messages into "to summarize" and "keep"
+	splitIdx := len(messages) - keepRecent
+	toSummarize := messages[:splitIdx]
+	toKeep := messages[splitIdx:]
+
+	// Generate summary of older messages
+	summary, err := w.generateSummary(ctx, toSummarize, params.SummaryLength)
+	if err != nil {
+		w.logger.Errorw("failed to generate summary", "error", err)
+		// Fall back to truncation if summary fails
+		return w.applyTruncationStrategy(messages, &TruncationParams{KeepRecentMessages: int32(keepRecent)})
+	}
+
+	// Create summary message
+	summaryMessage := &thread.LlmMessage{
+		Role:  thread.LlmMessageRole_LLM_MESSAGE_ROLE_ASSISTANT,
+		Model: messages[0].Model, // Use same model as first message
+		Contents: []*thread.LlmMessageContent{
+			{
+				Data: &thread.LlmMessageContent_Text{
+					Text: fmt.Sprintf("[Summary of %d previous messages]\n\n%s", len(toSummarize), summary),
+				},
+			},
+		},
+	}
+
+	// Build compacted messages: summary + recent messages
+	compacted := make([]*thread.LlmMessage, 0, 1+len(toKeep))
+	compacted = append(compacted, summaryMessage)
+	compacted = append(compacted, toKeep...)
+
+	w.logger.Infow("applied summarization strategy",
+		"original_count", len(messages),
+		"summarized_count", len(toSummarize),
+		"kept_recent", len(toKeep),
+		"compacted_count", len(compacted),
+	)
+
+	return compacted, nil
+}
+
+// applyImportanceBasedStrategy keeps user messages, tool calls, and summarizes assistant blocks
+func (w *Worker) applyImportanceBasedStrategy(ctx context.Context, messages []*thread.LlmMessage, params *ImportanceBasedParams) ([]*thread.LlmMessage, error) {
+	if len(messages) == 0 {
+		return messages, nil
+	}
+
+	keepRecent := int(params.KeepRecentMessages)
+	if keepRecent <= 0 {
+		keepRecent = 5 // default
+	}
+
+	// Always keep recent messages unchanged
+	splitIdx := len(messages) - keepRecent
+	if splitIdx <= 0 {
+		return messages, nil
+	}
+
+	toProcess := messages[:splitIdx]
+	toKeep := messages[splitIdx:]
+
+	// Process older messages based on importance
+	compacted := make([]*thread.LlmMessage, 0, len(messages))
+	var assistantBlock []*thread.LlmMessage
+
+	for _, msg := range toProcess {
+		isImportant := false
+
+		// Check if message is important
+		if params.PreserveUserMessages && msg.Role == thread.LlmMessageRole_LLM_MESSAGE_ROLE_USER {
+			isImportant = true
+		}
+
+		if params.PreserveToolCalls && msg.Role == thread.LlmMessageRole_LLM_MESSAGE_ROLE_ASSISTANT {
+			// Check if message contains tool calls
+			for _, content := range msg.Contents {
+				if content.GetToolCall() != nil {
+					isImportant = true
+					break
+				}
+			}
+		}
+
+		if isImportant {
+			// Flush any accumulated assistant block first
+			if len(assistantBlock) > 0 && params.SummarizeAssistantBlocks {
+				summary, err := w.summarizeAssistantBlock(ctx, assistantBlock)
+				if err == nil && summary != nil {
+					compacted = append(compacted, summary)
+				} else {
+					// If summary fails, keep the messages
+					compacted = append(compacted, assistantBlock...)
+				}
+				assistantBlock = nil
+			}
+			compacted = append(compacted, msg)
+		} else if msg.Role == thread.LlmMessageRole_LLM_MESSAGE_ROLE_ASSISTANT {
+			// Accumulate non-important assistant messages for summarization
+			assistantBlock = append(assistantBlock, msg)
+		}
+	}
+
+	// Flush any remaining assistant block
+	if len(assistantBlock) > 0 && params.SummarizeAssistantBlocks {
+		summary, err := w.summarizeAssistantBlock(ctx, assistantBlock)
+		if err == nil && summary != nil {
+			compacted = append(compacted, summary)
+		} else {
+			compacted = append(compacted, assistantBlock...)
+		}
+	}
+
+	// Add recent messages
+	compacted = append(compacted, toKeep...)
+
+	w.logger.Infow("applied importance-based strategy",
+		"original_count", len(messages),
+		"processed_count", len(toProcess),
+		"kept_recent", len(toKeep),
+		"compacted_count", len(compacted),
+	)
+
+	return compacted, nil
+}
+
+// generateSummary uses LLM to generate a summary of messages
+func (w *Worker) generateSummary(ctx context.Context, messages []*thread.LlmMessage, targetLength int32) (string, error) {
+	// Build conversation text from messages
+	var sb strings.Builder
+	for i, msg := range messages {
+		roleStr := "Assistant"
+		if msg.Role == thread.LlmMessageRole_LLM_MESSAGE_ROLE_USER {
+			roleStr = "User"
+		}
+
+		sb.WriteString(fmt.Sprintf("\n[Message %d - %s]\n", i+1, roleStr))
+
+		for _, content := range msg.Contents {
+			if text := content.GetText(); text != "" {
+				sb.WriteString(text)
+				sb.WriteString("\n")
+			} else if thinking := content.GetThinking(); thinking != nil {
+				sb.WriteString(fmt.Sprintf("[Thinking: %s]\n", thinking.Thinking))
+			} else if toolCall := content.GetToolCall(); toolCall != nil {
+				sb.WriteString(fmt.Sprintf("[Tool Call: %s]\n", toolCall.Name))
+			} else if toolResult := content.GetToolResult(); toolResult != nil {
+				sb.WriteString(fmt.Sprintf("[Tool Result: %s]\n", toolResult.Result))
+			}
+		}
+	}
+
+	conversationText := sb.String()
+
+	// For now, return a placeholder summary
+	// In production, this would call the actual LLM provider to generate a real summary
+	// using a prompt like: "Please provide a concise summary under {targetLength} tokens..."
+	_ = targetLength // unused for now
+	summary := fmt.Sprintf("Summary of %d messages covering the conversation history up to this point. Key topics discussed: %s",
+		len(messages),
+		truncateText(conversationText, 200))
+
+	return summary, nil
+}
+
+// summarizeAssistantBlock summarizes a block of assistant messages
+func (w *Worker) summarizeAssistantBlock(ctx context.Context, messages []*thread.LlmMessage) (*thread.LlmMessage, error) {
+	if len(messages) == 0 {
+		return nil, nil
+	}
+
+	// Collect all text from assistant messages
+	var sb strings.Builder
+	for _, msg := range messages {
+		for _, content := range msg.Contents {
+			if text := content.GetText(); text != "" {
+				sb.WriteString(text)
+				sb.WriteString(" ")
+			}
+		}
+	}
+
+	text := strings.TrimSpace(sb.String())
+	if text == "" {
+		return nil, nil
+	}
+
+	// Create a summarized message
+	summaryText := fmt.Sprintf("[Summary of %d assistant messages: %s...]",
+		len(messages),
+		truncateText(text, 100))
+
+	return &thread.LlmMessage{
+		Role:  thread.LlmMessageRole_LLM_MESSAGE_ROLE_ASSISTANT,
+		Model: messages[0].Model,
+		Contents: []*thread.LlmMessageContent{
+			{
+				Data: &thread.LlmMessageContent_Text{
+					Text: summaryText,
+				},
+			},
+		},
+	}, nil
+}
+
+// truncateText truncates text to maxLen characters
+func truncateText(text string, maxLen int) string {
+	if len(text) <= maxLen {
+		return text
+	}
+	return text[:maxLen]
+}
+
+// calculateTokenCount calculates approximate token count from messages
+func calculateTokenCount(messages []*thread.LlmMessage) int64 {
+	var totalChars int64
+	for _, msg := range messages {
+		for _, content := range msg.Contents {
+			if text := content.GetText(); text != "" {
+				totalChars += int64(len(text))
+			} else if thinking := content.GetThinking(); thinking != nil {
+				totalChars += int64(len(thinking.Thinking))
+			} else if toolCall := content.GetToolCall(); toolCall != nil {
+				totalChars += int64(len(toolCall.Name))
+				// Input is a structpb.Struct, we'll just estimate its size
+				totalChars += 100 // rough estimate for tool call input
+			} else if toolResult := content.GetToolResult(); toolResult != nil {
+				totalChars += int64(len(toolResult.Result))
+			}
+		}
+	}
+	// Approximate tokens as chars / 4 (rough estimate for English text)
+	return totalChars / 4
+}
diff --git a/tim-worker/internal/worker/handle_llm_relay.go b/tim-worker/internal/worker/handle_llm_relay.go
index 3cc376dae..73c221a77 100644
--- a/tim-worker/internal/worker/handle_llm_relay.go
+++ b/tim-worker/internal/worker/handle_llm_relay.go
@@ -160,8 +160,12 @@ func (w *Worker) handleLLMRelay(ctx context.Context, args ...interface{}) error
 	}()
 
 	// Call provider with streaming (blocks until LLM completes)
+	// Set a reasonable timeout for LLM API calls to prevent indefinite hangs
+	llmCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
+	defer cancel()
+
 	llmCallStart := time.Now()
-	usage, err := prov.CallStream(ctx, *params, eventChan)
+	usage, err := prov.CallStream(llmCtx, *params, eventChan)
 	llmCallDuration := time.Since(llmCallStart).Seconds()
 	close(eventChan) // Signal to forwarding goroutine that no more events coming
 
diff --git a/tim-worker/internal/worker/handle_thread_compaction.go b/tim-worker/internal/worker/handle_thread_compaction.go
new file mode 100644
index 000000000..1364a6f20
--- /dev/null
+++ b/tim-worker/internal/worker/handle_thread_compaction.go
@@ -0,0 +1,214 @@
+package worker
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"time"
+
+	"github.com/Greybox-Labs/tim/shared/tools"
+	thread "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/thread/v1alpha1"
+	toolv1 "github.com/Greybox-Labs/tim/tim-proto/gen/tim/api/tool/v1alpha1"
+)
+
+// handleThreadCompaction handles thread compaction jobs
+func (w *Worker) handleThreadCompaction(ctx context.Context, args ...interface{}) error {
+	jobStart := time.Now()
+	var jobSuccess bool
+	var errorType string
+
+	// Track concurrent job execution
+	w.metrics.IncrementConcurrentJobs(ctx, "thread_compaction")
+	defer func() {
+		w.metrics.DecrementConcurrentJobs(ctx, "thread_compaction")
+		jobDuration := time.Since(jobStart).Seconds()
+		w.metrics.RecordThreadCompactionJob(ctx, jobDuration, jobSuccess, errorType)
+	}()
+
+	w.logger.Infow("Processing thread compaction job", "args", args)
+
+	// Parse job arguments
+	if len(args) == 0 {
+		errorType = "argument_error"
+		return fmt.Errorf("no arguments provided for thread compaction job")
+	}
+
+	argBytes, err := json.Marshal(args[0])
+	if err != nil {
+		errorType = "argument_error"
+		return fmt.Errorf("failed to marshal job arguments: %w", err)
+	}
+
+	var jobArgs struct {
+		ThreadPath       string                 `json:"thread_path"`
+		CompactionConfig map[string]interface{} `json:"compaction_config"`
+	}
+	if err := json.Unmarshal(argBytes, &jobArgs); err != nil {
+		errorType = "argument_error"
+		return fmt.Errorf("failed to unmarshal job arguments: %w", err)
+	}
+
+	if jobArgs.ThreadPath == "" {
+		errorType = "argument_error"
+		return fmt.Errorf("thread_path is required")
+	}
+
+	w.logger.Infow("Fetching thread messages", "thread_path", jobArgs.ThreadPath)
+
+	// Fetch thread messages from API
+	messages, err := w.apiClient.ListMessages(ctx, jobArgs.ThreadPath)
+	if err != nil {
+		errorType = "api_error"
+		return fmt.Errorf("failed to list thread messages: %w", err)
+	}
+
+	if len(messages) == 0 {
+		errorType = "validation_error"
+		return fmt.Errorf("thread has no messages to compact")
+	}
+
+	w.logger.Infow("fetched thread messages", "message_count", len(messages))
+
+	// Convert tools.Message to proto messages for compaction
+	protoMessages, err := w.convertToolsMessagesToProto(messages, jobArgs.ThreadPath)
+	if err != nil {
+		errorType = "conversion_error"
+		return fmt.Errorf("failed to convert messages: %w", err)
+	}
+
+	// Parse compaction config
+	config, err := parseCompactionConfig(jobArgs.CompactionConfig)
+	if err != nil {
+		errorType = "config_error"
+		return fmt.Errorf("failed to parse compaction config: %w", err)
+	}
+
+	w.logger.Infow("applying compaction strategy",
+		"strategy", config.Strategy,
+		"original_message_count", len(protoMessages),
+	)
+
+	// Calculate original token count
+	originalTokenCount := calculateTokenCount(protoMessages)
+
+	// Apply compaction strategy
+	compactedMessages, err := w.applyCompactionStrategy(ctx, protoMessages, config)
+	if err != nil {
+		errorType = "compaction_error"
+		return fmt.Errorf("failed to apply compaction strategy: %w", err)
+	}
+
+	// Calculate compacted token count
+	compactedTokenCount := calculateTokenCount(compactedMessages)
+
+	w.logger.Infow("compaction applied",
+		"original_messages", len(protoMessages),
+		"compacted_messages", len(compactedMessages),
+		"original_tokens", originalTokenCount,
+		"compacted_tokens", compactedTokenCount,
+	)
+
+	// Create fork with compacted messages if requested
+	var forkedThread *thread.Thread
+	if config.CreateFork {
+		// For compaction with fork, the API handler will create the actual fork
+		// when we push the compaction result. We just need to indicate that a fork
+		// should be created by including a minimal thread representation.
+		if len(messages) > 0 {
+			forkedThread = &thread.Thread{
+				DisplayName: fmt.Sprintf("Compacted: %s", jobArgs.ThreadPath),
+			}
+		}
+	}
+
+	// Build compaction result
+	result := &thread.CompactionResult{
+		ForkedThread:          forkedThread,
+		Strategy:              config.Strategy,
+		OriginalMessageCount:  int32(len(protoMessages)),
+		CompactedMessageCount: int32(len(compactedMessages)),
+		OriginalTokenCount:    originalTokenCount,
+		CompactedTokenCount:   compactedTokenCount,
+	}
+
+	// Push compaction result to API
+	err = w.apiClient.PushCompactionResult(ctx, jobArgs.ThreadPath, result)
+	if err != nil {
+		errorType = "api_error"
+		return fmt.Errorf("failed to push compaction result: %w", err)
+	}
+
+	jobSuccess = true
+	w.logger.Infow("Thread compaction job completed successfully",
+		"thread_path", jobArgs.ThreadPath,
+		"strategy", config.Strategy,
+		"original_messages", len(protoMessages),
+		"compacted_messages", len(compactedMessages),
+	)
+
+	return nil
+}
+
+// convertToolsMessagesToProto converts tools.Message to proto messages
+func (w *Worker) convertToolsMessagesToProto(messages []tools.Message, threadPath string) ([]*thread.LlmMessage, error) {
+	protoMessages := make([]*thread.LlmMessage, 0, len(messages))
+
+	for i, msg := range messages {
+		protoMsg := &thread.LlmMessage{
+			Index: int32(i),
+			Model: "claude-sonnet-4", // Default model
+		}
+
+		// Convert role
+		switch msg.Role {
+		case "user":
+			protoMsg.Role = thread.LlmMessageRole_LLM_MESSAGE_ROLE_USER
+		case "assistant":
+			protoMsg.Role = thread.LlmMessageRole_LLM_MESSAGE_ROLE_ASSISTANT
+		default:
+			protoMsg.Role = thread.LlmMessageRole_LLM_MESSAGE_ROLE_ASSISTANT
+		}
+
+		// Convert contents
+		protoMsg.Contents = make([]*thread.LlmMessageContent, 0, len(msg.Contents))
+		for _, content := range msg.Contents {
+			protoContent := &thread.LlmMessageContent{}
+
+			switch content.Type {
+			case "text":
+				protoContent.Data = &thread.LlmMessageContent_Text{
+					Text: content.Text,
+				}
+			case "thinking":
+				protoContent.Data = &thread.LlmMessageContent_Thinking{
+					Thinking: &thread.Thinking{
+						Thinking: content.Text,
+					},
+				}
+			case "tool_use", "tool_call":
+				// Create tool call - input is already a byte array, we need to convert
+				// to structpb.Struct, but for now we'll just store the name
+				protoContent.Data = &thread.LlmMessageContent_ToolCall{
+					ToolCall: &toolv1.ToolCall{
+						Name: content.ToolName,
+						// Input: nil, // Would need to unmarshal ToolInput bytes to structpb.Struct
+					},
+				}
+			case "tool_result":
+				protoContent.Data = &thread.LlmMessageContent_ToolResult{
+					ToolResult: &toolv1.ToolResult{
+						Result: content.ToolResult,
+					},
+				}
+			}
+
+			if protoContent.Data != nil {
+				protoMsg.Contents = append(protoMsg.Contents, protoContent)
+			}
+		}
+
+		protoMessages = append(protoMessages, protoMsg)
+	}
+
+	return protoMessages, nil
+}
diff --git a/tim-worker/internal/worker/worker.go b/tim-worker/internal/worker/worker.go
index cc95e8534..17476c0b9 100644
--- a/tim-worker/internal/worker/worker.go
+++ b/tim-worker/internal/worker/worker.go
@@ -95,6 +95,7 @@ func NewWorker(cfg *config.Config, logger *logger.Logger, ctx context.Context) (
 	worker.jobQueueManager.Register("llm_relay", worker.handleLLMRelay)
 	worker.jobQueueManager.Register("count_tokens", worker.handleCountTokens)
 	worker.jobQueueManager.Register("tool_runner", worker.handleToolRunner)
+	worker.jobQueueManager.Register("thread_compaction", worker.handleThreadCompaction)
 
 	return worker, nil
 }