From 85eba5f38e0de09d3c723b392fdc9485c80db362 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <hello@danieldemmel.me>
Date: Thu, 7 May 2026 13:24:28 +0100
Subject: [PATCH 1/2] Use `--dist=worksteal` to speed up tests + move `-n auto`
 to config to make it default

---
 justfile       | 35 +++++++++++++++++------------------
 pyproject.toml |  5 +++++
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/justfile b/justfile
index 2f7e464a..3a36289c 100644
--- a/justfile
+++ b/justfile
@@ -7,29 +7,28 @@ cli *ARGS:
 
 # Run only unit tests (fast, no external dependencies)
 test:
-    uv run pytest -n auto -m "not (tui or browser or benchmark)" -v
-
-# Run benchmark tests (outputs to GITHUB_STEP_SUMMARY in CI)
+    uv run pytest -m "not (tui or browser or benchmark)" -v
 
+# Run benchmark tests serially for stable measurements (outputs to GITHUB_STEP_SUMMARY in CI).
 # DEBUG_TIMING enables coverage of renderer_timings.py
 test-benchmark:
-    CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -m benchmark -v
+    CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -n0 -m benchmark -v
 
 # Update snapshot tests (runs serially for deterministic file ordering)
 update-snapshot:
-    uv run pytest -m snapshot --snapshot-update -v
+    uv run pytest -n0 -m snapshot --snapshot-update -v
 
 # Run TUI tests (requires isolated event loop)
 test-tui:
-    uv run pytest -n auto -m tui -v
+    uv run pytest -m tui -v
 
 # Run browser tests (requires Chromium)
 test-browser:
-    uv run pytest -n auto -m browser -v
+    uv run pytest -m browser -v
 
 # Run integration tests with realistic JSONL data
 test-integration:
-    uv run pytest -n auto -m integration -v
+    uv run pytest -m integration -v
 
 # Run all tests in sequence (separated to avoid event loop conflicts)
 test-all:
@@ -37,15 +36,15 @@ test-all:
     set -e  # Exit on first failure
     echo "🧪 Running all tests in sequence..."
     echo "📦 Running unit tests..."
-    uv run pytest -n auto -m "not (tui or browser or integration or benchmark)" -v
+    uv run pytest -m "not (tui or browser or integration or benchmark)" -v
     echo "🖥️  Running TUI tests..."
-    uv run pytest -n auto -m tui -v
+    uv run pytest -m tui -v
     echo "🌐 Running browser tests..."
-    uv run pytest -n auto -m browser -v
+    uv run pytest -m browser -v
     echo "🔄 Running integration tests..."
-    uv run pytest -n auto -m integration -v
+    uv run pytest -m integration -v
     echo "📊 Running benchmark tests..."
-    CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -m benchmark -v
+    CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -n0 -m benchmark -v
     echo "✅ All tests completed!"
 
 # Run tests with coverage (all categories)
@@ -54,15 +53,15 @@ test-cov:
     set -e  # Exit on first failure
     echo "📊 Running all tests with coverage..."
     echo "📦 Running unit tests with coverage..."
-    uv run pytest -n auto -m "not (tui or browser or integration or benchmark)" --cov=claude_code_log --cov-report=xml --cov-report=html --cov-report=term -v
+    uv run pytest -m "not (tui or browser or integration or benchmark)" --cov=claude_code_log --cov-report=xml --cov-report=html --cov-report=term -v
     echo "🖥️  Running TUI tests with coverage append..."
-    uv run pytest -n auto -m tui --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v
+    uv run pytest -m tui --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v
     echo "🌐 Running browser tests with coverage append..."
-    uv run pytest -n auto -m browser --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v
+    uv run pytest -m browser --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v
     echo "🔄 Running integration tests with coverage append..."
-    uv run pytest -n auto -m integration --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v
+    uv run pytest -m integration --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v
     echo "📊 Running benchmark tests with coverage append..."
-    CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -m benchmark --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v
+    CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -n0 -m benchmark --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v
     echo "✅ All tests with coverage completed!"
 
 format:
diff --git a/pyproject.toml b/pyproject.toml
index 224e6d3d..498639fa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,6 +46,11 @@ exclude = ["/docs", "/test", "/scripts"]
 testpaths = ["test"]
 asyncio_mode = "auto"
 asyncio_default_fixture_loop_scope = "function"
+# Parallelise across all cores; worksteal rebalances heavyweight integration
+# tests across workers (default `load` left workers idle while one slogged
+# through the queue). Override with `-n0` for serial runs (snapshot updates,
+# benchmarks).
+addopts = "-n auto --dist=worksteal"
 markers = [
     "tui: TUI tests using Textual framework (requires isolated event loop)",
     "browser: Browser integration tests using Playwright (requires Chromium)",

From 163f06cfb566571b4088ef468d1691d499a90615 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <hello@danieldemmel.me>
Date: Fri, 8 May 2026 11:23:29 +0100
Subject: [PATCH 2/2] PR feedback + further cleanup

---
 .claude/skills/tool-renderer/SKILL.md |  2 +-
 .github/workflows/ci.yml              |  6 +--
 .github/workflows/claude.yml          | 67 ---------------------------
 CLAUDE.md                             | 10 ++--
 CONTRIBUTING.md                       | 17 +++----
 justfile                              |  5 +-
 pyproject.toml                        |  4 +-
 test/README.md                        | 20 ++++----
 8 files changed, 30 insertions(+), 101 deletions(-)
 delete mode 100644 .github/workflows/claude.yml

diff --git a/.claude/skills/tool-renderer/SKILL.md b/.claude/skills/tool-renderer/SKILL.md
index d5b43153..11172376 100644
--- a/.claude/skills/tool-renderer/SKILL.md
+++ b/.claude/skills/tool-renderer/SKILL.md
@@ -369,7 +369,7 @@ class Test{ToolName}OutputFormatting:
 uv run pytest test/test_{toolname}_rendering.py -v
 
 # Run full test suite to check for regressions
-uv run pytest -n auto -m "not (tui or browser)" -v
+uv run pytest -m "not (tui or browser)" -v
 ```
 
 ## Checklist
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f12f6b9a..8ea88393 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -33,13 +33,13 @@ jobs:
       run: uv sync --all-extras --dev && uv run playwright install chromium
 
     - name: Run unit tests with coverage
-      run: uv run pytest -n auto -m "not (tui or browser or benchmark)" --cov=claude_code_log --cov-report=xml --cov-report=html --cov-report=term
+      run: uv run pytest -m "not (tui or browser or benchmark)" --cov=claude_code_log --cov-report=xml --cov-report=html --cov-report=term
 
     - name: Run TUI tests with coverage append
-      run: uv run pytest -n auto -m tui --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term
+      run: uv run pytest -m tui --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term
 
     - name: Run browser tests with coverage append
-      run: uv run pytest -n auto -m browser --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term
+      run: uv run pytest -m browser --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term
 
     - name: Run benchmark tests with coverage append (primary only)
       if: matrix.is-primary
diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
deleted file mode 100644
index e624750b..00000000
--- a/.github/workflows/claude.yml
+++ /dev/null
@@ -1,67 +0,0 @@
-name: Claude Code
-
-on:
-  issue_comment:
-    types: [created]
-  pull_request_review_comment:
-    types: [created]
-  issues:
-    types: [opened, assigned]
-  pull_request_review:
-    types: [submitted]
-
-jobs:
-  claude:
-    if: |
-      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
-      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
-      (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
-      (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      pull-requests: read
-      issues: read
-      id-token: write
-      actions: read # Required for Claude to read CI results on PRs
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 1
-
-      - name: Run Claude Code
-        id: claude
-        uses: anthropics/claude-code-action@beta
-        env:
-          ALLOWED_TOOLS: "Bash,Edit,MultiEdit,View,GlobTool,Glob,GrepTool,Grep,BatchTool,Batch,LS,Read,Write,Replace,NotebookEditCell,mcp__github_file_ops__commit_files,mcp__github_file_ops__delete_files,mcp__github_file_ops__update_claude_comment,WebSearch,WebFetch"
-        with:
-          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
-
-          # This is an optional setting that allows Claude to read CI results on PRs
-          additional_permissions: |
-            actions: read
-          
-          # Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4.1)
-          # model: "claude-opus-4-1-20250805"
-          
-          # Optional: Customize the trigger phrase (default: @claude)
-          # trigger_phrase: "/claude"
-          
-          # Optional: Trigger when specific user is assigned to an issue
-          # assignee_trigger: "claude-bot"
-          
-          timeout_minutes: "120"
-          allowed_tools: "Bash,Edit,MultiEdit,View,GlobTool,Glob,GrepTool,Grep,BatchTool,Batch,LS,Read,Write,Replace,NotebookEditCell,mcp__github_file_ops__commit_files,mcp__github_file_ops__delete_files,mcp__github_file_ops__update_claude_comment,WebSearch,WebFetch"
-          max_turns: "500"
-          
-          # Optional: Add custom instructions for Claude to customize its behavior for your project
-          # custom_instructions: |
-          #   Follow our coding standards
-          #   Ensure all new code has tests
-          #   Use TypeScript for new files
-          
-          # Optional: Custom environment variables for Claude
-          # claude_env: |
-          #   NODE_ENV: test
-
diff --git a/CLAUDE.md b/CLAUDE.md
index d4b32612..4c6bdd92 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -51,26 +51,26 @@ See @CONTRIBUTING.md for detailed development setup, testing, architecture, and
 
 ### Claude-Specific Testing Tips
 
-**Always use `-n auto` for parallel test execution:**
+**Config in `pyproject.toml` sets `-n auto --dist=worksteal` so you might need to unset for pdb, etc**
 
 ```bash
 # Unit tests (fast, recommended for development)
 just test
-# or: uv run pytest -n auto -m "not (tui or browser)" -v
+# or: uv run pytest -m "not (tui or browser)" -v
 
 # TUI tests
 just test-tui
-# or: uv run pytest -n auto -m tui
+# or: uv run pytest -m tui
 
 # Browser tests
 just test-browser
-# or: uv run pytest -n auto -m browser
+# or: uv run pytest -m browser
 
 # All tests
 just test-all
 ```
 
-**Tip:** Add `-x` to stop on first failure (e.g., `uv run pytest -n auto -m "not (tui or browser)" -v -x`).
+**Tip:** Add `-x` to stop on first failure (e.g., `uv run pytest -m "not (tui or browser)" -v -x`).
 
 ### Code Quality
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 2a009bf2..219c846b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -96,7 +96,7 @@ The project uses a categorized test system to avoid async event loop conflicts.
 ```bash
 # Unit tests only (fast, recommended for development)
 just test
-# or: uv run pytest -n auto -m "not (tui or browser)" -v
+# or: uv run pytest -m "not (tui or browser)" -v
 
 # TUI tests (isolated event loop)
 just test-tui
@@ -117,21 +117,18 @@ Snapshot tests detect unintended HTML output changes using [syrupy](https://gith
 
 ```bash
 # Run snapshot tests (parallel mode is fine for read-only runs)
-uv run pytest -n auto test/test_snapshot_html.py -v
+uv run pytest test/test_snapshot_html.py -v
 
 # Update snapshots after intentional HTML changes
-# IMPORTANT: run --snapshot-update WITHOUT -n auto (see warning below)
-uv run pytest test/test_snapshot_html.py --snapshot-update
+# IMPORTANT: run --snapshot-update with -n0 (see warning below)
+uv run pytest test/test_snapshot_html.py -n0 --snapshot-update
 ```
 
-> **Warning — don't combine `--snapshot-update` with `-n auto`.** Syrupy
+> **Warning — don't let `--snapshot-update` run with `-n auto`.** Syrupy
 > and pytest-xdist race when writing snapshot files in parallel: the
 > `.ambr` file ends up truncated (observed: ~6000 lines silently
 > deleted on a single run, leaving the file structurally broken but
-> still passing on next read). Run `--snapshot-update` serially. This
-> is also why pytest is **not** configured with a default `-n auto`
-> in `pyproject.toml`; the `just test` recipes opt in for read-only
-> runs where the race doesn't apply.
+> still passing on next read). Run `--snapshot-update` serially.
 
 When snapshot tests fail:
 1. Review the diff to verify changes are intentional
@@ -163,7 +160,7 @@ Running all tests together can cause "RuntimeError: This event loop is already r
 just test-cov
 
 # Or manually:
-uv run pytest -n auto --cov=claude_code_log --cov-report=html --cov-report=term
+uv run pytest --cov=claude_code_log --cov-report=html --cov-report=term
 ```
 
 HTML coverage reports are generated in `htmlcov/index.html`.
diff --git a/justfile b/justfile
index 3a36289c..210ea933 100644
--- a/justfile
+++ b/justfile
@@ -5,12 +5,11 @@ default:
 cli *ARGS:
     uv run claude-code-log {{ ARGS }}
 
-# Run only unit tests (fast, no external dependencies)
+# Run unit + integration tests (excludes TUI, browser, and benchmark)
 test:
     uv run pytest -m "not (tui or browser or benchmark)" -v
 
-# Run benchmark tests serially for stable measurements (outputs to GITHUB_STEP_SUMMARY in CI).
-# DEBUG_TIMING enables coverage of renderer_timings.py
+# Run benchmark tests serially for stable measurements (outputs to GITHUB_STEP_SUMMARY in CI). DEBUG_TIMING enables coverage of renderer_timings.py
 test-benchmark:
     CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -n0 -m benchmark -v
 
diff --git a/pyproject.toml b/pyproject.toml
index 498639fa..aee19aa5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,8 +48,8 @@ asyncio_mode = "auto"
 asyncio_default_fixture_loop_scope = "function"
 # Parallelise across all cores; worksteal rebalances heavyweight integration
 # tests across workers (default `load` left workers idle while one slogged
-# through the queue). Override with `-n0` for serial runs (snapshot updates,
-# benchmarks).
+# through the queue). Override with `-n0 --dist=no` for serial runs, or with
+# `--dist=no --pdb` for debugging (worksteal distribution must be disabled).
 addopts = "-n auto --dist=worksteal"
 markers = [
     "tui: TUI tests using Textual framework (requires isolated event loop)",
diff --git a/test/README.md b/test/README.md
index 6a307f78..96d8126d 100644
--- a/test/README.md
+++ b/test/README.md
@@ -148,10 +148,10 @@ Snapshot tests capture the full HTML output and detect unintended regressions. T
 
 ```bash
 # Run snapshot tests
-uv run pytest -n auto test/test_snapshot_html.py -v
+uv run pytest test/test_snapshot_html.py -v
 
 # Update snapshots after intentional HTML changes
-uv run pytest -n auto test/test_snapshot_html.py --snapshot-update
+uv run pytest -n0 test/test_snapshot_html.py --snapshot-update
 
 # Review changes before committing
 git diff test/__snapshots__/
@@ -170,28 +170,28 @@ git diff test/__snapshots__/
 ```bash
 # Run only unit tests (fast, recommended for development)
 just test
-# or: uv run pytest -n auto -m "not (tui or browser or integration)" -v
+# or: uv run pytest -m "not (tui or browser or integration)" -v
 
 # Run TUI tests (isolated event loop)
 just test-tui
-# or: uv run pytest -n auto -m tui -v
+# or: uv run pytest -m tui -v
 
 # Run browser tests (requires Chromium)
 just test-browser
-# or: uv run pytest -n auto -m browser -v
+# or: uv run pytest -m browser -v
 
 # Run integration tests with realistic data
 just test-integration
-# or: uv run pytest -n auto -m integration -v
+# or: uv run pytest -m integration -v
 
 # Run all tests in sequence (separated to avoid conflicts)
 just test-all
 
 # Run specific test file
-uv run pytest -n auto test/test_template_rendering.py -v
+uv run pytest test/test_template_rendering.py -v
 
 # Run specific test method
-uv run pytest -n auto test/test_template_rendering.py::TestTemplateRendering::test_representative_messages_render -v
+uv run pytest test/test_template_rendering.py::TestTemplateRendering::test_representative_messages_render -v
 
 # Run tests with coverage
 just test-cov
@@ -214,10 +214,10 @@ Generate detailed coverage reports:
 
 ```bash
 # Run tests with coverage and HTML report
-uv run pytest -n auto --cov=claude_code_log --cov-report=html --cov-report=term
+uv run pytest --cov=claude_code_log --cov-report=html --cov-report=term
 
 # View coverage by module
-uv run pytest -n auto --cov=claude_code_log --cov-report=term-missing
+uv run pytest --cov=claude_code_log --cov-report=term-missing
 
 # Open HTML coverage report
 open htmlcov/index.html