From 85eba5f38e0de09d3c723b392fdc9485c80db362 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Thu, 7 May 2026 13:24:28 +0100 Subject: [PATCH 1/2] Use `--dist=worksteal` to speed up tests + move `-n auto` to config to make it default --- justfile | 35 +++++++++++++++++------------------ pyproject.toml | 5 +++++ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/justfile b/justfile index 2f7e464a..3a36289c 100644 --- a/justfile +++ b/justfile @@ -7,29 +7,28 @@ cli *ARGS: # Run only unit tests (fast, no external dependencies) test: - uv run pytest -n auto -m "not (tui or browser or benchmark)" -v - -# Run benchmark tests (outputs to GITHUB_STEP_SUMMARY in CI) + uv run pytest -m "not (tui or browser or benchmark)" -v +# Run benchmark tests serially for stable measurements (outputs to GITHUB_STEP_SUMMARY in CI). # DEBUG_TIMING enables coverage of renderer_timings.py test-benchmark: - CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -m benchmark -v + CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -n0 -m benchmark -v # Update snapshot tests (runs serially for deterministic file ordering) update-snapshot: - uv run pytest -m snapshot --snapshot-update -v + uv run pytest -n0 -m snapshot --snapshot-update -v # Run TUI tests (requires isolated event loop) test-tui: - uv run pytest -n auto -m tui -v + uv run pytest -m tui -v # Run browser tests (requires Chromium) test-browser: - uv run pytest -n auto -m browser -v + uv run pytest -m browser -v # Run integration tests with realistic JSONL data test-integration: - uv run pytest -n auto -m integration -v + uv run pytest -m integration -v # Run all tests in sequence (separated to avoid event loop conflicts) test-all: @@ -37,15 +36,15 @@ test-all: set -e # Exit on first failure echo "๐Ÿงช Running all tests in sequence..." echo "๐Ÿ“ฆ Running unit tests..." - uv run pytest -n auto -m "not (tui or browser or integration or benchmark)" -v + uv run pytest -m "not (tui or browser or integration or benchmark)" -v echo "๐Ÿ–ฅ๏ธ Running TUI tests..." - uv run pytest -n auto -m tui -v + uv run pytest -m tui -v echo "๐ŸŒ Running browser tests..." - uv run pytest -n auto -m browser -v + uv run pytest -m browser -v echo "๐Ÿ”„ Running integration tests..." - uv run pytest -n auto -m integration -v + uv run pytest -m integration -v echo "๐Ÿ“Š Running benchmark tests..." - CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -m benchmark -v + CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -n0 -m benchmark -v echo "โœ… All tests completed!" # Run tests with coverage (all categories) @@ -54,15 +53,15 @@ test-cov: set -e # Exit on first failure echo "๐Ÿ“Š Running all tests with coverage..." echo "๐Ÿ“ฆ Running unit tests with coverage..." - uv run pytest -n auto -m "not (tui or browser or integration or benchmark)" --cov=claude_code_log --cov-report=xml --cov-report=html --cov-report=term -v + uv run pytest -m "not (tui or browser or integration or benchmark)" --cov=claude_code_log --cov-report=xml --cov-report=html --cov-report=term -v echo "๐Ÿ–ฅ๏ธ Running TUI tests with coverage append..." - uv run pytest -n auto -m tui --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v + uv run pytest -m tui --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v echo "๐ŸŒ Running browser tests with coverage append..." - uv run pytest -n auto -m browser --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v + uv run pytest -m browser --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v echo "๐Ÿ”„ Running integration tests with coverage append..." - uv run pytest -n auto -m integration --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v + uv run pytest -m integration --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v echo "๐Ÿ“Š Running benchmark tests with coverage append..." - CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -m benchmark --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v + CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -n0 -m benchmark --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term -v echo "โœ… All tests with coverage completed!" format: diff --git a/pyproject.toml b/pyproject.toml index 224e6d3d..498639fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,11 @@ exclude = ["/docs", "/test", "/scripts"] testpaths = ["test"] asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "function" +# Parallelise across all cores; worksteal rebalances heavyweight integration +# tests across workers (default `load` left workers idle while one slogged +# through the queue). Override with `-n0` for serial runs (snapshot updates, +# benchmarks). +addopts = "-n auto --dist=worksteal" markers = [ "tui: TUI tests using Textual framework (requires isolated event loop)", "browser: Browser integration tests using Playwright (requires Chromium)", From 163f06cfb566571b4088ef468d1691d499a90615 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Fri, 8 May 2026 11:23:29 +0100 Subject: [PATCH 2/2] PR feedback + further cleanup --- .claude/skills/tool-renderer/SKILL.md | 2 +- .github/workflows/ci.yml | 6 +-- .github/workflows/claude.yml | 67 --------------------------- CLAUDE.md | 10 ++-- CONTRIBUTING.md | 17 +++---- justfile | 5 +- pyproject.toml | 4 +- test/README.md | 20 ++++---- 8 files changed, 30 insertions(+), 101 deletions(-) delete mode 100644 .github/workflows/claude.yml diff --git a/.claude/skills/tool-renderer/SKILL.md b/.claude/skills/tool-renderer/SKILL.md index d5b43153..11172376 100644 --- a/.claude/skills/tool-renderer/SKILL.md +++ b/.claude/skills/tool-renderer/SKILL.md @@ -369,7 +369,7 @@ class Test{ToolName}OutputFormatting: uv run pytest test/test_{toolname}_rendering.py -v # Run full test suite to check for regressions -uv run pytest -n auto -m "not (tui or browser)" -v +uv run pytest -m "not (tui or browser)" -v ``` ## Checklist diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f12f6b9a..8ea88393 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,13 +33,13 @@ jobs: run: uv sync --all-extras --dev && uv run playwright install chromium - name: Run unit tests with coverage - run: uv run pytest -n auto -m "not (tui or browser or benchmark)" --cov=claude_code_log --cov-report=xml --cov-report=html --cov-report=term + run: uv run pytest -m "not (tui or browser or benchmark)" --cov=claude_code_log --cov-report=xml --cov-report=html --cov-report=term - name: Run TUI tests with coverage append - run: uv run pytest -n auto -m tui --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term + run: uv run pytest -m tui --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term - name: Run browser tests with coverage append - run: uv run pytest -n auto -m browser --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term + run: uv run pytest -m browser --cov=claude_code_log --cov-append --cov-report=xml --cov-report=html --cov-report=term - name: Run benchmark tests with coverage append (primary only) if: matrix.is-primary diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml deleted file mode 100644 index e624750b..00000000 --- a/.github/workflows/claude.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: Claude Code - -on: - issue_comment: - types: [created] - pull_request_review_comment: - types: [created] - issues: - types: [opened, assigned] - pull_request_review: - types: [submitted] - -jobs: - claude: - if: | - (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || - (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || - (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || - (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) - runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: read - issues: read - id-token: write - actions: read # Required for Claude to read CI results on PRs - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - - name: Run Claude Code - id: claude - uses: anthropics/claude-code-action@beta - env: - ALLOWED_TOOLS: "Bash,Edit,MultiEdit,View,GlobTool,Glob,GrepTool,Grep,BatchTool,Batch,LS,Read,Write,Replace,NotebookEditCell,mcp__github_file_ops__commit_files,mcp__github_file_ops__delete_files,mcp__github_file_ops__update_claude_comment,WebSearch,WebFetch" - with: - claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} - - # This is an optional setting that allows Claude to read CI results on PRs - additional_permissions: | - actions: read - - # Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4.1) - # model: "claude-opus-4-1-20250805" - - # Optional: Customize the trigger phrase (default: @claude) - # trigger_phrase: "/claude" - - # Optional: Trigger when specific user is assigned to an issue - # assignee_trigger: "claude-bot" - - timeout_minutes: "120" - allowed_tools: "Bash,Edit,MultiEdit,View,GlobTool,Glob,GrepTool,Grep,BatchTool,Batch,LS,Read,Write,Replace,NotebookEditCell,mcp__github_file_ops__commit_files,mcp__github_file_ops__delete_files,mcp__github_file_ops__update_claude_comment,WebSearch,WebFetch" - max_turns: "500" - - # Optional: Add custom instructions for Claude to customize its behavior for your project - # custom_instructions: | - # Follow our coding standards - # Ensure all new code has tests - # Use TypeScript for new files - - # Optional: Custom environment variables for Claude - # claude_env: | - # NODE_ENV: test - diff --git a/CLAUDE.md b/CLAUDE.md index d4b32612..4c6bdd92 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -51,26 +51,26 @@ See @CONTRIBUTING.md for detailed development setup, testing, architecture, and ### Claude-Specific Testing Tips -**Always use `-n auto` for parallel test execution:** +**Config in `pyproject.toml` sets `-n auto --dist=worksteal` so you might need to unset for pdb, etc** ```bash # Unit tests (fast, recommended for development) just test -# or: uv run pytest -n auto -m "not (tui or browser)" -v +# or: uv run pytest -m "not (tui or browser)" -v # TUI tests just test-tui -# or: uv run pytest -n auto -m tui +# or: uv run pytest -m tui # Browser tests just test-browser -# or: uv run pytest -n auto -m browser +# or: uv run pytest -m browser # All tests just test-all ``` -**Tip:** Add `-x` to stop on first failure (e.g., `uv run pytest -n auto -m "not (tui or browser)" -v -x`). +**Tip:** Add `-x` to stop on first failure (e.g., `uv run pytest -m "not (tui or browser)" -v -x`). ### Code Quality diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2a009bf2..219c846b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -96,7 +96,7 @@ The project uses a categorized test system to avoid async event loop conflicts. ```bash # Unit tests only (fast, recommended for development) just test -# or: uv run pytest -n auto -m "not (tui or browser)" -v +# or: uv run pytest -m "not (tui or browser)" -v # TUI tests (isolated event loop) just test-tui @@ -117,21 +117,18 @@ Snapshot tests detect unintended HTML output changes using [syrupy](https://gith ```bash # Run snapshot tests (parallel mode is fine for read-only runs) -uv run pytest -n auto test/test_snapshot_html.py -v +uv run pytest test/test_snapshot_html.py -v # Update snapshots after intentional HTML changes -# IMPORTANT: run --snapshot-update WITHOUT -n auto (see warning below) -uv run pytest test/test_snapshot_html.py --snapshot-update +# IMPORTANT: run --snapshot-update with -n0 (see warning below) +uv run pytest test/test_snapshot_html.py -n0 --snapshot-update ``` -> **Warning โ€” don't combine `--snapshot-update` with `-n auto`.** Syrupy +> **Warning โ€” don't let `--snapshot-update` run with `-n auto`.** Syrupy > and pytest-xdist race when writing snapshot files in parallel: the > `.ambr` file ends up truncated (observed: ~6000 lines silently > deleted on a single run, leaving the file structurally broken but -> still passing on next read). Run `--snapshot-update` serially. This -> is also why pytest is **not** configured with a default `-n auto` -> in `pyproject.toml`; the `just test` recipes opt in for read-only -> runs where the race doesn't apply. +> still passing on next read). Run `--snapshot-update` serially. When snapshot tests fail: 1. Review the diff to verify changes are intentional @@ -163,7 +160,7 @@ Running all tests together can cause "RuntimeError: This event loop is already r just test-cov # Or manually: -uv run pytest -n auto --cov=claude_code_log --cov-report=html --cov-report=term +uv run pytest --cov=claude_code_log --cov-report=html --cov-report=term ``` HTML coverage reports are generated in `htmlcov/index.html`. diff --git a/justfile b/justfile index 3a36289c..210ea933 100644 --- a/justfile +++ b/justfile @@ -5,12 +5,11 @@ default: cli *ARGS: uv run claude-code-log {{ ARGS }} -# Run only unit tests (fast, no external dependencies) +# Run unit + integration tests (excludes TUI, browser, and benchmark) test: uv run pytest -m "not (tui or browser or benchmark)" -v -# Run benchmark tests serially for stable measurements (outputs to GITHUB_STEP_SUMMARY in CI). -# DEBUG_TIMING enables coverage of renderer_timings.py +# Run benchmark tests serially for stable measurements (outputs to GITHUB_STEP_SUMMARY in CI). DEBUG_TIMING enables coverage of renderer_timings.py test-benchmark: CLAUDE_CODE_LOG_DEBUG_TIMING=1 uv run pytest -n0 -m benchmark -v diff --git a/pyproject.toml b/pyproject.toml index 498639fa..aee19aa5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,8 +48,8 @@ asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "function" # Parallelise across all cores; worksteal rebalances heavyweight integration # tests across workers (default `load` left workers idle while one slogged -# through the queue). Override with `-n0` for serial runs (snapshot updates, -# benchmarks). +# through the queue). Override with `-n0 --dist=no` for serial runs, or with +# `--dist=no --pdb` for debugging (worksteal distribution must be disabled). addopts = "-n auto --dist=worksteal" markers = [ "tui: TUI tests using Textual framework (requires isolated event loop)", diff --git a/test/README.md b/test/README.md index 6a307f78..96d8126d 100644 --- a/test/README.md +++ b/test/README.md @@ -148,10 +148,10 @@ Snapshot tests capture the full HTML output and detect unintended regressions. T ```bash # Run snapshot tests -uv run pytest -n auto test/test_snapshot_html.py -v +uv run pytest test/test_snapshot_html.py -v # Update snapshots after intentional HTML changes -uv run pytest -n auto test/test_snapshot_html.py --snapshot-update +uv run pytest -n0 test/test_snapshot_html.py --snapshot-update # Review changes before committing git diff test/__snapshots__/ @@ -170,28 +170,28 @@ git diff test/__snapshots__/ ```bash # Run only unit tests (fast, recommended for development) just test -# or: uv run pytest -n auto -m "not (tui or browser or integration)" -v +# or: uv run pytest -m "not (tui or browser or integration)" -v # Run TUI tests (isolated event loop) just test-tui -# or: uv run pytest -n auto -m tui -v +# or: uv run pytest -m tui -v # Run browser tests (requires Chromium) just test-browser -# or: uv run pytest -n auto -m browser -v +# or: uv run pytest -m browser -v # Run integration tests with realistic data just test-integration -# or: uv run pytest -n auto -m integration -v +# or: uv run pytest -m integration -v # Run all tests in sequence (separated to avoid conflicts) just test-all # Run specific test file -uv run pytest -n auto test/test_template_rendering.py -v +uv run pytest test/test_template_rendering.py -v # Run specific test method -uv run pytest -n auto test/test_template_rendering.py::TestTemplateRendering::test_representative_messages_render -v +uv run pytest test/test_template_rendering.py::TestTemplateRendering::test_representative_messages_render -v # Run tests with coverage just test-cov @@ -214,10 +214,10 @@ Generate detailed coverage reports: ```bash # Run tests with coverage and HTML report -uv run pytest -n auto --cov=claude_code_log --cov-report=html --cov-report=term +uv run pytest --cov=claude_code_log --cov-report=html --cov-report=term # View coverage by module -uv run pytest -n auto --cov=claude_code_log --cov-report=term-missing +uv run pytest --cov=claude_code_log --cov-report=term-missing # Open HTML coverage report open htmlcov/index.html