From f3879a5d6f7f855ed70ac50fc004ac89043b878b Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Tue, 28 Apr 2026 23:44:47 -0700 Subject: [PATCH 001/103] remove completed migrations --- .../inplace-artifact-boundary-hardening.md | 34 -------- .../manifest.json | 53 ------------ .../phase-1-artifact-boundary-baseline.md | 32 -------- .../phase-2-artifacts-boundary-contract.md | 35 -------- .../phase-3-boundary-wrappers-at-callsites.md | 44 ---------- ...hase-4-loop-and-cli-boundary-resilience.md | 41 ---------- ...-shippable-regression-and-contract-lock.md | 55 ------------- .../plan.md | 80 ------------------- .../approaches/git-backed-tracked-files.md | 56 ------------- .../approaches/in-place-target-resolution.md | 63 --------------- .../approaches/split-targeting-domain.md | 66 --------------- .../manifest.json | 45 ----------- .../phase-1-targeting-parse-foundation.md | 37 --------- .../phase-2-loop-delegates-targeting-parse.md | 47 ----------- ...hase-3-targeting-git-boundary-hardening.md | 38 --------- ...ase-4-targeting-surface-regression-lock.md | 46 ----------- .../plan.md | 68 ---------------- 17 files changed, 840 deletions(-) delete mode 100644 migrations/src-continuous-refactoring-artifacts-py-20260427T215942/approaches/inplace-artifact-boundary-hardening.md delete mode 100644 migrations/src-continuous-refactoring-artifacts-py-20260427T215942/manifest.json delete mode 100644 migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-1-artifact-boundary-baseline.md delete mode 100644 migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-2-artifacts-boundary-contract.md delete mode 100644 migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-3-boundary-wrappers-at-callsites.md delete mode 100644 migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-4-loop-and-cli-boundary-resilience.md delete mode 100644 migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-5-shippable-regression-and-contract-lock.md delete mode 100644 migrations/src-continuous-refactoring-artifacts-py-20260427T215942/plan.md delete mode 100644 migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/git-backed-tracked-files.md delete mode 100644 migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/in-place-target-resolution.md delete mode 100644 migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/split-targeting-domain.md delete mode 100644 migrations/src-continuous-refactoring-targeting-py-20260427T220624/manifest.json delete mode 100644 migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-1-targeting-parse-foundation.md delete mode 100644 migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-2-loop-delegates-targeting-parse.md delete mode 100644 migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-3-targeting-git-boundary-hardening.md delete mode 100644 migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-4-targeting-surface-regression-lock.md delete mode 100644 migrations/src-continuous-refactoring-targeting-py-20260427T220624/plan.md diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/approaches/inplace-artifact-boundary-hardening.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/approaches/inplace-artifact-boundary-hardening.md deleted file mode 100644 index 3397292..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/approaches/inplace-artifact-boundary-hardening.md +++ /dev/null @@ -1,34 +0,0 @@ -# In-place artifact boundary hardening - -## Strategy - -Keep module boundaries intact and make failure contracts explicit where side effects cross module seams. - -1. Baseline current behavior with regression tests that assert boundary failures carry `__cause__` where this migration intends to improve context. -2. Tighten `artifacts.py` with private boundary helpers for event writes, summary serialization, and atomic persistence, then apply them to existing callsites with no public API change. -3. Extend callsite-level wrappers in `agent.py`, `git.py`, `phases.py`, and `migration_tick.py` so boundary failures bubble with preserved causes while preserving current control flow. -4. Update orchestration and CLI surfaces in `loop.py`, `config.py`, and `cli.py` to keep recovery/abort semantics unchanged while preserving richer causal context. -5. Close with a migration-wide contract lock, duplicate-symbol safety checks, and full-suite verification. - -## Tradeoffs - -Pros: -- No module splitting or symbol churn. -- Localized change surface anchored to observed co-change boundaries. -- Minimal stack distortion because boundaries stay aligned with existing module seams. - -Cons: -- Additional wrapper indirection in hot paths can lengthen tracebacks. -- Requires coordinated test updates across adjacent modules before the final lock step. - -## Compatibility stance - -No canary/cutover rollout in this repo. The migration is a straight in-place refinement with stronger boundary contracts and stable behavior defaults. - -## Phase intent - -- `phase-1` records a stable baseline and ensures the suite will catch causal-regression mistakes. -- `phase-2` introduces the module-level helpers in `artifacts.py` and validates their persistence contract. -- `phase-3` applies adjacent boundary wrappers at seams, including migration-tick reporting. -- `phase-4` propagates the contract safely through loop/CLI/config orchestration points. -- `phase-5` freezes contracts and runs full validation for shipping confidence. diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/manifest.json b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/manifest.json deleted file mode 100644 index 16817fa..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/manifest.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "awaiting_human_review": false, - "cooldown_until": null, - "created_at": "2026-04-27T21:59:42.893-07:00", - "current_phase": "", - "human_review_reason": null, - "last_touch": "2026-04-28T16:06:15.347-07:00", - "name": "src-continuous-refactoring-artifacts-py-20260427T215942", - "phases": [ - { - "done": true, - "effort_reason": null, - "file": "phase-1-artifact-boundary-baseline.md", - "name": "artifact-boundary-baseline", - "precondition": "- No production files in the migration scope have been modified yet. - The target migration scope is unchanged in production modules.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-2-artifacts-boundary-contract.md", - "name": "artifacts-boundary-contract", - "precondition": "- Phase 1 is marked complete in the migration manifest. - `src/continuous_refactoring/artifacts.py` has not been edited yet in this migration.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-3-boundary-wrappers-at-callsites.md", - "name": "boundary-wrappers-at-callsites", - "precondition": "- Phase 2 is marked complete in the migration manifest. - Phase-2 boundary contracts are present in `artifacts.py` and their tests. - No edits are made in `config.py`, `loop.py`, or `cli.py` during this phase.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-4-loop-and-cli-boundary-resilience.md", - "name": "loop-and-cli-boundary-resilience", - "precondition": "- Phase 3 is marked complete in the migration manifest. - Boundary wrappers from phase 3 are present at the intended module seams. - No edits in `__init__.py` in this phase yet.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-5-shippable-regression-and-contract-lock.md", - "name": "shippable-regression-and-contract-lock", - "precondition": "- Phase 4 is marked complete in the migration manifest. - All intended phase edits are present in working tree. - All phase documents in this migration directory match their intended scope (especially phase names referenced in `manifest.json`).", - "required_effort": null - } - ], - "status": "done", - "wake_up_on": null -} diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-1-artifact-boundary-baseline.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-1-artifact-boundary-baseline.md deleted file mode 100644 index 0f2ea41..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-1-artifact-boundary-baseline.md +++ /dev/null @@ -1,32 +0,0 @@ -# Phase 1: Baseline failure-cause behavior on artifact and phase boundaries - -## Objective -Create a failing-ready baseline for cause-preserving failure behavior without changing production code. - -## Scope -- `tests/test_continuous_refactoring.py` -- `tests/test_phases.py` -- `tests/test_loop_migration_tick.py` - -## Instructions -1. Add baseline tests in `tests/test_continuous_refactoring.py` for artifact persistence paths: - - fail-fast behavior on malformed payload flows - - boundary failures now asserting `__cause__` expectations only where behavior already depends on translation -2. Add baseline tests in `tests/test_phases.py` for readiness/phase parsing failure paths that already route through `ContinuousRefactorError`. -3. Add focused checks in `tests/test_loop_migration_tick.py` for artifact summary/failure text preservation and non-masked root causes. -4. Keep all production files untouched in this phase. - -## Precondition -- No production files in the migration scope have been modified yet. -- The target migration scope is unchanged in production modules. - -## Definition of Done -- New tests explicitly exercise baseline boundary-failure expectations for artifact and phase orchestration. -- No production files are edited. -- All phase-1 scope tests pass. -- The tree remains shippable with only baseline test changes. - -## Validation steps -- `uv run pytest tests/test_continuous_refactoring.py` -- `uv run pytest tests/test_phases.py` -- `uv run pytest tests/test_loop_migration_tick.py` diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-2-artifacts-boundary-contract.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-2-artifacts-boundary-contract.md deleted file mode 100644 index 4295b78..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-2-artifacts-boundary-contract.md +++ /dev/null @@ -1,35 +0,0 @@ -# Phase 2: Artifacts module in-place boundary hardening - -## Objective -Create a strict, low-churn boundary contract in `artifacts.py` around artifact writes and serialization paths, preserving underlying exceptions through nested causes. - -## Scope -- `src/continuous_refactoring/artifacts.py` -- `tests/test_continuous_refactoring.py` - -## Instructions -1. Add private helpers in `artifacts.py` to isolate unsafe effects: - - event append helper that captures and wraps effect-level `OSError` exceptions with `ContinuousRefactorError` using `from`. - - summary serialization helper that wraps serialization/value-shape failures with `ContinuousRefactorError` using `from` when boundary context adds signal. - - atomic write helper that wraps parent-dir/temp-file/write/replace failures with `ContinuousRefactorError` using `from`. -2. Use those helpers in: - - `RunArtifacts.log()` event emission, - - `RunArtifacts.write_summary()`, - - `create_run_artifacts()` initialization where the first summary write establishes the run boundary state. -3. Add boundary-level context only where it changes caller signal (what failed and where), but do not replace clearer native errors from pure bookkeeping branches. -4. Keep existing module surface (`__all__`, class names, public functions) unchanged. -5. Update tests to pin that nested causes are preserved for event-write, summary-serialization, and atomic-write boundary failures. - -## Precondition -- Phase 1 is marked complete in the migration manifest. -- `src/continuous_refactoring/artifacts.py` has not been edited yet in this migration. - -## Definition of Done -- Boundary helper functions exist in `artifacts.py` and are wired into event-write and summary-write/serialize flows. -- Wrapped boundary failures from `artifacts.py` include original exceptions as `__cause__`. -- Paths that are semantically better left unwrapped remain unwrapped. -- `tests/test_continuous_refactoring.py` contains explicit cause assertions for artifact boundary failures. -- Public module contracts (`__all__`, types, API names, and `ContinuousRefactorError`) remain unchanged. - -## Validation steps -- `uv run pytest tests/test_continuous_refactoring.py` diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-3-boundary-wrappers-at-callsites.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-3-boundary-wrappers-at-callsites.md deleted file mode 100644 index 920a8c3..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-3-boundary-wrappers-at-callsites.md +++ /dev/null @@ -1,44 +0,0 @@ -# Phase 3: Boundary wrappers at module seams - -## Objective -Apply boundary wrappers at adjacent module seams, preserving causes while keeping existing callsite semantics stable across routing and migration-tick flows. - -## Scope -- `src/continuous_refactoring/agent.py` -- `src/continuous_refactoring/git.py` -- `src/continuous_refactoring/migration_tick.py` -- `src/continuous_refactoring/phases.py` -- `tests/test_run.py` -- `tests/test_routing.py` -- `tests/test_loop_migration_tick.py` -- `tests/test_phases.py` - -## Instructions -1. In `agent.py`, wrap subprocess/process-launch failures with `ContinuousRefactorError` when a module boundary message improves troubleshooting, and preserve the original exception via `from`. -2. In `git.py`, keep `GitCommandError` as a boundary type and add nested causes consistently where subprocess launch/runtime failures are converted into module boundary errors. -3. In `phases.py`, preserve verdict flow while making readiness and phase-result errors boundary-safe at decision points. -4. In `migration_tick.py`, preserve defer/blocked/abandon decision flow while keeping ready-check and phase-result failures tied to meaningful summaries and original causes. -5. Keep semantics that callers depend on: - - stable control flow - - stable exception class behavior - - stable user-visible strings unless a wrapped-context test justifies a targeted delta. -6. Add/adjust tests: - - `tests/test_run.py` for module-seam command-boundary cause retention. - - `tests/test_routing.py` for routing/decision stability under wrapped failures. - - `tests/test_loop_migration_tick.py` to ensure migration-tick summaries still include meaningful root-cause context. - -## Precondition -- Phase 2 is marked complete in the migration manifest. -- Phase-2 boundary contracts are present in `artifacts.py` and their tests. -- No edits are made in `config.py`, `loop.py`, or `cli.py` during this phase. - -## Definition of Done -- `agent.py`, `git.py`, `phases.py`, and `migration_tick.py` boundary wrappers preserve `__cause__` and keep current call patterns intact. -- No new external API is introduced. -- Behavior for run/routing/migration tick remains unchanged in flow and decision results while asserting cause retention where wrapped. -- No test in phase scope is left failing. - -## Validation steps -- `uv run pytest tests/test_run.py` -- `uv run pytest tests/test_routing.py` -- `uv run pytest tests/test_loop_migration_tick.py tests/test_phases.py` diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-4-loop-and-cli-boundary-resilience.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-4-loop-and-cli-boundary-resilience.md deleted file mode 100644 index fef9cce..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-4-loop-and-cli-boundary-resilience.md +++ /dev/null @@ -1,41 +0,0 @@ -# Phase 4: Loop and CLI resilience under boundary changes - -## Objective -Ensure loop and CLI behavior remains shippable when artifact/config/git boundaries fail, with truthful user messaging and unchanged control-flow defaults. - -## Scope -- `src/continuous_refactoring/loop.py` -- `src/continuous_refactoring/config.py` -- `src/continuous_refactoring/cli.py` -- `tests/test_cli_init_taste.py` -- `tests/test_cli_taste_warning.py` -- `tests/test_run_once.py` -- `tests/test_run_once_regression.py` -- `tests/test_config.py` -- `tests/test_phases.py` - -## Instructions -1. Update boundary catch/relay points in `loop.py` so config/artifacts/git failures are wrapped only at decision points and keep current fallback logic when safe (`load_taste` defaults, non-fatal taste path failures, validation path continuity). -2. Tighten `config.py` helpers (`continuous_refactoring.config.load_manifest`, `_load_manifest_payload`, and related config loaders) only where needed to align with consistent cause-chaining semantics with artifacts and keep missing-manifest behavior unchanged. -3. Update CLI taste/upgrade/init paths to preserve exact user-facing behavior on boundary failures while adding richer cause-linked debug context internally. -4. Add regression tests for malformed/unreadable manifest and log-write failures in config/CLI/loop surfaces that must not crash into less useful errors. -5. Keep command output and exit status stable where existing tests assert exact semantics. -6. Ensure `tests/test_phases.py` still validates unchanged high-level phase outcomes under loop/cli boundary stress paths. - -## Precondition -- Phase 3 is marked complete in the migration manifest. -- Boundary wrappers from phase 3 are present at the intended module seams. -- No edits in `__init__.py` in this phase yet. - -## Definition of Done -- Loop/CLI/cfg paths remain robust under boundary failures and recover/abort in the same control plane as before. -- Boundary errors are wrapped with preserved causes only where callsite semantics improve context. -- Regressions for taste, run-once, and config load paths are covered by new/updated tests. -- No observable behavior changes outside error-cause channels unless explicitly documented by tests. -- No new direct API behavior changes in this phase outside boundary resilience scope. -- All phase-4 scope tests pass. - -## Validation steps -- `uv run pytest tests/test_cli_init_taste.py tests/test_cli_taste_warning.py` -- `uv run pytest tests/test_run_once.py tests/test_run_once_regression.py` -- `uv run pytest tests/test_config.py` diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-5-shippable-regression-and-contract-lock.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-5-shippable-regression-and-contract-lock.md deleted file mode 100644 index e8f15c7..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-5-shippable-regression-and-contract-lock.md +++ /dev/null @@ -1,55 +0,0 @@ -# Phase 5: Cross-module contract lock and migration finalization - -## Objective -Lock export/runtime contracts after boundary hardening and complete migration-wide regression validation while keeping the repository shippable after each intermediate step. - -## Scope -- `src/continuous_refactoring/__init__.py` -- `src/continuous_refactoring/loop.py` -- `src/continuous_refactoring/migration_tick.py` -- `src/continuous_refactoring/phases.py` -- `src/continuous_refactoring/config.py` -- `src/continuous_refactoring/git.py` -- `src/continuous_refactoring/agent.py` -- `src/continuous_refactoring/artifacts.py` -- All tests touched in earlier phases -- `tests/test_continuous_refactoring.py` -- `tests/test_loop_migration_tick.py` -- `tests/test_phases.py` -- `tests/test_routing.py` -- `tests/test_run.py` -- `tests/test_run_once.py` -- `tests/test_run_once_regression.py` -- `tests/test_cli_init_taste.py` -- `tests/test_cli_taste_warning.py` -- `tests/test_config.py` - -## Instructions -1. Verify `__init__.py` still enforces duplicate-export safety after any added/retained symbols and update no public symbol lists unless required by the migration. -2. Re-run phase-level and integration checks to ensure no behavior drift: - - phase readiness/validation retry semantics, - - artifact summary/event content, - - CLI taste/init messages, - - run/run-once loop outcomes. -3. Confirm migration docs and this plan match scope edits and that no phase introduced behavior outside migration intent, including the current `migration_tick.py` seam covered by `tests/test_loop_migration_tick.py`. -4. If any CLI exit path changed only in wording for more context, add/adjust exact-string assertions in dedicated CLI tests and call this out explicitly in DoD. -5. Verify `migrations/src-continuous-refactoring-artifacts-py-20260427T215942/manifest.json` phase graph and metadata remain consistent with this plan (no missing or renamed phase files). - -## Precondition -- Phase 4 is marked complete in the migration manifest. -- All intended phase edits are present in working tree. -- All phase documents in this migration directory match their intended scope (especially phase names referenced in `manifest.json`). - -## Definition of Done -- Package export checks are clean for touched modules. -- All phase-level target validations and full suite are green. -- Boundary-cause semantics are consistent across `artifacts`, `agent`, `git`, `loop`, `migration_tick`, `phases`, `config`, and CLI callsites. -- Migration scope and docs are aligned to the final code shape. -- `manifest.json` and `plan.md` are coherent with delivered phase files and scope. -- No unresolved documentation/process debt introduced by this migration. - -## Validation steps -- `uv run pytest tests/test_config.py tests/test_continuous_refactoring.py tests/test_loop_migration_tick.py tests/test_phases.py tests/test_routing.py` -- `uv run pytest tests/test_cli_init_taste.py tests/test_cli_taste_warning.py tests/test_run_once.py tests/test_run_once_regression.py` -- `uv run pytest tests/test_run.py` -- `uv run pytest` diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/plan.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/plan.md deleted file mode 100644 index 51813fa..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/plan.md +++ /dev/null @@ -1,80 +0,0 @@ -# Migration: src-continuous-refactoring-artifacts-py-20260427T215942 - -## Goal -Harden artifact persistence and adjacent command-boundary behavior around `continuous_refactoring.artifacts` in place so that failures at module boundaries preserve root-cause context, callsites remain stable, and execution behavior stays shippable between phases. - -## Chosen approach -[`inplace-artifact-boundary-hardening`](approaches/inplace-artifact-boundary-hardening.md) - -## Scope -- `src/continuous_refactoring/artifacts.py` -- `src/continuous_refactoring/agent.py` -- `src/continuous_refactoring/loop.py` -- `src/continuous_refactoring/migration_tick.py` -- `src/continuous_refactoring/phases.py` -- `src/continuous_refactoring/cli.py` -- `src/continuous_refactoring/config.py` -- `src/continuous_refactoring/git.py` -- `src/continuous_refactoring/__init__.py` -- `tests/test_continuous_refactoring.py` -- `tests/test_loop_migration_tick.py` -- `tests/test_phases.py` -- `tests/test_run.py` -- `tests/test_routing.py` -- `tests/test_cli_init_taste.py` -- `tests/test_cli_taste_warning.py` -- `tests/test_run_once.py` -- `tests/test_run_once_regression.py` -- `tests/test_config.py` - -## Non-goals -- No module splitting or package-boundary redesign. -- No rollout flags or canary mechanics in this migration. -- No API-level renames. -- No deliberate changes to prompt text where tests assert exact output, except where required to preserve boundary context. - -## Scope policy -Only files listed above and existing migration documents in this directory may be edited for this migration. - -## Phases -1. `phase-1-artifact-boundary-baseline` -2. `phase-2-artifacts-boundary-contract` -3. `phase-3-boundary-wrappers-at-callsites` -4. `phase-4-loop-and-cli-boundary-resilience` -5. `phase-5-shippable-regression-and-contract-lock` - -```mermaid -flowchart TD - P1[phase-1-artifact-boundary-baseline] - P2[phase-2-artifacts-boundary-contract] - P3[phase-3-boundary-wrappers-at-callsites] - P4[phase-4-loop-and-cli-boundary-resilience] - P5[phase-5-shippable-regression-and-contract-lock] - - P1 --> P2 - P2 --> P3 - P3 --> P4 - P4 --> P5 -``` - -## Dependency summary -- Phase 1 creates a test baseline and verifies current behavior before production edits. -- Phase 2 introduces helper contracts in `artifacts.py` for summary/event persistence; all other production modules consume these contracts later. -- Phase 3 applies direct boundary wrappers in adjacent modules and migration-tick reporting seams, and must run only after Phase 2 is green. -- Phase 4 applies boundary resilience in orchestration and CLI surfaces and must run only after callsite behavior in Phase 3 is locked. -- Phase 5 performs final contract lock validation across the scope and must run only after Phase 4 is green. - -## Validation strategy -Taste version: `taste-scoping-version: 1` - -Phase gates must remain independently verifiable and each phase must leave a shippable tree (at least targeted tests green and no behavioral break outside migration intent). - -### Phase gates -- `phase-1-artifact-boundary-baseline.md`: `uv run pytest tests/test_continuous_refactoring.py tests/test_phases.py tests/test_loop_migration_tick.py` -- `phase-2-artifacts-boundary-contract.md`: `uv run pytest tests/test_continuous_refactoring.py` -- `phase-3-boundary-wrappers-at-callsites.md`: `uv run pytest tests/test_run.py tests/test_routing.py tests/test_loop_migration_tick.py tests/test_phases.py` -- `phase-4-loop-and-cli-boundary-resilience.md`: `uv run pytest tests/test_cli_init_taste.py tests/test_cli_taste_warning.py tests/test_run_once.py tests/test_run_once_regression.py tests/test_config.py` -- `phase-5-shippable-regression-and-contract-lock.md`: `uv run pytest` - -## Verification rule -Each phase must satisfy its local Definition of Done, including full boundary error-cause visibility for behavior changes it introduces, and pass its gate before the next phase starts. diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/git-backed-tracked-files.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/git-backed-tracked-files.md deleted file mode 100644 index 07349fe..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/git-backed-tracked-files.md +++ /dev/null @@ -1,56 +0,0 @@ -# Move Tracked-File Enumeration to `git.py` - -## Strategy - -Extract low-level repo access from `targeting.py` into `git.py` so file enumeration is centralized and boundary-faithful. - -Proposed changes: -- Add `list_tracked_files(repo_root: Path) -> list[str]` to `src/continuous_refactoring/git.py` using existing `run_command`. -- Replace direct `subprocess.run([... "git", "ls-files", "-z"])` in `targeting.py` with `continuous_refactoring.git.list_tracked_files`. -- Keep `select_random_files` in `targeting.py` as policy (`count`, tuple return, ordering behavior) and use `git.py` only for repository access. -- Preserve warning/error behavior by preserving command output messages and wrapping failures with nested `ContinuousRefactorError` in one place. -- Add regression tests in both modules: - - low-level git command edge cases (`git.py`) and - - target resolution behavior under non-ASCII and empty-repo conditions (`test_targeting.py`). - -This is explicitly non-speculative: there is real duplication pressure across modules that already depend on git command semantics. - -## Tradeoffs - -Pros: -- Stronger domain split around repository transport. -- Easier to test and mock repository behavior in one place. -- Improves consistency if other modules later need reliable tracked-file access. - -Cons: -- Requires modifying `git.py`, which increases blast radius into `loop.py`, `artifacts.py`, and related tests through callsite imports. -- Need to keep error messages stable for existing tests that assert on command failure paths. -- Not as immediate a cleanup as pure in-place refactor. - -## Estimated Phases - -1. Git utility extraction -- Add `list_tracked_files` to `git.py` and test it with fixtures already used by `tests/test_git.py`. -- Keep interface narrow and stdlib-only. - -2. Targeting integration -- Replace in-module git listing with the new utility. -- Ensure `select_random_files` and `expand_patterns_to_files` remain deterministic and deduplicated. - -3. Scope and loop checks -- Update any callsites that need direct visibility of tracked-file listing behavior. -- Keep `targeting.py` API and `Target` contract unchanged. - -4. Full behavioral pass -- Focused tests: `uv run pytest tests/test_git.py tests/test_targeting.py` -- Broader: `uv run pytest tests/test_run_once_regression.py tests/test_cli_init_taste.py`. - -## Risk Profile - -Medium. - -Watch-outs: -- Avoid introducing temporary migration flags in CLI or runtime flow. -- Do not change fallback semantics: no random target shape change, no precedence change. -- Keep exception boundaries clear: `run_command` wraps low-level process issues, `targeting.py` wraps domain-level failures only. - diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/in-place-target-resolution.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/in-place-target-resolution.md deleted file mode 100644 index ce2d348..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/in-place-target-resolution.md +++ /dev/null @@ -1,63 +0,0 @@ -# In-Place Targeting Resolution Tightening - -## Strategy - -Keep `src/continuous_refactoring/targeting.py` as the ownership point for all target semantics, but tighten the module into explicit, small pipeline helpers. - -Core move: -- Move CLI-facing target parsing to a first-class helper in `targeting.py`: - - `parse_paths_arg(raw: str | None) -> tuple[str, ...] | None` - - validate/truncate-empty in one place, not ad hoc in `loop.py`. -- Introduce a tiny selector abstraction in `targeting.py`: - - `select_target_files(patterns: tuple[str, ...], repo_root: Path) -> tuple[str, ...]` - - `resolve_target_sources(...) -> tuple[list[Target], list[str]]` is still not a second data structure, just returns an ordered `list[Target]`. -- Keep `loop.py` orchestration thin: - - `_resolve_targets_from_args()` delegates parsing and resolution; it only passes parser outputs. -- Preserve existing output contracts: - - fallback provenance strings (`targets`, `globs`, `extensions`, `paths`, `random`), - - random fallback behavior and warning text patterns, - - `Target` dataclass shape and public imports. -- Normalize warnings/errors at module boundaries: - - keep current behavior (`ContinuousRefactorError` on fatal git enumeration failures), - - attach `__cause__` where wrapping adds context. - -## Tradeoffs - -Pros: -- Lowest churn across `loop.py`, `scope_expansion.py`, and tests. -- No module boundary churn, no migration of symbol ownership. -- Fastest path to measurable cleanup and easy review. - -Cons: -- Retains a broader `targeting.py` surface than a full split. -- Less architectural separation than module extraction options. -- Any later boundary extraction will be easier from this cleaner baseline, not zero-cost. - -## Estimated Phases - -1. Baseline lock -- Add regression tests for `_parse_paths_arg` behavior and edge-case warnings in `tests/test_targeting.py`. -- Add one small `run-loop` integration assertion proving `loop.py` delegates to new parser behavior. - -2. Internal pipeline cleanup -- Extract parsing and selection helpers inside `targeting.py`. -- Update `_resolve_targets_from_args()` in `loop.py` to call the new helper functions. -- Keep prompt composition unchanged; only target resolution shape changes through same contract. - -3. Error-boundary hardening -- Wrap failed git enumeration paths with nested `ContinuousRefactorError`. -- Preserve user-facing strings where tests assert them; update only if intentional and justified. - -4. Validation -- `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_prompts.py` -- Then focused `uv run pytest tests/test_cli_*.py` for any touched CLI path. - -## Risk Profile - -Low to medium. - -Watch-outs: -- Keep warning wording stable to avoid brittle regression in stderr-capture tests. -- Keep first-match targeting semantics intact (`targets > globs > extensions > paths > random`). -- No new temporary flags, names, or compatibility indirection. - diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/split-targeting-domain.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/split-targeting-domain.md deleted file mode 100644 index b5bb4b8..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/split-targeting-domain.md +++ /dev/null @@ -1,66 +0,0 @@ -# Split Targeting by Domain Ownership - -## Strategy - -Introduce explicit domain modules and keep `targeting.py` as a facade: -- `src/continuous_refactoring/targeting_io.py` - - JSONL parsing/validation (`load_targets_jsonl`, `validate_target_line`) - - `_optional_str` and field mapping (`effort-override`, `model-override`) -- `src/continuous_refactoring/targeting_match.py` - - pattern compilation (`_compile_glob`) - - `parse_extensions`, `parse_globs`, `expand_patterns_to_files` -- `src/continuous_refactoring/targeting_resolution.py` - - `resolve_targets` policy and source precedence -- `src/continuous_refactoring/targeting.py` - - stable façade, `Target`, `TargetSource`, `select_random_files` - - re-exports and orchestration glue only. - -This aligns with taste-scoping by making domain boundaries meaningful: -parsing, matching, and policy are separate and testable without CLI, agent, or loop context. - -## Tradeoffs - -Pros: -- Clearer code ownership and fewer long-distance responsibilities in one file. -- Easier targeted tests for each boundary (pure parsing/matching/resolution). -- Reduced pressure on `targeting.py` as behavior keeps growing. - -Cons: -- Medium-high import churn (`loop.py`, tests, `__init__.py` surface, maybe `prompts.py` type imports). -- Higher chance of symbol/export conflict with package uniqueness checks. -- More files to keep in sync while maintaining deterministic behavior and warning wording. - -## Estimated Phases - -1. Test and contract capture -- Split existing tests into focused ownership buckets: - - keep `tests/test_targeting.py` for top-level orchestration and cross-boundary behavior, - - add `tests/test_targeting_match.py` for glob semantics, - - add `tests/test_targeting_resolution.py` for precedence and fallback. - -2. Extract parsing module -- Move validation and JSONL loading into `targeting_io.py`. -- Update direct imports where `validate_target_line` and `load_targets_jsonl` are used. - -3. Extract matching module -- Move glob and extension parsing to `targeting_match.py`. -- Ensure dedupe/sort/range behavior stays identical; expand tests using existing randomized generator case. - -4. Extract resolution module -- Move precedence/fallback policy into `targeting_resolution.py`. -- Keep return-order semantics stable and deterministic. - -5. Facade and package integration -- Keep stable imports from `targeting.py` where external callers expect it. -- Update `src/continuous_refactoring/__init__.py` if new public symbols are intentionally exported. -- Final smoke tests. - -## Risk Profile - -Medium. - -Watch-outs: -- Avoid speculative new API: no extra adapters, no temporary compatibility aliases. -- Do not rename the "truthy" precedence order; any change must be explicit and covered by tests. -- Ensure package-level uniqueness passes after each phase; no duplicate exports allowed. - diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/manifest.json b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/manifest.json deleted file mode 100644 index cf8094e..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/manifest.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "awaiting_human_review": false, - "cooldown_until": null, - "created_at": "2026-04-27T22:06:24.348-07:00", - "current_phase": "", - "human_review_reason": null, - "last_touch": "2026-04-28T15:43:53.161-07:00", - "name": "src-continuous-refactoring-targeting-py-20260427T220624", - "phases": [ - { - "done": true, - "effort_reason": null, - "file": "phase-1-targeting-parse-foundation.md", - "name": "targeting-parse-foundation", - "precondition": "- `rg -n \\\"def parse_paths_arg\\\\(\\\" src/continuous_refactoring/targeting.py` returns no matches before edits. - Existing `loop.py` path parsing may still be present; phase 2 owns loop delegation and removal.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-2-loop-delegates-targeting-parse.md", - "name": "loop-delegates-targeting-parse", - "precondition": "- `phase-1-targeting-parse-foundation.md` is marked complete in the migration manifest. - `rg -n \\\"def parse_paths_arg\\\\(\\\" src/continuous_refactoring/targeting.py` finds the parser in `targeting.py`. - `rg -n \\\"def _parse_paths_arg\\\\(\\\" src/continuous_refactoring/loop.py` finds the local parser that this phase removes. - `rg -n \\\"_resolve_targets_from_args\\\\(\\\" src/continuous_refactoring/loop.py` finds the shared helper definition plus the existing `run_once()` and `run_loop()` callsites, confirming both entrypoints already route through one resolver before this phase delegates path parsing.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-3-targeting-git-boundary-hardening.md", - "name": "targeting-git-boundary-hardening", - "precondition": "- Phase 2 is marked complete in the migration manifest. - `rg -n \\\"def _parse_paths_arg\\\\(\\\" src/continuous_refactoring/loop.py` returns no matches, confirming Phase 2 removed loop-local path parsing. - `rg -n \\\"parse_paths_arg\\\\(\\\" src/continuous_refactoring/loop.py` reports only delegated usage from the shared target resolver. - `rg -n \\\"def list_tracked_files\\\\(\\\" src/continuous_refactoring/targeting.py` finds the tracked-file enumeration implementation that this phase hardens.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-4-targeting-surface-regression-lock.md", - "name": "targeting-surface-regression-lock", - "precondition": "- Phases 1, 2, and 3 are marked complete in the migration manifest. - `rg -n \\\"parse_paths_arg\\\\(\\\" src/continuous_refactoring/loop.py` shows only callsite usage from `targeting`. - `rg -n \\\"_parse_paths_arg\\\\(\\\" src/continuous_refactoring/loop.py` returns no matches. - `rg -n \\\"run_command\\\\(\\\" src/continuous_refactoring/targeting.py` finds tracked-file enumeration flowing through the repository git boundary. - `rg -n \\\"subprocess\\\\.run\\\\(\\\" src/continuous_refactoring/targeting.py` returns no matches.", - "required_effort": null - } - ], - "status": "done", - "wake_up_on": null -} diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-1-targeting-parse-foundation.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-1-targeting-parse-foundation.md deleted file mode 100644 index 3f79a46..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-1-targeting-parse-foundation.md +++ /dev/null @@ -1,37 +0,0 @@ -# Phase 1: targeting parse foundation - -## Objective -Move path and selection parsing into `targeting.py` without changing public targeting output. - -## Scope -- `src/continuous_refactoring/targeting.py` -- `tests/test_targeting.py` - -## Instructions -1. In `targeting.py`, add a helper that owns CLI path parsing: - - `parse_paths_arg(raw_paths: str | None) -> tuple[str, ...] | None` -2. Keep `resolve_targets()` as the public entrypoint and add the helper without changing current loop callsites: - - normalize and drop empty segments in one place. - - avoid changing output ordering or `Target.provenance` behavior. -3. Add/adjust tests in `tests/test_targeting.py` for: - - trimming and dropping empty path segments (e.g. `"src/foo.py: src/bar.py"`) - - `None`/blank path raw values produce `None` - - precedence expectations preserved in `resolve_targets` when path input is present -4. Keep warning text and exception behavior stable unless a test in this phase requires a deliberate, documented assertion. - -## Precondition -- `rg -n \"def parse_paths_arg\\(\" src/continuous_refactoring/targeting.py` returns no matches before edits. -- Existing `loop.py` path parsing may still be present; phase 2 owns loop delegation and removal. - -## Definition of Done -- `targeting.py` has a first-class path parser ready for loop delegation. -- `tests/test_targeting.py` contains targeted regression coverage for path parsing semantics and precedence at the unit level. -- `uv run pytest tests/test_targeting.py` passes with no skipped assertions specific to this migration. -- No external API/CLI contract changes outside `targeting.py` behavior. -- No edits are made in `loop.py` during this phase. - -## Validation steps -- Run: `uv run pytest tests/test_targeting.py` -- Validate ownership by inspection and signature checks: - - `rg -n \"def parse_paths_arg\\(\" src/continuous_refactoring/targeting.py` - - `rg -n \"def _parse_paths_arg\\(\" src/continuous_refactoring/loop.py` diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-2-loop-delegates-targeting-parse.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-2-loop-delegates-targeting-parse.md deleted file mode 100644 index 83cbbac..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-2-loop-delegates-targeting-parse.md +++ /dev/null @@ -1,47 +0,0 @@ -# Phase 2: loop delegates target parsing and resolution - -## Objective -Make `loop.py` a thin orchestration layer by delegating all target argument parsing to `targeting.py`. - -## Scope -- `src/continuous_refactoring/targeting.py` -- `src/continuous_refactoring/loop.py` -- `tests/test_run_once_regression.py` -- `tests/test_run.py` - -## Instructions -1. Remove local `_parse_paths_arg` path parsing logic from `loop.py`. -2. Update `_resolve_targets_from_args()` to call `targeting.parse_paths_arg(...)` and pass the parsed value directly into `resolve_targets(...)`. -3. Keep existing precedence behavior identical: `targets` > `globs` > `extensions` > `paths` > random. -4. Ensure no behavioral coupling is introduced in loop entrypoints: - - `run_once()` - - `run_loop()` -5. Add/adjust regression checks covering: - - trimmed path handling in the run-once path (`args.paths` with whitespace) - - path-driven target prompt shape in one-shot flow - - non-empty target list behavior in normal loop mode. -6. Ensure parse helper ownership is visible at callsite by importing from `continuous_refactoring.targeting` rather than local path parsing implementations. - -## Precondition -- `phase-1-targeting-parse-foundation.md` is marked complete in the migration manifest. -- `rg -n \"def parse_paths_arg\\(\" src/continuous_refactoring/targeting.py` finds the parser in `targeting.py`. -- `rg -n \"def _parse_paths_arg\\(\" src/continuous_refactoring/loop.py` finds the local parser that this phase removes. -- `rg -n \"_resolve_targets_from_args\\(\" src/continuous_refactoring/loop.py` finds the shared helper definition plus the existing `run_once()` and `run_loop()` callsites, confirming both entrypoints already route through one resolver before this phase delegates path parsing. - -## Definition of Done -- `loop.py` contains no `_parse_paths_arg` implementation and does not parse `args.paths` directly. -- `_resolve_targets_from_args()` passes parsed `paths` and raw non-path selectors to `resolve_targets(...)` in one place. -- `run_once` and `run_loop` behavior stays unchanged for all targeting modes. -- Focused regression scope passes: - - path trimming in `args.paths` on one-shot path - - run-loop target prompt shape with non-empty target set - - precedence still resolves to `targets` > `globs` > `extensions` > `paths` > random. -- `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_run.py` passes. -- `rg -n \"parse_paths_arg\\(\" src/continuous_refactoring/loop.py` reports only delegated usage to `targeting.parse_paths_arg`. - -## Validation steps -- Run: `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_run.py` -- Verify delegation ownership by inspection: - - `rg -n \"_parse_paths_arg\\(\" src/continuous_refactoring/loop.py` - - `rg -n \"parse_paths_arg\\(\" src/continuous_refactoring/loop.py` -- Keep `tests/test_run_once_regression.py` and `tests/test_run.py` green before phase transition. diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-3-targeting-git-boundary-hardening.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-3-targeting-git-boundary-hardening.md deleted file mode 100644 index b485fbb..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-3-targeting-git-boundary-hardening.md +++ /dev/null @@ -1,38 +0,0 @@ -# Phase 3: targeting git enumeration boundary hardening - -## Objective -Standardize failure handling for tracked-file enumeration so git subprocess failures are wrapped at the targeting boundary with preserved causes. - -## Scope -- `src/continuous_refactoring/targeting.py` -- `src/continuous_refactoring/git.py` -- `tests/test_targeting.py` - -## Instructions -1. In `targeting.py`, replace direct subprocess-based tracked-file reads inside `list_tracked_files()` with the repository git boundary (`continuous_refactoring.git.run_command`, imported or module-qualified). -2. Add module-local context when git enumeration fails and preserve the original exception via `from` (`ContinuousRefactorError` nesting). -3. Keep non-fatal semantics for missing matches: - - no patterns -> empty tuple - - zero tracked files in matching mode -> empty tuple -4. Add/extend tests in `tests/test_targeting.py` for nested-cause behavior and message preservation when git enumeration fails. -5. Keep `list_tracked_files` return value and shape stable when git succeeds. - -## Precondition -- Phase 2 is marked complete in the migration manifest. -- `rg -n \"def _parse_paths_arg\\(\" src/continuous_refactoring/loop.py` returns no matches, confirming Phase 2 removed loop-local path parsing. -- `rg -n \"parse_paths_arg\\(\" src/continuous_refactoring/loop.py` reports only delegated usage from the shared target resolver. -- `rg -n \"def list_tracked_files\\(\" src/continuous_refactoring/targeting.py` finds the tracked-file enumeration implementation that this phase hardens. - -## Definition of Done -- `list_tracked_files()` uses the git command boundary and wraps failures with nested context at the targeting boundary. -- No behavioral changes in successful pattern matching paths. -- `uv run pytest tests/test_targeting.py` passes. -- `rg -n \"subprocess\\.run\\(\" src/continuous_refactoring/targeting.py` returns no matches for tracked-file reads. -- `tests/test_targeting.py` has explicit assertions for: - - `ContinuousRefactorError` raised on git command failures - - original failure attached as `__cause__` - -## Validation steps -- Run: `uv run pytest tests/test_targeting.py` -- Confirm that a failing git enumeration path raises `ContinuousRefactorError` with the original cause (`GitCommandError` from the git command boundary) where applicable. -- Confirm by inspection that tracked-file reads now flow through the repository git command boundary and no duplicate subprocess paths exist in `targeting.py`. diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-4-targeting-surface-regression-lock.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-4-targeting-surface-regression-lock.md deleted file mode 100644 index 63f07d3..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-4-targeting-surface-regression-lock.md +++ /dev/null @@ -1,46 +0,0 @@ -# Phase 4: targeting surface regression lock - -## Objective -Lock in regression coverage across CLI, loop, and planning surfaces after in-place targeting refactor. - -## Scope -- `tests/test_targeting.py` -- `tests/test_run.py` -- `tests/test_run_once_regression.py` -- `tests/test_scope_loop_integration.py` -- `tests/test_focus_on_live_migrations.py` -- `tests/test_e2e.py` - -## Instructions -1. Run focused integration/reuse tests that depend on targeting contracts (prompt construction, CLI handling, loop flow). -2. Confirm no implicit behavior shift for these invariants: - - precedence remains `targets > globs > extensions > paths > random` - - `--paths` whitespace is ignored after parsing - - random fallback to `general refactoring` remains unchanged when no tracked matches exist - - no regression in live-migration routing where target files are forwarded unchanged. -3. If new regression failures appear, contain them in a minimal additional test under the targeting module or affected loop integration test in the same phase. -4. Keep user-facing output and validation contracts intact: - - prompt text/contents used by `compose_full_prompt` flows - - scope-fallback and max-target behavior semantics. - -## Precondition -- Phases 1, 2, and 3 are marked complete in the migration manifest. -- `rg -n \"parse_paths_arg\\(\" src/continuous_refactoring/loop.py` shows only callsite usage from `targeting`. -- `rg -n \"_parse_paths_arg\\(\" src/continuous_refactoring/loop.py` returns no matches. -- `rg -n \"run_command\\(\" src/continuous_refactoring/targeting.py` finds tracked-file enumeration flowing through the repository git boundary. -- `rg -n \"subprocess\\.run\\(\" src/continuous_refactoring/targeting.py` returns no matches. - -## Definition of Done -- Focused cross-surface targeting regression suite for this phase passes. -- Targeting behavior is stable for both one-shot and loop runs, including live-migration integration points. -- No behavior contract changes are introduced by module boundary refactoring. -- `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_run.py tests/test_scope_loop_integration.py tests/test_focus_on_live_migrations.py tests/test_prompts.py tests/test_prompts_scope_selection.py tests/test_e2e.py` passes. -- `uv run pytest` passes (final migration-wide regression gate). -- In `prompt` and `cli` surfaces, precedence and fallback invariants are still observable: - - `targets` > `globs` > `extensions` > `paths` > `random` - - `--paths` whitespace is ignored - - random fallback still uses existing fallback prompt behavior. - -## Validation steps -- Run: `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_run.py tests/test_scope_loop_integration.py tests/test_focus_on_live_migrations.py tests/test_prompts.py tests/test_prompts_scope_selection.py tests/test_e2e.py` -- Run: `uv run pytest` diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/plan.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/plan.md deleted file mode 100644 index 6e25aee..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/plan.md +++ /dev/null @@ -1,68 +0,0 @@ -# Migration: src-continuous-refactoring-targeting-py-20260427T220624 - -## Goal -Refactor target resolution so CLI targeting semantics are owned by `src/continuous_refactoring/targeting.py`, while keeping all runtime behavior unchanged and preserving precedence order (`targets` > `globs` > `extensions` > `paths` > random fallback). - -## Chosen approach -`in-place-target-resolution` - -## Scope -- `src/continuous_refactoring/targeting.py` -- `tests/test_targeting.py` -- `src/continuous_refactoring/loop.py` -- `src/continuous_refactoring/prompts.py` -- `src/continuous_refactoring/cli.py` -- `src/continuous_refactoring/artifacts.py` -- `src/continuous_refactoring/git.py` -- `src/continuous_refactoring/scope_expansion.py` - -## Non-goals -- No API or data-shape migration. -- No architectural split of `loop.py`/`scope_expansion.py`. -- No rollout/temporary naming or compatibility shims. -- No global project-wide behavior changes outside target resolution and tracking failure boundaries. - -## Phases -1. `phase-1-targeting-parse-foundation.md` -2. `phase-2-loop-delegates-targeting-parse.md` -3. `phase-3-targeting-git-boundary-hardening.md` -4. `phase-4-targeting-surface-regression-lock.md` - -```mermaid -flowchart TD - P1[phase-1-targeting-parse-foundation] - P2[phase-2-loop-delegates-targeting-parse] - P3[phase-3-targeting-git-boundary-hardening] - P4[phase-4-targeting-surface-regression-lock] - - P1 --> P2 - P2 --> P3 - P2 --> P4 - P3 --> P4 -``` - -## Dependencies -1. `phase-1` must establish parsing ownership in `targeting.py` before `loop.py` can delegate. -2. `phase-2` must complete before `phase-3` because error-hardening depends on the same argument flow. -3. `phase-4` must wait for both `phase-2` and `phase-3` so loop behavior and git-failure boundaries are stabilized. - -## Dependency summary -- `phase-1` must establish parser/selector abstractions and coverage in `test_targeting.py` before `loop.py` and callers can shift responsibility. -- `phase-2` depends on phase-1 because all loop delegation points route through the new targeting helper signatures. -- `phase-3` depends on phase-2 so any git-tracking failure edge case is validated through the same call shape used by loop + tests. -- `phase-4` is integration + regression lock and must only run after all prior phase DoD are satisfied. - -## Validation strategy -Each phase is independently verifiable and includes a narrow command that should be green before proceeding. - -- `phase-1` gate: `uv run pytest tests/test_targeting.py` -- `phase-2` gate: `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_run.py` -- `phase-3` gate: `uv run pytest tests/test_targeting.py` -- `phase-4` gate: `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_run.py tests/test_scope_loop_integration.py tests/test_focus_on_live_migrations.py tests/test_prompts.py tests/test_prompts_scope_selection.py tests/test_e2e.py` - -Final migration gate (after all phases): -- `uv run pytest` - -## Validation notes -The migration stays shippable after every phase by enforcing behavior-specific gates that include the changed surface. -Phase order minimizes coupling: parsing is isolated first, delegation second, failure boundary hardening third, and only then full-surface verification. From 267f77719789dd9ff16e3ab192d562a5a44b548e Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Tue, 28 Apr 2026 23:52:37 -0700 Subject: [PATCH 002/103] continuous refactor: src/continuous_refactoring/scope_expansion.py Why: Removes duplicated bypass log formatting across modules and protects the scope-expansion artifact contract with focused coverage. Validation: uv run pytest --- .../routing_pipeline.py | 10 ++-- src/continuous_refactoring/scope_expansion.py | 4 +- tests/test_scope_expansion.py | 49 +++++++++++++++++++ 3 files changed, 56 insertions(+), 7 deletions(-) diff --git a/src/continuous_refactoring/routing_pipeline.py b/src/continuous_refactoring/routing_pipeline.py index 79670a9..f2df5c7 100644 --- a/src/continuous_refactoring/routing_pipeline.py +++ b/src/continuous_refactoring/routing_pipeline.py @@ -35,9 +35,11 @@ from continuous_refactoring.prompts import describe_scope_candidate from continuous_refactoring.routing import classify_target from continuous_refactoring.scope_expansion import ( + ScopeSelection, scope_candidate_to_target, scope_expansion_bypass_reason, select_scope_candidate, + write_scope_selection_logs, write_scope_expansion_artifacts, ) from continuous_refactoring.scope_candidates import build_scope_candidates @@ -90,11 +92,9 @@ def expand_target_for_classification( (), bypass_reason=bypass_reason, ) - bypass_line = f"selected-candidate: seed — {bypass_reason}\n" - (scope_dir / "selection.stdout.log").write_text(bypass_line, encoding="utf-8") - (scope_dir / "selection-last-message.md").write_text( - bypass_line, - encoding="utf-8", + write_scope_selection_logs( + scope_dir, + ScopeSelection(kind="seed", reason=bypass_reason), ) return target, _scope_bypass_context(target, bypass_reason) diff --git a/src/continuous_refactoring/scope_expansion.py b/src/continuous_refactoring/scope_expansion.py index 02e4b05..83b42e4 100644 --- a/src/continuous_refactoring/scope_expansion.py +++ b/src/continuous_refactoring/scope_expansion.py @@ -41,7 +41,7 @@ def _scope_selection_line(selection: ScopeSelection) -> str: return f"selected-candidate: {selection.kind} — {selection.reason}\n" -def _write_selection_logs(selection_dir: Path, selection: ScopeSelection) -> None: +def write_scope_selection_logs(selection_dir: Path, selection: ScopeSelection) -> None: line = _scope_selection_line(selection) (selection_dir / "selection.stdout.log").write_text(line, encoding="utf-8") (selection_dir / "selection-last-message.md").write_text(line, encoding="utf-8") @@ -111,7 +111,7 @@ def select_scope_candidate( kind=candidates[0].kind, reason="only viable candidate", ) - _write_selection_logs(selection_dir, selection) + write_scope_selection_logs(selection_dir, selection) return selection call_role = "scope-expansion" diff --git a/tests/test_scope_expansion.py b/tests/test_scope_expansion.py index 1849288..cce912c 100644 --- a/tests/test_scope_expansion.py +++ b/tests/test_scope_expansion.py @@ -6,6 +6,7 @@ import pytest +import continuous_refactoring.routing_pipeline as routing_pipeline import continuous_refactoring.scope_expansion as scope_expansion from continuous_refactoring.artifacts import ( CommandCapture, @@ -154,6 +155,54 @@ def test_write_scope_expansion_artifacts_records_payload(tmp_path: Path) -> None assert payload["selection"] == {"kind": "local-cluster", "reason": "clustered evidence"} +def test_expand_target_bypass_writes_scope_artifacts_and_logs( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + target = Target( + description="explicit paths", + files=("src/foo.py", "src/bar.py"), + provenance="paths", + ) + artifacts = _make_artifacts(tmp_path, monkeypatch) + + selected_target, planning_context = routing_pipeline.expand_target_for_classification( + target, + "taste", + tmp_path, + artifacts, + agent="codex", + model="gpt-5.5", + effort="low", + timeout=None, + ) + + scope_dir = artifacts.root / "scope-expansion" + payload = json.loads((scope_dir / "variants.json").read_text(encoding="utf-8")) + + assert selected_target == target + assert planning_context == ( + "Scope expansion bypassed: scope expansion bypassed for explicit multi-file target\n" + "Files:\n" + "- src/foo.py\n" + "- src/bar.py" + ) + assert payload == { + "bypass_reason": "scope expansion bypassed for explicit multi-file target", + "candidates": [], + "target": { + "description": "explicit paths", + "files": ["src/foo.py", "src/bar.py"], + "provenance": "paths", + }, + } + expected = ( + "selected-candidate: seed — scope expansion bypassed for explicit multi-file target\n" + ) + assert (scope_dir / "selection.stdout.log").read_text(encoding="utf-8") == expected + assert (scope_dir / "selection-last-message.md").read_text(encoding="utf-8") == expected + + def test_select_scope_candidate_surfaces_parser_boundary_errors( monkeypatch: pytest.MonkeyPatch, tmp_path: Path, From 160cc522b1c5d036f708c39c895c7acc95478634 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Tue, 28 Apr 2026 23:58:00 -0700 Subject: [PATCH 003/103] continuous refactor: tests/test_scope_selection.py Why: Removes regex-heavy duplicate grammar handling in a boundary parser while preserving behavior with tighter edge-case tests. Validation: uv run pytest --- src/continuous_refactoring/scope_expansion.py | 36 +++++++++++++------ tests/test_scope_selection.py | 20 +++++++++++ 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/src/continuous_refactoring/scope_expansion.py b/src/continuous_refactoring/scope_expansion.py index 83b42e4..348bb7e 100644 --- a/src/continuous_refactoring/scope_expansion.py +++ b/src/continuous_refactoring/scope_expansion.py @@ -1,7 +1,6 @@ from __future__ import annotations import json -import re from dataclasses import asdict, dataclass, replace from pathlib import Path from typing import TYPE_CHECKING @@ -24,10 +23,11 @@ from continuous_refactoring.prompts import compose_scope_selection_prompt from continuous_refactoring.scope_candidates import ScopeCandidate, ScopeCandidateKind -_SELECTION_RE = re.compile( - r"^selected-candidate:\s*(seed|local-cluster|cross-cluster)" - r"(?:\s*[—-]\s*(.+))?$", - re.IGNORECASE, +_SCOPE_SELECTION_PREFIX = "selected-candidate:" +_KNOWN_SCOPE_SELECTION_KINDS: tuple[ScopeCandidateKind, ...] = ( + "local-cluster", + "cross-cluster", + "seed", ) @@ -47,6 +47,23 @@ def write_scope_selection_logs(selection_dir: Path, selection: ScopeSelection) - (selection_dir / "selection-last-message.md").write_text(line, encoding="utf-8") +def _parse_selection_line(line: str) -> tuple[ScopeCandidateKind, str] | None: + if not line[: len(_SCOPE_SELECTION_PREFIX)].lower() == _SCOPE_SELECTION_PREFIX: + return None + body = line[len(_SCOPE_SELECTION_PREFIX):].strip() + for kind in _KNOWN_SCOPE_SELECTION_KINDS: + if not body.lower().startswith(kind): + continue + reason = body[len(kind):].strip() + if not reason: + return kind, kind + if reason[0] not in {"—", "-"}: + return None + reason = reason[1:].strip() + return kind, reason or kind + return None + + def scope_expansion_bypass_reason(target: Target) -> str | None: if len(target.files) == 0: return "scope expansion requires a seed file" @@ -65,17 +82,14 @@ def parse_scope_selection( if not non_blank: raise ContinuousRefactorError("Scope selection produced no output") for stripped in reversed(non_blank): - match = _SELECTION_RE.match(stripped) - if not match: + parsed = _parse_selection_line(stripped) + if parsed is None: continue - kind = match.group(1).lower() + kind, reason = parsed if kind not in candidate_kinds: raise ContinuousRefactorError( f"Selection chose unavailable candidate: {kind!r}" ) - reason = match.group(2).strip() if match.group(2) else "" - if not reason: - reason = kind return ScopeSelection(kind=kind, reason=reason) raise ContinuousRefactorError( f"Scope selection produced unrecognised output: {non_blank[-1]!r}" diff --git a/tests/test_scope_selection.py b/tests/test_scope_selection.py index 101cf03..65f5dff 100644 --- a/tests/test_scope_selection.py +++ b/tests/test_scope_selection.py @@ -18,6 +18,16 @@ def test_selection_parser_accepts_valid_output() -> None: assert selection.reason == "paired test and helper" +def test_selection_parser_accepts_hyphen_reason_separator() -> None: + selection = parse_scope_selection( + "selected-candidate: cross-cluster - shared validation surface\n", + _KINDS, + ) + + assert selection.kind == "cross-cluster" + assert selection.reason == "shared validation surface" + + def test_selection_parser_defaults_reason_to_kind_when_missing() -> None: selection = parse_scope_selection("selected-candidate: seed\n", _KINDS) @@ -40,6 +50,16 @@ def test_selection_parser_rejects_empty_output() -> None: parse_scope_selection(" \n\n", _KINDS) +def test_selection_parser_uses_last_valid_selection_line() -> None: + selection = parse_scope_selection( + "selected-candidate: seed\nnotes\nselected-candidate: local-cluster — paired test and helper\n", + _KINDS, + ) + + assert selection.kind == "local-cluster" + assert selection.reason == "paired test and helper" + + def test_selection_parser_rejects_kind_outside_available_candidates() -> None: with pytest.raises(ContinuousRefactorError, match="unavailable candidate"): parse_scope_selection( From ad11bffc336b3b11e5c497771a098f4f75f92291 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 00:03:11 -0700 Subject: [PATCH 004/103] continuous refactor: src/continuous_refactoring/planning.py Why: Removes repeated load-bearing parsing logic in planning.py while preserving phase discovery behavior and locking section-over-legacy precedence with tests. Validation: uv run pytest --- src/continuous_refactoring/planning.py | 98 ++++++++++++++++++-------- tests/test_planning.py | 27 +++++++ 2 files changed, 94 insertions(+), 31 deletions(-) diff --git a/src/continuous_refactoring/planning.py b/src/continuous_refactoring/planning.py index 82be2e5..4ac44e0 100644 --- a/src/continuous_refactoring/planning.py +++ b/src/continuous_refactoring/planning.py @@ -47,6 +47,13 @@ class PlanningOutcome: reason: str +@dataclass(frozen=True) +class _PhaseMetadata: + precondition: str + required_effort: str | None + effort_reason: str | None + + @dataclass(frozen=True) class _PlanningStageSpec: prompt_stage: PlanningStage @@ -119,36 +126,75 @@ def _phase_section_text(content: str, heading: str) -> str | None: return normalized or None -def _phase_precondition(content: str, phase_file: str) -> str: - section = _phase_section_text(content, "Precondition") +def _phase_field( + content: str, + *, + heading: str, + line_re: re.Pattern[str], +) -> str | None: + section = _phase_section_text(content, heading) if section is not None: return section - match = _PRECONDITION_LINE_RE.search(content) + match = line_re.search(content) if match: return match.group(1).strip() - return f"prerequisites in {phase_file} are met" + return None + + +def _parse_phase_metadata(content: str, phase_file: str) -> _PhaseMetadata: + precondition = _phase_field( + content, + heading="Precondition", + line_re=_PRECONDITION_LINE_RE, + ) + raw_required_effort = _phase_field( + content, + heading="Required Effort", + line_re=_REQUIRED_EFFORT_LINE_RE, + ) + effort_reason = _phase_field( + content, + heading="Effort Reason", + line_re=_EFFORT_REASON_LINE_RE, + ) + required_effort = None + if raw_required_effort is not None: + candidate = raw_required_effort.strip().strip("`").split()[0].strip("`.,;:") + required_effort = require_effort_tier( + candidate, + field=f"{phase_file} required_effort", + ) + return _PhaseMetadata( + precondition=precondition or f"prerequisites in {phase_file} are met", + required_effort=required_effort, + effort_reason=effort_reason, + ) + + +def _phase_precondition(content: str, phase_file: str) -> str: + return _parse_phase_metadata(content, phase_file).precondition def _phase_required_effort(content: str, phase_file: str) -> str | None: - raw = _phase_section_text(content, "Required Effort") - if raw is None: - match = _REQUIRED_EFFORT_LINE_RE.search(content) - raw = match.group(1).strip() if match else None - if raw is None: - return None - candidate = raw.strip().strip("`").split()[0].strip("`.,;:") - return require_effort_tier(candidate, field=f"{phase_file} required_effort") + return _parse_phase_metadata(content, phase_file).required_effort def _phase_effort_reason(content: str) -> str | None: - section = _phase_section_text(content, "Effort Reason") - if section is not None: - return section - match = _EFFORT_REASON_LINE_RE.search(content) - if match: - return match.group(1).strip() - return None - + return _parse_phase_metadata(content, "").effort_reason + + +def _phase_spec_from_file(phase_file: Path) -> PhaseSpec: + content = phase_file.read_text(encoding="utf-8") + metadata = _parse_phase_metadata(content, phase_file.name) + name = phase_file.stem.split("-", 2)[2] + return PhaseSpec( + name=name, + file=phase_file.name, + done=False, + precondition=metadata.precondition, + required_effort=metadata.required_effort, + effort_reason=metadata.effort_reason, + ) def _discover_phase_files(mig_root: Path) -> tuple[PhaseSpec, ...]: phase_files: list[tuple[int, Path]] = [] @@ -169,17 +215,7 @@ def _discover_phase_files(mig_root: Path) -> tuple[PhaseSpec, ...]: f"Duplicate phase names are not allowed in {mig_root.name}: {name}" ) seen_names.add(name) - content = pf.read_text(encoding="utf-8") - phases.append( - PhaseSpec( - name=name, - file=pf.name, - done=False, - precondition=_phase_precondition(content, pf.name), - required_effort=_phase_required_effort(content, pf.name), - effort_reason=_phase_effort_reason(content), - ) - ) + phases.append(_phase_spec_from_file(pf)) return tuple(phases) diff --git a/tests/test_planning.py b/tests/test_planning.py index 145856a..df1cfdf 100644 --- a/tests/test_planning.py +++ b/tests/test_planning.py @@ -546,6 +546,33 @@ def test_discover_phase_files_reads_optional_effort_metadata(tmp_path: Path) -> assert phases[0].effort_reason == "touches routing and planning" +def test_discover_phase_files_prefers_section_metadata_over_legacy_lines( + tmp_path: Path, +) -> None: + mig_root = tmp_path / "live" / "section-precedence" + mig_root.mkdir(parents=True) + + (mig_root / "phase-1-risky.md").write_text( + ( + "precondition: legacy precondition\n" + "required_effort: low\n" + "effort_reason: legacy reason\n\n" + "## Precondition\n\nsection precondition\n\n" + "## Required Effort\n\nhigh with extra context\n\n" + "## Effort Reason\n\nsection reason wins\n\n" + "## Definition of Done\n\nDone.\n" + ), + encoding="utf-8", + ) + + phases = _discover_phase_files(mig_root) + + assert len(phases) == 1 + assert phases[0].precondition == "section precondition" + assert phases[0].required_effort == "high" + assert phases[0].effort_reason == "section reason wins" + + def test_discover_phase_files_rejects_invalid_required_effort(tmp_path: Path) -> None: mig_root = tmp_path / "live" / "bad-effort" mig_root.mkdir(parents=True) From 0f7854374219fb8d9db82bd61ed8d2ac78eb6538 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 00:11:19 -0700 Subject: [PATCH 005/103] continuous refactor: src/continuous_refactoring/refactor_attempts.py Why: This makes a load-bearing retry state machine smaller and easier to change while preserving rollback and validation semantics under direct test. Validation: uv run pytest --- .../refactor_attempts.py | 104 +++++++++-- tests/test_refactor_attempts.py | 166 ++++++++++++++++++ 2 files changed, 252 insertions(+), 18 deletions(-) create mode 100644 tests/test_refactor_attempts.py diff --git a/src/continuous_refactoring/refactor_attempts.py b/src/continuous_refactoring/refactor_attempts.py index 6fbbb3b..fb4e428 100644 --- a/src/continuous_refactoring/refactor_attempts.py +++ b/src/continuous_refactoring/refactor_attempts.py @@ -103,6 +103,74 @@ def _retry_context(record: DecisionRecord) -> str: return "\n".join(lines) +def _decision_record( + *, + decision: str, + retry_recommendation: str, + target: str, + call_role: str, + phase_reached: str, + failure_kind: str, + summary: str, + next_retry_focus: str | None = None, + agent_last_message_path: Path | None = None, + agent_stdout_path: Path | None = None, + agent_stderr_path: Path | None = None, + tests_stdout_path: Path | None = None, + tests_stderr_path: Path | None = None, +) -> DecisionRecord: + return DecisionRecord( + decision=decision, + retry_recommendation=retry_recommendation, + target=target, + call_role=call_role, + phase_reached=phase_reached, + failure_kind=failure_kind, + summary=summary, + next_retry_focus=next_retry_focus, + agent_last_message_path=agent_last_message_path, + agent_stdout_path=agent_stdout_path, + agent_stderr_path=agent_stderr_path, + tests_stdout_path=tests_stdout_path, + tests_stderr_path=tests_stderr_path, + ) + + +def _restore_and_retry( + *, + repo_root: Path, + head_before: str, + preserved_workspace: _PreservedWorkspaceTree | None, + target: str, + call_role: str, + phase_reached: str, + failure_kind: str, + summary: str, + next_retry_focus: str | None, + agent_last_message_path: Path | None, + agent_stdout_path: Path | None, + agent_stderr_path: Path | None, + tests_stdout_path: Path | None = None, + tests_stderr_path: Path | None = None, +) -> DecisionRecord: + _reset_to_source_baseline(repo_root, head_before, preserved_workspace) + return _decision_record( + decision="retry", + retry_recommendation="same-target", + target=target, + call_role=call_role, + phase_reached=phase_reached, + failure_kind=failure_kind, + summary=summary, + next_retry_focus=next_retry_focus, + agent_last_message_path=agent_last_message_path, + agent_stdout_path=agent_stdout_path, + agent_stderr_path=agent_stderr_path, + tests_stdout_path=tests_stdout_path, + tests_stderr_path=tests_stderr_path, + ) + + def _finalize_commit( repo_root: Path, head_before: str, @@ -194,7 +262,6 @@ def _run_refactor_attempt( summary=str(error), effort=effort_metadata, ) - _reset_to_source_baseline(repo_root, head_before, preserved_workspace) agent_status = read_status( agent, last_message_path=last_message_path, @@ -205,9 +272,10 @@ def _run_refactor_attempt( fallback=sanitize_text(str(error), repo_root) or str(error), repo_root=repo_root, ) - return DecisionRecord( - decision="retry", - retry_recommendation="same-target", + return _restore_and_retry( + repo_root=repo_root, + head_before=head_before, + preserved_workspace=preserved_workspace, target=target.description, call_role=call_role, phase_reached=resolved_phase_reached(agent_status, phase_reached), @@ -241,10 +309,10 @@ def _run_refactor_attempt( summary=summary, effort=effort_metadata, ) - _reset_to_source_baseline(repo_root, head_before, preserved_workspace) - return DecisionRecord( - decision="retry", - retry_recommendation="same-target", + return _restore_and_retry( + repo_root=repo_root, + head_before=head_before, + preserved_workspace=preserved_workspace, target=target.description, call_role=call_role, phase_reached=resolved_phase_reached(agent_status, phase_reached), @@ -293,15 +361,15 @@ def _run_refactor_attempt( level="WARN", summary=str(error), ) - _reset_to_source_baseline(repo_root, head_before, preserved_workspace) summary, focus = status_summary( agent_status, fallback=sanitize_text(str(error), repo_root) or str(error), repo_root=repo_root, ) - return DecisionRecord( - decision="retry", - retry_recommendation="same-target", + return _restore_and_retry( + repo_root=repo_root, + head_before=head_before, + preserved_workspace=preserved_workspace, target=target.description, call_role=validation_role, phase_reached=resolved_phase_reached(agent_status, phase_reached), @@ -332,10 +400,10 @@ def _run_refactor_attempt( returncode=validation_result.returncode, summary=summary, ) - _reset_to_source_baseline(repo_root, head_before, preserved_workspace) - return DecisionRecord( - decision="retry", - retry_recommendation="same-target", + return _restore_and_retry( + repo_root=repo_root, + head_before=head_before, + preserved_workspace=preserved_workspace, target=target.description, call_role=validation_role, phase_reached=resolved_phase_reached(agent_status, phase_reached), @@ -371,7 +439,7 @@ def _run_refactor_attempt( agent_status.retry_recommendation or default_retry_recommendation(decision) ) - return DecisionRecord( + return _decision_record( decision=decision, retry_recommendation=retry_recommendation, target=target.description, @@ -408,7 +476,7 @@ def _run_refactor_attempt( phase="refactor", ) - return DecisionRecord( + return _decision_record( decision="commit", retry_recommendation="none", target=target.description, diff --git a/tests/test_refactor_attempts.py b/tests/test_refactor_attempts.py new file mode 100644 index 0000000..fa56bf8 --- /dev/null +++ b/tests/test_refactor_attempts.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +from continuous_refactoring import run_command +from continuous_refactoring.artifacts import ( + CommandCapture, + ContinuousRefactorError, + RunArtifacts, + create_run_artifacts, +) +from continuous_refactoring.refactor_attempts import _run_refactor_attempt +from continuous_refactoring.targeting import Target + +from conftest import init_repo + + +@pytest.fixture(autouse=True) +def _isolate_tmpdir( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + (tmp_path / "tmpdir").mkdir() + monkeypatch.setenv("TMPDIR", str(tmp_path / "tmpdir")) + + +def _make_artifacts(repo_root: Path) -> RunArtifacts: + return create_run_artifacts( + repo_root=repo_root, + agent="codex", + model="fake", + effort="low", + test_command="uv run pytest", + ) + + +def _target() -> Target: + return Target( + description="src/demo.py", + files=("src/demo.py",), + provenance="paths", + ) + + +def _capture(path: Path, *, returncode: int = 0) -> CommandCapture: + path.parent.mkdir(parents=True, exist_ok=True) + stderr_path = path.with_name("stderr.log") + path.write_text("", encoding="utf-8") + stderr_path.write_text("", encoding="utf-8") + return CommandCapture( + command=("fake",), + returncode=returncode, + stdout="", + stderr="", + stdout_path=path, + stderr_path=stderr_path, + ) + + +def test_run_refactor_attempt_agent_infra_failure_restores_baseline( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root = tmp_path / "repo" + init_repo(repo_root) + artifacts = _make_artifacts(repo_root) + head_before = run_command(["git", "rev-parse", "HEAD"], cwd=repo_root).stdout.strip() + + def fail_agent(**kwargs: object) -> CommandCapture: + rr = Path(str(kwargs["repo_root"])) + (rr / "bad_change.txt").write_text("bad\n", encoding="utf-8") + raise ContinuousRefactorError("Command timed out after 1s: fake") + + monkeypatch.setattr( + "continuous_refactoring.refactor_attempts.maybe_run_agent", + fail_agent, + ) + + record = _run_refactor_attempt( + repo_root=repo_root, + artifacts=artifacts, + target=_target(), + attempt=1, + retry=1, + agent="codex", + model="fake", + effort="low", + prompt="prompt", + timeout=1, + validation_command="uv run pytest", + show_agent_logs=False, + show_command_logs=False, + commit_message_prefix="continuous refactor", + ) + + assert record.decision == "retry" + assert record.retry_recommendation == "same-target" + assert record.call_role == "refactor" + assert record.failure_kind == "timeout" + assert not (repo_root / "bad_change.txt").exists() + head_after = run_command(["git", "rev-parse", "HEAD"], cwd=repo_root).stdout.strip() + assert head_after == head_before + + +def test_run_refactor_attempt_validation_infra_failure_records_test_logs( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root = tmp_path / "repo" + init_repo(repo_root) + artifacts = _make_artifacts(repo_root) + + def ok_agent(**kwargs: object) -> CommandCapture: + rr = Path(str(kwargs["repo_root"])) + (rr / "bad_change.txt").write_text("bad\n", encoding="utf-8") + stdout_path = Path(str(kwargs["stdout_path"])) + return _capture(stdout_path) + + def fail_validation( + test_command: str, + repo_root: Path, + stdout_path: Path, + stderr_path: Path, + **kwargs: object, + ) -> CommandCapture: + raise ContinuousRefactorError("pytest executable missing") + + monkeypatch.setattr( + "continuous_refactoring.refactor_attempts.maybe_run_agent", + ok_agent, + ) + monkeypatch.setattr( + "continuous_refactoring.refactor_attempts.run_tests", + fail_validation, + ) + + record = _run_refactor_attempt( + repo_root=repo_root, + artifacts=artifacts, + target=_target(), + attempt=1, + retry=1, + agent="codex", + model="fake", + effort="low", + prompt="prompt", + timeout=None, + validation_command="uv run pytest", + show_agent_logs=False, + show_command_logs=False, + commit_message_prefix="continuous refactor", + ) + + assert record.decision == "retry" + assert record.call_role == "validation" + assert record.failure_kind == "validation-infra-failure" + assert ( + record.tests_stdout_path + == artifacts.attempt_dir(1) / "refactor" / "tests.stdout.log" + ) + assert ( + record.tests_stderr_path + == artifacts.attempt_dir(1) / "refactor" / "tests.stderr.log" + ) + assert not (repo_root / "bad_change.txt").exists() From e55ee1519ff82830da16da0089d0ff65de648a78 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 00:16:14 -0700 Subject: [PATCH 006/103] continuous refactor: tests/test_no_driver_branching.py Why: Preserves a real safety invariant by fixing a test helper that could miss branching calls in direct run_command imports and still pass. Validation: uv run pytest --- tests/conftest.py | 19 +++++++++++++++++++ tests/test_no_driver_branching.py | 23 +++++------------------ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 372f0e8..4eb478f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,8 @@ import continuous_refactoring import continuous_refactoring.loop +import continuous_refactoring.refactor_attempts +import continuous_refactoring.targeting from continuous_refactoring.artifacts import CommandCapture from continuous_refactoring.config import ( ProjectEntry, @@ -341,6 +343,23 @@ def failing_tests( ) +def install_run_command_spy( + monkeypatch: pytest.MonkeyPatch, +) -> list[tuple[str, ...]]: + captured: list[tuple[str, ...]] = [] + real_run_command = continuous_refactoring.run_command + + def spy(command, cwd, *args, **kwargs): # type: ignore[no-untyped-def] + captured.append(tuple(command)) + return real_run_command(command, cwd, *args, **kwargs) + + monkeypatch.setattr("continuous_refactoring.git.run_command", spy) + monkeypatch.setattr("continuous_refactoring.loop.run_command", spy) + monkeypatch.setattr("continuous_refactoring.refactor_attempts.run_command", spy) + monkeypatch.setattr("continuous_refactoring.targeting.run_command", spy) + return captured + + def _default_validation_command(repo_root: Path) -> str: test_script = repo_root.parent / "check_tests.py" if not test_script.exists(): diff --git a/tests/test_no_driver_branching.py b/tests/test_no_driver_branching.py index ae4dcfe..b92a71d 100644 --- a/tests/test_no_driver_branching.py +++ b/tests/test_no_driver_branching.py @@ -15,6 +15,7 @@ ) from conftest import ( + install_run_command_spy, make_run_loop_args, make_run_once_args, noop_agent, @@ -139,21 +140,6 @@ def test_run_arg_helpers_match_cli_effort_defaults(run_once_env: Path) -> None: assert run_loop_args.max_allowed_effort == "xhigh" -def _install_argv_spy(monkeypatch: pytest.MonkeyPatch) -> list[tuple[str, ...]]: - """Record every argv passed to git.run_command across the driver.""" - captured: list[tuple[str, ...]] = [] - real_run_command = continuous_refactoring.git.run_command - - def spy(command, cwd, *args, **kwargs): # type: ignore[no-untyped-def] - captured.append(tuple(command)) - return real_run_command(command, cwd, *args, **kwargs) - - # The driver imports run_command into multiple modules; patch each binding. - monkeypatch.setattr("continuous_refactoring.git.run_command", spy) - monkeypatch.setattr("continuous_refactoring.loop.run_command", spy) - return captured - - def _assert_no_branching(captured: list[tuple[str, ...]]) -> None: branching = [argv for argv in captured if _is_branching_argv(argv)] assert not branching, ( @@ -192,11 +178,12 @@ def test_run_once_makes_no_branching_calls( monkeypatch.setattr("continuous_refactoring.loop.maybe_run_agent", noop_agent) monkeypatch.setattr("continuous_refactoring.loop.run_tests", noop_tests) - captured = _install_argv_spy(monkeypatch) + captured = install_run_command_spy(monkeypatch) exit_code = continuous_refactoring.run_once(make_run_once_args(run_once_env)) assert exit_code == 0 + assert ("git", "ls-files", "-z") in captured _assert_no_branching(captured) @@ -224,7 +211,7 @@ def touching_agent(**kwargs: object) -> object: encoding="utf-8", ) - captured = _install_argv_spy(monkeypatch) + captured = install_run_command_spy(monkeypatch) exit_code = continuous_refactoring.run_loop( make_run_loop_args( @@ -289,7 +276,7 @@ def fake_execute_phase( "continuous_refactoring.migration_tick.execute_phase", fake_execute_phase, ) - captured = _install_argv_spy(monkeypatch) + captured = install_run_command_spy(monkeypatch) exit_code = continuous_refactoring.run_migrations_focused_loop( make_run_loop_args( From 13443a1f59373b9e1cc547e80e730003f3fdaa5f Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 00:20:40 -0700 Subject: [PATCH 007/103] continuous refactor: tests/test_effort.py Why: This locks down migration-facing effort semantics and removes duplicated result-building logic that could drift. Validation: uv run pytest --- src/continuous_refactoring/effort.py | 28 +++++++++---- tests/test_effort.py | 61 ++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 8 deletions(-) diff --git a/src/continuous_refactoring/effort.py b/src/continuous_refactoring/effort.py index 823bccf..5c6d616 100644 --- a/src/continuous_refactoring/effort.py +++ b/src/continuous_refactoring/effort.py @@ -90,6 +90,24 @@ def cap_effort(requested: EffortTier, max_allowed: EffortTier) -> EffortTier: return requested +def _build_resolution( + *, + source: str, + requested_effort: EffortTier, + max_allowed_effort: EffortTier, + reason: str, +) -> EffortResolution: + effective_effort = cap_effort(requested_effort, max_allowed_effort) + return EffortResolution( + source=source, + requested_effort=requested_effort, + effective_effort=effective_effort, + max_allowed_effort=max_allowed_effort, + capped=effective_effort != requested_effort, + reason=reason, + ) + + def resolve_effort_budget( default_effort: object | None, max_allowed_effort: object | None, @@ -123,13 +141,10 @@ def resolve_requested_effort( if requested_effort is None else require_effort_tier(requested_effort, field=f"{source} effort") ) - effective = cap_effort(requested, budget.max_allowed_effort) - return EffortResolution( + return _build_resolution( source=source, requested_effort=requested, - effective_effort=effective, max_allowed_effort=budget.max_allowed_effort, - capped=effective != requested, reason=reason, ) @@ -146,13 +161,10 @@ def resolve_phase_effort( else max_effort(budget.default_effort, required_effort) ) source = "phase-required" if required_effort is not None else "default" - effective = cap_effort(requested, budget.max_allowed_effort) - return EffortResolution( + return _build_resolution( source=source, requested_effort=requested, - effective_effort=effective, max_allowed_effort=budget.max_allowed_effort, - capped=effective != requested, reason=reason or ( "phase required effort" if required_effort is not None else "default effort" ), diff --git a/tests/test_effort.py b/tests/test_effort.py index d714175..4904b9e 100644 --- a/tests/test_effort.py +++ b/tests/test_effort.py @@ -8,6 +8,7 @@ cap_effort, effort_exceeds, resolve_effort_budget, + resolve_phase_effort, resolve_requested_effort, ) @@ -52,3 +53,63 @@ def test_target_override_requests_default_then_caps_to_max() -> None: assert resolution.effective_effort == "medium" assert resolution.max_allowed_effort == "medium" assert resolution.capped is True + + +def test_phase_effort_uses_default_when_no_requirement() -> None: + budget = resolve_effort_budget("medium", "xhigh") + + resolution = resolve_phase_effort(budget, None) + + assert resolution.source == "default" + assert resolution.requested_effort == "medium" + assert resolution.effective_effort == "medium" + assert resolution.capped is False + assert resolution.reason == "default effort" + + +def test_phase_effort_does_not_drop_below_default() -> None: + budget = resolve_effort_budget("high", "xhigh") + + resolution = resolve_phase_effort(budget, "medium") + + assert resolution.source == "phase-required" + assert resolution.requested_effort == "high" + assert resolution.effective_effort == "high" + assert resolution.capped is False + assert resolution.reason == "phase required effort" + + +def test_phase_effort_promotes_then_caps_to_max() -> None: + budget = resolve_effort_budget("medium", "high") + + resolution = resolve_phase_effort( + budget, + "xhigh", + reason="migration phase override", + ) + + assert resolution.source == "phase-required" + assert resolution.requested_effort == "xhigh" + assert resolution.effective_effort == "high" + assert resolution.max_allowed_effort == "high" + assert resolution.capped is True + assert resolution.reason == "migration phase override" + + +def test_resolution_event_fields_match_resolution() -> None: + budget = resolve_effort_budget("low", "medium") + resolution = resolve_requested_effort( + budget, + "xhigh", + source="target-override", + reason="test override", + ) + + assert resolution.event_fields() == { + "effort_source": "target-override", + "requested_effort": "xhigh", + "effective_effort": "medium", + "max_allowed_effort": "medium", + "effort_capped": True, + "effort_reason": "test override", + } From 0b9eaa73fa68cd75d9972eac6d7730a10fee7b2a Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 00:25:42 -0700 Subject: [PATCH 008/103] continuous refactor: src/continuous_refactoring/migration_tick.py Why: removes repeated manifest discovery logic in a load-bearing module while preserving human-review and effort-budget scheduling semantics Validation: uv run pytest --- src/continuous_refactoring/migration_tick.py | 58 +++++++++++--------- tests/test_loop_migration_tick.py | 41 ++++++++++++++ 2 files changed, 74 insertions(+), 25 deletions(-) diff --git a/src/continuous_refactoring/migration_tick.py b/src/continuous_refactoring/migration_tick.py index a5f6c3e..ad5d756 100644 --- a/src/continuous_refactoring/migration_tick.py +++ b/src/continuous_refactoring/migration_tick.py @@ -78,25 +78,10 @@ def enumerate_eligible_manifests( now: datetime, effort_budget: EffortBudget | None = None, ) -> list[tuple[MigrationManifest, Path]]: - if not live_dir.is_dir(): - return [] candidates: list[tuple[MigrationManifest, Path]] = [] - for entry in sorted(live_dir.iterdir()): - if not entry.is_dir() or entry.name.startswith("__"): - continue - manifest_path = entry / "manifest.json" - if not manifest_path.exists(): - continue - manifest = load_manifest(manifest_path) - if manifest.status not in ("ready", "in-progress"): - continue - if manifest.awaiting_human_review: - continue - if not has_executable_phase(manifest): - continue - if not eligible_now(manifest, now): - continue - candidates.append((manifest, manifest_path)) + for manifest, manifest_path in _iter_candidate_manifests(live_dir): + if _is_normally_eligible(manifest, now): + candidates.append((manifest, manifest_path)) if effort_budget is not None: seen_paths = {path for _, path in candidates} for manifest, manifest_path in _cooling_effort_candidates( @@ -112,6 +97,17 @@ def _cooling_effort_candidates( live_dir: Path, now: datetime, budget: EffortBudget, +) -> list[tuple[MigrationManifest, Path]]: + candidates: list[tuple[MigrationManifest, Path]] = [] + for manifest, manifest_path in _iter_candidate_manifests(live_dir): + if not _can_ignore_effort_cooldown(manifest, now, budget): + continue + candidates.append((manifest, manifest_path)) + return candidates + + +def _iter_candidate_manifests( + live_dir: Path, ) -> list[tuple[MigrationManifest, Path]]: if not live_dir.is_dir(): return [] @@ -122,21 +118,25 @@ def _cooling_effort_candidates( manifest_path = entry / "manifest.json" if not manifest_path.exists(): continue - manifest = load_manifest(manifest_path) - if not _can_ignore_effort_cooldown(manifest, now, budget): - continue - candidates.append((manifest, manifest_path)) + candidates.append((load_manifest(manifest_path), manifest_path)) return candidates +def _is_normally_eligible(manifest: MigrationManifest, now: datetime) -> bool: + return ( + manifest.status in ("ready", "in-progress") + and not manifest.awaiting_human_review + and has_executable_phase(manifest) + and eligible_now(manifest, now) + ) + + def _can_ignore_effort_cooldown( manifest: MigrationManifest, now: datetime, budget: EffortBudget, ) -> bool: - if manifest.status not in ("ready", "in-progress"): - return False - if manifest.awaiting_human_review or not has_executable_phase(manifest): + if not _is_phase_candidate(manifest): return False if manifest.cooldown_until is None: return False @@ -149,6 +149,14 @@ def _can_ignore_effort_cooldown( ) +def _is_phase_candidate(manifest: MigrationManifest) -> bool: + return ( + manifest.status in ("ready", "in-progress") + and not manifest.awaiting_human_review + and has_executable_phase(manifest) + ) + + def try_migration_tick( live_dir: Path, taste: str, diff --git a/tests/test_loop_migration_tick.py b/tests/test_loop_migration_tick.py index 1f83d1c..c46efa6 100644 --- a/tests/test_loop_migration_tick.py +++ b/tests/test_loop_migration_tick.py @@ -324,6 +324,47 @@ def test_enumerate_eligible_manifests_ignores_noise_and_sorts_by_created_at( assert [path.parent.name for _, path in candidates] == ["older", "newer"] +def test_enumerate_eligible_manifests_includes_cooling_effort_candidate_once( + tmp_path: Path, +) -> None: + live_dir = tmp_path / "live" + live_dir.mkdir() + now = _utc_now() + over_budget_phase = replace(_PHASE_0, required_effort="xhigh") + + _save( + replace( + _make_manifest( + "cooling-over-budget", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=2), + phases=(over_budget_phase, _PHASE_1), + ), + cooldown_until=(now + timedelta(hours=1)).isoformat(timespec="milliseconds"), + ), + live_dir, + ) + _save( + _make_manifest( + "ready-now", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=1), + ), + live_dir, + ) + + candidates = enumerate_eligible_manifests( + live_dir, + now, + EffortBudget(default_effort="high", max_allowed_effort="xhigh"), + ) + + assert [manifest.name for manifest, _ in candidates] == [ + "cooling-over-budget", + "ready-now", + ] + + def test_try_migration_tick_skips_migrations_awaiting_human_review( run_once_env: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: From bbab5bc75a6487828f7c616ac5af8c33dda859bf Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 00:30:51 -0700 Subject: [PATCH 009/103] continuous refactor: tests/test_cli_init_taste.py Why: Removes duplicated boundary checks in init, making repo-relative path rules easier to maintain while preserving the tested contract for both flags. Validation: uv run pytest --- src/continuous_refactoring/cli.py | 71 ++++++++++++++++---------- tests/test_cli_init_taste.py | 85 +++++++++++++++++-------------- 2 files changed, 91 insertions(+), 65 deletions(-) diff --git a/src/continuous_refactoring/cli.py b/src/continuous_refactoring/cli.py index 89ba5c8..f7bcbdd 100644 --- a/src/continuous_refactoring/cli.py +++ b/src/continuous_refactoring/cli.py @@ -7,6 +7,7 @@ from collections.abc import Callable from importlib.metadata import version as metadata_version from pathlib import Path +from typing import Literal __all__ = [ "build_parser", @@ -318,36 +319,20 @@ def _handle_init(args: argparse.Namespace) -> None: try: if in_repo_taste_arg is not None: - repo_taste_resolved = (path / in_repo_taste_arg).resolve() - if not repo_taste_resolved.is_relative_to(path): - print( - f"Error: --in-repo-taste must be inside the repo: {in_repo_taste_arg}", - file=sys.stderr, - ) - raise SystemExit(2) - if repo_taste_resolved.exists() and not repo_taste_resolved.is_file(): - print( - f"Error: --in-repo-taste must point to a file: {in_repo_taste_arg}", - file=sys.stderr, - ) - raise SystemExit(2) - repo_taste_relative = str(repo_taste_resolved.relative_to(path)) + repo_taste_resolved, repo_taste_relative = _resolve_repo_relative_arg( + repo_root=path, + value=in_repo_taste_arg, + flag="--in-repo-taste", + expected_kind="file", + ) if live_dir_arg is not None: - resolved_live = (path / live_dir_arg).resolve() - if not resolved_live.is_relative_to(path): - print( - f"Error: --live-migrations-dir must be inside the repo: {live_dir_arg}", - file=sys.stderr, - ) - raise SystemExit(2) - if resolved_live.exists() and not resolved_live.is_dir(): - print( - f"Error: --live-migrations-dir must point to a directory: {live_dir_arg}", - file=sys.stderr, - ) - raise SystemExit(2) - live_dir_relative = str(resolved_live.relative_to(path)) + resolved_live, live_dir_relative = _resolve_repo_relative_arg( + repo_root=path, + value=live_dir_arg, + flag="--live-migrations-dir", + expected_kind="directory", + ) project = register_project(path) if repo_taste_relative is not None: @@ -386,6 +371,36 @@ def _handle_init(args: argparse.Namespace) -> None: print(f"Live migrations dir: {resolved_live}") +def _resolve_repo_relative_arg( + *, + repo_root: Path, + value: Path, + flag: str, + expected_kind: Literal["file", "directory"], +) -> tuple[Path, str]: + resolved = (repo_root / value).resolve() + if not resolved.is_relative_to(repo_root): + print( + f"Error: {flag} must be inside the repo: {value}", + file=sys.stderr, + ) + raise SystemExit(2) + if resolved.exists(): + if expected_kind == "file" and not resolved.is_file(): + print( + f"Error: {flag} must point to a file: {value}", + file=sys.stderr, + ) + raise SystemExit(2) + if expected_kind == "directory" and not resolved.is_dir(): + print( + f"Error: {flag} must point to a directory: {value}", + file=sys.stderr, + ) + raise SystemExit(2) + return resolved, str(resolved.relative_to(repo_root)) + + def _configure_repo_taste( *, current: Path, diff --git a/tests/test_cli_init_taste.py b/tests/test_cli_init_taste.py index 6f2cc02..c3bdbbf 100644 --- a/tests/test_cli_init_taste.py +++ b/tests/test_cli_init_taste.py @@ -282,51 +282,78 @@ def test_init_in_repo_taste_conflict_force_replaces_with_old_taste( assert not source.exists() -def test_init_in_repo_taste_rejects_outside_repo( +@pytest.mark.parametrize( + ("args", "expected_message"), + [ + ( + make_init_args(Path("unused"), in_repo_taste=Path("../taste.md")), + "--in-repo-taste must be inside the repo", + ), + ( + make_init_args(Path("unused"), live_migrations_dir=Path("../outside")), + "--live-migrations-dir must be inside the repo", + ), + ], + ids=["in-repo-taste", "live-migrations-dir"], +) +def test_init_rejects_outside_repo( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], + args: argparse.Namespace, + expected_message: str, ) -> None: repo = init_repo_with_temp_home(tmp_path, monkeypatch) - - args = argparse.Namespace( - path=repo, - in_repo_taste=Path("../taste.md"), - live_migrations_dir=None, - ) + args.path = repo with pytest.raises(SystemExit) as exc_info: _handle_init(args) assert exc_info.value.code == 2 err = capsys.readouterr().err - assert "--in-repo-taste must be inside the repo" in err + assert expected_message in err @pytest.mark.parametrize( - "taste_arg", - [Path("."), Path("existing-dir")], - ids=["repo-root", "existing-dir"], + ("args", "setup_name", "expected_message"), + [ + ( + make_init_args(Path("unused"), in_repo_taste=Path(".")), + "existing-dir", + "--in-repo-taste must point to a file", + ), + ( + make_init_args(Path("unused"), in_repo_taste=Path("existing-dir")), + "existing-dir", + "--in-repo-taste must point to a file", + ), + ( + make_init_args(Path("unused"), live_migrations_dir=Path("existing-file")), + "existing-file", + "--live-migrations-dir must point to a directory", + ), + ], + ids=["taste-repo-root", "taste-existing-dir", "live-migrations-existing-file"], ) -def test_init_in_repo_taste_rejects_directories( +def test_init_rejects_wrong_existing_path_kind( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], - taste_arg: Path, + args: argparse.Namespace, + setup_name: str, + expected_message: str, ) -> None: repo = init_repo_with_temp_home(tmp_path, monkeypatch) - (repo / "existing-dir").mkdir() - - args = argparse.Namespace( - path=repo, - in_repo_taste=taste_arg, - live_migrations_dir=None, - ) + args.path = repo + if setup_name == "existing-dir": + (repo / setup_name).mkdir() + else: + (repo / setup_name).write_text("not a directory\n", encoding="utf-8") with pytest.raises(SystemExit) as exc_info: _handle_init(args) assert exc_info.value.code == 2 err = capsys.readouterr().err - assert "--in-repo-taste must point to a file" in err + assert expected_message in err def test_init_idempotent( @@ -528,22 +555,6 @@ def test_init_live_migrations_dir_conflict_force_replaces_destination( assert not (repo / ".migrations").exists() -def test_init_live_migrations_dir_rejects_outside_repo( - tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, - capsys: pytest.CaptureFixture[str], -) -> None: - repo = init_repo_with_temp_home(tmp_path, monkeypatch) - - args = argparse.Namespace(path=repo, live_migrations_dir=Path("../outside")) - with pytest.raises(SystemExit) as exc_info: - _handle_init(args) - - assert exc_info.value.code == 2 - err = capsys.readouterr().err - assert "must be inside the repo" in err - - def test_init_exits_cleanly_on_malformed_manifest( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, From 28cc67b16e175dffee1693e787bd7dbe5a0be011 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 00:35:53 -0700 Subject: [PATCH 010/103] continuous refactor: tests/test_scope_candidates.py Why: This preserves the real scope-candidate contract while deleting test-coupled internals that made small cleanups harder than necessary. Validation: uv run pytest --- .../scope_candidates.py | 36 ++++++++++--------- tests/test_scope_candidates.py | 28 +++++++++------ 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/src/continuous_refactoring/scope_candidates.py b/src/continuous_refactoring/scope_candidates.py index 93f2f3f..6b7087e 100644 --- a/src/continuous_refactoring/scope_candidates.py +++ b/src/continuous_refactoring/scope_candidates.py @@ -357,20 +357,6 @@ def _rank_paths( return [path for _score, path in ranked] -def _include_local(same_dir: bool, support_kinds: tuple[_SupportKind, ...]) -> bool: - has_pairing = "source-test-pairing" in support_kinds - has_non_cochange = any(kind != "git-cochange" for kind in support_kinds) - return has_pairing or (same_dir and has_non_cochange) - - -def _include_cross(same_dir: bool, support_kinds: tuple[_SupportKind, ...]) -> bool: - return not ( - same_dir - and support_kinds - and all(kind == "git-cochange" for kind in support_kinds) - ) - - def build_scope_candidates( target: Target, repo_root: Path, @@ -394,8 +380,23 @@ def build_scope_candidates( ) candidates = [_build_seed_candidate(seed_file)] + def include_local(same_dir: bool, support_kinds: tuple[_SupportKind, ...]) -> bool: + return "source-test-pairing" in support_kinds or ( + same_dir and any(kind != "git-cochange" for kind in support_kinds) + ) + + def include_cross(same_dir: bool, support_kinds: tuple[_SupportKind, ...]) -> bool: + return not ( + same_dir + and support_kinds + and all(kind == "git-cochange" for kind in support_kinds) + ) + local_ranked = _rank_paths( - support.scores, support.support_kinds, seed_file, _include_local, + support.scores, + support.support_kinds, + seed_file, + include_local, ) local_extras = tuple(local_ranked[: max_files - 1]) if local_extras: @@ -407,7 +408,10 @@ def build_scope_candidates( ) cross_ranked = _rank_paths( - support.scores, support.support_kinds, seed_file, _include_cross, + support.scores, + support.support_kinds, + seed_file, + include_cross, ) cross_extras = tuple(cross_ranked[: max_files - 1]) if cross_extras: diff --git a/tests/test_scope_candidates.py b/tests/test_scope_candidates.py index e9f8c59..02e3b28 100644 --- a/tests/test_scope_candidates.py +++ b/tests/test_scope_candidates.py @@ -6,8 +6,6 @@ from conftest import init_repo from continuous_refactoring.scope_candidates import ( - _include_cross, - _include_local, build_scope_candidates, ) from continuous_refactoring.targeting import Target @@ -112,16 +110,26 @@ def test_local_git_cochange_alone_does_not_add_noisy_local_sibling( assert [candidate.kind for candidate in candidates] == ["seed"] -def test_local_scope_inclusion_uses_structured_support_kinds() -> None: - assert _include_local(True, ("direct-reference",)) - assert _include_local(False, ("source-test-pairing",)) - assert not _include_local(True, ("git-cochange",)) +def test_cross_cluster_excludes_same_dir_git_only_noise( + tmp_path: Path, +) -> None: + init_repo(tmp_path) + _write(tmp_path, "src/foo.py", "VALUE = 0\n") + _write(tmp_path, "src/helpers.py", "VALUE = 0\n") + _write(tmp_path, "cross/a.py", "VALUE = 0\n") + _commit_all(tmp_path, "seed files") + _write(tmp_path, "src/foo.py", "VALUE = 1\n") + _write(tmp_path, "src/helpers.py", "VALUE = 1\n") + _write(tmp_path, "cross/a.py", "VALUE = 1\n") + _commit_all(tmp_path, "mixed cochange") + + candidates = build_scope_candidates(_seed_target("src/foo.py"), tmp_path) -def test_cross_scope_inclusion_filters_same_dir_git_only_support() -> None: - assert not _include_cross(True, ("git-cochange",)) - assert _include_cross(True, ("git-cochange", "reverse-reference")) - assert _include_cross(False, ("git-cochange",)) + cross_cluster = next( + candidate for candidate in candidates if candidate.kind == "cross-cluster" + ) + assert cross_cluster.files == ("src/foo.py", "cross/a.py") def test_git_cochange_neighbors_are_capped_and_deterministic(tmp_path: Path) -> None: From d9dd3cb7c50c31e717593304b2329e7e10526e4e Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 00:43:07 -0700 Subject: [PATCH 011/103] continuous refactor: src/continuous_refactoring/decisions.py Why: Removes repeated summary/rationale fallback branches and locks the boundary-text contract behind one helper plus direct tests. Validation: uv run pytest --- src/continuous_refactoring/commit_messages.py | 8 ++++++-- src/continuous_refactoring/decisions.py | 6 +++++- src/continuous_refactoring/migration_tick.py | 19 ++++++++++++------- src/continuous_refactoring/phases.py | 5 +++-- tests/test_commit_messages.py | 10 ++++++++++ tests/test_decisions.py | 17 +++++++++++++++++ 6 files changed, 53 insertions(+), 12 deletions(-) diff --git a/src/continuous_refactoring/commit_messages.py b/src/continuous_refactoring/commit_messages.py index 07ac258..d3a2ce5 100644 --- a/src/continuous_refactoring/commit_messages.py +++ b/src/continuous_refactoring/commit_messages.py @@ -2,7 +2,11 @@ from pathlib import Path -from continuous_refactoring.decisions import AgentStatus, sanitize_text +from continuous_refactoring.decisions import ( + AgentStatus, + sanitize_text, + sanitized_text_or, +) __all__ = [ "build_commit_message", @@ -49,7 +53,7 @@ def commit_rationale( ): return summary - fallback_text = _present_text(sanitize_text(fallback, repo_root)) + fallback_text = _present_text(sanitized_text_or(fallback, repo_root, fallback)) if fallback_text is not None: return fallback_text return "Validated cleanup completed." diff --git a/src/continuous_refactoring/decisions.py b/src/continuous_refactoring/decisions.py index 0377225..6f08489 100644 --- a/src/continuous_refactoring/decisions.py +++ b/src/continuous_refactoring/decisions.py @@ -159,13 +159,17 @@ def sanitize_text(text: str | None, repo_root: Path) -> str | None: return " ".join(lines)[:240] +def sanitized_text_or(text: str | None, repo_root: Path, fallback: str) -> str: + return sanitize_text(text, repo_root) or fallback + + def status_summary( status: AgentStatus | None, *, fallback: str, repo_root: Path, ) -> tuple[str, str | None]: - summary = sanitize_text(status.summary if status else None, repo_root) or fallback + summary = sanitized_text_or(status.summary if status else None, repo_root, fallback) focus = sanitize_text(status.next_retry_focus if status else None, repo_root) return summary, focus diff --git a/src/continuous_refactoring/migration_tick.py b/src/continuous_refactoring/migration_tick.py index ad5d756..ff403e2 100644 --- a/src/continuous_refactoring/migration_tick.py +++ b/src/continuous_refactoring/migration_tick.py @@ -24,6 +24,7 @@ RouteOutcome, error_failure_kind, sanitize_text, + sanitized_text_or, ) from continuous_refactoring.effort import ( EffortBudget, @@ -269,7 +270,7 @@ def try_migration_tick( build_commit_message( f"{commit_message_prefix}: migration/{manifest.name}" f"/{phase_file_reference(phase)}", - why=sanitize_text(outcome.reason, repo_root) or outcome.reason, + why=sanitized_text_or(outcome.reason, repo_root, outcome.reason), validation=validation_command, ), artifacts=artifacts, @@ -369,7 +370,7 @@ def _log_phase_effort_deferred( def _ready_check_failure_record( error: ContinuousRefactorError, repo_root: Path, target_label: str, ) -> DecisionRecord: - summary = sanitize_text(str(error), repo_root) or str(error) + summary = sanitized_text_or(str(error), repo_root, str(error)) return DecisionRecord( decision="abandon", retry_recommendation="new-target", @@ -391,7 +392,7 @@ def _phase_failure_record( call_role=outcome.call_role or "phase.execute", phase_reached=outcome.phase_reached or "phase.execute", failure_kind=outcome.failure_kind or "phase-failed", - summary=sanitize_text(outcome.reason, repo_root) or outcome.reason, + summary=sanitized_text_or(outcome.reason, repo_root, outcome.reason), retry_used=outcome.retry, ) @@ -408,7 +409,7 @@ def _phase_commit_record( call_role="phase.execute", phase_reached="phase.execute", failure_kind="none", - summary=sanitize_text(outcome.reason, repo_root) or outcome.reason, + summary=sanitized_text_or(outcome.reason, repo_root, outcome.reason), ) @@ -438,7 +439,7 @@ def _defer_manifest( def _human_review_record( reason: str, repo_root: Path, target_label: str, ) -> DecisionRecord: - summary = sanitize_text(reason, repo_root) or "Phase requires human review" + summary = sanitized_text_or(reason, repo_root, "Phase requires human review") return DecisionRecord( decision="blocked", retry_recommendation="human-review", @@ -458,7 +459,7 @@ def _deferred_record(reason: str, repo_root: Path, target_label: str) -> Decisio call_role="phase.ready-check", phase_reached="phase.ready-check", failure_kind="phase-ready-no", - summary=sanitize_text(reason, repo_root) or "Migration phase not ready", + summary=sanitized_text_or(reason, repo_root, "Migration phase not ready"), ) @@ -472,5 +473,9 @@ def _effort_deferred_record( call_role="phase.effort-budget", phase_reached="phase.effort-budget", failure_kind="phase-effort-over-budget", - summary=sanitize_text(reason, repo_root) or "Migration phase over effort budget", + summary=sanitized_text_or( + reason, + repo_root, + "Migration phase over effort budget", + ), ) diff --git a/src/continuous_refactoring/phases.py b/src/continuous_refactoring/phases.py index 4d110f0..865b0e3 100644 --- a/src/continuous_refactoring/phases.py +++ b/src/continuous_refactoring/phases.py @@ -26,6 +26,7 @@ read_status, resolved_phase_reached, sanitize_text, + sanitized_text_or, status_summary, ) from continuous_refactoring.git import get_head_sha, revert_to @@ -347,7 +348,7 @@ def _run_phase_agent( timeout=timeout, ) except ContinuousRefactorError as error: - summary = sanitize_text(str(error), repo_root) or str(error) + summary = sanitized_text_or(str(error), repo_root, str(error)) return _PhaseAgentRun( status=None, phase_reached=_PHASE_EXECUTE_ROLE, @@ -448,7 +449,7 @@ def _run_phase_validation( except ContinuousRefactorError as error: summary, focus = status_summary( agent_run.status, - fallback=sanitize_text(str(error), repo_root) or str(error), + fallback=sanitized_text_or(str(error), repo_root, str(error)), repo_root=repo_root, ) return _PhaseValidationResult( diff --git a/tests/test_commit_messages.py b/tests/test_commit_messages.py index b91a303..0550f56 100644 --- a/tests/test_commit_messages.py +++ b/tests/test_commit_messages.py @@ -58,3 +58,13 @@ def test_commit_rationale_ignores_placeholder_summary() -> None: ) assert rationale == "agent stdout explained the cleanup" + + +def test_commit_rationale_sanitizes_fallback_text() -> None: + rationale = commit_rationale( + None, + fallback="Touched /repo/src/continuous_refactoring/decisions.py", + repo_root=Path("/repo"), + ) + + assert rationale == "Touched /src/continuous_refactoring/decisions.py" diff --git a/tests/test_decisions.py b/tests/test_decisions.py index 6b52170..0f5f4e1 100644 --- a/tests/test_decisions.py +++ b/tests/test_decisions.py @@ -13,6 +13,7 @@ error_failure_kind, parse_status_block, sanitize_text, + sanitized_text_or, ) from continuous_refactoring.prompts import ( CONTINUOUS_REFACTORING_STATUS_BEGIN, @@ -195,6 +196,22 @@ def test_sanitize_text_is_idempotent() -> None: assert sanitize_text(once, repo_root) == once +def test_sanitized_text_or_prefers_sanitized_text() -> None: + repo_root = Path("/repo") + + assert ( + sanitized_text_or(" touched /repo/src/file.py ", repo_root, "fallback") + == "touched /src/file.py" + ) + + +def test_sanitized_text_or_uses_fallback_when_sanitized_text_is_empty() -> None: + assert ( + sanitized_text_or("codex exec --help", Path("/repo"), "fallback") + == "fallback" + ) + + @pytest.mark.parametrize( ("message", "expected"), [ From ef84cc5d19a8e58c11f37471dae9f71988e5dec2 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 00:49:17 -0700 Subject: [PATCH 012/103] continuous refactor: src/continuous_refactoring/scope_candidates.py Why: Preserves the current kind-based selection contract while preventing silent misrouting if future refactors produce duplicate candidate kinds. Validation: uv run pytest --- src/continuous_refactoring/scope_expansion.py | 19 +++++++++++++++- tests/test_scope_expansion.py | 22 +++++++++++++++++++ tests/test_scope_selection.py | 8 +++++++ 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/continuous_refactoring/scope_expansion.py b/src/continuous_refactoring/scope_expansion.py index 348bb7e..f5ab884 100644 --- a/src/continuous_refactoring/scope_expansion.py +++ b/src/continuous_refactoring/scope_expansion.py @@ -64,6 +64,20 @@ def _parse_selection_line(line: str) -> tuple[ScopeCandidateKind, str] | None: return None +def _require_unique_candidate_kinds( + candidate_kinds: tuple[ScopeCandidateKind, ...], +) -> tuple[ScopeCandidateKind, ...]: + duplicates = tuple( + kind for kind in _KNOWN_SCOPE_SELECTION_KINDS if candidate_kinds.count(kind) > 1 + ) + if duplicates: + quoted = ", ".join(repr(kind) for kind in duplicates) + raise ContinuousRefactorError( + f"Scope selection requires unique candidate kinds, got duplicates: {quoted}" + ) + return candidate_kinds + + def scope_expansion_bypass_reason(target: Target) -> str | None: if len(target.files) == 0: return "scope expansion requires a seed file" @@ -78,6 +92,7 @@ def parse_scope_selection( stdout: str, candidate_kinds: tuple[ScopeCandidateKind, ...], ) -> ScopeSelection: + candidate_kinds = _require_unique_candidate_kinds(candidate_kinds) non_blank = [line.strip() for line in stdout.splitlines() if line.strip()] if not non_blank: raise ContinuousRefactorError("Scope selection produced no output") @@ -115,6 +130,9 @@ def select_scope_candidate( retry: int = 1, effort_metadata: dict[str, object] | None = None, ) -> ScopeSelection: + candidate_kinds = _require_unique_candidate_kinds( + tuple(candidate.kind for candidate in candidates) + ) selection_dir = artifacts.root / "scope-expansion" selection_dir.mkdir(parents=True, exist_ok=True) selection_stdout_path = selection_dir / "selection.stdout.log" @@ -178,7 +196,6 @@ def select_scope_candidate( raise ContinuousRefactorError( f"Scope selection agent failed with exit code {result.returncode}" ) - candidate_kinds = tuple(candidate.kind for candidate in candidates) try: selection = parse_scope_selection(result.stdout, candidate_kinds) except ContinuousRefactorError as error: diff --git a/tests/test_scope_expansion.py b/tests/test_scope_expansion.py index cce912c..b3bc77b 100644 --- a/tests/test_scope_expansion.py +++ b/tests/test_scope_expansion.py @@ -237,6 +237,28 @@ def fake_run_agent(**_: object) -> CommandCapture: assert failed["call_status"] == "failed" +def test_select_scope_candidate_rejects_duplicate_candidate_kinds( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + target = Target(description="clean up", files=("README.md",), provenance="globs") + candidates = (_candidate("local-cluster"), _candidate("local-cluster")) + artifacts = _make_artifacts(tmp_path, monkeypatch) + + with pytest.raises(ContinuousRefactorError, match="requires unique candidate kinds"): + select_scope_candidate( + target, + candidates, + "taste", + tmp_path, + artifacts, + agent="codex", + model="gpt-5.5", + effort="low", + timeout=None, + ) + + def test_select_scope_candidate_multi_candidate_logs_call_events_with_effort( monkeypatch: pytest.MonkeyPatch, tmp_path: Path, diff --git a/tests/test_scope_selection.py b/tests/test_scope_selection.py index 65f5dff..3a416d7 100644 --- a/tests/test_scope_selection.py +++ b/tests/test_scope_selection.py @@ -66,3 +66,11 @@ def test_selection_parser_rejects_kind_outside_available_candidates() -> None: "selected-candidate: cross-cluster\n", ("seed", "local-cluster"), ) + + +def test_selection_parser_rejects_duplicate_candidate_kinds() -> None: + with pytest.raises(ContinuousRefactorError, match="requires unique candidate kinds"): + parse_scope_selection( + "selected-candidate: local-cluster\n", + ("seed", "local-cluster", "local-cluster"), + ) From d4964cbcbabf16c71f8472a28aeeac80d811056b Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 00:53:03 -0700 Subject: [PATCH 013/103] continuous refactor: tests/test_cli_version.py Why: Removes a duplicated load-bearing package string while adding a higher-value test for the actual --version CLI boundary Validation: uv run pytest --- src/continuous_refactoring/cli.py | 7 ++++++- tests/test_cli_version.py | 25 +++++++++++++++++++------ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/continuous_refactoring/cli.py b/src/continuous_refactoring/cli.py index f7bcbdd..872caf8 100644 --- a/src/continuous_refactoring/cli.py +++ b/src/continuous_refactoring/cli.py @@ -32,6 +32,7 @@ ) from continuous_refactoring.review_cli import handle_review +_PACKAGE_DISTRIBUTION = "continuous-refactoring" _TASTE_WARNING = "warning: taste out of date — run `continuous-refactoring taste --upgrade`" _GLOBAL_TASTE_WARNING = ( "warning: global taste is out of date — " @@ -39,6 +40,10 @@ ) +def _version_banner() -> str: + return f"{_PACKAGE_DISTRIBUTION} {metadata_version(_PACKAGE_DISTRIBUTION)}" + + def parse_max_attempts(value: str) -> int: try: attempts = int(value) @@ -279,7 +284,7 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument( "--version", action="version", - version=f"continuous-refactoring {metadata_version('continuous-refactoring')}", + version=_version_banner(), ) subparsers = parser.add_subparsers(dest="command") diff --git a/tests/test_cli_version.py b/tests/test_cli_version.py index 9b89d1b..b664b19 100644 --- a/tests/test_cli_version.py +++ b/tests/test_cli_version.py @@ -1,13 +1,14 @@ from __future__ import annotations +import sys + import pytest from continuous_refactoring import cli -def test_global_version_uses_installed_package_metadata( +def test_build_parser_uses_installed_package_metadata_for_version_banner( monkeypatch: pytest.MonkeyPatch, - capsys: pytest.CaptureFixture[str], ) -> None: package_names: list[str] = [] @@ -17,10 +18,22 @@ def fake_metadata_version(package_name: str) -> str: monkeypatch.setattr(cli, "metadata_version", fake_metadata_version) - parser = cli.build_parser() + cli.build_parser() + + assert package_names == [cli._PACKAGE_DISTRIBUTION] + + +def test_cli_main_version_prints_banner_without_stale_taste_warning( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setattr(cli, "metadata_version", lambda _: "9.8.7") + monkeypatch.setattr(sys, "argv", ["continuous-refactoring", "--version"]) + with pytest.raises(SystemExit) as exc_info: - parser.parse_args(["--version"]) + cli.cli_main() + captured = capsys.readouterr() assert exc_info.value.code == 0 - assert package_names == ["continuous-refactoring"] - assert capsys.readouterr().out == "continuous-refactoring 9.8.7\n" + assert captured.out == "continuous-refactoring 9.8.7\n" + assert captured.err == "" From 0a089f1c3160b8b5316b0f42d4d6d52748c5aa08 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 00:57:33 -0700 Subject: [PATCH 014/103] continuous refactor: tests/test_cli_upgrade.py Why: Preserves the same upgrade behavior with a smaller, truer test file that is easier to extend without copy-paste drift. Validation: uv run pytest --- tests/test_cli_upgrade.py | 118 ++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 62 deletions(-) diff --git a/tests/test_cli_upgrade.py b/tests/test_cli_upgrade.py index 1118839..1674a78 100644 --- a/tests/test_cli_upgrade.py +++ b/tests/test_cli_upgrade.py @@ -2,6 +2,7 @@ import argparse import json +from collections.abc import Callable from pathlib import Path import pytest @@ -16,13 +17,17 @@ register_project, ) +_LEGACY_TASTE = "- Old taste without version.\n" + def _upgrade_args() -> argparse.Namespace: return argparse.Namespace(command="upgrade") -def _set_xdg_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) +def _set_xdg_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + xdg_root = tmp_path / "xdg" + monkeypatch.setenv("XDG_DATA_HOME", str(xdg_root)) + return xdg_root def _register_project_with_upgrade_layout( @@ -35,6 +40,30 @@ def _register_project_with_upgrade_layout( register_project(repo) +def _write_stale_config_manifest(xdg_root: Path) -> None: + manifest_dir = xdg_root / "continuous-refactoring" + manifest_dir.mkdir(parents=True, exist_ok=True) + (manifest_dir / "manifest.json").write_text( + json.dumps({"projects": {}}), encoding="utf-8", + ) + + +def _write_global_taste(text: str) -> None: + gdir = global_dir() + gdir.mkdir(parents=True, exist_ok=True) + (gdir / "taste.md").write_text(text, encoding="utf-8") + + +def _assert_upgrade_fails_for_bad_config( + capsys: pytest.CaptureFixture[str], +) -> None: + with pytest.raises(SystemExit) as exc_info: + _handle_upgrade(_upgrade_args()) + + assert exc_info.value.code == 1 + assert "config version" in capsys.readouterr().err + + # --------------------------------------------------------------------------- # Happy path: current config version → exit 0 # --------------------------------------------------------------------------- @@ -60,49 +89,27 @@ def test_upgrade_is_idempotent( # --------------------------------------------------------------------------- -# Failure: missing config → exit 1 -# --------------------------------------------------------------------------- - - -def test_upgrade_fails_when_config_missing( - tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, - capsys: pytest.CaptureFixture[str], -) -> None: - _set_xdg_home(tmp_path, monkeypatch) - - with pytest.raises(SystemExit) as exc_info: - _handle_upgrade(_upgrade_args()) - - assert exc_info.value.code == 1 - err = capsys.readouterr().err - assert "config version" in err - - -# --------------------------------------------------------------------------- -# Failure: stale config version → exit 1 +# Failure: missing or stale config → exit 1 # --------------------------------------------------------------------------- -def test_upgrade_fails_when_config_stale( +@pytest.mark.parametrize( + "prepare_config", + [ + lambda xdg_root: None, + _write_stale_config_manifest, + ], + ids=["missing", "stale"], +) +def test_upgrade_fails_for_missing_or_stale_config( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], + prepare_config: Callable[[Path], None], ) -> None: - _set_xdg_home(tmp_path, monkeypatch) - - manifest_dir = tmp_path / "xdg" / "continuous-refactoring" - manifest_dir.mkdir(parents=True, exist_ok=True) - (manifest_dir / "manifest.json").write_text( - json.dumps({"projects": {}}), encoding="utf-8", - ) - - with pytest.raises(SystemExit) as exc_info: - _handle_upgrade(_upgrade_args()) - - assert exc_info.value.code == 1 - err = capsys.readouterr().err - assert "config version" in err + xdg_root = _set_xdg_home(tmp_path, monkeypatch) + prepare_config(xdg_root) + _assert_upgrade_fails_for_bad_config(capsys) # --------------------------------------------------------------------------- @@ -116,10 +123,7 @@ def test_upgrade_warns_on_stale_global_taste( capsys: pytest.CaptureFixture[str], ) -> None: _register_project_with_upgrade_layout(tmp_path, monkeypatch) - - gdir = global_dir() - gdir.mkdir(parents=True, exist_ok=True) - (gdir / "taste.md").write_text("- Old taste without version.\n", encoding="utf-8") + _write_global_taste(_LEGACY_TASTE) _handle_upgrade(_upgrade_args()) @@ -128,31 +132,21 @@ def test_upgrade_warns_on_stale_global_taste( assert "out of date" in err -def test_upgrade_no_taste_warning_when_current( - tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, - capsys: pytest.CaptureFixture[str], -) -> None: - _register_project_with_upgrade_layout(tmp_path, monkeypatch) - - gdir = global_dir() - gdir.mkdir(parents=True, exist_ok=True) - (gdir / "taste.md").write_text(default_taste_text(), encoding="utf-8") - - _handle_upgrade(_upgrade_args()) - - err = capsys.readouterr().err - assert err == "" - - -def test_upgrade_no_taste_warning_when_absent( +@pytest.mark.parametrize( + "taste_text", + [default_taste_text(), None], + ids=["current", "absent"], +) +def test_upgrade_skips_taste_warning_when_global_taste_is_current_or_absent( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], + taste_text: str | None, ) -> None: _register_project_with_upgrade_layout(tmp_path, monkeypatch) + if taste_text is not None: + _write_global_taste(taste_text) _handle_upgrade(_upgrade_args()) - err = capsys.readouterr().err - assert err == "" + assert capsys.readouterr().err == "" From 780d22c65fb696fd1a855af6e77ab59bb283d1a5 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:03:41 -0700 Subject: [PATCH 015/103] continuous refactor: src/continuous_refactoring/failure_report.py Why: Removes duplicated attempt/transition field plumbing in failure reporting while preserving the tested commit-vs-failure boundary. Validation: uv run pytest --- src/continuous_refactoring/failure_report.py | 68 +++++++++++++++----- tests/test_failure_report.py | 41 ++++++++++-- 2 files changed, 85 insertions(+), 24 deletions(-) diff --git a/src/continuous_refactoring/failure_report.py b/src/continuous_refactoring/failure_report.py index 1c2734e..3e0e0a2 100644 --- a/src/continuous_refactoring/failure_report.py +++ b/src/continuous_refactoring/failure_report.py @@ -271,6 +271,50 @@ def effective_record( ) +def _update_attempt_from_record( + artifacts: RunArtifacts, + *, + attempt: int, + retry: int, + record: DecisionRecord, + reason_doc_path: Path | None, +) -> None: + artifacts.update_attempt( + attempt, + target=record.target, + retry=retry, + call_role=record.call_role, + phase_reached=record.phase_reached, + decision=record.decision, + retry_recommendation=record.retry_recommendation, + failure_kind=record.failure_kind, + failure_summary=record.summary, + reason_doc_path=reason_doc_path, + ) + + +def _log_transition_from_record( + artifacts: RunArtifacts, + *, + attempt: int, + retry: int, + record: DecisionRecord, + reason_doc_path: Path | None, +) -> None: + artifacts.log_transition( + attempt=attempt, + retry=retry, + target=record.target, + call_role=record.call_role, + phase_reached=record.phase_reached, + decision=record.decision, + retry_recommendation=record.retry_recommendation, + failure_kind=record.failure_kind, + summary=record.summary, + reason_doc_path=reason_doc_path, + ) + + def persist_decision( repo_root: Path, artifacts: RunArtifacts, @@ -281,16 +325,11 @@ def persist_decision( record: DecisionRecord, ) -> Path | None: if record.decision == "commit": - artifacts.update_attempt( - attempt, - target=record.target, + _update_attempt_from_record( + artifacts, + attempt=attempt, retry=retry, - call_role=record.call_role, - phase_reached=record.phase_reached, - decision=record.decision, - retry_recommendation=record.retry_recommendation, - failure_kind=record.failure_kind, - failure_summary=record.summary, + record=record, reason_doc_path=None, ) return None @@ -314,16 +353,11 @@ def persist_decision( phase_reached=record.phase_reached, reason_doc_path=str(reason_doc), ) - artifacts.log_transition( + _log_transition_from_record( + artifacts, attempt=attempt, retry=retry, - target=record.target, - call_role=record.call_role, - phase_reached=record.phase_reached, - decision=record.decision, - retry_recommendation=record.retry_recommendation, - failure_kind=record.failure_kind, - summary=record.summary, + record=record, reason_doc_path=reason_doc, ) return reason_doc diff --git a/tests/test_failure_report.py b/tests/test_failure_report.py index 24ca551..54679f8 100644 --- a/tests/test_failure_report.py +++ b/tests/test_failure_report.py @@ -48,6 +48,26 @@ def _record(**overrides: object) -> DecisionRecord: return DecisionRecord(**values) +def assert_attempt_decision( + artifacts: RunArtifacts, + *, + attempt: int, + retry: int, + record: DecisionRecord, + reason_doc_path: str | None, +) -> None: + stats = artifacts.attempts[attempt] + assert stats.target == record.target + assert stats.retry == retry + assert stats.call_role == record.call_role + assert stats.phase_reached == record.phase_reached + assert stats.decision == record.decision + assert stats.retry_recommendation == record.retry_recommendation + assert stats.failure_kind == record.failure_kind + assert stats.failure_summary == record.summary + assert stats.reason_doc_path == reason_doc_path + + def test_effective_record_abandons_after_max_attempts() -> None: record = _record( summary="Still red", @@ -230,10 +250,13 @@ def fail_if_snapshot_dir_requested(_repo_root: Path) -> Path: assert result is None assert failure_snapshot_calls == 0 - stats = artifacts.attempts[1] - assert stats.decision == "commit" - assert stats.retry == 2 - assert stats.reason_doc_path is None + assert_attempt_decision( + artifacts, + attempt=1, + retry=2, + record=record, + reason_doc_path=None, + ) assert not artifacts.events_path.exists() @@ -258,9 +281,13 @@ def test_persist_decision_records_non_commit_snapshot( assert result is not None assert result.exists() - stats = artifacts.attempts[1] - assert stats.decision == "retry" - assert stats.reason_doc_path == str(result) + assert_attempt_decision( + artifacts, + attempt=1, + retry=1, + record=record, + reason_doc_path=str(result), + ) events = artifacts.events_path.read_text(encoding="utf-8") assert '"event": "failure_doc_written"' in events assert '"event": "target_transition"' in events From c79414a82a552c0bbe75f136a7913d31e1756857 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:07:29 -0700 Subject: [PATCH 016/103] continuous refactor: src/continuous_refactoring/routing_pipeline.py Why: Centralizing RouteResult and DecisionRecord assembly removes duplicate error-mapping logic in a hot orchestration path and makes future cleanup safer Validation: uv run pytest --- .../routing_pipeline.py | 112 +++++++++++------- tests/test_scope_loop_integration.py | 76 +++++++++++- 2 files changed, 143 insertions(+), 45 deletions(-) diff --git a/src/continuous_refactoring/routing_pipeline.py b/src/continuous_refactoring/routing_pipeline.py index f2df5c7..44fedec 100644 --- a/src/continuous_refactoring/routing_pipeline.py +++ b/src/continuous_refactoring/routing_pipeline.py @@ -61,6 +61,60 @@ class RouteResult: decision_record: DecisionRecord | None = None +def _sanitized_summary(text: str, repo_root: Path) -> str: + return sanitize_text(text, repo_root) or text + + +def _abandon_result( + *, + target: Target, + planning_context: str, + repo_root: Path, + error: ContinuousRefactorError, + call_role: str, +) -> RouteResult: + summary = _sanitized_summary(str(error), repo_root) + return RouteResult( + outcome="abandon", + target=target, + planning_context=planning_context, + decision_record=DecisionRecord( + decision="abandon", + retry_recommendation="new-target", + target=target.description, + call_role=call_role, + phase_reached=call_role, + failure_kind=error_failure_kind(str(error)), + summary=summary, + ), + ) + + +def _planning_result( + *, + outcome: RouteOutcome, + target: Target, + planning_context: str, + repo_root: Path, + reason: str, +) -> RouteResult: + summary = _sanitized_summary(reason, repo_root) + return RouteResult( + outcome=outcome, + target=target, + planning_context=planning_context, + decision_record=DecisionRecord( + decision=outcome, + retry_recommendation="none" if outcome == "commit" else "new-target", + target=target.description, + call_role="planning.final-review", + phase_reached="planning.final-review", + failure_kind="none" if outcome == "commit" else "planning-rejected", + summary=summary, + ), + ) + + def _scope_bypass_context(target: Target, reason: str) -> str: lines = [ f"Scope expansion bypassed: {reason}", @@ -196,20 +250,12 @@ def route_and_run( timeout=timeout, ) except ContinuousRefactorError as error: - summary = sanitize_text(str(error), repo_root) or str(error) - return RouteResult( - outcome="abandon", + return _abandon_result( target=target, planning_context=planning_context, - decision_record=DecisionRecord( - decision="abandon", - retry_recommendation="new-target", - target=target.description, - call_role="classify", - phase_reached="classify", - failure_kind=error_failure_kind(str(error)), - summary=summary, - ), + repo_root=repo_root, + error=error, + call_role="classify", ) print(f"Classification: {decision} — {target.description}") @@ -241,24 +287,16 @@ def route_and_run( extra_context=planning_context, ) except ContinuousRefactorError as error: - summary = sanitize_text(str(error), repo_root) or str(error) call_role = "planning.final-review" match = re.match(r"^(planning\.[a-z0-9-]+)\s+failed:", str(error)) if match: call_role = match.group(1) - return RouteResult( - outcome="abandon", + return _abandon_result( target=target, planning_context=planning_context, - decision_record=DecisionRecord( - decision="abandon", - retry_recommendation="new-target", - target=target.description, - call_role=call_role, - phase_reached=call_role, - failure_kind=error_failure_kind(str(error)), - summary=summary, - ), + repo_root=repo_root, + error=error, + call_role=call_role, ) finalize_commit( @@ -275,33 +313,19 @@ def route_and_run( print(f"Planning: {describe_planning_outcome(outcome.status)} — {outcome.reason}") if outcome.status == "skipped": - return RouteResult( + return _planning_result( outcome="abandon", target=target, planning_context=planning_context, - decision_record=DecisionRecord( - decision="abandon", - retry_recommendation="new-target", - target=target.description, - call_role="planning.final-review", - phase_reached="planning.final-review", - failure_kind="planning-rejected", - summary=sanitize_text(outcome.reason, repo_root) or outcome.reason, - ), + repo_root=repo_root, + reason=outcome.reason, ) - return RouteResult( + return _planning_result( outcome="commit", target=target, planning_context=planning_context, - decision_record=DecisionRecord( - decision="commit", - retry_recommendation="none", - target=target.description, - call_role="planning.final-review", - phase_reached="planning.final-review", - failure_kind="none", - summary=sanitize_text(outcome.reason, repo_root) or outcome.reason, - ), + repo_root=repo_root, + reason=outcome.reason, ) diff --git a/tests/test_scope_loop_integration.py b/tests/test_scope_loop_integration.py index 30c0faa..bd47ae8 100644 --- a/tests/test_scope_loop_integration.py +++ b/tests/test_scope_loop_integration.py @@ -7,7 +7,11 @@ import continuous_refactoring import continuous_refactoring.loop import continuous_refactoring.routing_pipeline -from continuous_refactoring.artifacts import RunArtifacts, create_run_artifacts +from continuous_refactoring.artifacts import ( + ContinuousRefactorError, + RunArtifacts, + create_run_artifacts, +) from continuous_refactoring.decisions import DecisionRecord from continuous_refactoring.routing_pipeline import RouteResult from continuous_refactoring.targeting import Target @@ -240,6 +244,76 @@ def fake_run_planning(*_args: object, **kwargs: object) -> StubPlanningOutcome: assert captured["extra_context"] == planning_context +def test_classifier_failure_returns_abandon_record( + routing_env: tuple[Path, RunArtifacts], + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, artifacts = routing_env + target = Target(description="seed", files=("README.md",), provenance="globs") + + monkeypatch.setattr( + "continuous_refactoring.routing_pipeline.classify_target", + lambda *_args, **_kwargs: (_ for _ in ()).throw( + ContinuousRefactorError("transport failed") + ), + ) + + result = _invoke_route_and_run(repo_root, artifacts, target) + + assert result.outcome == "abandon" + assert result.decision_record == DecisionRecord( + decision="abandon", + retry_recommendation="new-target", + target="seed", + call_role="classify", + phase_reached="classify", + failure_kind="agent-infra-failure", + summary="transport failed", + ) + + +def test_planning_failure_uses_stage_label_in_abandon_record( + routing_env: tuple[Path, RunArtifacts], + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, artifacts = routing_env + + _patch_scope_expansion( + monkeypatch, + files=("src/foo.py",), + context="Selected scope candidate: local-cluster", + ) + monkeypatch.setattr( + "continuous_refactoring.routing_pipeline.classify_target", + lambda *_args, **_kwargs: "needs-plan", + ) + monkeypatch.setattr( + "continuous_refactoring.routing_pipeline.run_planning", + lambda *_args, **_kwargs: (_ for _ in ()).throw( + ContinuousRefactorError( + "planning.review-2 failed: revised plan still has findings" + ) + ), + ) + + result = _invoke_route_and_run( + repo_root, + artifacts, + Target(description="seed", files=("src/foo.py",), provenance="globs"), + ) + + assert result.outcome == "abandon" + assert result.decision_record == DecisionRecord( + decision="abandon", + retry_recommendation="new-target", + target="seed", + call_role="planning.review-2", + phase_reached="planning.review-2", + failure_kind="agent-infra-failure", + summary="planning.review-2 failed: revised plan still has findings", + ) + + def test_live_migrations_unset_skips_scope_expansion_and_classification( run_once_env: Path, prompt_capture: list[str], From e10192e79e3620072eaa3230a5d04c76662d4cfe Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:12:47 -0700 Subject: [PATCH 017/103] continuous refactor: tests/test_loop_migration_tick.py Why: Preserves behavior while deleting private test scaffolding that duplicated shared fixture logic, reducing drift risk in a sensitive test path. Validation: uv run pytest --- tests/test_loop_migration_tick.py | 50 ++++++++----------------------- 1 file changed, 12 insertions(+), 38 deletions(-) diff --git a/tests/test_loop_migration_tick.py b/tests/test_loop_migration_tick.py index c46efa6..dab8b74 100644 --- a/tests/test_loop_migration_tick.py +++ b/tests/test_loop_migration_tick.py @@ -12,7 +12,6 @@ import continuous_refactoring import continuous_refactoring.loop from continuous_refactoring.artifacts import ( - CommandCapture, ContinuousRefactorError, create_run_artifacts, ) @@ -35,8 +34,6 @@ from conftest import ( make_run_once_args, - noop_agent, - noop_tests, patch_classifier_trap, ) @@ -215,18 +212,6 @@ def trap(*_a: object, **_k: object) -> object: monkeypatch.setattr("continuous_refactoring.migration_tick.execute_phase", trap) -def _patch_one_shot(monkeypatch: pytest.MonkeyPatch) -> list[str]: - prompts: list[str] = [] - - def capture(**kwargs: object) -> CommandCapture: - prompts.append(str(kwargs.get("prompt", ""))) - return noop_agent(**kwargs) - - monkeypatch.setattr("continuous_refactoring.loop.maybe_run_agent", capture) - monkeypatch.setattr("continuous_refactoring.loop.run_tests", noop_tests) - return prompts - - def _tick( live_dir: Path, repo_root: Path, @@ -1074,7 +1059,7 @@ def test_unverifiable_human_approval_uncertainty_still_blocks_for_review( def test_eligible_ready_migration_advances_phase( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: now = _utc_now() live_dir, _, manifest_path = _seed_manifest( @@ -1091,7 +1076,6 @@ def test_eligible_ready_migration_advances_phase( ) check_calls = _patch_check_ready(monkeypatch, "yes") exec_calls = _patch_execute_phase(monkeypatch, status="done") - _patch_one_shot(monkeypatch) exit_code = _run_once(run_once_env) @@ -1108,6 +1092,7 @@ def test_eligible_ready_migration_advances_phase( def test_migration_labels_use_phase_file_not_numeric_cursor( run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + prompt_capture: list[str], capsys: pytest.CaptureFixture[str], ) -> None: now = _utc_now() @@ -1127,7 +1112,6 @@ def test_migration_labels_use_phase_file_not_numeric_cursor( ) _patch_check_ready(monkeypatch, "yes") _patch_execute_phase(monkeypatch, status="done") - _patch_one_shot(monkeypatch) monkeypatch.setattr( "continuous_refactoring.loop._finalize_commit", lambda _repo_root, _head_before, message, **_kwargs: commit_messages.append(message), @@ -1144,7 +1128,7 @@ def test_migration_labels_use_phase_file_not_numeric_cursor( def test_phase_ready_check_receives_runtime_taste( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: now = _utc_now() live_dir, _, _ = _seed_manifest( @@ -1174,7 +1158,6 @@ def fake_check_ready( fake_check_ready, ) _patch_execute_phase_trap(monkeypatch) - _patch_one_shot(monkeypatch) exit_code = _run_once(run_once_env) @@ -1188,18 +1171,16 @@ def fake_check_ready( def test_no_eligible_migrations_falls_through( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: live_dir = _migrations_dir(run_once_env) _patch_live_dir(monkeypatch, live_dir) classifier_calls = _patch_classifier_cohesive(monkeypatch) - prompts = _patch_one_shot(monkeypatch) - exit_code = _run_once(run_once_env) assert exit_code == 0 - _assert_fell_through(classifier_calls, prompts) + _assert_fell_through(classifier_calls, prompt_capture) # --------------------------------------------------------------------------- @@ -1208,7 +1189,7 @@ def test_no_eligible_migrations_falls_through( def test_eligible_not_ready_bumps_wake_up_on( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: now = _utc_now() live_dir, _, manifest_path = _seed_manifest( @@ -1223,8 +1204,6 @@ def test_eligible_not_ready_bumps_wake_up_on( classifier_calls = _patch_classifier_cohesive(monkeypatch) _patch_check_ready(monkeypatch, "no", "prerequisites not met") _patch_execute_phase_trap(monkeypatch) - prompts = _patch_one_shot(monkeypatch) - exit_code = _run_once(run_once_env) assert exit_code == 0 @@ -1236,7 +1215,7 @@ def test_eligible_not_ready_bumps_wake_up_on( assert reloaded.current_phase == "setup" assert eligible_now(reloaded, _utc_now()) is False - _assert_fell_through(classifier_calls, prompts) + _assert_fell_through(classifier_calls, prompt_capture) # --------------------------------------------------------------------------- @@ -1245,7 +1224,7 @@ def test_eligible_not_ready_bumps_wake_up_on( def test_future_wake_up_blocks_execution( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: now = _utc_now() live_dir, manifest, _ = _seed_manifest( @@ -1261,16 +1240,14 @@ def test_future_wake_up_blocks_execution( _patch_live_dir(monkeypatch, live_dir) classifier_calls = _patch_classifier_cohesive(monkeypatch) _patch_execute_phase_trap(monkeypatch) - prompts = _patch_one_shot(monkeypatch) - exit_code = _run_once(run_once_env) assert exit_code == 0 - _assert_fell_through(classifier_calls, prompts) + _assert_fell_through(classifier_calls, prompt_capture) def test_unverifiable_phase_stores_human_review_reason( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: import pytest @@ -1287,7 +1264,6 @@ def test_unverifiable_phase_stores_human_review_reason( _patch_classifier_cohesive(monkeypatch) _patch_check_ready(monkeypatch, "unverifiable", reason) _patch_execute_phase_trap(monkeypatch) - _patch_one_shot(monkeypatch) with pytest.raises(ContinuousRefactorError, match="external dependency"): _run_once(run_once_env) @@ -1298,7 +1274,7 @@ def test_unverifiable_phase_stores_human_review_reason( def test_empty_current_phase_skips_migration_path( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: now = _utc_now() live_dir, manifest, manifest_path = _seed_manifest( @@ -1315,13 +1291,11 @@ def test_empty_current_phase_skips_migration_path( check_calls = _patch_check_ready(monkeypatch, "yes") _patch_execute_phase_trap(monkeypatch) classifier_calls = _patch_classifier_cohesive(monkeypatch) - prompts = _patch_one_shot(monkeypatch) - exit_code = _run_once(run_once_env) assert exit_code == 0 assert check_calls == [] - _assert_fell_through(classifier_calls, prompts) + _assert_fell_through(classifier_calls, prompt_capture) reloaded = load_manifest(manifest_path) assert reloaded.current_phase == "" From b498e66aceef46240589583ea15d857fb93f656b Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:17:37 -0700 Subject: [PATCH 018/103] continuous refactor: tests/test_decisions.py Why: Preserves caller-visible status parsing and retry defaults while making the module smaller, more direct, and better protected by boundary-level tests. Validation: uv run pytest --- src/continuous_refactoring/decisions.py | 29 ++++------- tests/test_decisions.py | 68 +++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 19 deletions(-) diff --git a/src/continuous_refactoring/decisions.py b/src/continuous_refactoring/decisions.py index 6f08489..6409751 100644 --- a/src/continuous_refactoring/decisions.py +++ b/src/continuous_refactoring/decisions.py @@ -36,6 +36,12 @@ _VALID_RETRY_RECOMMENDATIONS = frozenset( (*get_args(RetryRecommendation), None), ) +_DEFAULT_RETRY_RECOMMENDATIONS: dict[RunnerDecision, RetryRecommendation] = { + "commit": "none", + "retry": "same-target", + "abandon": "new-target", + "blocked": "human-review", +} @dataclass(frozen=True) @@ -68,13 +74,6 @@ class DecisionRecord: tests_stdout_path: Path | None = None tests_stderr_path: Path | None = None - -def _status_path_text(path: Path | None) -> str | None: - if path is None or not path.exists(): - return None - return path.read_text(encoding="utf-8") - - def parse_status_block(text: str | None) -> AgentStatus | None: if not text: return None @@ -134,8 +133,8 @@ def read_status( last_message_path: Path | None, fallback_text: str | None, ) -> AgentStatus | None: - if agent == "codex": - status = parse_status_block(_status_path_text(last_message_path)) + if agent == "codex" and last_message_path is not None and last_message_path.exists(): + status = parse_status_block(last_message_path.read_text(encoding="utf-8")) if status is not None: return status return parse_status_block(fallback_text) @@ -175,9 +174,7 @@ def status_summary( def resolved_phase_reached(status: AgentStatus | None, fallback: str) -> str: - if status is None: - return fallback - return status.phase_reached or fallback + return fallback if status is None else (status.phase_reached or fallback) def error_failure_kind(message: str) -> str: @@ -192,10 +189,4 @@ def error_failure_kind(message: str) -> str: def default_retry_recommendation( decision: RunnerDecision, ) -> RetryRecommendation: - if decision == "retry": - return "same-target" - if decision == "abandon": - return "new-target" - if decision == "blocked": - return "human-review" - return "none" + return _DEFAULT_RETRY_RECOMMENDATIONS[decision] diff --git a/tests/test_decisions.py b/tests/test_decisions.py index 0f5f4e1..8386e7d 100644 --- a/tests/test_decisions.py +++ b/tests/test_decisions.py @@ -8,12 +8,17 @@ import pytest from continuous_refactoring.decisions import ( + AgentStatus, RetryRecommendation, RunnerDecision, + default_retry_recommendation, error_failure_kind, parse_status_block, + read_status, + resolved_phase_reached, sanitize_text, sanitized_text_or, + status_summary, ) from continuous_refactoring.prompts import ( CONTINUOUS_REFACTORING_STATUS_BEGIN, @@ -155,6 +160,31 @@ def test_parse_status_block_never_raises_on_generated_corpus() -> None: assert all(isinstance(item, str) for item in status.evidence) +def test_read_status_prefers_codex_last_message_file(tmp_path: Path) -> None: + last_message_path = tmp_path / "codex-last-message.md" + last_message_path.write_text( + _status_block("summary: from file"), + encoding="utf-8", + ) + fallback = _status_block("summary: from fallback") + + codex_status = read_status( + "codex", + last_message_path=last_message_path, + fallback_text=fallback, + ) + other_status = read_status( + "claude", + last_message_path=last_message_path, + fallback_text=fallback, + ) + + assert codex_status is not None + assert codex_status.summary == "from file" + assert other_status is not None + assert other_status.summary == "from fallback" + + def test_sanitize_text_filters_and_redacts() -> None: repo_root = Path("/worktree/repo") text = "\n".join( @@ -212,6 +242,44 @@ def test_sanitized_text_or_uses_fallback_when_sanitized_text_is_empty() -> None: ) +def test_status_summary_sanitizes_summary_and_focus() -> None: + status = AgentStatus( + summary=" touched /repo/src/file.py ", + next_retry_focus=" /tmp/logs/run.txt ", + ) + + assert status_summary(status, fallback="fallback", repo_root=Path("/repo")) == ( + "touched /src/file.py", + "", + ) + + +def test_resolved_phase_reached_uses_fallback_for_missing_status_or_phase() -> None: + fallback = "review" + + assert resolved_phase_reached(None, fallback) == fallback + assert resolved_phase_reached(AgentStatus(), fallback) == fallback + assert resolved_phase_reached(AgentStatus(phase_reached="refactor"), fallback) == ( + "refactor" + ) + + +@pytest.mark.parametrize( + ("decision", "expected"), + [ + ("commit", "none"), + ("retry", "same-target"), + ("abandon", "new-target"), + ("blocked", "human-review"), + ], +) +def test_default_retry_recommendation_maps_each_decision( + decision: RunnerDecision, + expected: RetryRecommendation, +) -> None: + assert default_retry_recommendation(decision) == expected + + @pytest.mark.parametrize( ("message", "expected"), [ From f48ed63b0e25c63ca41c612593f05ea873a513fc Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:20:42 -0700 Subject: [PATCH 019/103] continuous refactor: tests/test_focus_on_live_migrations.py Why: Preserves CLI guard behavior while reducing test-only drift against the canonical run-loop argument shape. Validation: uv run pytest --- tests/test_focus_on_live_migrations.py | 45 +++++++++----------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/tests/test_focus_on_live_migrations.py b/tests/test_focus_on_live_migrations.py index 0704096..45dc831 100644 --- a/tests/test_focus_on_live_migrations.py +++ b/tests/test_focus_on_live_migrations.py @@ -182,41 +182,20 @@ def test_focused_loop_eligibility_rechecks_effort_deferred_phase_when_cap_rises( # --------------------------------------------------------------------------- -def _make_handle_run_args( - repo_root: Path, *, focus: bool, -) -> argparse.Namespace: - return argparse.Namespace( +def test_handle_run_without_focus_requires_targeting( + tmp_path: Path, +) -> None: + repo_root = tmp_path / "repo" + repo_root.mkdir() + args = make_run_loop_args( + repo_root, agent="claude", model="opus", effort="medium", validation_command="uv run pytest", - extensions=None, - globs=None, - targets=None, - paths=None, scope_instruction=None, - timeout=None, - refactoring_prompt=None, - fix_prompt=None, - show_agent_logs=False, - show_command_logs=False, - repo_root=repo_root, - max_attempts=None, - max_refactors=None, - commit_message_prefix="continuous refactor", - max_consecutive_failures=3, - sleep=0.0, - focus_on_live_migrations=focus, ) - -def test_handle_run_without_focus_requires_targeting( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch, -) -> None: - repo_root = tmp_path / "repo" - repo_root.mkdir() - args = _make_handle_run_args(repo_root, focus=False) - from continuous_refactoring.cli import _handle_run with pytest.raises(SystemExit) as exc: @@ -229,7 +208,15 @@ def test_handle_run_with_focus_bypasses_targeting_and_max_refactors( ) -> None: repo_root = tmp_path / "repo" repo_root.mkdir() - args = _make_handle_run_args(repo_root, focus=True) + args = make_run_loop_args( + repo_root, + agent="claude", + model="opus", + effort="medium", + validation_command="uv run pytest", + scope_instruction=None, + focus_on_live_migrations=True, + ) calls: list[argparse.Namespace] = [] From b48c03d01d9c77395aa19c34b0991aca2c8315e5 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:26:51 -0700 Subject: [PATCH 020/103] continuous refactor: tests/test_taste_interview.py Why: Preserves a small but real safety net by making taste CLI tests fail truthfully and share one arg-construction path, which reduces maintenance drift in the taste test cluster. Validation: uv run pytest --- tests/conftest.py | 4 ++++ tests/test_taste_interview.py | 13 +++++++------ tests/test_taste_refine.py | 8 +++----- tests/test_taste_upgrade.py | 26 ++++++++++++++++---------- 4 files changed, 30 insertions(+), 21 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 4eb478f..94c504b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -97,6 +97,10 @@ def assert_single_run_final_status(repo_root: Path, expected_status: str) -> Non assert summary["final_status"] == expected_status +def fail_if_taste_agent_runs(*_args: object, **_kwargs: object) -> int: + pytest.fail("taste agent should not be invoked") + + def make_taste_agent_writer( *, content: str | None = None, diff --git a/tests/test_taste_interview.py b/tests/test_taste_interview.py index 77e38b5..7a75d28 100644 --- a/tests/test_taste_interview.py +++ b/tests/test_taste_interview.py @@ -6,6 +6,7 @@ import pytest from conftest import ( extract_settle_path, + fail_if_taste_agent_runs, init_repo, init_repo_with_temp_home, init_taste_project, @@ -42,11 +43,6 @@ def _interview_args( effort=effort, ) - -def _fail_if_taste_agent_runs(**_: object) -> int: - pytest.fail("taste agent should not be invoked") - - # --------------------------------------------------------------------------- # Flag validation # --------------------------------------------------------------------------- @@ -173,7 +169,7 @@ def test_interview_refuses_overwrite_without_force( taste_path = init_taste_project(tmp_path, monkeypatch) taste_path.write_text("- pre-existing custom\n", encoding="utf-8") - monkeypatch.setattr(_AGENT_RUNNER_PATH, _fail_if_taste_agent_runs) + monkeypatch.setattr(_AGENT_RUNNER_PATH, fail_if_taste_agent_runs) with pytest.raises(SystemExit) as exc_info: _handle_taste(_interview_args()) @@ -296,6 +292,11 @@ def test_interview_prompt_includes_existing_content( assert extract_settle_path(prompt) == taste_path.with_name("taste.md.done") +def test_fail_if_taste_agent_runs_matches_real_runner_signature() -> None: + with pytest.raises(pytest.fail.Exception): + fail_if_taste_agent_runs("codex", "m", "high", "prompt", Path.cwd()) + + # --------------------------------------------------------------------------- # Argparse-level validation sanity (covers the subparser wiring itself) # --------------------------------------------------------------------------- diff --git a/tests/test_taste_refine.py b/tests/test_taste_refine.py index c7a2314..7b57e56 100644 --- a/tests/test_taste_refine.py +++ b/tests/test_taste_refine.py @@ -4,7 +4,7 @@ from pathlib import Path import pytest -from conftest import init_taste_project, make_taste_agent_writer +from conftest import init_taste_project, make_taste_agent_writer, make_taste_args from continuous_refactoring.cli import _handle_taste, build_parser from continuous_refactoring.config import default_taste_text, global_dir @@ -18,11 +18,9 @@ def _refine_args( effort: str | None = "high", force: bool = False, ) -> argparse.Namespace: - return argparse.Namespace( + return make_taste_args( + "refine", global_=global_, - interview=False, - upgrade=False, - refine=True, agent=agent, model=model, effort=effort, diff --git a/tests/test_taste_upgrade.py b/tests/test_taste_upgrade.py index 8c443fe..d1dbbcd 100644 --- a/tests/test_taste_upgrade.py +++ b/tests/test_taste_upgrade.py @@ -4,7 +4,13 @@ from pathlib import Path import pytest -from conftest import extract_settle_path, init_taste_project, make_taste_agent_writer +from conftest import ( + extract_settle_path, + fail_if_taste_agent_runs, + init_taste_project, + make_taste_agent_writer, + make_taste_args, +) from continuous_refactoring.cli import _handle_taste, build_parser from continuous_refactoring.config import ( @@ -24,16 +30,16 @@ def _upgrade_args( effort: str | None = "high", force: bool = False, ) -> argparse.Namespace: - return argparse.Namespace( - global_=global_, interview=False, upgrade=True, - agent=agent, model=model, effort=effort, force=force, + return make_taste_args( + "upgrade", + global_=global_, + agent=agent, + model=model, + effort=effort, + force=force, ) -def _fail_if_taste_agent_runs(**_: object) -> int: - pytest.fail("taste agent should not be invoked") - - # --------------------------------------------------------------------------- # No-op: current taste → agent NOT invoked # --------------------------------------------------------------------------- @@ -46,7 +52,7 @@ def test_upgrade_noop_on_current_taste( taste_path = init_taste_project(tmp_path, monkeypatch) taste_path.write_text(default_taste_text(), encoding="utf-8") - monkeypatch.setattr(_AGENT_RUNNER_PATH, _fail_if_taste_agent_runs) + monkeypatch.setattr(_AGENT_RUNNER_PATH, fail_if_taste_agent_runs) _handle_taste(_upgrade_args()) out = capsys.readouterr().out.strip() @@ -63,7 +69,7 @@ def test_upgrade_noop_on_current_global_taste( gdir.mkdir(parents=True, exist_ok=True) (gdir / "taste.md").write_text(default_taste_text(), encoding="utf-8") - monkeypatch.setattr(_AGENT_RUNNER_PATH, _fail_if_taste_agent_runs) + monkeypatch.setattr(_AGENT_RUNNER_PATH, fail_if_taste_agent_runs) _handle_taste(_upgrade_args(global_=True)) out = capsys.readouterr().out.strip() From a17bdfb968af1dc43c587ef008d4c667e8fccb58 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:32:02 -0700 Subject: [PATCH 021/103] continuous refactor: tests/test_git.py Why: Removes pointless count-based indirection in git state checks while adding direct coverage for dirty-state helpers and empty-commit rejection. Validation: uv run pytest --- src/continuous_refactoring/git.py | 2 +- tests/test_git.py | 54 +++++++++++++++++++------------ 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/src/continuous_refactoring/git.py b/src/continuous_refactoring/git.py index a8ff78b..22f056a 100644 --- a/src/continuous_refactoring/git.py +++ b/src/continuous_refactoring/git.py @@ -89,7 +89,7 @@ def repo_change_count(repo_root: Path) -> int: def repo_has_changes(repo_root: Path) -> bool: - return repo_change_count(repo_root) > 0 + return bool(workspace_status_lines(repo_root)) def current_branch(repo_root: Path) -> str: diff --git a/tests/test_git.py b/tests/test_git.py index 6aa2521..46f0c96 100644 --- a/tests/test_git.py +++ b/tests/test_git.py @@ -8,6 +8,12 @@ from conftest import init_repo +_FAILING_COMMAND = [ + "python", + "-c", + "import sys\nprint('out')\nprint('err', file=sys.stderr)\nraise SystemExit(1)", +] + def test_discard_workspace_changes_restores_tracked_files_and_removes_untracked( tmp_path: Path, @@ -28,6 +34,30 @@ def test_discard_workspace_changes_restores_tracked_files_and_removes_untracked( assert continuous_refactoring.workspace_status_lines(repo) == [] +def test_repo_change_helpers_track_workspace_status(tmp_path: Path) -> None: + repo = tmp_path / "repo" + init_repo(repo) + + assert continuous_refactoring.repo_change_count(repo) == 0 + assert not continuous_refactoring.repo_has_changes(repo) + + (repo / "README.md").write_text("changed\n", encoding="utf-8") + (repo / "scratch.txt").write_text("scratch\n", encoding="utf-8") + + assert continuous_refactoring.repo_change_count(repo) == 2 + assert continuous_refactoring.repo_has_changes(repo) + + +def test_git_commit_rejects_clean_worktree(tmp_path: Path) -> None: + repo = tmp_path / "repo" + init_repo(repo) + + with pytest.raises(continuous_refactoring.ContinuousRefactorError) as exc: + continuous_refactoring.git_commit(repo, "no-op") + + assert str(exc.value) == "No changes to commit." + + def test_revert_to_restores_requested_head_and_removes_untracked( tmp_path: Path, ) -> None: @@ -55,25 +85,13 @@ def test_revert_to_restores_requested_head_and_removes_untracked( def test_run_command_checked_failure_raises_git_command_error(tmp_path: Path) -> None: - command = [ - "python", - "-c", - "import sys\nprint('out')\nprint('err', file=sys.stderr)\nraise SystemExit(1)", - ] - with pytest.raises(continuous_refactoring.GitCommandError): - continuous_refactoring.run_command(command, cwd=tmp_path) + continuous_refactoring.run_command(_FAILING_COMMAND, cwd=tmp_path) def test_run_command_checked_failure_includes_cause_and_payload(tmp_path: Path) -> None: - command = [ - "python", - "-c", - "import sys\nprint('out')\nprint('err', file=sys.stderr)\nraise SystemExit(1)", - ] - with pytest.raises(continuous_refactoring.GitCommandError) as exc: - continuous_refactoring.run_command(command, cwd=tmp_path) + continuous_refactoring.run_command(_FAILING_COMMAND, cwd=tmp_path) error = exc.value assert isinstance(error.__cause__, subprocess.CalledProcessError) @@ -98,14 +116,8 @@ def _raise(*_args: object, **_kwargs: object) -> subprocess.CompletedProcess[str def test_run_command_unchecked_returns_completed_process(tmp_path: Path) -> None: - command = [ - "python", - "-c", - "import sys\nprint('out')\nprint('err', file=sys.stderr)\nraise SystemExit(1)", - ] - result = continuous_refactoring.run_command( - command, + _FAILING_COMMAND, cwd=tmp_path, check=False, ) From db88b62c2e237a9154c86fbfb7cd7eda84761c1d Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:35:58 -0700 Subject: [PATCH 022/103] continuous refactor: tests/test_run.py Why: Removes duplicated test setup and replaces it with one shared fail-fast helper, making the run-loop tests smaller and easier to change without touching behavior. Validation: uv run pytest --- tests/conftest.py | 22 ++++++++ tests/test_run.py | 127 ++++++++++++++-------------------------------- 2 files changed, 61 insertions(+), 88 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 94c504b..fce21b3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -165,6 +165,28 @@ class RegisteredProjectLayout: taste_path: Path +def write_targets_file( + tmp_path: Path, + *, + count: int | None = None, + targets: list[dict[str, object]] | None = None, +) -> Path: + if (count is None) == (targets is None): + raise AssertionError("provide exactly one of count or targets") + if targets is None: + assert count is not None + targets = [ + {"description": f"target-{index}", "files": [f"file{index}.py"]} + for index in range(count) + ] + targets_file = tmp_path / "targets.jsonl" + targets_file.write_text( + "\n".join(json.dumps(target) for target in targets), + encoding="utf-8", + ) + return targets_file + + def init_repo(path: Path) -> None: path.mkdir(parents=True, exist_ok=True) continuous_refactoring.run_command(["git", "init", "-b", "main"], cwd=path) diff --git a/tests/test_run.py b/tests/test_run.py index 09cddbd..b7c71a7 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -28,6 +28,7 @@ noop_tests, read_single_run_events as _read_single_run_events, read_single_run_summary as _read_single_run_summary, + write_targets_file, ) @@ -59,18 +60,6 @@ def _write_live_manifest( save_manifest(manifest, migration_dir / "manifest.json") -def _write_targets_file(tmp_path: Path, count: int) -> Path: - targets_file = tmp_path / "targets.jsonl" - targets_file.write_text( - "\n".join( - json.dumps({"description": f"target-{index}", "files": [f"file{index}.py"]}) - for index in range(count) - ), - encoding="utf-8", - ) - return targets_file - - def _migration_record( decision: Literal["commit", "abandon", "blocked"], *, @@ -375,14 +364,7 @@ def test_run_sleeps_only_between_targets( sleep_calls: list[float] = [] monkeypatch.setattr("continuous_refactoring.loop.time.sleep", sleep_calls.append) - targets_file = tmp_path / "targets.jsonl" - targets_file.write_text( - "\n".join( - json.dumps({"description": f"target-{index}", "files": [f"file{index}.py"]}) - for index in range(3) - ), - encoding="utf-8", - ) + targets_file = write_targets_file(tmp_path, count=3) args = make_run_loop_args( repo_root, @@ -472,14 +454,7 @@ def fail_first_attempt_then_pass( sleep_calls: list[float] = [] monkeypatch.setattr("continuous_refactoring.loop.time.sleep", sleep_calls.append) - targets_file = tmp_path / "targets.jsonl" - targets_file.write_text( - "\n".join( - json.dumps({"description": f"target-{index}", "files": [f"file{index}.py"]}) - for index in range(2) - ), - encoding="utf-8", - ) + targets_file = write_targets_file(tmp_path, count=2) args = make_run_loop_args( repo_root, @@ -627,14 +602,7 @@ def test_run_stops_after_max_consecutive_failures( _patch_run_loop_tests(monkeypatch, noop_tests) # Write a JSONL with 5 targets so we have enough attempts - targets_file = tmp_path / "targets.jsonl" - lines = [] - for i in range(5): - lines.append(json.dumps({ - "description": f"target-{i}", - "files": [f"file{i}.py"], - })) - targets_file.write_text("\n".join(lines), encoding="utf-8") + targets_file = write_targets_file(tmp_path, count=5) args = make_run_loop_args( repo_root, @@ -671,14 +639,7 @@ def alternating_agent(**kwargs: object) -> CommandCapture: _patch_run_loop_agent(monkeypatch, alternating_agent) _patch_run_loop_tests(monkeypatch, noop_tests) - targets_file = tmp_path / "targets.jsonl" - lines = [] - for i in range(6): - lines.append(json.dumps({ - "description": f"target-{i}", - "files": [f"file{i}.py"], - })) - targets_file.write_text("\n".join(lines), encoding="utf-8") + targets_file = write_targets_file(tmp_path, count=6) args = make_run_loop_args( repo_root, @@ -709,14 +670,15 @@ def model_capturing_agent(**kwargs: object) -> CommandCapture: _patch_run_loop_agent(monkeypatch, model_capturing_agent) _patch_run_loop_tests(monkeypatch, noop_tests) - targets_file = tmp_path / "targets.jsonl" - targets_file.write_text( - json.dumps({ - "description": "override target", - "files": ["foo.py"], - "model-override": "special-model", - }), - encoding="utf-8", + targets_file = write_targets_file( + tmp_path, + targets=[ + { + "description": "override target", + "files": ["foo.py"], + "model-override": "special-model", + } + ], ) args = make_run_loop_args( @@ -746,14 +708,15 @@ def effort_capturing_agent(**kwargs: object) -> CommandCapture: _patch_run_loop_agent(monkeypatch, effort_capturing_agent) _patch_run_loop_tests(monkeypatch, noop_tests) - targets_file = tmp_path / "targets.jsonl" - targets_file.write_text( - json.dumps({ - "description": "override effort target", - "files": ["foo.py"], - "effort-override": "xhigh", - }), - encoding="utf-8", + targets_file = write_targets_file( + tmp_path, + targets=[ + { + "description": "override effort target", + "files": ["foo.py"], + "effort-override": "xhigh", + } + ], ) args = make_run_loop_args( @@ -1080,10 +1043,9 @@ def capture_agent(**kwargs: object) -> CommandCapture: _patch_run_loop_agent(monkeypatch, capture_agent) _patch_run_loop_tests(monkeypatch, noop_tests) - targets_file = tmp_path / "targets.jsonl" - targets_file.write_text( - json.dumps({"description": "jsonl wins", "files": ["from-targets.py"]}) + "\n", - encoding="utf-8", + targets_file = write_targets_file( + tmp_path, + targets=[{"description": "jsonl wins", "files": ["from-targets.py"]}], ) args = make_run_loop_args( @@ -1227,10 +1189,9 @@ def test_cli_run_allows_targets_without_max_refactors( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - targets_file = tmp_path / "targets.jsonl" - targets_file.write_text( - json.dumps({"description": "target", "files": ["file.py"]}), - encoding="utf-8", + targets_file = write_targets_file( + tmp_path, + targets=[{"description": "target", "files": ["file.py"]}], ) args = make_run_loop_args( run_loop_env, @@ -1404,14 +1365,7 @@ def counting_agent(**kwargs: object) -> CommandCapture: _patch_run_loop_agent(monkeypatch, counting_agent) _patch_run_loop_tests(monkeypatch, noop_tests) - targets_file = tmp_path / "targets.jsonl" - lines = [] - for i in range(10): - lines.append(json.dumps({ - "description": f"target-{i}", - "files": [f"file{i}.py"], - })) - targets_file.write_text("\n".join(lines), encoding="utf-8") + targets_file = write_targets_file(tmp_path, count=10) args = make_run_loop_args( repo_root, @@ -1677,15 +1631,12 @@ def fail_twice_then_pass( _patch_run_loop_agent(monkeypatch, touching_agent) _patch_run_loop_tests(monkeypatch, fail_twice_then_pass) - targets_file = tmp_path / "targets.jsonl" - targets_file.write_text( - "\n".join( - [ - json.dumps({"description": "target-a", "files": ["a.py"]}), - json.dumps({"description": "target-b", "files": ["b.py"]}), - ] - ), - encoding="utf-8", + targets_file = write_targets_file( + tmp_path, + targets=[ + {"description": "target-a", "files": ["a.py"]}, + {"description": "target-b", "files": ["b.py"]}, + ], ) args = make_run_loop_args( @@ -2005,7 +1956,7 @@ def test_run_non_runnable_migration_does_not_consume_max_refactors( ["git", "commit", "-m", "add live migration"], cwd=repo_root, ) - targets_file = _write_targets_file(tmp_path, 2) + targets_file = write_targets_file(tmp_path, count=2) monkeypatch.setattr( "continuous_refactoring.loop._resolve_live_migrations_dir", lambda _repo_root: live_dir, @@ -2101,7 +2052,7 @@ def test_run_preserves_non_runnable_migration_state_across_source_retry( ["git", "commit", "-m", "add live migration"], cwd=repo_root, ) - targets_file = _write_targets_file(tmp_path, 2) + targets_file = write_targets_file(tmp_path, count=2) monkeypatch.setattr( "continuous_refactoring.loop._resolve_live_migrations_dir", lambda _repo_root: live_dir, @@ -2199,7 +2150,7 @@ def test_run_runnable_migration_counts_as_one_action( live_dir = tmp_path / "live-migrations" live_dir.mkdir() _write_live_manifest(live_dir) - targets_file = _write_targets_file(tmp_path, 1) + targets_file = write_targets_file(tmp_path, count=1) monkeypatch.setattr( "continuous_refactoring.loop._resolve_live_migrations_dir", lambda _repo_root: live_dir, From 008550c3daa08eaf4949e3ad708a99d74be1d907 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:43:52 -0700 Subject: [PATCH 023/103] continuous refactor: src/continuous_refactoring/targeting.py Why: Preserves the effort-override contract while removing effort-domain logic from loop.py, improving locality and reducing orchestration noise. Validation: uv run pytest --- src/continuous_refactoring/effort.py | 25 ++++++++++++++++++++ src/continuous_refactoring/loop.py | 34 ++++------------------------ tests/test_effort.py | 28 +++++++++++++++++++++++ 3 files changed, 58 insertions(+), 29 deletions(-) diff --git a/src/continuous_refactoring/effort.py b/src/continuous_refactoring/effort.py index 5c6d616..5007824 100644 --- a/src/continuous_refactoring/effort.py +++ b/src/continuous_refactoring/effort.py @@ -21,6 +21,7 @@ "resolve_effort_budget", "resolve_phase_effort", "resolve_requested_effort", + "resolve_target_effort_budget", ] EffortTier = Literal["low", "medium", "high", "xhigh"] @@ -149,6 +150,30 @@ def resolve_requested_effort( ) +def resolve_target_effort_budget( + budget: EffortBudget, + requested_effort: object | None, +) -> tuple[EffortBudget, EffortResolution]: + has_override = requested_effort is not None + resolution = resolve_requested_effort( + budget, + requested_effort, + source="target-override" if has_override else "default", + reason=( + "target effort override capped by run budget" + if has_override + else "run default effort" + ), + ) + return ( + EffortBudget( + default_effort=resolution.effective_effort, + max_allowed_effort=budget.max_allowed_effort, + ), + resolution, + ) + + def resolve_phase_effort( budget: EffortBudget, required_effort: EffortTier | None, diff --git a/src/continuous_refactoring/loop.py b/src/continuous_refactoring/loop.py index 6f46366..b884e74 100644 --- a/src/continuous_refactoring/loop.py +++ b/src/continuous_refactoring/loop.py @@ -50,7 +50,7 @@ EffortBudget, EffortResolution, resolve_effort_budget, - resolve_requested_effort, + resolve_target_effort_budget, ) from continuous_refactoring.failure_report import effective_record, persist_decision from continuous_refactoring.git import ( @@ -125,30 +125,6 @@ def _effort_budget_from_args(args: argparse.Namespace) -> EffortBudget: return resolve_effort_budget(default_effort, max_allowed_effort) -def _target_effort_budget( - budget: EffortBudget, - target: Target, -) -> tuple[EffortBudget, EffortResolution]: - has_override = target.effort_override is not None - resolution = resolve_requested_effort( - budget, - target.effort_override, - source="target-override" if has_override else "default", - reason=( - "target effort override capped by run budget" - if has_override - else "run default effort" - ), - ) - return ( - EffortBudget( - default_effort=resolution.effective_effort, - max_allowed_effort=budget.max_allowed_effort, - ), - resolution, - ) - - def _log_effort_budget(artifacts: RunArtifacts, budget: EffortBudget) -> None: artifacts.log( "INFO", @@ -382,9 +358,9 @@ def run_once(args: argparse.Namespace) -> int: base_prompt = _resolve_base_prompt(args) model = target.model_override or args.model - target_effort_budget, effort_resolution = _target_effort_budget( + target_effort_budget, effort_resolution = resolve_target_effort_budget( base_effort_budget, - target, + target.effort_override, ) effort = target_effort_budget.default_effort @@ -745,9 +721,9 @@ def run_loop(args: argparse.Namespace) -> int: source_index += 1 artifacts.mark_attempt_started(action_index) model = target.model_override or args.model - target_effort_budget, effort_resolution = _target_effort_budget( + target_effort_budget, effort_resolution = resolve_target_effort_budget( base_effort_budget, - target, + target.effort_override, ) effort = target_effort_budget.default_effort effort_metadata = effort_resolution.event_fields() diff --git a/tests/test_effort.py b/tests/test_effort.py index 4904b9e..139c26d 100644 --- a/tests/test_effort.py +++ b/tests/test_effort.py @@ -10,6 +10,7 @@ resolve_effort_budget, resolve_phase_effort, resolve_requested_effort, + resolve_target_effort_budget, ) @@ -55,6 +56,33 @@ def test_target_override_requests_default_then_caps_to_max() -> None: assert resolution.capped is True +def test_target_effort_budget_uses_run_default_without_override() -> None: + budget = resolve_effort_budget("medium", "xhigh") + + target_budget, resolution = resolve_target_effort_budget(budget, None) + + assert target_budget.default_effort == "medium" + assert target_budget.max_allowed_effort == "xhigh" + assert resolution.source == "default" + assert resolution.requested_effort == "medium" + assert resolution.effective_effort == "medium" + assert resolution.reason == "run default effort" + + +def test_target_effort_budget_caps_override_and_updates_default() -> None: + budget = resolve_effort_budget("low", "medium") + + target_budget, resolution = resolve_target_effort_budget(budget, "xhigh") + + assert target_budget.default_effort == "medium" + assert target_budget.max_allowed_effort == "medium" + assert resolution.source == "target-override" + assert resolution.requested_effort == "xhigh" + assert resolution.effective_effort == "medium" + assert resolution.capped is True + assert resolution.reason == "target effort override capped by run budget" + + def test_phase_effort_uses_default_when_no_requirement() -> None: budget = resolve_effort_budget("medium", "xhigh") From b821b805b854464479dd133a011df879e158405a Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:47:52 -0700 Subject: [PATCH 024/103] continuous refactor: tests/test_run_once.py Why: Preserves a small but real cleanup that makes the run-once test cluster simpler and less drift-prone without changing behavior. Validation: uv run pytest --- tests/test_run_once.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/tests/test_run_once.py b/tests/test_run_once.py index b86b195..acd5db5 100644 --- a/tests/test_run_once.py +++ b/tests/test_run_once.py @@ -1,6 +1,5 @@ from __future__ import annotations -import json from pathlib import Path import pytest @@ -10,6 +9,7 @@ from continuous_refactoring.artifacts import CommandCapture, ContinuousRefactorError from conftest import ( + assert_single_prompt, failing_tests, make_run_once_args, noop_agent, @@ -17,18 +17,10 @@ read_single_run_events, read_single_run_summary, touch_file_agent, + write_targets_file, ) -def _run_once_prompt_capture( - run_once_env: Path, prompt_capture: list[str], **kwargs: object -) -> str: - args = make_run_once_args(run_once_env, **kwargs) - continuous_refactoring.run_once(args) - assert len(prompt_capture) == 1 - return prompt_capture[0] - - def _is_baseline_validation(stdout_path: Path) -> bool: parts = stdout_path.parts return "baseline" in parts and "initial" in parts @@ -54,9 +46,9 @@ def test_run_once_prompt_composition( kwargs: dict[str, object], needles: tuple[str, ...], ) -> None: - prompt = _run_once_prompt_capture(run_once_env, prompt_capture, **kwargs) - for needle in needles: - assert needle in prompt + args = make_run_once_args(run_once_env, **kwargs) + continuous_refactoring.run_once(args) + assert_single_prompt(prompt_capture, *needles) def test_run_once_baseline_validation_blocks_routing_and_agent_when_red( @@ -308,14 +300,13 @@ def effort_capturing_agent(**kwargs: object) -> CommandCapture: monkeypatch.setattr("continuous_refactoring.loop.maybe_run_agent", effort_capturing_agent) monkeypatch.setattr("continuous_refactoring.loop.run_tests", noop_tests) - targets_file = tmp_path / "targets.jsonl" - targets_file.write_text( - json.dumps({ + targets_file = write_targets_file( + tmp_path, + targets=[{ "description": "direct override", "files": ["foo.py"], "effort-override": "xhigh", - }), - encoding="utf-8", + }], ) args = make_run_once_args( run_once_env, From 1df1602606c04f3a43c98021d51822366e6b0ed3 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:50:55 -0700 Subject: [PATCH 025/103] continuous refactor: tests/test_cli_taste_warning.py Why: Reduces duplicated fixture mechanics in the CLI taste-warning tests, making the protected warning behavior easier to read and safer to extend. Validation: uv run pytest --- tests/test_cli_taste_warning.py | 54 ++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/tests/test_cli_taste_warning.py b/tests/test_cli_taste_warning.py index 93c1d50..e14986a 100644 --- a/tests/test_cli_taste_warning.py +++ b/tests/test_cli_taste_warning.py @@ -18,16 +18,34 @@ _LEGACY_TASTE = "- Old taste without version header.\n" -def _write_stale_taste(xdg_root: Path) -> None: +def _write_global_taste(xdg_root: Path, text: str) -> None: taste_dir = xdg_root / "continuous-refactoring" / "global" taste_dir.mkdir(parents=True, exist_ok=True) - (taste_dir / "taste.md").write_text(_LEGACY_TASTE, encoding="utf-8") + (taste_dir / "taste.md").write_text(text, encoding="utf-8") + + +def _write_stale_taste(xdg_root: Path) -> None: + _write_global_taste(xdg_root, _LEGACY_TASTE) def _write_current_taste(xdg_root: Path) -> None: - taste_dir = xdg_root / "continuous-refactoring" / "global" - taste_dir.mkdir(parents=True, exist_ok=True) - (taste_dir / "taste.md").write_text(default_taste_text(), encoding="utf-8") + _write_global_taste(xdg_root, default_taste_text()) + + +def _register_repo_with_taste( + *, + repo: Path, + monkeypatch: pytest.MonkeyPatch, + taste_text: str, +) -> None: + init_repo(repo) + monkeypatch.chdir(repo) + + project = register_project(repo) + set_repo_taste_path(project.entry.uuid, DEFAULT_REPO_TASTE_PATH) + taste_path = repo / DEFAULT_REPO_TASTE_PATH + taste_path.parent.mkdir(parents=True, exist_ok=True) + taste_path.write_text(taste_text, encoding="utf-8") _SUBCOMMANDS: list[tuple[list[str], str]] = [ @@ -125,14 +143,11 @@ def test_taste_warning_uses_configured_repo_taste( ) -> None: _write_current_taste(xdg_root) repo = tmp_path / "repo" - init_repo(repo) - monkeypatch.chdir(repo) - - project = register_project(repo) - set_repo_taste_path(project.entry.uuid, DEFAULT_REPO_TASTE_PATH) - taste_path = repo / DEFAULT_REPO_TASTE_PATH - taste_path.parent.mkdir(parents=True, exist_ok=True) - taste_path.write_text(_LEGACY_TASTE, encoding="utf-8") + _register_repo_with_taste( + repo=repo, + monkeypatch=monkeypatch, + taste_text=_LEGACY_TASTE, + ) monkeypatch.setattr(sys, "argv", ["cr", "upgrade"]) monkeypatch.setattr(cli, "_handle_upgrade", lambda _: None) @@ -150,14 +165,11 @@ def test_current_repo_taste_suppresses_stale_global_warning( ) -> None: _write_stale_taste(xdg_root) repo = tmp_path / "repo" - init_repo(repo) - monkeypatch.chdir(repo) - - project = register_project(repo) - set_repo_taste_path(project.entry.uuid, DEFAULT_REPO_TASTE_PATH) - taste_path = repo / DEFAULT_REPO_TASTE_PATH - taste_path.parent.mkdir(parents=True, exist_ok=True) - taste_path.write_text(default_taste_text(), encoding="utf-8") + _register_repo_with_taste( + repo=repo, + monkeypatch=monkeypatch, + taste_text=default_taste_text(), + ) monkeypatch.setattr(sys, "argv", ["cr", "upgrade"]) monkeypatch.setattr(cli, "_handle_upgrade", lambda _: None) From a66e13c8156c025b5b3e24275b33d365edac9254 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:56:07 -0700 Subject: [PATCH 026/103] continuous refactor: tests/test_targeting.py Why: This makes `targeting` easier to read and change by replacing exception-shaped local validation with direct validity checks, with behavior pinned by tests. Validation: uv run pytest --- src/continuous_refactoring/targeting.py | 33 +++++++++++-------------- tests/test_targeting.py | 14 +++++++++++ 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/continuous_refactoring/targeting.py b/src/continuous_refactoring/targeting.py index a943f02..95e91ae 100644 --- a/src/continuous_refactoring/targeting.py +++ b/src/continuous_refactoring/targeting.py @@ -45,10 +45,6 @@ def _warn_skip(message: str) -> None: print(f"warning: target line has {message}, skipping", file=sys.stderr) -class _InvalidTargetFieldError(ValueError): - """Raised when a JSONL target line contains an invalid optional field.""" - - def parse_extensions(raw: str) -> tuple[str, ...]: """Convert comma-separated extensions to glob patterns. @@ -80,25 +76,27 @@ def parse_paths_arg(raw_paths: str | None) -> tuple[str, ...] | None: return parsed or None -def _optional_str(data: dict[str, object], key: str) -> str | None: +def _optional_str(data: dict[str, object], key: str) -> tuple[bool, str | None]: value = data.get(key) if value is None: - return None + return True, None if isinstance(value, str) and value.strip(): - return value + return True, value _warn_skip(f"non-string or empty {key}") - raise _InvalidTargetFieldError(key) + return False, None -def _optional_effort_override(data: dict[str, object]) -> str | None: - value = _optional_str(data, "effort-override") +def _optional_effort_override(data: dict[str, object]) -> tuple[bool, str | None]: + valid, value = _optional_str(data, "effort-override") + if not valid: + return False, None if value is None: - return None + return True, None try: - return require_effort_tier(value, field="effort-override") + return True, require_effort_tier(value, field="effort-override") except ContinuousRefactorError: _warn_skip("invalid effort-override") - raise _InvalidTargetFieldError("effort-override") + return False, None def validate_target_line(data: object) -> Target | None: @@ -120,11 +118,10 @@ def validate_target_line(data: object) -> Target | None: _warn_skip("invalid file entries") return None - try: - scoping = _optional_str(data, "scoping") - model_override = _optional_str(data, "model-override") - effort_override = _optional_effort_override(data) - except _InvalidTargetFieldError: + valid_scoping, scoping = _optional_str(data, "scoping") + valid_model_override, model_override = _optional_str(data, "model-override") + valid_effort_override, effort_override = _optional_effort_override(data) + if not (valid_scoping and valid_model_override and valid_effort_override): return None return Target( diff --git a/tests/test_targeting.py b/tests/test_targeting.py index a6acc58..d71424b 100644 --- a/tests/test_targeting.py +++ b/tests/test_targeting.py @@ -309,6 +309,20 @@ def test_validate_target_line_warns_for_each_invalid_optional_field(capsys) -> N assert f"non-string or empty {key}" in captured.err +def test_validate_target_line_warns_for_invalid_effort_override_tier(capsys) -> None: + target = validate_target_line( + { + "description": "good", + "files": ["src/**/*.py"], + "effort-override": "extreme", + }, + ) + + assert target is None + captured = capsys.readouterr() + assert "invalid effort-override" in captured.err + + def test_load_targets_jsonl_skips_invalid_optional_fields(tmp_path: Path, capsys) -> None: jsonl = tmp_path / "targets.jsonl" lines = [ From 0615c1d41c8da77f71fd869e1e36dc11cdfc329d Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 01:59:55 -0700 Subject: [PATCH 027/103] continuous refactor: tests/test_routing.py Why: preserves behavior while removing repeated failure-event construction that could drift across classifier error paths Validation: uv run pytest --- src/continuous_refactoring/routing.py | 57 +++++++++++++++++---------- tests/test_routing.py | 34 ++++++++++++++++ 2 files changed, 71 insertions(+), 20 deletions(-) diff --git a/src/continuous_refactoring/routing.py b/src/continuous_refactoring/routing.py index 3bda167..c92d6ee 100644 --- a/src/continuous_refactoring/routing.py +++ b/src/continuous_refactoring/routing.py @@ -21,6 +21,29 @@ ) +def _log_failed_classification( + artifacts: RunArtifacts, + target: Target, + *, + attempt: int, + retry: int, + summary: str, + effort_metadata: dict[str, object] | None, + returncode: int | None = None, +) -> None: + artifacts.log_call_finished( + attempt=attempt, + retry=retry, + target=target.description, + call_role="classify", + status="failed", + level="WARN", + returncode=returncode, + summary=summary, + effort=effort_metadata, + ) + + def _parse_decision(stdout: str) -> ClassifierDecision: non_empty = [line.strip() for line in stdout.splitlines() if line.strip()] if not non_empty: @@ -77,29 +100,25 @@ def classify_target( timeout=timeout, ) except ContinuousRefactorError as error: - artifacts.log_call_finished( + _log_failed_classification( + artifacts, + target, attempt=attempt, retry=retry, - target=target.description, - call_role=call_role, - status="failed", - level="WARN", summary=str(error), - effort=effort_metadata, + effort_metadata=effort_metadata, ) raise if result.returncode != 0: - artifacts.log_call_finished( + _log_failed_classification( + artifacts, + target, attempt=attempt, retry=retry, - target=target.description, - call_role=call_role, - status="failed", - level="WARN", - returncode=result.returncode, summary=f"{agent} exited with code {result.returncode}", - effort=effort_metadata, + effort_metadata=effort_metadata, + returncode=result.returncode, ) raise ContinuousRefactorError( f"Classifier agent failed with exit code {result.returncode}" @@ -108,16 +127,14 @@ def classify_target( try: decision = _parse_decision(result.stdout) except ContinuousRefactorError as error: - artifacts.log_call_finished( + _log_failed_classification( + artifacts, + target, attempt=attempt, retry=retry, - target=target.description, - call_role=call_role, - status="failed", - level="WARN", - returncode=result.returncode, summary=str(error), - effort=effort_metadata, + effort_metadata=effort_metadata, + returncode=result.returncode, ) raise diff --git a/tests/test_routing.py b/tests/test_routing.py index 95c147f..2a732bc 100644 --- a/tests/test_routing.py +++ b/tests/test_routing.py @@ -224,6 +224,40 @@ def test_classify_nonzero_exit_raises( ) +def test_classify_nonzero_exit_logs_failed_call( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + _prepare_tmpdir(tmp_path, monkeypatch) + artifacts = _make_artifacts(tmp_path) + + with pytest.raises(ContinuousRefactorError, match="exit code 1"): + _run_with_fake_agent( + tmp_path, + monkeypatch, + "decision: cohesive-cleanup\n", + returncode=1, + artifacts=artifacts, + ) + + event = _call_finished_events(artifacts)[-1] + timestamp = event.pop("timestamp") + + assert isinstance(timestamp, str) + assert event == { + "attempt": 1, + "call_role": "classify", + "call_status": "failed", + "event": "call_finished", + "level": "WARN", + "message": "call failed: classify \u2014 Clean up auth module", + "phase_reached": "classify", + "retry": 1, + "returncode": 1, + "summary": "codex exited with code 1", + "target": "Clean up auth module", + } + + def test_classify_preserves_wrapped_agent_failure( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, From d20b9a251e3d3fa162a8bce4282e397bfc8c227e Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 15:26:18 -0700 Subject: [PATCH 028/103] partially planned migrations --- .../approaches/compat-facade-module-split.md | 40 ++++++++ .../approaches/inplace-domain-seams.md | 37 +++++++ .../read-write-boundary-hardening.md | 39 ++++++++ .../manifest.json | 45 +++++++++ .../phase-1-lock-git-behavior.md | 49 +++++++++ .../phase-2-reorganize-domain-flow.md | 43 ++++++++ .../phase-3-tighten-error-boundaries.md | 44 +++++++++ ...4-trim-helper-shapes-and-verify-surface.md | 46 +++++++++ .../plan.md | 99 +++++++++++++++++++ .../approaches/inplace-domain-seams.md | 38 +++++++ .../approaches/manifest-ops-module-split.md | 39 ++++++++ .../approaches/pure-kernel-boundary-split.md | 40 ++++++++ .../manifest.json | 45 +++++++++ .../phase-1-lock-current-surface.md | 52 ++++++++++ .../phase-2-extract-manifest-ops.md | 48 +++++++++ .../phase-3-redirect-internal-callers.md | 55 +++++++++++ .../phase-4-tighten-boundary-contracts.md | 48 +++++++++ .../plan.md | 89 +++++++++++++++++ 18 files changed, 896 insertions(+) create mode 100644 migrations/src-continuous-refactoring-git-py-20260429T020915/approaches/compat-facade-module-split.md create mode 100644 migrations/src-continuous-refactoring-git-py-20260429T020915/approaches/inplace-domain-seams.md create mode 100644 migrations/src-continuous-refactoring-git-py-20260429T020915/approaches/read-write-boundary-hardening.md create mode 100644 migrations/src-continuous-refactoring-git-py-20260429T020915/manifest.json create mode 100644 migrations/src-continuous-refactoring-git-py-20260429T020915/phase-1-lock-git-behavior.md create mode 100644 migrations/src-continuous-refactoring-git-py-20260429T020915/phase-2-reorganize-domain-flow.md create mode 100644 migrations/src-continuous-refactoring-git-py-20260429T020915/phase-3-tighten-error-boundaries.md create mode 100644 migrations/src-continuous-refactoring-git-py-20260429T020915/phase-4-trim-helper-shapes-and-verify-surface.md create mode 100644 migrations/src-continuous-refactoring-git-py-20260429T020915/plan.md create mode 100644 migrations/src-continuous-refactoring-migrations-py-20260429T020031/approaches/inplace-domain-seams.md create mode 100644 migrations/src-continuous-refactoring-migrations-py-20260429T020031/approaches/manifest-ops-module-split.md create mode 100644 migrations/src-continuous-refactoring-migrations-py-20260429T020031/approaches/pure-kernel-boundary-split.md create mode 100644 migrations/src-continuous-refactoring-migrations-py-20260429T020031/manifest.json create mode 100644 migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-1-lock-current-surface.md create mode 100644 migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-2-extract-manifest-ops.md create mode 100644 migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-3-redirect-internal-callers.md create mode 100644 migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-4-tighten-boundary-contracts.md create mode 100644 migrations/src-continuous-refactoring-migrations-py-20260429T020031/plan.md diff --git a/migrations/src-continuous-refactoring-git-py-20260429T020915/approaches/compat-facade-module-split.md b/migrations/src-continuous-refactoring-git-py-20260429T020915/approaches/compat-facade-module-split.md new file mode 100644 index 0000000..39374b3 --- /dev/null +++ b/migrations/src-continuous-refactoring-git-py-20260429T020915/approaches/compat-facade-module-split.md @@ -0,0 +1,40 @@ +# Approach: Compatibility Facade Module Split + +## Strategy +- Keep `src/continuous_refactoring/git.py` as the stable exported facade, but move implementation into internal modules with tighter domain focus. +- A sensible split here is: + - `git.py`: public compatibility exports only, + - `git_process.py`: subprocess execution and `GitCommandError`, + - `git_worktree.py`: status queries, resets, revert, commit helpers. +- Update internal callers gradually where direct imports improve readability, while preserving existing `continuous_refactoring.git` imports and package-root exports. +- Treat any public export change as human-review territory and avoid it unless the migration explicitly chooses that break. + +## Tradeoffs +- Stronger long-term shape without forcing a user-facing import migration now. +- Makes the subprocess boundary explicit and easier to test in isolation. +- More churn than in-place cleanup because imports move across several hot modules. +- Risks wrapper soup if `git.py` becomes a thin file full of mechanical pass-throughs instead of a meaningful facade. + +## Estimated phases +1. Add regression tests around the current `continuous_refactoring.git` surface, including package-root availability through `continuous_refactoring`. + - `required_effort`: `low` +2. Introduce `git_process.py` for command execution and boundary error wrapping, with behavior preserved exactly. + - `required_effort`: `medium` +3. Introduce `git_worktree.py` for repo state and mutation helpers; re-export from `git.py`. + - `required_effort`: `medium` +4. Redirect internal callers to the sharper modules only where it improves call-site clarity, then trim dead private glue. + - `required_effort`: `medium` +5. Run full pytest and confirm package export uniqueness still holds. + - `required_effort`: `low` + +## Risk profile +- Technical risk: medium +- Blast radius: medium +- Failure modes: + - Circular imports with `__init__.py` package export collection if the split is done mechanically. + - Public-surface drift if `git.py` forgets to re-export a helper that tests do not cover. + - Internal callers start mixing facade and direct-module imports in a way that gets less clear, not more. + +## Best when +- We want cleaner boundaries now but do not want to break existing imports. +- We expect more git-related behavior to grow and want room without bloating one file further. diff --git a/migrations/src-continuous-refactoring-git-py-20260429T020915/approaches/inplace-domain-seams.md b/migrations/src-continuous-refactoring-git-py-20260429T020915/approaches/inplace-domain-seams.md new file mode 100644 index 0000000..3e8c1d0 --- /dev/null +++ b/migrations/src-continuous-refactoring-git-py-20260429T020915/approaches/inplace-domain-seams.md @@ -0,0 +1,37 @@ +# Approach: In-Place Domain Seams + +## Strategy +- Keep `src/continuous_refactoring/git.py` as the public and internal home for git helpers. +- Reorganize the file around three clear sections: + - subprocess boundary and error translation, + - read-only repository state queries, + - destructive worktree and history mutations. +- Add tests that lock the current behavior before moving helpers around, especially exact failure wrapping and destructive reset behavior. +- Tighten naming so call sites read in domain terms without introducing new modules or compatibility layers. + +## Tradeoffs +- Safest option. Lowest risk to `loop.py`, `refactor_attempts.py`, `phases.py`, and package-root exports. +- Good fit if the real problem is that `git.py` reads like a junk drawer, not that its module boundary is wrong. +- Keeps one module owning both read and write git behavior. +- Leaves `run_command()` as a broad primitive shared across domains, which may still feel a little too generic. + +## Estimated phases +1. Add characterization tests for `run_command()`, worktree status helpers, destructive reset helpers, and commit/revert flows. + - `required_effort`: `low` +2. Refactor private helper flow so mutations share one obvious reset path and read-only helpers read top-down. + - `required_effort`: `low` +3. Narrow error translation to the subprocess boundary and keep higher-level helpers bubbling signal unless they add domain context. + - `required_effort`: `low` +4. Delete stale helper shapes, rerun `tests/test_git.py`, then full pytest. + - `required_effort`: `low` + +## Risk profile +- Technical risk: low +- Blast radius: low +- Failure modes: + - Refactor changes exact error text that downstream tests assert. + - Cleanup accidentally changes semantics around `git_commit()` no-op detection or destructive reset ordering. + +## Best when +- We want the fastest safe readability win. +- We do not yet have evidence that a new module boundary will pay for itself. diff --git a/migrations/src-continuous-refactoring-git-py-20260429T020915/approaches/read-write-boundary-hardening.md b/migrations/src-continuous-refactoring-git-py-20260429T020915/approaches/read-write-boundary-hardening.md new file mode 100644 index 0000000..6f07af1 --- /dev/null +++ b/migrations/src-continuous-refactoring-git-py-20260429T020915/approaches/read-write-boundary-hardening.md @@ -0,0 +1,39 @@ +# Approach: Read/Write Boundary Hardening + +## Strategy +- Split git behavior by safety profile rather than by implementation detail: + - read-only repository inspection, + - destructive workspace/history mutation. +- Make read helpers pure-ish wrappers over git output and push destructive helpers behind a smaller, more explicit API. +- Use the migration to harden contracts at call sites: modules that only need inspection should stop importing mutation helpers. +- Keep compatibility for shipped imports only as long as needed for the migration, then aggressively delete dead compatibility if the project decides the sharper boundary is worth the churn. + +## Tradeoffs +- Best architectural payoff. The dangerous operations stop hiding beside harmless queries. +- Improves reasoning in `loop.py`, `phases.py`, and `refactor_attempts.py` because each call site declares whether it is inspecting or mutating repo state. +- Highest churn of these options. More call-site edits, more chances to nick behavior. +- Most likely to surface human-review-worthy questions if any public helper moves or disappears. + +## Estimated phases +1. Expand tests to distinguish read-only helpers from destructive helpers, including revert/reset invariants and branch-preservation expectations. + - `required_effort`: `low` +2. Extract read-only helpers into a focused module such as `git_inspect.py` and redirect non-mutating callers. + - `required_effort`: `medium` +3. Extract destructive helpers into a focused module such as `git_mutations.py`, then tighten helper names around intent rather than git verbs. + - `required_effort`: `high` +4. Decide whether `git.py` remains a compatibility facade or is reduced/retired; require human review if package-visible behavior changes. + - `required_effort`: `high` +5. Run full pytest and audit remaining imports for boundary violations or stale shims. + - `required_effort`: `medium` + +## Risk profile +- Technical risk: medium-high +- Blast radius: high +- Failure modes: + - Call-site churn accidentally changes destructive sequencing, especially around reset/clean and commit finalization. + - Compatibility cleanup breaks package-root imports or external expectations without enough review. + - The split overfits today's call sites and leaves awkward names if future git behavior grows differently. + +## Best when +- We want the cleanest domain boundary, not just a tidier file. +- We are willing to spend migration budget now to make destructive git behavior much more explicit. diff --git a/migrations/src-continuous-refactoring-git-py-20260429T020915/manifest.json b/migrations/src-continuous-refactoring-git-py-20260429T020915/manifest.json new file mode 100644 index 0000000..b3e315b --- /dev/null +++ b/migrations/src-continuous-refactoring-git-py-20260429T020915/manifest.json @@ -0,0 +1,45 @@ +{ + "awaiting_human_review": false, + "cooldown_until": null, + "created_at": "2026-04-29T02:09:15.185-07:00", + "current_phase": "lock-git-behavior", + "human_review_reason": null, + "last_touch": "2026-04-29T02:17:11.327-07:00", + "name": "src-continuous-refactoring-git-py-20260429T020915", + "phases": [ + { + "done": false, + "effort_reason": "focused characterization work on a small module and its tests", + "file": "phase-1-lock-git-behavior.md", + "name": "lock-git-behavior", + "precondition": "This migration is at its first executable phase, and `src/continuous_refactoring/git.py` still exposes the current public helpers listed in its `__all__` for downstream callers and package-root re-export.", + "required_effort": "low" + }, + { + "done": false, + "effort_reason": "in-place helper reordering and local cleanup only", + "file": "phase-2-reorganize-domain-flow.md", + "name": "reorganize-domain-flow", + "precondition": "Phase 1 is marked complete, and `tests/test_git.py` already contains characterization coverage for `run_command()`, workspace status helpers, clean-worktree enforcement, destructive reset helpers, and commit/revert flows.", + "required_effort": "low" + }, + { + "done": false, + "effort_reason": "localized boundary cleanup with narrow downstream verification", + "file": "phase-3-tighten-error-boundaries.md", + "name": "tighten-error-boundaries", + "precondition": "Phase 2 is marked complete, and `run_command()` is still the subprocess entry point used by the public git helpers touched by this migration.", + "required_effort": "low" + }, + { + "done": false, + "effort_reason": "final cleanup is limited to transitional private helpers and", + "file": "phase-4-trim-helper-shapes-and-verify-surface.md", + "name": "trim-helper-shapes-and-verify-surface", + "precondition": "Phase 3 is marked complete, and the public symbol set in `src/continuous_refactoring/git.py::__all__` still matches the surface this migration is preserving.", + "required_effort": "low" + } + ], + "status": "planning", + "wake_up_on": null +} diff --git a/migrations/src-continuous-refactoring-git-py-20260429T020915/phase-1-lock-git-behavior.md b/migrations/src-continuous-refactoring-git-py-20260429T020915/phase-1-lock-git-behavior.md new file mode 100644 index 0000000..6c9b747 --- /dev/null +++ b/migrations/src-continuous-refactoring-git-py-20260429T020915/phase-1-lock-git-behavior.md @@ -0,0 +1,49 @@ +# Phase 1: Lock Git Behavior + +## Scope +- `tests/test_git.py` +- `src/continuous_refactoring/git.py` +- If package-root export coverage is the smallest stable place to pin symbol + exposure: + - `tests/test_continuous_refactoring.py` + +required_effort: low +effort_reason: focused characterization work on a small module and its tests + +## Precondition +This migration is at its first executable phase, and +`src/continuous_refactoring/git.py` still exposes the current public helpers +listed in its `__all__` for downstream callers and package-root re-export. + +## Instructions +- Add or tighten characterization tests before restructuring `git.py`. +- Make the current contract explicit for the seams most likely to break during + refactoring: + - `run_command()` checked failure wrapping, + - `run_command(check=False)` passthrough behavior, + - nested causes for startup failures and non-zero exits, + - `workspace_status_lines()` and `require_clean_worktree()`, + - `discard_workspace_changes()` and `revert_to()`, + - `git_commit()` and `undo_last_commit()`. +- Prefer real repositories through the existing test helpers over mocks unless + the behavior under test is subprocess startup failure. +- Keep production edits minimal. Only touch `git.py` if a tiny source change is + required to expose already-shipped behavior to tests. + +## Definition of Done +- `tests/test_git.py` contains explicit coverage for: + - `run_command()` checked failure wrapping and unchecked passthrough, + - nested startup and non-zero-exit causes, + - workspace status and clean-worktree enforcement, + - destructive reset/clean behavior, + - commit, undo, and revert behavior. +- Any package-root git exports this migration intends to preserve are pinned by + the smallest explicit coverage needed. +- No caller-facing behavior has changed beyond test-only clarification. +- The configured broad validation command passes. + +## Validation +- Run `uv run pytest tests/test_git.py`. +- If package-root export coverage is added or changed, run + `uv run pytest tests/test_continuous_refactoring.py`. +- Finish with `uv run pytest`. diff --git a/migrations/src-continuous-refactoring-git-py-20260429T020915/phase-2-reorganize-domain-flow.md b/migrations/src-continuous-refactoring-git-py-20260429T020915/phase-2-reorganize-domain-flow.md new file mode 100644 index 0000000..221e5be --- /dev/null +++ b/migrations/src-continuous-refactoring-git-py-20260429T020915/phase-2-reorganize-domain-flow.md @@ -0,0 +1,43 @@ +# Phase 2: Reorganize Domain Flow + +## Scope +- `src/continuous_refactoring/git.py` +- `tests/test_git.py` + +required_effort: low +effort_reason: in-place helper reordering and local cleanup only + +## Precondition +Phase 1 is marked complete, and `tests/test_git.py` already contains +characterization coverage for `run_command()`, workspace status helpers, +clean-worktree enforcement, destructive reset helpers, and commit/revert +flows. + +## Instructions +- Reorganize `git.py` so it reads top-down in this order: + 1. subprocess execution and failure translation, + 2. read-only repository state queries, + 3. destructive worktree and history mutations. +- Preserve the existing public symbol set in `git.py.__all__` and the existing + behavior of those helpers. +- Simplify private helper flow only as needed to support that ordering. +- If you introduce any private helper solely to stage the reorganization, name + it plainly and treat it as transitional so Phase 4 can delete it explicitly. +- Do not change package imports, split the module, or change caller contracts. + +## Definition of Done +- `src/continuous_refactoring/git.py` is ordered top-down by the three domains + named above. +- The public symbol set exposed from `git.py.__all__` is unchanged. +- `discard_workspace_changes()` and `revert_to()` both delegate their reset and + clean work through `_reset_hard_and_clean()` rather than duplicating inline + command sequences. +- The characterization tests added in Phase 1 still pass without changing the + contract they assert. +- The configured broad validation command passes. + +## Validation +- Run `uv run pytest tests/test_git.py`. +- If the edit unexpectedly touches package-root export behavior, run + `uv run pytest tests/test_continuous_refactoring.py`. +- Finish with `uv run pytest`. diff --git a/migrations/src-continuous-refactoring-git-py-20260429T020915/phase-3-tighten-error-boundaries.md b/migrations/src-continuous-refactoring-git-py-20260429T020915/phase-3-tighten-error-boundaries.md new file mode 100644 index 0000000..19add22 --- /dev/null +++ b/migrations/src-continuous-refactoring-git-py-20260429T020915/phase-3-tighten-error-boundaries.md @@ -0,0 +1,44 @@ +# Phase 3: Tighten Error Boundaries + +## Scope +- `src/continuous_refactoring/git.py` +- `tests/test_git.py` +- If direct callers need targeted confirmation: + - `tests/test_targeting.py` + - `tests/test_refactor_attempts.py` + +required_effort: low +effort_reason: localized boundary cleanup with narrow downstream verification + +## Precondition +Phase 2 is marked complete, and `run_command()` is still the subprocess entry +point used by the public git helpers touched by this migration. + +## Instructions +- Keep subprocess failure translation at `run_command()` by default. +- Preserve nested causes from both command startup failures and non-zero exits. +- Let higher-level helpers bubble `GitCommandError` unchanged unless they add + domain-owned context that is already part of that helper's contract, such as + detached-HEAD reporting in `current_branch()` or the user-facing clean + worktree message in `require_clean_worktree()`. +- Remove redundant re-wrapping or helper layering that obscures the original + failure source. +- Extend focused tests only where needed to make the intended error-boundary + contract explicit. + +## Definition of Done +- `run_command()` remains the only general subprocess failure-translation + boundary in `git.py`. +- Higher-level helpers add context only for their own domain contract and do + not add duplicate wrapper layers around `GitCommandError`. +- Focused regression tests would catch loss of nested causes, changed command + failure payloads, or new redundant wrapping. +- Downstream suites that directly consume `GitCommandError` still pass without + API churn. +- The configured broad validation command passes. + +## Validation +- Run `uv run pytest tests/test_git.py`. +- If direct caller expectations around `GitCommandError` are touched, run + `uv run pytest tests/test_targeting.py tests/test_refactor_attempts.py`. +- Finish with `uv run pytest`. diff --git a/migrations/src-continuous-refactoring-git-py-20260429T020915/phase-4-trim-helper-shapes-and-verify-surface.md b/migrations/src-continuous-refactoring-git-py-20260429T020915/phase-4-trim-helper-shapes-and-verify-surface.md new file mode 100644 index 0000000..8744695 --- /dev/null +++ b/migrations/src-continuous-refactoring-git-py-20260429T020915/phase-4-trim-helper-shapes-and-verify-surface.md @@ -0,0 +1,46 @@ +# Phase 4: Trim Helper Shapes and Verify Surface + +## Scope +- `src/continuous_refactoring/git.py` +- `tests/test_git.py` +- If package-root export checks are needed: + - `src/continuous_refactoring/__init__.py` + - `tests/test_continuous_refactoring.py` + +required_effort: low +effort_reason: final cleanup is limited to transitional private helpers and +surface verification + +## Precondition +Phase 3 is marked complete, and the public symbol set in +`src/continuous_refactoring/git.py::__all__` still matches the surface this +migration is preserving. + +## Instructions +- Remove only private helpers or naming that were introduced or retained solely + to stage phases 2 or 3. +- If phases 2 or 3 introduced a private staging helper or alias that did not + exist at migration start, either delete it here or explicitly keep it by + folding it into the final `git.py` structure and covering the retained shape + with focused tests. +- Do not reopen module structure or error-boundary design in this phase. +- Verify that `continuous_refactoring.git` and package-root re-export still + expose the same git helper surface after cleanup. +- If explicit export coverage is still missing, add the smallest assertion set + that will catch symbol drift. + +## Definition of Done +- No private staging helper or alias introduced during phases 2 or 3 remains + unless this phase intentionally keeps it as part of the final file shape. +- The public symbol set exposed from `git.py.__all__` is unchanged from the + start of the migration. +- Any package-root re-export coverage needed to catch symbol drift is present. +- The final `git.py` layout and cleanup preserve caller-facing behavior under + the focused git tests and the configured broad validation command. +- The configured broad validation command passes. + +## Validation +- Run `uv run pytest tests/test_git.py`. +- If package-root export coverage is added or changed, run + `uv run pytest tests/test_continuous_refactoring.py`. +- Finish with `uv run pytest`. diff --git a/migrations/src-continuous-refactoring-git-py-20260429T020915/plan.md b/migrations/src-continuous-refactoring-git-py-20260429T020915/plan.md new file mode 100644 index 0000000..f33165d --- /dev/null +++ b/migrations/src-continuous-refactoring-git-py-20260429T020915/plan.md @@ -0,0 +1,99 @@ +# Migration Plan: `inplace-domain-seams` + +## Goal +Refactor `src/continuous_refactoring/git.py` in place so the file reads as one +coherent git boundary with three explicit seams: +- subprocess execution and failure translation, +- read-only repository state queries, +- destructive worktree and history mutations. + +The migration preserves the shipped package surface and caller-facing git +behavior while making later edits safer and easier to review. + +## Chosen Approach +- Keep `src/continuous_refactoring/git.py` as the only module boundary. +- Lock behavior first with characterization tests. +- Reshape file order and private helper flow before touching error-boundary + rules. +- Reserve the last phase for explicit surface verification and deletion of any + phase-local transitional helpers that earlier phases intentionally left + behind. + +## Why This Shape +- `git.py` is already the shared git boundary for `loop.py`, + `refactor_attempts.py`, `phases.py`, `migration_tick.py`, + `routing_pipeline.py`, and `targeting.py`. In-place cleanup keeps blast + radius low. +- The main risk is behavioral drift, not module architecture. The existing + public helpers, package-root exports, and destructive git flows are already + load-bearing. +- The review findings are best addressed by making each phase gate factual: + previous phase marked complete, named characterization coverage exists, and + the structural artifact needed by the next phase still exists. + +## Target Surface +- Primary edit surface: + - `src/continuous_refactoring/git.py` + - `tests/test_git.py` +- Verification-aware callers and package surface: + - `src/continuous_refactoring/__init__.py` + - `src/continuous_refactoring/loop.py` + - `src/continuous_refactoring/phases.py` + - `tests/test_continuous_refactoring.py` + - `tests/test_refactor_attempts.py` + - `tests/test_targeting.py` + +## Phase Breakdown +1. `phase-1-lock-git-behavior.md` + Add characterization coverage for the current `git.py` contract, especially + command failure wrapping, workspace cleanliness checks, and destructive git + flows. +2. `phase-2-reorganize-domain-flow.md` + Reorder and simplify `git.py` so the file reads top-down by domain while + preserving the existing public symbol set and behavior. +3. `phase-3-tighten-error-boundaries.md` + Keep failure translation anchored at `run_command()` and remove redundant + higher-level wrapping that does not add domain-owned context. +4. `phase-4-trim-helper-shapes-and-verify-surface.md` + Remove only explicitly transitional private helpers left from phases 2 or 3 + and verify the `git.py` and package-root symbol surfaces are unchanged. + +## Dependencies +1. Phase 1 has no prior migration-phase dependency. +2. Phase 2 depends on Phase 1. +3. Phase 3 depends on Phase 2. +4. Phase 4 depends on Phase 3. + +## Dependency Visualization +```mermaid +flowchart TD + P1["1. Lock Git Behavior"] --> P2["2. Reorganize Domain Flow"] + P2 --> P3["3. Tighten Error Boundaries"] + P3 --> P4["4. Trim Helper Shapes and Verify Surface"] +``` + +## Validation Strategy +- The harness already enforces the broad validation baseline before work starts. + Each completed phase must still end with `uv run pytest`. +- `uv run pytest tests/test_git.py` is the default focused gate for all phases. +- Phase 1 should make these behaviors explicit in `tests/test_git.py`: + - `run_command()` checked failure wrapping and unchecked passthrough, + - nested causes from startup failures and non-zero exits, + - `workspace_status_lines()` and `require_clean_worktree()`, + - destructive reset/clean flows through `discard_workspace_changes()` and + `revert_to()`, + - commit and undo behavior through `git_commit()` and `undo_last_commit()`. +- Use downstream suites only when a phase touches the behavior they consume: + - package-root export checks: `uv run pytest tests/test_continuous_refactoring.py` + - direct `GitCommandError` consumers: `uv run pytest tests/test_targeting.py tests/test_refactor_attempts.py` + +## Risk Controls +- No package-surface churn. Public names exposed from `continuous_refactoring.git` + and re-exported from `continuous_refactoring` stay stable through this + migration. +- No call-site rewrites unless a test proves a real break and the migration is + amended for human review. +- No structural `loop.py` refactor. Caller files are validation surfaces here, + not primary edit targets. +- Phase 2 owns the structural end state. Phase 4 does not reopen structure; it + only verifies surface stability and deletes named transitional leftovers. diff --git a/migrations/src-continuous-refactoring-migrations-py-20260429T020031/approaches/inplace-domain-seams.md b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/approaches/inplace-domain-seams.md new file mode 100644 index 0000000..fc162da --- /dev/null +++ b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/approaches/inplace-domain-seams.md @@ -0,0 +1,38 @@ +# Approach: In-Place Domain Seams + +## Strategy +- Keep `src/continuous_refactoring/migrations.py` as the public home for migration helpers and refactor internally for clearer sections: + - manifest value types and status constants, + - phase cursor helpers, + - manifest persistence, + - wake-up eligibility. +- Tighten helper naming so call sites read in domain terms instead of implementation terms. +- Grow tests around the current public functions before moving code around. +- Preserve import paths everywhere else. No package-surface churn, no new module. + +## Tradeoffs +- Safest option. Lowest merge pain and lowest risk to `loop.py`, `planning.py`, `phases.py`, and `migration_tick.py`. +- Good fit if the real problem is readability and local change friction, not the file count. +- Leaves one module owning multiple concerns. Cleaner, yes; simpler architecture, not really. +- Misses the chance to make codec and persistence boundaries more explicit. + +## Estimated phases +1. Add characterization tests for phase lookup, cursor advance, completion, load/save failures, and wake-up eligibility. + - `required_effort`: `low` +2. Refactor private helpers so lookup, cursor, and eligibility logic read top-down and duplication around phase resolution disappears. + - `required_effort`: `low` +3. Isolate manifest save/load flow into tighter private helpers inside `migrations.py`, keeping boundary wrapping only at filesystem and JSON edges. + - `required_effort`: `low` +4. Trim stale helper shapes and rerun targeted plus broad pytest coverage. + - `required_effort`: `low` + +## Risk profile +- Technical risk: low +- Blast radius: low +- Failure modes: + - Cleanup accidentally changes exact error strings that tests or callers rely on. + - Helper reshuffling obscures the current codec boundary instead of clarifying it. + +## Best when +- We want the fastest safe win. +- We are not yet confident a new module boundary will pay for itself. diff --git a/migrations/src-continuous-refactoring-migrations-py-20260429T020031/approaches/manifest-ops-module-split.md b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/approaches/manifest-ops-module-split.md new file mode 100644 index 0000000..a70a58b --- /dev/null +++ b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/approaches/manifest-ops-module-split.md @@ -0,0 +1,39 @@ +# Approach: Manifest Ops Split + +## Strategy +- Keep `migrations.py` as the public compatibility facade for migration concepts, but move operational logic into a new internal module such as `src/continuous_refactoring/migration_manifest_ops.py`. +- Split responsibilities like this: + - `migrations.py`: `MigrationManifest`, `PhaseSpec`, status vocabulary, stable exported helpers. + - `migration_manifest_ops.py`: phase lookup, cursor advance, completion, eligibility, load/save helpers. + - `migration_manifest_codec.py`: payload decoding/encoding only. +- Update internal callers gradually to import from the new ops module only where doing so improves readability. Keep the old `continuous_refactoring.migrations` imports working during the migration. + +## Tradeoffs +- Better domain boundaries without changing the user-facing manifest structure or CLI behavior. +- Stronger long-term shape: codec stops pretending to be half the domain while `migrations.py` stops being a junk drawer. +- More churn than the in-place approach because many modules import from `migrations.py`. +- Requires discipline to avoid creating a pointless facade plus wrapper soup. + +## Estimated phases +1. Add import-safe regression tests around the current `continuous_refactoring.migrations` surface and behavior-heavy tests in `tests/test_migrations.py`. + - `required_effort`: `low` +2. Introduce `migration_manifest_ops.py` and move pure-ish operational helpers there without changing behavior. + - `required_effort`: `medium` +3. Re-export the moved helpers from `migrations.py` so existing callers still work, then simplify internal call sites where direct ops imports are clearer. + - `required_effort`: `medium` +4. Tighten error translation so filesystem and JSON wrapping stay at the true boundary, with nested causes preserved. + - `required_effort`: `medium` +5. Run full pytest and decide whether any remaining direct imports should stay for compatibility or move for clarity. + - `required_effort`: `low` + +## Risk profile +- Technical risk: medium +- Blast radius: medium +- Failure modes: + - Circular imports between the new ops module, codec, and existing callers if the split is done mechanically. + - Public-surface drift if `migrations.py` forgets to re-export something tests do not cover. + - Human-review-worthy churn if import moves accidentally alter package-root behavior. + +## Best when +- We want a real boundary improvement now, not just a tidier file. +- We expect more migration scheduling and manifest logic to grow soon. diff --git a/migrations/src-continuous-refactoring-migrations-py-20260429T020031/approaches/pure-kernel-boundary-split.md b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/approaches/pure-kernel-boundary-split.md new file mode 100644 index 0000000..279f183 --- /dev/null +++ b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/approaches/pure-kernel-boundary-split.md @@ -0,0 +1,40 @@ +# Approach: Pure Kernel + Boundary Split + +## Strategy +- Separate migration manifest logic into a pure kernel and an impure boundary: + - `src/continuous_refactoring/migration_manifest.py`: dataclasses, status constants, phase traversal, eligibility, and completion helpers. + - `src/continuous_refactoring/migration_manifest_store.py`: `load_manifest` and `save_manifest` only. + - `src/continuous_refactoring/migration_manifest_codec.py`: payload schema translation only. + - `src/continuous_refactoring/migrations.py`: compatibility facade plus path helpers, or a thin redirect layer that can later shrink away. +- Lean into code shape: pure functions get dense example/property-style coverage, boundary code gets example-based failure tests. +- Treat public import changes as compatibility-sensitive even though the implementation is being aggressively cleaned up underneath. + +## Tradeoffs +- Cleanest architecture. The module names finally tell the truth. +- Best testing shape: pure manifest behavior becomes cheap to reason about and validate. +- Highest churn and most opportunities for circular imports, stale exports, and half-finished compatibility shims. +- Very easy to overbuild. If the split does not materially simplify call sites, it’s architecture cosplay. + +## Estimated phases +1. Add package-surface and behavior lock tests for manifest types, ops helpers, and boundary errors. + - `required_effort`: `medium` +2. Create `migration_manifest.py` and move pure datatypes plus cursor/eligibility/completion logic there. + - `required_effort`: `medium` +3. Create `migration_manifest_store.py` for filesystem I/O and keep `migration_manifest_codec.py` focused on schema conversion. + - `required_effort`: `high` +4. Rewire `planning.py`, `phases.py`, `migration_tick.py`, `loop.py`, and tests to the new structure while preserving stable public imports where they are still worth keeping. + - `required_effort`: `high` +5. Review whether `migrations.py` still earns its existence or should remain only as a compatibility facade for one release window. + - `required_effort`: `xhigh` + +## Risk profile +- Technical risk: medium to high +- Blast radius: high +- Failure modes: + - Import cycles or package boot failures, especially because codec currently imports dataclasses from `migrations.py`. + - Accidental contract changes around manifest loading, error messages, or saved JSON formatting. + - Over-splitting that makes call sites noisier instead of clearer. + +## Best when +- We want the end-state architecture now and are willing to pay the migration cost. +- The repo is about to do more substantial migration-system work, making a stronger boundary immediately valuable. diff --git a/migrations/src-continuous-refactoring-migrations-py-20260429T020031/manifest.json b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/manifest.json new file mode 100644 index 0000000..0b761d4 --- /dev/null +++ b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/manifest.json @@ -0,0 +1,45 @@ +{ + "awaiting_human_review": false, + "cooldown_until": null, + "created_at": "2026-04-29T02:00:31.729-07:00", + "current_phase": "lock-current-surface", + "human_review_reason": null, + "last_touch": "2026-04-29T02:07:32.938-07:00", + "name": "src-continuous-refactoring-migrations-py-20260429T020031", + "phases": [ + { + "done": false, + "effort_reason": "behavior-locking test work is local and should stay cheap", + "file": "phase-1-lock-current-surface.md", + "name": "lock-current-surface", + "precondition": "This migration is still in planning or at its first executable phase, and `src/continuous_refactoring/migrations.py` still contains the shipped manifest types, manifest I/O helpers, and exported operational helpers that later phases intend to split.", + "required_effort": "low" + }, + { + "done": false, + "effort_reason": "module extraction has real import-cycle and compatibility risk", + "file": "phase-2-extract-manifest-ops.md", + "name": "extract-manifest-ops", + "precondition": "Phase 1 is complete, and the compatibility export contract plus behavior-locking tests for `continuous_refactoring.migrations` are in place.", + "required_effort": "medium" + }, + { + "done": false, + "effort_reason": "import rewrites span several call sites and can hide behavior drift", + "file": "phase-3-redirect-internal-callers.md", + "name": "redirect-internal-callers", + "precondition": "Phase 2 is complete, `migration_manifest_ops.py` owns the extracted operational helpers, and `migrations.py` still re-exports those helpers through the locked compatibility surface.", + "required_effort": "medium" + }, + { + "done": false, + "effort_reason": "boundary cleanup can accidentally change error semantics or public exports", + "file": "phase-4-tighten-boundary-contracts.md", + "name": "tighten-boundary-contracts", + "precondition": "Phase 3 is complete, `src/continuous_refactoring/phases.py`, `src/continuous_refactoring/loop.py`, and `src/continuous_refactoring/prompts.py` now import their extracted operational helpers from `migration_manifest_ops.py`, and `migrations.py` still preserves the locked compatibility export set.", + "required_effort": "medium" + } + ], + "status": "planning", + "wake_up_on": null +} diff --git a/migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-1-lock-current-surface.md b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-1-lock-current-surface.md new file mode 100644 index 0000000..2c66398 --- /dev/null +++ b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-1-lock-current-surface.md @@ -0,0 +1,52 @@ +# Phase 1: Lock Current Surface + +## Scope +- `tests/test_migrations.py` +- `src/continuous_refactoring/migrations.py` +- If needed for focused regression coverage only: + `tests/test_continuous_refactoring.py`, `tests/test_phases.py`, + `tests/test_planning.py`, `tests/test_prompts.py`, `tests/test_wake_up.py` + +required_effort: low +effort_reason: behavior-locking test work is local and should stay cheap + +## Precondition +This migration is still in planning or at its first executable phase, and +`src/continuous_refactoring/migrations.py` still contains the shipped manifest +types, manifest I/O helpers, and exported operational helpers that later phases +intend to split. + +## Instructions +- Add or tighten regression tests for the public + `continuous_refactoring.migrations` surface that later phases must preserve. +- Lock the exported-symbol contract explicitly. The regression coverage must + name the compatibility export set that stays publicly reachable from + `continuous_refactoring.migrations`, not just assert that some representative + imports still work. +- Cover behavior that would be easy to break during the split: + current-phase lookup, cursor advancement, phase completion/reset behavior, + eligibility logic, and manifest load/save error wrapping. +- Favor outcome-based tests over call-shape assertions. +- Keep production edits minimal in this phase. Only change source if a tiny fix + is required to expose or stabilize the behavior being locked down. + +## Definition of Done +- `tests/test_migrations.py` or equivalent focused regression coverage names the + shipped compatibility export set for `continuous_refactoring.migrations`. +- The public helpers and value types that later phases rely on are protected by + explicit regression coverage. +- The test suite would catch export drift, operational-behavior regressions, and + broken boundary error nesting introduced by the module split. +- No user-visible contract has changed; this phase only strengthens the safety + net. +- The configured broad validation command passes. + +## Validation +- Run the narrowest relevant checks first, expected to include + `uv run pytest tests/test_migrations.py`. +- If the export-contract coverage lives in package-root tests, run the focused + follow-up checks such as `uv run pytest tests/test_continuous_refactoring.py`. +- If new regression coverage touches import-heavy callers, run the relevant + focused checks such as `uv run pytest tests/test_phases.py + tests/test_planning.py tests/test_prompts.py tests/test_wake_up.py`. +- Finish with `uv run pytest`. diff --git a/migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-2-extract-manifest-ops.md b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-2-extract-manifest-ops.md new file mode 100644 index 0000000..d428219 --- /dev/null +++ b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-2-extract-manifest-ops.md @@ -0,0 +1,48 @@ +# Phase 2: Extract Manifest Ops + +## Scope +- `src/continuous_refactoring/migrations.py` +- `src/continuous_refactoring/migration_manifest_ops.py` +- `tests/test_migrations.py` +- Adjacent files only if required to keep imports or tests coherent: + `src/continuous_refactoring/migration_manifest_codec.py` + +required_effort: medium +effort_reason: module extraction has real import-cycle and compatibility risk + +## Precondition +Phase 1 is complete, and the compatibility export contract plus behavior-locking +tests for `continuous_refactoring.migrations` are in place. + +## Instructions +- Introduce `src/continuous_refactoring/migration_manifest_ops.py` as the new + internal home for manifest operational helpers. +- Move operational logic there first: + phase lookup helpers, current-phase resolution, cursor advancement, phase + completion state updates, and wake-up eligibility helpers. +- Keep `MigrationManifest`, `PhaseSpec`, status vocabulary, path helpers, and + the public `load_manifest()` / `save_manifest()` facade anchored in + `migrations.py`. +- Preserve behavior exactly. This phase is about ownership and locality, not + semantic change. +- Avoid speculative interfaces. One concrete internal module is enough. +- Do not redirect downstream callers yet unless a minimal import adjustment is + required to keep the extraction coherent. + +## Definition of Done +- `migration_manifest_ops.py` exists and owns the extracted manifest + operational logic. +- `continuous_refactoring.migrations` still exports the locked compatibility + symbol set from Phase 1. +- `load_manifest()` and `save_manifest()` still present the same public + contract, with persistence and codec boundary behavior unchanged. +- The split does not introduce circular imports or duplicate, drifting copies + of the same logic. +- The configured broad validation command passes. + +## Validation +- Run `uv run pytest tests/test_migrations.py`. +- Run focused downstream checks that exercise imported helpers, expected to + include `uv run pytest tests/test_phases.py tests/test_planning.py + tests/test_prompts.py tests/test_wake_up.py`. +- Finish with `uv run pytest`. diff --git a/migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-3-redirect-internal-callers.md b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-3-redirect-internal-callers.md new file mode 100644 index 0000000..113b415 --- /dev/null +++ b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-3-redirect-internal-callers.md @@ -0,0 +1,55 @@ +# Phase 3: Redirect Internal Callers + +## Scope +- `src/continuous_refactoring/phases.py` +- `src/continuous_refactoring/loop.py` +- `src/continuous_refactoring/prompts.py` +- `src/continuous_refactoring/migrations.py` +- Related tests for touched call sites: + `tests/test_phases.py`, `tests/test_loop_migration_tick.py`, + `tests/test_focus_on_live_migrations.py`, `tests/test_prompts.py`, + `tests/test_run.py`, `tests/test_migrations.py` + +required_effort: medium +effort_reason: import rewrites span several call sites and can hide behavior drift + +## Precondition +Phase 2 is complete, `migration_manifest_ops.py` owns the extracted operational +helpers, and `migrations.py` still re-exports those helpers through the locked +compatibility surface. + +## Instructions +- Redirect only the in-scope internal callers that consume extracted + operational helpers: + `src/continuous_refactoring/phases.py`, + `src/continuous_refactoring/loop.py`, and + `src/continuous_refactoring/prompts.py`. +- Move those modules to direct imports from + `continuous_refactoring.migration_manifest_ops` only for helpers extracted in + Phase 2, such as `complete_manifest_phase`, `resolve_current_phase`, and + `phase_file_reference`. +- Do not edit `planning.py` or `cli.py` in this phase unless Phase 2 made a + narrow coherence fix unavoidable. They are in the migration scope, but they + are not required caller redirects for this phase. +- Keep `continuous_refactoring.migrations` as the public compatibility facade. + Public imports and tests that intentionally exercise the shipped surface stay + on that module. +- Preserve helper names unless a rename is strictly necessary and covered by + updated tests. + +## Definition of Done +- `phases.py`, `loop.py`, and `prompts.py` import their extracted operational + helpers directly from `migration_manifest_ops.py`. +- `tests/test_migrations.py` still exercises the compatibility facade through + `continuous_refactoring.migrations`. +- No out-of-scope source modules were edited just to widen the redirect. +- The redirect introduces no circular import and no duplicate helper wrappers. +- The configured broad validation command passes. + +## Validation +- Run the focused caller tests first: + `uv run pytest tests/test_phases.py tests/test_loop_migration_tick.py + tests/test_focus_on_live_migrations.py tests/test_prompts.py tests/test_run.py`. +- Run `uv run pytest tests/test_migrations.py` to keep the compatibility facade + honest. +- Finish with `uv run pytest`. diff --git a/migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-4-tighten-boundary-contracts.md b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-4-tighten-boundary-contracts.md new file mode 100644 index 0000000..9fa3255 --- /dev/null +++ b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/phase-4-tighten-boundary-contracts.md @@ -0,0 +1,48 @@ +# Phase 4: Tighten Boundary Contracts + +## Scope +- `src/continuous_refactoring/migrations.py` +- `src/continuous_refactoring/migration_manifest_ops.py` +- `src/continuous_refactoring/migration_manifest_codec.py` +- `tests/test_migrations.py` +- Any directly affected downstream tests from earlier phases + +required_effort: medium +effort_reason: boundary cleanup can accidentally change error semantics or public exports + +## Precondition +Phase 3 is complete, `src/continuous_refactoring/phases.py`, +`src/continuous_refactoring/loop.py`, and +`src/continuous_refactoring/prompts.py` now import their extracted operational +helpers from `migration_manifest_ops.py`, and `migrations.py` still preserves +the locked compatibility export set. + +## Instructions +- Remove residual non-boundary operational logic from `migrations.py` that no + longer belongs there after the extraction and caller redirects. +- Keep error translation at the real boundaries: + `load_manifest()` and `save_manifest()` for filesystem and manifest-file I/O, + `migration_manifest_codec.py` for payload decoding and encoding semantics. +- Preserve exception nesting with `from error` when translation is warranted. +- Delete transitional wrappers or duplicate helpers that no longer earn their + keep once the seam is stable. +- Stop short of a hard compatibility cut. If a helper is still part of the + locked public surface, keep that export from `continuous_refactoring.migrations`. + +## Definition of Done +- `migrations.py` reads as a facade for manifest concepts, manifest-path + helpers, manifest I/O boundaries, and compatibility exports rather than as + the primary implementation home for extracted operational logic. +- Filesystem and JSON failures are wrapped once at the true boundary, with + preserved nested causes. +- No dead transitional code remains from the split, except compatibility + exports that intentionally preserve the shipped surface. +- The locked compatibility export set from Phase 1 still passes unchanged. +- The configured broad validation command passes. + +## Validation +- Run `uv run pytest tests/test_migrations.py`. +- Run any focused downstream suites affected by the final boundary cleanup, + expected to include `uv run pytest tests/test_phases.py tests/test_planning.py + tests/test_loop_migration_tick.py tests/test_prompts.py`. +- Finish with `uv run pytest`. diff --git a/migrations/src-continuous-refactoring-migrations-py-20260429T020031/plan.md b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/plan.md new file mode 100644 index 0000000..5239427 --- /dev/null +++ b/migrations/src-continuous-refactoring-migrations-py-20260429T020031/plan.md @@ -0,0 +1,89 @@ +# Migration Plan: `manifest-ops-module-split` + +## Goal +Split manifest operational logic out of +`src/continuous_refactoring/migrations.py` into a dedicated internal module +while keeping `continuous_refactoring.migrations` as the stable public home for +manifest types, manifest I/O helpers, and compatibility exports. + +## Why This Shape +- `migrations.py` currently mixes value types, manifest operations, path + helpers, eligibility rules, and persistence boundaries. +- `migration_manifest_codec.py` already owns manifest payload decoding and + encoding, so the next useful seam is operational manifest logic rather than a + bigger architecture rewrite. +- The package already exposes a broad `continuous_refactoring.migrations` + surface. The migration should preserve that facade while reducing the amount + of real logic that lives behind it. + +## Target Surface +- `src/continuous_refactoring/migrations.py` +- `src/continuous_refactoring/migration_manifest_codec.py` +- `src/continuous_refactoring/phases.py` +- `src/continuous_refactoring/planning.py` +- `src/continuous_refactoring/loop.py` +- `src/continuous_refactoring/prompts.py` +- `src/continuous_refactoring/cli.py` +- `tests/test_migrations.py` +- Likely adjacent validation surfaces: + `tests/test_phases.py`, `tests/test_planning.py`, + `tests/test_loop_migration_tick.py`, `tests/test_focus_on_live_migrations.py`, + `tests/test_prompts.py`, `tests/test_run.py`, `tests/test_cli_review.py`, + `tests/test_no_driver_branching.py`, `tests/test_continuous_refactoring.py` + +## Phase Breakdown +1. `phase-1-lock-current-surface.md` + Add explicit regression coverage for the shipped + `continuous_refactoring.migrations` export set and for behavior that later + extraction phases must preserve. +2. `phase-2-extract-manifest-ops.md` + Introduce `migration_manifest_ops.py` and move manifest operational helpers + there without changing the public facade or persistence behavior. +3. `phase-3-redirect-internal-callers.md` + Redirect only the in-scope internal callers that currently use extracted + operational helpers: `phases.py`, `loop.py`, and `prompts.py`. +4. `phase-4-tighten-boundary-contracts.md` + Thin `migrations.py` down to the facade and true boundary helpers, preserving + compatibility exports and keeping error translation only at the filesystem + and JSON boundaries. + +## Dependencies +- Phase 1 has no migration-phase dependency. +- Phase 2 depends on Phase 1. +- Phase 3 depends on Phase 2. +- Phase 4 depends on Phase 3. + +## Dependency Visualization +```mermaid +flowchart TD + P1["1. Lock Current Surface"] --> P2["2. Extract Manifest Ops"] + P2 --> P3["3. Redirect Internal Callers"] + P3 --> P4["4. Tighten Boundary Contracts"] +``` + +## Validation Strategy +- Every phase must leave the repository shippable and finish with the + configured broad validation command: `uv run pytest`. +- Phase 1 establishes the compatibility contract explicitly: + `tests/test_migrations.py` should pin the exported-symbol set from + `continuous_refactoring.migrations`, alongside behavioral coverage for phase + lookup, phase advancement, completion/reset behavior, eligibility, and + manifest load/save error wrapping. +- Later phases should run narrow checks first so failures localize quickly: + `tests/test_migrations.py` remains the primary compatibility safety net, with + focused downstream suites added only for the touched callers. +- Import rewrites stay within the selected scope candidate. This migration does + not authorize edits to `migration_tick.py` or `review_cli.py`. +- Error behavior is part of the contract: filesystem and JSON failures must + stay wrapped at the actual boundary with preserved nested causes. + +## Risk Controls +- Lock the compatibility export set before moving code. +- Move operational logic before redirecting callers. +- Redirect only the concrete in-scope callers that currently use extracted + operational helpers. Do not turn Phase 3 into a general import cleanup. +- Treat import-cycle pressure as a stop sign. If the split starts forcing + circular dependencies, keep the seam smaller rather than completing a + mechanical move. +- Do not change manifest JSON structure, CLI behavior, XDG state handling, or + migration scheduling semantics as part of this cleanup. From 1590ee87c3f48631d30546d9406ca2b987963932 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Wed, 29 Apr 2026 15:26:38 -0700 Subject: [PATCH 029/103] remove approaches dir added by an earlier bug (I'm guessing) --- approaches/agent-backend-boundary-split.md | 39 --------------- approaches/agent-execution-domain-split.md | 40 --------------- approaches/agent-inplace-seams.md | 37 -------------- .../artifacts-boundary-hardening-inplace.md | 50 ------------------- .../artifacts-domain-split-lightweight.md | 43 ---------------- .../artifacts-error-typing-crossboundaries.md | 44 ---------------- approaches/init-init-export-contract.md | 34 ------------- approaches/init-init-export-lazy-namespace.md | 31 ------------ approaches/init-init-export-surface.md | 34 ------------- 9 files changed, 352 deletions(-) delete mode 100644 approaches/agent-backend-boundary-split.md delete mode 100644 approaches/agent-execution-domain-split.md delete mode 100644 approaches/agent-inplace-seams.md delete mode 100644 approaches/artifacts-boundary-hardening-inplace.md delete mode 100644 approaches/artifacts-domain-split-lightweight.md delete mode 100644 approaches/artifacts-error-typing-crossboundaries.md delete mode 100644 approaches/init-init-export-contract.md delete mode 100644 approaches/init-init-export-lazy-namespace.md delete mode 100644 approaches/init-init-export-surface.md diff --git a/approaches/agent-backend-boundary-split.md b/approaches/agent-backend-boundary-split.md deleted file mode 100644 index abf6010..0000000 --- a/approaches/agent-backend-boundary-split.md +++ /dev/null @@ -1,39 +0,0 @@ -# Approach: Backend Boundary Split - -## Strategy -- Split backend-specific command behavior out of `src/continuous_refactoring/agent.py`. -- Create focused modules: - - `src/continuous_refactoring/agent_backends.py` for supported-agent validation and command construction, - - `src/continuous_refactoring/agent_claude_stream.py` for Claude stream-json extraction, - - keep `agent.py` as the orchestration layer for interactive execution, settle handling, and observed command capture. -- Keep public imports stable through `agent.py`; no package-root re-export churn unless needed. - -## Tradeoffs -- Strongest readability gain around the real domain seam: Codex and Claude are different products with different protocol handling. -- Makes future backend additions or behavior changes less likely to bloat the process-control code. -- Medium migration churn because tests and imports will move across files. -- Risk of over-splitting if backend logic remains tiny after cleanup. - -## Estimated phases -1. Extract backend validation and command builders into `agent_backends.py` with no behavior changes. - - `required_effort`: `medium` -2. Extract Claude NDJSON parsing into `agent_claude_stream.py` and retarget stream-json tests there. - - `required_effort`: `low` -3. Reduce `agent.py` to orchestration glue plus interactive/process-control concerns. - - `required_effort`: `medium` -4. Delete dead wrappers and duplicate private helpers once all callsites are stable. - - `required_effort`: `low` -5. Run full pytest, paying extra attention to package export uniqueness and callsite imports. - - `required_effort`: `low` - -## Risk profile -- Technical risk: medium -- Blast radius: medium -- Failure modes: - - Import cycles if orchestration and backend helpers are split in the wrong direction. - - Private helper extraction accidentally weakening boundary errors or hiding unsupported-agent checks. - - Test churn masking a subtle command-line regression. - -## Best when -- The main pain is that backend concerns and process-control concerns are mixed together. -- We want a real module boundary without touching the heavier settle/watchdog code yet. diff --git a/approaches/agent-execution-domain-split.md b/approaches/agent-execution-domain-split.md deleted file mode 100644 index 58d58b8..0000000 --- a/approaches/agent-execution-domain-split.md +++ /dev/null @@ -1,40 +0,0 @@ -# Approach: Execution-Domain Split - -## Strategy -- Split `agent.py` by execution model rather than by backend. -- Proposed modules: - - `src/continuous_refactoring/agent_commands.py` for agent command construction and support checks, - - `src/continuous_refactoring/agent_interactive.py` for settle protocol, signal escalation, terminal reset, and TTY handling, - - `src/continuous_refactoring/agent_observed.py` for subprocess capture, watchdog behavior, timestamped logs, and test execution, - - `src/continuous_refactoring/agent.py` as a thin public facade. -- Keep public function names stable: `build_command`, `maybe_run_agent`, `run_agent_interactive`, `run_agent_interactive_until_settled`, `run_observed_command`, `run_tests`, `summarize_output`. - -## Tradeoffs -- Cleanest long-term structure. The boundaries match how callers think about the module. -- Makes the load-bearing settle protocol and watchdog code easier to review in isolation. -- Highest churn of the options here. More files, more imports, more chances to nick a subtle invariant. -- Adds a facade module, which is justified only if we believe `agent.py` will keep evolving. - -## Estimated phases -1. Extract `agent_commands.py` and move backend validation plus command builders first. - - `required_effort`: `medium` -2. Extract `agent_observed.py` for command capture, watchdog, and `run_tests`. - - `required_effort`: `medium` -3. Extract `agent_interactive.py` for settle protocol, terminal state handling, and forced Codex reset. - - `required_effort`: `high` -4. Collapse `agent.py` into a thin facade with direct imports and no compatibility shims beyond those imports. - - `required_effort`: `low` -5. Rebalance tests around the new module boundaries and run full pytest. - - `required_effort`: `medium` - -## Risk profile -- Technical risk: medium-high -- Blast radius: high -- Failure modes: - - Import layering mistakes around `ContinuousRefactorError`, `CommandCapture`, and shared helpers. - - Behavioral regressions in the settle handshake or watchdog teardown because lifecycle code moved wholesale. - - Package export collisions or stale imports if the facade and implementation modules drift. - -## Best when -- We want the migration to end with a durable structure, not just a neater big file. -- We can afford a higher-churn refactor in exchange for clearer review surfaces later. diff --git a/approaches/agent-inplace-seams.md b/approaches/agent-inplace-seams.md deleted file mode 100644 index a64d159..0000000 --- a/approaches/agent-inplace-seams.md +++ /dev/null @@ -1,37 +0,0 @@ -# Approach: In-Place Seams Inside `agent.py` - -## Strategy -- Keep `src/continuous_refactoring/agent.py` as one module for now. -- Refactor internally around three truthful sections: - - command construction and backend validation, - - interactive settle lifecycle and terminal recovery, - - observed command execution and watchdog logging. -- Normalize helper naming and data flow so the public API reads top-down without changing imports anywhere else. -- Add a small amount of typed structure only where it shortens branches or clarifies return values. - -## Tradeoffs -- Safest path. No import churn, no package-surface changes, minimal merge pain. -- Best fit if the immediate problem is readability and local change friction, not module count. -- Leaves `agent.py` large. It gets cleaner, but not smaller in a meaningful architectural way. -- Does not create future domain boundaries for backend-specific behavior. - -## Estimated phases -1. Reorder and tighten private helpers so command-building, settle logic, and observed-command logic read as coherent blocks. - - `required_effort`: `low` -2. Introduce small internal value helpers where they remove repetitive branching without hiding behavior. - - `required_effort`: `low` -3. Update tests to reflect any renamed helpers or changed internal flow, while keeping behavior identical. - - `required_effort`: `low` -4. Run full pytest and remove dead local helper paths uncovered during the cleanup. - - `required_effort`: `low` - -## Risk profile -- Technical risk: low -- Blast radius: low -- Failure modes: - - Accidental behavior drift in settle timing or Claude output extraction during local cleanup. - - Over-tidying that obscures the load-bearing Codex terminal reset and watchdog semantics. - -## Best when -- We want the fastest safe readability win. -- We do not yet know which future split is actually worth carrying. diff --git a/approaches/artifacts-boundary-hardening-inplace.md b/approaches/artifacts-boundary-hardening-inplace.md deleted file mode 100644 index 4e5f147..0000000 --- a/approaches/artifacts-boundary-hardening-inplace.md +++ /dev/null @@ -1,50 +0,0 @@ -# Approach: In-Place Artifact Boundary Hardening - -## Strategy -- Keep module surfaces stable and refactor inside the existing cluster with minimal churn: - - `src/continuous_refactoring/artifacts.py` - - `src/continuous_refactoring/agent.py` - - `src/continuous_refactoring/loop.py` - - `src/continuous_refactoring/phases.py` - - `src/continuous_refactoring/migration_tick.py` - - `src/continuous_refactoring/routing_pipeline.py` - - `src/continuous_refactoring/config.py` - - `src/continuous_refactoring/git.py` - - `src/continuous_refactoring/cli.py` -- Treat `artifacts.py` as the current error and telemetry spine, but harden it so every external effect returns actionable causes and preserves `__cause__`. -- At module boundaries (agent, cli, loop, phases, routing, migration, git, config) translate only where behavior needs a boundary contract change: - - Keep original exceptions as nested causes unless caller-level signal is improved by context. - - Avoid blanket wrapping inside helper functions that are already at the callsite. - -## Tradeoffs -- Lowest blast radius and easiest to apply under an active migration. -- No new module-level indirection and little `__init__.py` risk. -- Best fit for taste version 1: strong boundary comments only where contract changes. -- Leaves `artifacts.py` still carrying multiple concerns (capture/state/path/root metadata), but no risky cut needed for this migration. -- Keeps direct import compatibility with `ContinuousRefactoringError` and existing `_SUBMODULES`. - -## Estimated phases -1. Add migration tests for failure-cause retention -2. Introduce explicit boundary helpers and nested exceptions in `artifacts.py` -3. Update cluster modules to catch and wrap only at decision points -4. Add regression tests on loop/migration-path behavior -5. Tighten CLI exit messaging while preserving exact user-visible strings that tests assert - -### Phase intent -- Phase 1: Add focused tests in `tests/test_continuous_refactoring.py`, `tests/test_phases.py`, `tests/test_loop_migration_tick.py`, `tests/test_routing.py` for `__cause__` preservation. -- Phase 2: In `artifacts.py`, add small helpers for atomic JSON/log writes and command capture parsing that include nested underlying errors. -- Phase 3: In cluster modules, avoid new broad wrappers; replace ambiguous messages with boundary-specific context where needed. -- Phase 4: Verify migration and loop flow still emits expected artifacts summaries and commit handoff semantics. -- Phase 5: Run targeted migration tests, then run full suite as final gate. - -## Risk profile -- Technical risk: low to medium -- Blast radius: medium, because changes touch loop routing and failure persistence paths -- Failure modes: - - Message-level test regressions if we over-wrap and lose exact strings. - - Slightly more verbose failure paths in `artifacts.py` impacting readability if too many wrappers are added. - - No new APIs expected, so integration regression risk stays low. - -## Why this first -- It satisfies the taste mandate (boundary-aware wrapping with cause chaining) without a disruptive module split. -- It keeps compatibility and can be evaluated quickly with tight, deterministic phase gates. diff --git a/approaches/artifacts-domain-split-lightweight.md b/approaches/artifacts-domain-split-lightweight.md deleted file mode 100644 index bfd51be..0000000 --- a/approaches/artifacts-domain-split-lightweight.md +++ /dev/null @@ -1,43 +0,0 @@ -# Approach: Lightweight Domain Split of Artifact Subsystems - -## Strategy -- Keep API compatibility but split `artifacts.py` into two files: - - `src/continuous_refactoring/artifacts_models.py` for immutable telemetry data structures. - - `src/continuous_refactoring/artifact_runs.py` for run lifecycle creation and atomic writes. - - `src/continuous_refactoring/artifacts.py` as a thin compatibility re-export and doc seam. -- Keep CLI/migration and loop integration untouched where possible: - - `loop.py`, `phases.py`, `migration_tick.py`, `routing_pipeline.py`, `agent.py`, `config.py`, `git.py`, `cli.py`. -- Replace ad hoc imports of `ContinuousRefactorError` from `artifacts.py` with direct imports from `artifacts.py` compatibility alias only if needed. -- This creates clearer file-level domains while preserving FQNs and avoiding module sprawl. - -## Tradeoffs -- Cleaner local module focus and lower future merge pain when `artifacts.py` starts to grow. -- Best future extensibility for migration state persistence versus command-capture concerns. -- Highest mechanical risk of this set due split and import graph migration. -- Increases short-term review burden because many names stay re-exported for compatibility. -- Must guard against hidden behavior shifts due import order and module initialization. - -## Estimated phases -1. Create split modules with zero-behavior shims and compatibility exports -2. Migrate production imports and keep package `__all__` uniqueness clean -3. Fold in taste-compliant error wrapping and cause chaining during migration -4. Update tests to use compatibility imports and assert no drift in summaries/events -5. Run full suite after phased import migration and clean dead-paths - -### Phase intent -- Phase 1: Data models and lifecycle utilities move out without changing logic. -- Phase 2: Rewire imports in cluster modules and ensure `continuous_refactoring.__all__` contract remains stable. -- Phase 3: Apply error-boundary pass without introducing interface churn. -- Phase 4: Remove transitional names and dead compatibility comments only if no longer needed. -- Phase 5: Verification as per existing full-run migration gate. - -## Risk profile -- Technical risk: medium -- Blast radius: high -- Failure modes: - - Package import order regressions while `__init__.py` rebuilds re-exports. - - Hidden test failures due import-time side effects. - - More difficult conflict detection with duplicate symbols during package init. - -## Why pick this only if we can absorb the churn -- Strong structure win, but not worth it if we need the cleanest, fastest path to safe artifact boundary improvement. diff --git a/approaches/artifacts-error-typing-crossboundaries.md b/approaches/artifacts-error-typing-crossboundaries.md deleted file mode 100644 index f5edda0..0000000 --- a/approaches/artifacts-error-typing-crossboundaries.md +++ /dev/null @@ -1,44 +0,0 @@ -# Approach: Error Taxonomy with Boundary-Specific Types - -## Strategy -- Introduce a dedicated error module and explicit boundary error classes while preserving public compatibility: - - Add `src/continuous_refactoring/errors.py` with `ContinuousRefactoringError` as canonical base. - - Re-export `ContinuousRefactoringError` from `artifacts.py` to keep existing imports and `__init__` behavior intact. - - Add module-level boundary types: `CommandBoundaryError`, `ArtifactBoundaryError`, `GitBoundaryError`, `MigrationBoundaryError`, `LoopBoundaryError`. -- Move wrapping logic so each cluster module becomes explicit about what it owns and what it reports: - - `agent` and `config` wrap infra faults when translating to domain failure outcomes. - - `loop`, `phases`, `migration_tick`, and `routing_pipeline` wrap only policy-level failures. -- Keep semantics of CLI and migration scheduling unchanged; preserve existing command strings, artifact path names, and summary structure. - -## Tradeoffs -- Clearer operational signal and cleaner root-cause triage. -- Stronger alignment with taste instruction on nested exceptions at boundaries. -- Larger import churn across the cluster and tests. -- Must update `continuous_refactoring.__all__` import graph after moving exported error ownership, which is extra mechanical risk. -- Potentially over-specified errors if boundary classes expand faster than actual domain needs. - -## Estimated phases -1. Add `errors.py` and compatibility export path -2. Create boundary exception types and migrate `artifacts.py` to consume canonical base -3. Shift catch/raise behavior in `agent.py`, `git.py`, `loop.py`, `phases.py`, `migration_tick.py`, `routing_pipeline.py` -4. Update `cli.py` and tests that assert exact exception types/messages -5. Run targeted migration and full project verification - -### Phase intent -- Phase 1: New module only, no production behavior changes yet. -- Phase 2: Add wrappers and nesting around I/O/process/git/agent callouts. -- Phase 3: Convert consumer catches to boundary-aware failures and update decision records where needed. -- Phase 4: Add/adjust tests for exception typing, compatibility of imports, and boundary names in messages. -- Phase 5: Validate `tests/test_continuous_refactoring.py`, `tests/test_run.py`, `tests/test_run_once.py`, `tests/test_phases.py`, and `tests/test_loop_migration_tick.py`. - -## Risk profile -- Technical risk: medium -- Blast radius: medium -- Failure modes: - - Import graph breakage from moved exported symbols into `__init__.py` and `_SUBMODULES`. - - Tests that assert specific exception text may break on message wrapping style. - - Additional migration complexity from adding new module and maintaining alias compatibility. - -## Why choose this if stability budget allows -- Better long-term maintainability and explicit domain boundaries. -- Clear runway for future non-trivial refactors where cross-module ownership gets noisier than today. diff --git a/approaches/init-init-export-contract.md b/approaches/init-init-export-contract.md deleted file mode 100644 index c3b4fc8..0000000 --- a/approaches/init-init-export-contract.md +++ /dev/null @@ -1,34 +0,0 @@ -# Approach: Contract-Driven Public Surface Descriptor - -## Strategy -- Move exported-public definition out of runtime module-order magic into `src/continuous_refactoring/public_api.py`. -- Define a compact, explicit `PUBLIC_REEXPORTS` descriptor (module_name, symbol, optional alias) and drive `__init__.py` from that list. -- Keep package runtime behavior: same names still appear in `continuous_refactoring.__all__`, same re-exported callsites, same hidden-module boundary. -- Keep `__SUBMODULES` for import validation, but source of truth for API moves to descriptor data. - -## Tradeoffs -- Pros: clearer intent, easier code review for future API changes, simpler to detect stale/manual exports, aligns with domain-focused boundaries and naming truthfulness. -- Cons: adds one new module and one migration step to validate descriptor integrity. -- Why this is taste-aligned: no speculative abstractions, clear readability gain, explicit compatibility over convenience. - -## Estimated phases -1. Add `src/continuous_refactoring/public_api.py` with a typed re-export descriptor + minimal validation helpers. -2. Refactor `src/continuous_refactoring/__init__.py` to build `__all__` from descriptor + runtime imports only. -3. Add descriptor-level tests in `tests/test_continuous_refactoring.py` for: - - all exported names present, - - no duplicate symbol names in descriptor, - - internal module not re-exported (`migration_manifest_codec` remains private). -4. Add a migration check that compares generated `__all__` to a non-empty known set to prevent accidental empty exposure. -5. Run targeted contract tests for package init and prompt/loop import flows. - -### Phased scope -- Files touched: `src/continuous_refactoring/__init__.py`, `src/continuous_refactoring/public_api.py` -- Test touched: `tests/test_continuous_refactoring.py` - -## Risk profile -- Technical risk: Medium -- Blast radius: Medium -- Failure modes: - - New descriptor errors can hide symbols if import paths drift. - - More churn touching two new files means merge conflict potential during rapid migrations. -- Mitigation: keep descriptor small and strictly validated before touching any loop/routing logic. diff --git a/approaches/init-init-export-lazy-namespace.md b/approaches/init-init-export-lazy-namespace.md deleted file mode 100644 index b5e7cb8..0000000 --- a/approaches/init-init-export-lazy-namespace.md +++ /dev/null @@ -1,31 +0,0 @@ -# Approach: Lazy-Load Package Namespace via `__getattr__` - -## Strategy -- Replace eager import-and-reexport side-effects in `src/continuous_refactoring/__init__.py` with explicit `__all__` and lazy symbol resolution via `__getattr__`. -- Keep exported API stable but defer module imports until first symbol access. -- Use cause-preserving wrapping only in namespace boundary failures (e.g., loader exception -> wrapped as `ContinuousRefactorError` with original exception attached) and avoid translation elsewhere. -- Keep `__SUBMODULES` for package contract visibility, but shrink initial work needed for import-time module graph. - -## Tradeoffs -- Pros: faster and cleaner import path for package consumers, easier to spot import fan-in issues when one symbol fails to resolve. -- Cons: behavior shifts for side effects that depended on module import side-effects during package import; requires careful docs/tests for `hasattr`/`dir` expectations. -- Why this is taste-aligned: keeps compatibility paths safer (no hard cuts), uses explicit boundary mapping, uses truthful transitional naming (`migrating`/`stabilized` states where needed in plan docs). - -## Estimated phases -1. Design a symbol-to-module map (static, not dynamic inference) and explicit `__all__` in `__init__`. -2. Implement `__getattr__` loader path and `__dir__` to keep introspection stable. -3. Add targeted tests in `tests/test_continuous_refactoring.py` and a small namespace-focused regression test verifying `hasattr` works for public exports. -4. Add a migration-readiness test run against `loop.py`/`prompts.py` entry usage to ensure the refactoring pipeline still imports cleanly. -5. Decide and lock rollback if lazy behavior introduces import timing regressions. - -### Phased scope -- Files touched: `src/continuous_refactoring/__init__.py` -- Test touched: `tests/test_continuous_refactoring.py` - -## Risk profile -- Technical risk: Medium to High -- Blast radius: Medium -- Failure modes: - - subtle breakage in code that relies on eager module import side effects. - - harder-to-diagnose delayed import failures during runtime. -- Mitigation: phase-gated activation with a hard stop plan after contract test failures; fallback to Approach 1 style if timing regression appears. diff --git a/approaches/init-init-export-surface.md b/approaches/init-init-export-surface.md deleted file mode 100644 index 43c8296..0000000 --- a/approaches/init-init-export-surface.md +++ /dev/null @@ -1,34 +0,0 @@ -# Approach: Surface-Clarity Refactor for `__init__.py` - -## Strategy -- Keep current re-export model and behavior intact, but make it explicit and inspectable. -- In `src/continuous_refactoring/__init__.py`, replace the raw tuple of imports with a small set of explicit module entries plus one `collect_package_exports()` helper. -- Enforce duplicate detection with origin-aware error messages (module + symbol), preserving full cause chains on lower-level errors only where raised. -- Keep `__SUBMODULES` and exported symbols backward-compatible for existing tests and callers. -- No module split or new runtime behavior outside package init. - -## Tradeoffs -- Pros: very low blast radius, low behavioral risk, minimal API churn, direct migration to stable `__all__` contract. -- Cons: still keeps `__init__.py` as the central export hub and does not change the eager-import profile. -- Why this is taste-aligned: it avoids speculative boundaries, keeps module boundaries stable, and improves clarity without touching dead/legacy code paths. - -## Estimated phases -1. Capture current export expectations in tests - - Add/extend assertions for stable symbol presence and deterministic export order if useful. -2. Introduce a structured `_PUBLIC_MODULES` list and extraction helper in `src/continuous_refactoring/__init__.py` - - Preserve module import order and public-only behavior. -3. Upgrade duplicate-symbol checks to include duplicate provenance details while keeping same failure contract. -4. Add a tiny regression test for internal-module re-export exclusion still holding (`migration_manifest_codec` remains module-private to package root). -5. Run focused package contract tests. - -### Phased scope -- File touched: `src/continuous_refactoring/__init__.py` -- Test touched: `tests/test_continuous_refactoring.py` - -## Risk profile -- Technical risk: Low -- Blast radius: Low -- Failure modes: - - Hidden breakage if symbol collection accidentally drops a symbol due descriptor typo. - - Slightly harder-to-spot import-time failures if one of the modules in the explicit list raises on import. -- Mitigation: phase gates with existing package contract tests before migration write path changes. From 7776f93e8408e893c567be3470af33b5b7e42aa9 Mon Sep 17 00:00:00 2001 From: Hiren Hiranandani Date: Thu, 30 Apr 2026 14:46:16 -0700 Subject: [PATCH 030/103] working plan for migration resumption --- .gitignore | 2 + AGENTS.md | 26 +- README.md | 36 +- ...gration-dirs-and-consistency-foundation.md | 157 ++ ...-state-schema-and-durable-stage-outputs.md | 177 +++ ...003-atomic-planning-workspace-publisher.md | 170 +++ .../004-resumable-one-step-planning-engine.md | 175 +++ ...nning-before-phase-execution-scheduling.md | 164 +++ docs/plans/006-migration-list-and-doctor.md | 149 ++ .../007-migration-review-staged-publish.md | 140 ++ docs/plans/008-migration-refine.md | 139 ++ .../009-hardening-compatibility-and-docs.md | 144 ++ src/continuous_refactoring/cli.py | 78 + src/continuous_refactoring/failure_report.py | 28 +- src/continuous_refactoring/loop.py | 184 ++- src/continuous_refactoring/migration_cli.py | 697 +++++++++ .../migration_consistency.py | 449 ++++++ src/continuous_refactoring/migration_tick.py | 405 ++++- src/continuous_refactoring/planning.py | 837 +++++++++-- .../planning_publish.py | 681 +++++++++ src/continuous_refactoring/planning_state.py | 995 +++++++++++++ src/continuous_refactoring/prompts.py | 51 +- src/continuous_refactoring/review_cli.py | 234 ++- .../routing_pipeline.py | 91 +- tests/test_cli_migrations.py | 1300 +++++++++++++++++ tests/test_cli_review.py | 123 +- tests/test_continuous_refactoring.py | 9 +- tests/test_failure_report.py | 42 + tests/test_focus_on_live_migrations.py | 226 ++- tests/test_loop_migration_tick.py | 463 ++++++ tests/test_migration_consistency.py | 215 +++ tests/test_no_driver_branching.py | 3 + tests/test_planning.py | 707 ++++++++- tests/test_planning_publish.py | 594 ++++++++ tests/test_planning_state.py | 590 ++++++++ tests/test_prompts.py | 222 +++ tests/test_run.py | 438 +++++- tests/test_run_once.py | 88 ++ tests/test_scope_loop_integration.py | 18 +- 39 files changed, 10958 insertions(+), 289 deletions(-) create mode 100644 docs/plans/001-visible-migration-dirs-and-consistency-foundation.md create mode 100644 docs/plans/002-planning-state-schema-and-durable-stage-outputs.md create mode 100644 docs/plans/003-atomic-planning-workspace-publisher.md create mode 100644 docs/plans/004-resumable-one-step-planning-engine.md create mode 100644 docs/plans/005-planning-before-phase-execution-scheduling.md create mode 100644 docs/plans/006-migration-list-and-doctor.md create mode 100644 docs/plans/007-migration-review-staged-publish.md create mode 100644 docs/plans/008-migration-refine.md create mode 100644 docs/plans/009-hardening-compatibility-and-docs.md create mode 100644 src/continuous_refactoring/migration_cli.py create mode 100644 src/continuous_refactoring/migration_consistency.py create mode 100644 src/continuous_refactoring/planning_publish.py create mode 100644 src/continuous_refactoring/planning_state.py create mode 100644 tests/test_cli_migrations.py create mode 100644 tests/test_migration_consistency.py create mode 100644 tests/test_planning_publish.py create mode 100644 tests/test_planning_state.py diff --git a/.gitignore b/.gitignore index eef6600..7625076 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ .scratchpad/ +tmpdir/ +.pytest_cache/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/AGENTS.md b/AGENTS.md index f1c705f..83c696f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -20,6 +20,14 @@ Treat `AGENTS.md` as part of the codebase's invariants, not documentation. A dri - Test one: `uv run pytest tests/test_x.py::test_name` - Entry: `continuous-refactoring --help` / `continuous-refactoring --version` (or `python -m continuous_refactoring`) +- Inspect migrations: `continuous-refactoring migration list` / + `continuous-refactoring migration doctor ` / + `continuous-refactoring migration doctor --all` +- Review migrations: `continuous-refactoring migration review ` + (top-level `review perform ` remains a compatibility wrapper) +- Refine migration planning: `continuous-refactoring migration refine + (--message |--file ) --with codex|claude --model + --effort ` No lint, no typecheck, no formatter, no pre-commit. GitHub Actions `Test` runs `uv run pytest`. **Pytest is the only code gate.** GitHub Actions @@ -40,6 +48,12 @@ runs `uv run pytest`. **Pytest is the only code gate.** GitHub Actions - **Scope expansion** — deciding the set of files edited together with the target (`scope_expansion.py`). - **Classifier / routing** — picks which agent handles a target (`routing.py`). - **Migration** — a multi-phase plan living under `migrations//`. +- **Visible migration directory** — direct child migration dir that is not hidden, dotted, symlinked, or internal/transactional; enumerate through `iter_visible_migration_dirs()`. +- **Consistency finding** — structured migration integrity result with shared `info | warning | error` severity and `planning-snapshot | ready-publish | execution-gate | doctor` mode. +- **Planning state** — durable resume/audit cursor at `/.planning/state.json`; it records accepted planning steps and their repo-relative stage outputs. +- **Planning stage output** — accepted planning stdout stored under `/.planning/stages/.stdout.md`; repeated accepted steps use suffixed refs such as `-2.stdout.md`. Failed current-step output stays in run artifacts only. +- **Planning feedback** — explicit user refinement feedback recorded in `.planning/state.json`; it reuses the `revise` planning step and is published only through staged planning/refine transactions. +- **Planning workspace** — off-live candidate migration snapshot built under project state, then copied to a live-dir transaction before publish. - **Phase** — one step of a migration; state transitions in `phases.py`. - **Precondition** — what must already be true before a phase may execute; stored on each manifest phase as `precondition`. - **Definition of Done** — what must be true for a phase to count as completed; written in each phase markdown doc under `## Definition of Done`. @@ -48,7 +62,7 @@ runs `uv run pytest`. **Pytest is the only code gate.** GitHub Actions - **Eligibility cooldown** — `manifest.cooldown_until` gates re-checks after a migration was deferred or blocked; `last_touch` records activity only. - **Settle protocol** — `.done` + sha256 handshake confirming an interactive agent is finished. - **Status block** — the driver's end-of-attempt summary written to artifacts. -- **Call role** — `classifier | planner | editor | reviewer` slot filled in a prompt. +- **Call role** — prompt slot recorded in artifacts, including `classifier`, `editor`, dotted planning roles such as `planning.`, `planning.state`, `planning.publish`, and phase roles such as `phase.ready-check` or `phase.execute`. - **Effort budget** — shared nominal tiers `low < medium < high < xhigh`; `--default-effort` is the normal call effort, `--max-allowed-effort` caps target overrides and phase escalation. - **Failure snapshot** — per-attempt failure record at `…/projects//failures/-attempt-NNN-retry-NN-.md`. One file per failed attempt; sort to find the latest. @@ -93,9 +107,15 @@ active phase explicitly names `loop.py` in scope. - **Driver owns commits** (`refactor_attempts.py:_finalize_commit()`, called from `loop.py`) — if an agent commits mid-attempt, driver does `git reset --soft head_before` and re-commits with its own message. - **Migration scheduling split** (`migrations.py`, `loop.py`, `phases.py`) — `last_touch` is activity bookkeeping, not the 6-hour retry gate. Deferred/blocked migrations set `cooldown_until`; successful phase completion clears deferral markers so the next ready phase can run immediately. - **Migration tick deferral writes** (`migration_tick.py`) — ready-check deferrals are queued while scanning candidates and saved only when the tick finds no executable phase or blocks for human review. Do not save a deferred manifest before checking later candidates; that dirties the worktree and can make ready-checks reject runnable phases. +- **Migration visibility + consistency gate** (`migration_consistency.py`, `migration_tick.py`, `loop.py`, `review_cli.py`) — candidate scans use `iter_visible_migration_dirs()` so hidden/dotted/internal/symlink dirs are invisible to tick/review list. Before ready-check, `execution-gate` consistency errors block phase execution; `info`/`warning` never block. - **Manifest codec boundary** (`migration_manifest_codec.py`, `migrations.py`) — codec owns legacy `ready_when`, legacy integer `current_phase`, duplicate phase-name rejection, and saved JSON formatting. `load_manifest()` / `save_manifest()` own filesystem and JSON boundary errors. -- **Review CLI boundary** (`cli.py`, `review_cli.py`) — `cli.py` owns parser wiring and run dispatch; migration review internals live in `review_cli.py`, which stays internal and out of package-root `_SUBMODULES`. -- **Human-review gating** (`planning.py`, `migration_tick.py`, `review_cli.py`) — migrations with `awaiting_human_review=true` must be invisible to automated migration ticks/ready-checks until `review perform` clears the flag. +- **Planning state codec boundary** (`planning_state.py`, `planning.py`) — `.planning/state.json` is valid only when completed steps replay through the branching planning graph to `next_step`; recorded outputs must be repo-relative files inside the migration directory. User refinement feedback is durable state, and `revision_base_step_count` is the replay marker that lets an unexecuted ready migration reuse `revise` after a terminal ready decision. Persist accepted step stdout after the step is validated; do not add durable fields for failed current-step output. +- **Planning publish transaction** (`planning_publish.py`) — publish copies the complete workspace snapshot to `__transactions__//staged`, validates it, checks same-device and `base_snapshot_id`, moves live to `rollback`, moves staged live, validates live, then deletes rollback. On post-rollback failure, move bad live to `failed` before restoring rollback. Transaction directories are invisible to scheduling/list candidates but visible to `migration doctor --all`. Do not bypass the lock or dirty-live check. +- **One-step planning engine** (`planning.py`) — product planning entry points call `run_next_planning_step()` so one action runs exactly `PlanningState.next_step`, records accepted stdout/state in an off-live workspace, and publishes through `planning_publish.py`. Failed current-step output is never durable resume input. `run_planning` is intentionally not package-exported. +- **Planning resume scheduling** (`migration_tick.py`, `loop.py`, `routing_pipeline.py`) — normal automation runs one eligible `status: planning` step before ready/in-progress phase ticks and before source-target routing. Missing or invalid `.planning/state.json` blocks automation with planning failure evidence; `status: planning` must never enter phase ready-check or phase execution. +- **Review CLI boundary** (`cli.py`, `review_cli.py`) — `cli.py` owns parser wiring; staged migration review internals live in `review_cli.py`, publish only through `planning_publish.py`, and stay internal/out of package-root `_SUBMODULES`. Top-level `review perform` is only a compatibility wrapper around this path. +- **Migration CLI boundary** (`cli.py`, `migration_cli.py`) — `cli.py` owns parser wiring only; `migration_cli.py` owns namespace dispatch, read-only list/doctor behavior, and the contained slug/path resolver used by mutation commands. Mutating subcommands delegate their internals to focused modules such as `review_cli.py` or the planning refine entry point. Resolver targets must stay direct visible children of the configured live migrations root and reject symlink, outside, parent-traversal, and ambiguous paths. +- **Human-review gating** (`planning.py`, `migration_tick.py`, `review_cli.py`) — migrations with `awaiting_human_review=true` must be invisible to automated migration ticks/ready-checks until canonical `migration review` clears the flag through staged publish; top-level `review perform` routes to the same compatibility path. `migration refine` may reopen an unexecuted ready migration to planning, but it is user feedback, not review approval. - **Migration terminology split** (`migrations.py`, `planning.py`, `prompts.py`) — manifest `precondition` gates phase start; phase markdown `## Definition of Done` governs completion. - **Run-level baseline validation** (`loop.py`) — `run-once`, `run`, and `--focus-on-live-migrations` run the configured validation command after the clean-worktree check and before routing/refactoring. A red baseline stops as `baseline_failed`, not migration human review. - **Phase execution validation gate** (`phases.py`, `prompts.py`, `loop.py`) — a migration phase is complete only after host-side full validation passes. `execute_phase()` retries validation-red attempts from `head_before` up to the effective `--max-attempts` budget, and the phase prompt must include the literal configured validation command plus the phase file's Definition of Done as the completion contract. diff --git a/README.md b/README.md index a22a8d7..7796b17 100644 --- a/README.md +++ b/README.md @@ -122,8 +122,13 @@ continuous-refactoring run \ | `run-once` | Single pass on one resolved target. No retry. If there is a diff and validation passes, it commits locally and prints the diffstat. | | `run` | The loop. Iterates targets, retries on failure, and commits successful targets locally. | | `upgrade` | Checks that the global config manifest is current, rewrites it idempotently, and warns if the global taste file is stale. | -| `review list` | Lists migrations flagged for human review (`awaiting_human_review`). | -| `review perform ` | Starts an interactive agent session to resolve a flagged migration's review. Requires `--with`, `--model`, and `--effort`. | +| `migration list` | Lists visible migrations. Add `--status ` or `--awaiting-review` to filter. | +| `migration doctor ` | Validates one visible migration's consistency. | +| `migration doctor --all` | Validates every visible migration plus internal transaction state. | +| `migration review ` | Starts staged review for a migration awaiting human review. Requires `--with`, `--model`, and `--effort`. | +| `migration refine ` | Records feedback for a planning or unexecuted ready migration and runs one staged planning revision. Requires `--message ` or `--file `, plus `--with`, `--model`, and `--effort`. | + +Legacy `review list` and `review perform ` remain compatibility aliases; prefer `migration list --awaiting-review` and `migration review`. ## Targeting / Useful flags @@ -146,10 +151,27 @@ If you provide none of `--targets`, `--globs`, `--extensions`, or `--paths`, the - `init --live-migrations-dir PATH` — enables the larger-refactoring workflow for this project. The path is stored repo-relative in the project registry and created if missing. - `init --in-repo-taste [PATH]` — stores this project's taste file in the repo and remembers the repo-relative path. Defaults to `.continuous-refactoring/taste.md`; re-run `init --in-repo-taste ...` to choose a different path. +- `migration list` — shows visible migrations; `--awaiting-review` narrows to human-review handoffs. +- `migration doctor ` / `migration doctor --all` — read-only consistency checks. Doctor reports problems; it does not repair them. +- `migration review --with ... --model ... --effort ...` — resolves an `awaiting_human_review` migration through a staged workspace. +- `migration refine (--message |--file ) --with ... --model ... --effort ...` — adds user feedback to a planning or unexecuted ready migration and resumes planning through the `revise` step when reopening ready work. - `taste --refine` — opens a collaborative editing session for the taste file. The agent keeps refining until you tell it to write, then the session ends automatically after the settled write. - `taste --upgrade` — re-interviews for taste dimensions added since your last version. No-op when already current; use `taste --refine` if you want to rework the doc anyway. - `taste --force` — only applies to `--interview`; it allows a customized taste file to be overwritten after backing it up to `taste.md.bak`. +Canonical migration commands: + +```bash +continuous-refactoring migration list +continuous-refactoring migration list --status planning +continuous-refactoring migration list --awaiting-review +continuous-refactoring migration doctor +continuous-refactoring migration doctor --all +continuous-refactoring migration review --with codex --model gpt-5 --effort high +continuous-refactoring migration refine --message "split the risky phase" --with codex --model gpt-5 --effort high +continuous-refactoring migration refine --file feedback.md --with codex --model gpt-5 --effort high +``` + ### Shared `run` / `run-once` flags - `--with`, `--model` — required agent backend/model selection. @@ -218,7 +240,7 @@ This tells the CLI where to store migration artifacts. The path is repo-relative Each `run` / `run-once` tick now checks for eligible migration work before falling back to single-commit cleanups: 1. **Classify** — a classifier agent reads the target and decides: `cohesive-cleanup` (one-shot path) or `needs-plan` (migration path). -2. **Plan** — for `needs-plan` targets, a six-stage planning workflow runs: generate approaches → pick best → expand into phases → review → revise → final review. Artifacts land under `//`. +2. **Plan** — for `needs-plan` targets, each automation action runs exactly one planning step: approaches, pick-best, expand, review, optional revise/review-2, then final-review. Accepted steps update `.planning/state.json`, store stdout under `.planning/stages/`, and publish through a staged transaction. Failed current-step output stays in run artifacts and is not resume input. 3. **Execute** — each phase is a self-contained unit of work. The tick picks the oldest eligible migration, checks whether its current phase precondition is satisfied, and executes it on the current branch. Phase completion is judged against the phase file's `## Definition of Done`; commit message identifies the migration as `migration//.md`. ### Migration directory layout @@ -227,14 +249,20 @@ Each `run` / `run-once` tick now checks for eligible migration work before falli / / manifest.json # status, phases, wake-up schedule + .planning/ + state.json # durable planning cursor and accepted step refs + stages/ # accepted planning stdout, suffixed on repeats plan.md # the expanded plan approaches/ # candidate approaches considered during planning phase-1-.md # per-phase specification phase-2-.md ... + __transactions__/ # internal staged publish state __intentional_skips__/ # migrations rejected at final review ``` +Do not edit `.planning/` or `__transactions__/` by hand. Use `migration doctor` when the shape looks wrong. + ### Wake-up rules Migrations don't run on every tick. The scheduler now separates **activity** from @@ -268,7 +296,7 @@ Before executing a phase, a ready-check agent verifies that the current phase pr - **ready: yes** — phase executes; on green tests, the phase is marked done, any prior deferral markers are cleared, and the migration advances immediately to the next phase. - **ready: no** — manifest activity is bumped, a retry cooldown is started, and a future `wake_up_on` is recorded when needed; the tick moves on. -- **ready: unverifiable** — the migration is flagged `awaiting_human_review` and put on cooldown. Automated migration ticks skip flagged migrations until review clears the flag. Use `review list` to find it and `review perform --with ... --model ... --effort ...` to resolve it interactively. +- **ready: unverifiable** — the migration is flagged `awaiting_human_review` and put on cooldown. Automated migration ticks skip flagged migrations until review clears the flag. Use `migration list --awaiting-review` to find it and `migration review --with ... --model ... --effort ...` to resolve it interactively. Human-facing migration references use the relative phase spec path, for example `phase-2-failure-report.md`. The manifest cursor stores the phase `name`, not a numeric index. diff --git a/docs/plans/001-visible-migration-dirs-and-consistency-foundation.md b/docs/plans/001-visible-migration-dirs-and-consistency-foundation.md new file mode 100644 index 0000000..7089701 --- /dev/null +++ b/docs/plans/001-visible-migration-dirs-and-consistency-foundation.md @@ -0,0 +1,157 @@ +# 001 - Visible Migration Dirs And Consistency Foundation + +## Goal + +Create the shared foundation every later planning-resume change can stand on: + +- one definition of a visible migration directory, +- one structural consistency validator API, +- no accidental scheduling of hidden, dotted, or transaction directories, +- early consistency findings that can later gate `ready` and power `migration doctor`. + +This is the first stacked PR because the transaction publisher and CLI both need the same directory scan and validation vocabulary. + +## Non-goals + +- Do not change planning execution or resume behavior yet. +- Do not add `.planning/state.json` yet. +- Do not add the new `migration` CLI yet. +- Do not make `status: planning` executable. +- Do not repair invalid migrations; this plan only reports structured findings. + +## Current behavior and evidence + +- `migration_tick.enumerate_eligible_manifests()` scans direct children under the live migrations dir, skips names beginning with `__`, and loads `manifest.json` for eligible `ready` / `in-progress` manifests. +- `review_cli.review list` performs its own child scan and also skips only `__*` directories. +- The manifest codec validates JSON shape, duplicate phase names, and cursor references, but no shared validator checks whether `manifest.json`, `plan.md`, phase docs, and future planning state agree. +- Phase execution trusts manifest metadata after ready-check. There is no pre-ready structural gate for stale phase docs or missing plan files. +- Existing tests cover codec validation, tick eligibility, and review CLI filtering, but not a common "visible migration dir" helper or doctor-style consistency results. + +## Proposed design + +Add a small internal consistency module or clearly named helpers in `migrations.py`; final placement should follow the code shape discovered during implementation. + +Core concepts: + +- `iter_visible_migration_dirs(live_dir) -> list[Path]` + - includes only direct child directories, + - skips `__*`, + - skips `.*`, + - skips known transaction roots such as `__transactions__`, + - returns deterministic ordering suitable for later sorting by manifest metadata. +- `MigrationConsistencyFinding` + - frozen dataclass, + - fields such as `severity`, `mode`, `code`, `path`, `message`, + - structured enough for tests, CLI output, and failure records. +- `check_migration_consistency(migration_dir, mode)` + - `mode="planning-snapshot"` validates a visible planning snapshot without requiring terminal readiness, + - `mode="ready-publish"` validates a staged candidate before it can publish `status: ready`, + - `mode="execution-gate"` validates an already visible ready/in-progress migration before phase ready-check, + - `mode="doctor"` reports every finding without caller-specific filtering, + - this PR implements the foundation checks only. + +Severity vocabulary: + +- `info`: useful operator context; never blocks. +- `warning`: suspicious but not unsafe; doctor reports it but exits zero unless paired with errors. +- `error`: inconsistent or unsafe state; blocks ready publish, phase execution, and planning resume; makes `migration doctor` exit nonzero. + +Do not invent caller-local interpretations of findings. Later scheduler, publisher, and CLI work should ask the validator for findings, then apply the same severity contract. + +Initial checks: + +- `manifest.json` exists and loads through the existing manifest boundary. +- `manifest.name` matches the directory slug. +- phase docs matching `phase--.md` have unique indexes and names. +- manifest phase file paths are repo-relative and stay inside the migration directory. +- manifest phase files are regular files, not symlink escapes. +- manifest phase files exist when the manifest claims non-empty phases. +- `plan.md` is required for `ready` / `in-progress`, but not for a newly seeded planning snapshot. +- transaction internals are never treated as candidate migrations. + +Do not raise exceptions for normal validation failures. Return findings. Reserve `ContinuousRefactorError` for boundary failures that prevent validation itself. + +## Files/modules likely touched + +- `src/continuous_refactoring/migrations.py` +- `src/continuous_refactoring/migration_manifest_codec.py` +- `src/continuous_refactoring/migration_tick.py` +- `src/continuous_refactoring/review_cli.py` +- `src/continuous_refactoring/cli.py` only if command discovery needs the helper +- `tests/test_migrations.py` +- `tests/test_loop_migration_tick.py` +- `tests/test_cli_review.py` +- new `tests/test_migration_consistency.py` + +## Test strategy + +- Add focused unit tests for `iter_visible_migration_dirs()`. +- Add consistency tests for missing manifest, slug mismatch, duplicate phase docs, missing phase doc, and transaction directory exclusion. +- Update tick and review CLI tests so both use the shared visibility rule. +- Keep validation command: `uv run pytest tests/test_migration_consistency.py tests/test_migrations.py tests/test_loop_migration_tick.py tests/test_cli_review.py`, then `uv run pytest`. + +Exact regression tests to add: + +- `tests/test_migration_consistency.py::test_visible_migration_dirs_skip_hidden_dotted_and_transaction_dirs` +- `tests/test_migration_consistency.py::test_consistency_reports_missing_manifest` +- `tests/test_migration_consistency.py::test_consistency_rejects_manifest_slug_mismatch` +- `tests/test_migration_consistency.py::test_consistency_rejects_manifest_phase_symlink_escape` +- `tests/test_migration_consistency.py::test_consistency_reports_duplicate_phase_doc_indexes` +- `tests/test_migration_consistency.py::test_consistency_reports_manifest_phase_missing_doc` +- `tests/test_migration_consistency.py::test_consistency_modes_share_severity_blocking_contract` +- `tests/test_loop_migration_tick.py::test_enumeration_uses_visible_migration_dirs` +- `tests/test_cli_review.py::test_review_list_ignores_hidden_and_transaction_dirs` + +## Numbered task breakdown with agent assignments + +1. `[Scout]` Confirm every migration-dir enumeration site and list which must switch to the shared helper. +2. `[Architect]` Finalize the validator result shape and severity vocabulary. +3. `[Artisan]` Implement visible-dir iteration and the initial consistency validator. +4. `[Test Maven]` Add the regression tests listed above and verify they fail before implementation. +5. `[Critic]` Review for accidental behavior changes in scheduling and review listing. +6. `[Artisan]` Apply Critic fixes without expanding scope into planning state or CLI work. + +## Blocking dependencies + +- No earlier plan dependencies. +- Blocks: + - [002-planning-state-schema-and-durable-stage-outputs.md](002-planning-state-schema-and-durable-stage-outputs.md) + - [003-atomic-planning-workspace-publisher.md](003-atomic-planning-workspace-publisher.md) + - [006-migration-list-and-doctor.md](006-migration-list-and-doctor.md) + +## Mermaid dependency visualization + +```mermaid +graph TD + P001[001 visible dirs + consistency foundation] --> P002[002 planning state schema] + P001 --> P003[003 atomic publisher] + P001 --> P006[006 migration list + doctor] +``` + +## Acceptance criteria + +- All migration directory scans that should ignore hidden/transaction dirs use the shared helper. +- Consistency findings are structured and testable without parsing human prose. +- Validator modes and severities are defined once and are reusable by publisher, scheduler, and CLI callers. +- Existing ready/in-progress phase scheduling behavior is unchanged for normal migration dirs. +- Hidden, dotted, and transaction dirs cannot appear in tick or review candidates. +- No runtime dependency is added. +- `uv run pytest` passes. + +## Risks and rollback + +- Risk: a hidden directory someone currently expects to be listed becomes invisible. Roll back by narrowing hidden-dir skipping to only known transaction roots, but keep the helper. +- Risk: validation findings become too broad and block future work. Roll back severity use, not the helper API. +- Risk: duplicate visibility logic survives in one caller. Mitigate with tests that seed hidden dirs in tick and review paths. + +## Open questions + +- Should dotted migration dirs be ignored everywhere or only in automated scheduling? Recommendation: ignore everywhere; hidden should mean hidden. +- Should consistency findings include stable machine codes from the start? Recommendation: yes, because CLI and tests should not parse prose. +- Should symlinked phase docs be allowed? Recommendation: no; migration docs should be regular files inside the migration directory. + +## How later plans may need to adapt if this plan changes + +- If the helper lives outside `migrations.py`, later plans should import that module rather than reintroduce local scans. +- If finding severity names change, plans 004 through 008 must use the final names for ready-gating, doctor output, review, and refine. +- If dotted dirs stay visible, plan 003 must choose transaction directory names that still cannot be scheduled. diff --git a/docs/plans/002-planning-state-schema-and-durable-stage-outputs.md b/docs/plans/002-planning-state-schema-and-durable-stage-outputs.md new file mode 100644 index 0000000..d3ae04b --- /dev/null +++ b/docs/plans/002-planning-state-schema-and-durable-stage-outputs.md @@ -0,0 +1,177 @@ +# 002 - Planning State Schema And Durable Stage Outputs + +## Goal + +Persist planning progress inside each migration directory so a partially planned migration can resume from the last transactionally published snapshot. + +The migration directory becomes the durable source of truth for: + +- current planning step, +- completed planning steps, +- accepted stage outputs needed by later prompts, +- review findings and final decision metadata. + +## Non-goals + +- Do not change the scheduler yet. +- Do not introduce XDG work dirs or transaction publishing yet. +- Do not run planning agents one step at a time yet. +- Do not add the `migration` CLI yet. +- Do not preserve failed current-step output. + +## Current behavior and evidence + +- `planning.run_planning()` creates a fresh `manifest.json` with `status: planning`, runs all stages in one call, then flips the manifest to `ready`, `ready` plus human review, or `skipped`. +- Planning stage artifacts live under the ephemeral run artifacts directory, not inside `migrations/`. +- Later planning prompts read transient outputs from the current run, such as approach docs and previous stdout, rather than a durable planning-state model. +- There is no resume function. A migration left in `status: planning` has no durable cursor that tells the next run which stage to continue. +- Tests cover stage ordering and final decisions, but not durable restart after process death. + +## Proposed design + +Add a planning-state model stored under the visible migration snapshot: + +```text +migrations// + manifest.json + .planning/ + state.json + stages/ + approaches.stdout.md + pick-best.stdout.md + review.stdout.md + review-2.stdout.md + final-review.stdout.md +``` + +Recommended model: + +- `PlanningState` + - `schema_version` + - `target` + - `next_step` + - `completed_steps` + - `started_at` + - `updated_at` + - `feedback` + - `review_findings` + - `final_decision` + - `final_reason` +- `CompletedPlanningStep` + - `name` + - `completed_at` + - `outputs` + - optional `agent`, `model`, `effort` + +Step vocabulary: + +```text +approaches -> pick-best -> expand -> review +review(no findings) -> final-review +review(findings) -> revise -> review-2 -> final-review +final-review(approve-auto) -> terminal-ready +final-review(approve-needs-human) -> terminal-ready-awaiting-human +final-review(reject) -> terminal-skipped +``` + +Rules: + +- Store accepted output text needed by later prompts in `.planning/stages/`. +- Store only repo-relative paths in state. +- Store ordered completed-step history with each step's transition outcome. +- Validate history by replaying the planning transition graph, not by simple linear index comparison. +- Reject unknown steps, impossible branches, and cursors that skip required review/revise work at the codec boundary. +- Reject completed steps whose referenced stage output files are missing. +- Permit absent `.planning/state.json` only for legacy ready/done/skipped migrations and newly seeded planning dirs before the first publish. +- Do not store paths to `$TMPDIR` artifacts as resume inputs. + +## Files/modules likely touched + +- new internal module such as `src/continuous_refactoring/planning_state.py` +- `src/continuous_refactoring/planning.py` +- `src/continuous_refactoring/prompts.py` +- `src/continuous_refactoring/migrations.py` +- `src/continuous_refactoring/migration_manifest_codec.py` only if manifest validation references planning terminal state +- new `tests/test_planning_state.py` +- `tests/test_planning.py` +- `tests/test_prompts.py` + +## Test strategy + +Use stdlib-only pytest tests. No new dependencies. + +Exact regression tests to add: + +- `tests/test_planning_state.py::test_planning_state_roundtrip_preserves_completed_steps_and_current_step` +- `tests/test_planning_state.py::test_planning_state_defaults_new_plan_to_first_step` +- `tests/test_planning_state.py::test_planning_state_rejects_unknown_current_step` +- `tests/test_planning_state.py::test_planning_state_rejects_completed_step_after_current_step` +- `tests/test_planning_state.py::test_planning_state_rejects_review_to_final_review_when_findings_required_revise` +- `tests/test_planning_state.py::test_planning_state_rejects_revise_without_prior_review_findings` +- `tests/test_planning_state.py::test_planning_state_replays_branching_transition_history` +- `tests/test_planning_state.py::test_planning_state_rejects_missing_artifact_for_completed_step` +- `tests/test_planning_state.py::test_planning_state_atomic_save_preserves_existing_file_on_replace_failure` +- `tests/test_planning_state.py::test_planning_state_snapshot_paths_are_repo_relative` +- `tests/test_planning.py::test_planning_context_reconstructs_from_durable_stage_outputs` +- `tests/test_prompts.py::test_planning_resume_prompt_uses_durable_state_and_keeps_taste` + +Validation command: + +- `uv run pytest tests/test_planning_state.py tests/test_planning.py tests/test_prompts.py` +- then `uv run pytest` + +## Numbered task breakdown with agent assignments + +1. `[Scout]` Confirm current planning prompt inputs and which transient outputs must become durable. +2. `[Architect]` Finalize the state schema and transition table. +3. `[Artisan]` Implement the frozen dataclasses, codec, atomic state save, and durable stage-output helpers. +4. `[Test Maven]` Add codec, transition, and restart-context tests. +5. `[Critic]` Review the schema for overfitting to current stage names and for accidental `$TMPDIR` coupling. +6. `[Artisan]` Apply review fixes and update prompt context builders only as needed. + +## Blocking dependencies + +- Depends on [001-visible-migration-dirs-and-consistency-foundation.md](001-visible-migration-dirs-and-consistency-foundation.md) for validation vocabulary and visible directory conventions. +- Blocks: + - [004-resumable-one-step-planning-engine.md](004-resumable-one-step-planning-engine.md) + - [005-planning-before-phase-execution-scheduling.md](005-planning-before-phase-execution-scheduling.md) + - [008-migration-refine.md](008-migration-refine.md) + +## Mermaid dependency visualization + +```mermaid +graph TD + P001[001 visible dirs + consistency foundation] --> P002[002 planning state schema] + P002 --> P004[004 one-step planning engine] + P002 --> P005[005 planning scheduler] + P002 --> P008[008 migration refine] +``` + +## Acceptance criteria + +- A migration in `status: planning` can describe its next planning step from `.planning/state.json`. +- Accepted stage outputs needed by later prompts live inside the migration directory. +- State codec rejects impossible or inconsistent planning progress. +- State codec validates branching history by replaying the transition graph. +- Durable prompt context can be rebuilt after deleting run artifacts. +- Failed current-step output has no field in the durable schema. +- `## Taste` remains present in affected planning prompts. +- `uv run pytest` passes. + +## Risks and rollback + +- Risk: schema tries to encode too much agent-specific detail. Roll back to step names, accepted output paths, and final decisions only. +- Risk: keeping `.planning/` forever feels noisy. Do not remove it in this PR; later docs/CLI can decide presentation. +- Risk: state validation duplicates manifest codec work. Keep planning-state validation local and use manifest validation only for manifest-owned fields. + +## Open questions + +- Should `.planning/` remain after a migration is `done`? Recommendation: yes; it is audit data and future doctor context. +- Should final-review stdout always be stored even for reject? Recommendation: yes, if it is the accepted terminal decision. +- Should state live inside `manifest.json` instead? Recommendation: no; a separate `.planning/state.json` avoids bloating the manifest and keeps planning internals out of phase execution. + +## How later plans may need to adapt if this plan changes + +- If state is embedded in `manifest.json`, plans 004 through 008 must use manifest saves as their state boundary. +- If stage output file names differ, plan 004 prompt reconstruction and plan 006 list display must follow the final helper API. +- If terminal planning state is removed after ready, plans 007 and 008 must reconstruct review/refine context from docs alone. diff --git a/docs/plans/003-atomic-planning-workspace-publisher.md b/docs/plans/003-atomic-planning-workspace-publisher.md new file mode 100644 index 0000000..76a8c09 --- /dev/null +++ b/docs/plans/003-atomic-planning-workspace-publisher.md @@ -0,0 +1,170 @@ +# 003 - Atomic Planning Workspace Publisher + +## Goal + +Add the workspace and publish layer that lets planning agents work off to the side, then publishes a coherent `migrations/` snapshot only after a planning step completes. + +This is the mechanical safety layer for the product requirement: agents and humans should see migration docs plus planning state in sync. The guarantee is transactional best-effort replacement with rollback, not a portable atomic swap of two non-empty directories. + +## Non-goals + +- Do not change planning stage logic yet. +- Do not add scheduler behavior yet. +- Do not add the new CLI yet. +- Do not claim a true portable atomic swap of two non-empty directories; stdlib/APFS experiments show that is not available. +- Do not publish failed current-step output. + +## Current behavior and evidence + +- Current planning writes directly under the live migration directory. +- `save_manifest()` atomically replaces one JSON file, but docs, approach files, and future `.planning/` state are multi-file directory state. +- Experimenter validated that `os.replace(src_dir, non_empty_dst_dir)` fails on APFS with `ENOTEMPTY`. +- Experimenter validated that cross-device replacement fails with `EXDEV`. +- A safe same-filesystem publish requires moving the existing live dir aside, moving staged into place, validating, then deleting rollback. +- Git sees replacement as normal modifications/deletions/untracked files, and rollback works while the backup directory remains. + +## Proposed design + +Introduce a planning workspace and publish transaction. + +Workspace: + +```text +/planning///work/ +``` + +Transaction under the repo live migrations filesystem: + +```text +migrations/__transactions__// + staged/ + rollback/ + failed/ +``` + +Publish choreography: + +1. Acquire a mutation lock for the live migrations dir. +2. Capture `base_snapshot_id`, a deterministic digest of the live migration tree excluding transaction internals. +3. Build or copy the complete candidate snapshot in the XDG work dir. +4. Validate the candidate in the work dir for the caller's intended mode. +5. Copy the complete candidate to `migrations/__transactions__//staged`. +6. Validate the staged snapshot again before touching live state. +7. Verify staged and live dir are on the same device. +8. Under the lock, recompute the current live digest and compare it with `base_snapshot_id`. +9. If the digest differs, block publish, keep staged diagnostics, and do not move the live dir. +10. If `migrations/` exists, move it to `rollback`. +11. Move `staged` to `migrations/`. +12. Validate the newly visible live snapshot as a sanity check. +13. Delete `rollback` only after validation passes. +14. If any step after rollback move fails, restore rollback to live and preserve diagnostic paths if restoration fails. + +Locking: + +- Use a simple lock file or directory under the live migrations dir or project state. +- Hold the lock for publish operations and later for review/refine mutations. +- Surface the lock path, owner pid when available, created timestamp, and operation name in errors. +- Do not silently break locks in this PR. + +Dirty worktree policy: + +- Before copying live into a work dir, check `git status --porcelain -- migrations/` and refuse if tracked or untracked user changes exist under the target migration dir. +- Ignore known transaction roots when checking the live migration target. +- Under the publish lock, rely on `base_snapshot_id` to catch committed or separately published changes that happened after the work dir was copied. +- Return a blocked result that names the dirty paths and tells the operator to commit, discard, or inspect with `migration doctor`. + +## Files/modules likely touched + +- new internal module such as `src/continuous_refactoring/planning_publish.py` +- `src/continuous_refactoring/config.py` for XDG project planning work path helpers +- `src/continuous_refactoring/migrations.py` +- `src/continuous_refactoring/git.py` if dirty checks need shared helpers +- `src/continuous_refactoring/artifacts.py` if publish paths are logged +- new `tests/test_planning_publish.py` +- `tests/test_planning.py` +- `tests/test_git.py` only if dirty-check helpers are added there + +## Test strategy + +Exact regression tests to add: + +- `tests/test_planning_publish.py::test_publish_creates_new_live_migration_from_staged_snapshot` +- `tests/test_planning_publish.py::test_publish_replaces_existing_non_empty_live_dir_with_backup_transaction` +- `tests/test_planning_publish.py::test_publish_requires_same_device_final_staging` +- `tests/test_planning_publish.py::test_staged_validation_failure_leaves_live_snapshot_unchanged` +- `tests/test_planning_publish.py::test_publish_rejects_stale_base_snapshot` +- `tests/test_planning_publish.py::test_publish_cleans_backup_after_success` +- `tests/test_planning_publish.py::test_publish_restores_rollback_when_live_replace_fails` +- `tests/test_planning_publish.py::test_publish_reports_live_rollback_staged_and_failed_paths_when_rollback_fails` +- `tests/test_planning_publish.py::test_publish_refuses_dirty_live_migration_dir` +- `tests/test_planning_publish.py::test_lock_rejects_concurrent_mutation_and_reports_lock_path` +- `tests/test_planning_publish.py::test_transaction_dirs_are_left_for_doctor_when_cleanup_fails` + +Use temp git repos for dirty-worktree behavior. Inject failures with monkeypatch around copy/replace/remove helpers rather than relying on OS accidents. + +Validation command: + +- `uv run pytest tests/test_planning_publish.py tests/test_migration_consistency.py tests/test_git.py` +- then `uv run pytest` + +## Numbered task breakdown with agent assignments + +1. `[Experimenter]` Reconfirm the desired transaction choreography against the target OS using temp dirs only. +2. `[Architect]` Define the publisher API: inputs, result type, failure type, and lock behavior. +3. `[Artisan]` Implement XDG work path helpers, same-device staged copy, publish, rollback, cleanup, and lock. +4. `[Test Maven]` Add failure-injection tests for every publish boundary. +5. `[Critic]` Review for user-data overwrite risk and stale-lock failure modes. +6. `[Artisan]` Apply fixes without coupling the publisher to planning stage semantics. + +## Blocking dependencies + +- Depends on [001-visible-migration-dirs-and-consistency-foundation.md](001-visible-migration-dirs-and-consistency-foundation.md) for transaction directory invisibility. +- Blocks: + - [004-resumable-one-step-planning-engine.md](004-resumable-one-step-planning-engine.md) + - [006-migration-list-and-doctor.md](006-migration-list-and-doctor.md) + - [007-migration-review-staged-publish.md](007-migration-review-staged-publish.md) + - [008-migration-refine.md](008-migration-refine.md) + +## Mermaid dependency visualization + +```mermaid +graph TD + P001[001 visible dirs + consistency foundation] --> P003[003 atomic publisher] + P003 --> P004[004 one-step planning engine] + P003 --> P006[006 list + doctor] + P003 --> P007[007 staged review] + P003 --> P008[008 refine] +``` + +## Acceptance criteria + +- Planning candidates can be built outside the live migration directory. +- Final publish source is copied under the live migrations dir before rename. +- Replacing a non-empty migration dir works through backup/rollback choreography. +- Staged validation succeeds before the live path is moved. +- A stale base snapshot blocks publish before the live path is moved. +- Failed publish restores the previous live snapshot whenever rollback is possible. +- Transaction internals remain invisible to migration enumeration. +- Concurrent mutation attempts fail clearly. +- User edits in a live migration dir are not overwritten silently. +- `uv run pytest` passes. + +## Risks and rollback + +- Risk: brief missing-path window between moving live to rollback and staged to live. Mitigate with pre-validation, CAS, lock, and immediate rollback; call this out in implementation docs. +- Risk: stale locks block automation. Roll back by making lock errors actionable; do not auto-break locks until doctor repair is designed. +- Risk: dirty-worktree detection is too conservative. Roll back to a clear blocked result rather than overwrite. +- Risk: cleanup failure leaves `__transactions__`. Plan 006 doctor reports these paths. + +## Open questions + +- Should lock files live under XDG state or under `migrations/__transactions__`? Recommendation: under the live migrations dir so the lock is on the same filesystem and easy for doctor to report. +- Should `migration doctor` later repair stale locks? Recommendation: yes, but only with explicit repair design; plan 006 reports lock presence/age first. +- Should publish fsync directories? Recommendation: consider it during implementation, but keep the public API independent of platform quirks. + +## How later plans may need to adapt if this plan changes + +- If the publisher API returns exceptions instead of structured results, plans 004 and 006 must translate those into route/failure records. +- If dirty checks are deferred, plan 004 must add a blocked outcome before running resume against a user-edited migration. +- If transaction paths differ, plan 006 doctor must inspect the final path convention. +- If `base_snapshot_id` changes shape, plans 004, 007, and 008 must pass the final token through review/refine/planning publish calls. diff --git a/docs/plans/004-resumable-one-step-planning-engine.md b/docs/plans/004-resumable-one-step-planning-engine.md new file mode 100644 index 0000000..9350789 --- /dev/null +++ b/docs/plans/004-resumable-one-step-planning-engine.md @@ -0,0 +1,175 @@ +# 004 - Resumable One-Step Planning Engine + +## Goal + +Refactor planning so each automation iteration can complete exactly one accepted planning step, publish a coherent snapshot, and resume later from the durable state. + +This closes the core product gap: migrations left in `status: planning` can continue planning instead of being stranded. + +## Non-goals + +- Do not wire planning into the run scheduler yet. +- Do not add the new `migration` CLI yet. +- Do not change phase execution semantics. +- Do not replay partial failed step output. +- Do not collapse source-target planning into a terminal planning loop. + +## Current behavior and evidence + +- `planning.run_planning()` initializes a new planning manifest and runs `approaches`, `pick-best`, `expand`, `review`, optional `revise` / `review-2`, and final review in one call. +- Stage outputs are passed through the same process and artifact tree. They are not a durable resume contract. +- If `review-2` still has findings, planning raises without a durable cursor for the next run. +- If the process dies after writing some docs but before final decision, the live migration dir can contain partial output with no reliable current-step state. +- Existing tests assert stage order, revise behavior, final-decision parsing, and phase discovery refresh timing. + +## Proposed design + +Split planning into a step engine: + +- `start_or_resume_planning(migration_dir | seed_context) -> PlanningSnapshot` +- `run_next_planning_step(snapshot, agent_settings, artifacts) -> PlanningStepResult` +- `publish_planning_step(result) -> RouteRecord-like outcome` + +Each invocation: + +1. Reads the published live snapshot. +2. Copies it to the XDG work dir through the publisher/workspace helpers. +3. Runs exactly `PlanningState.next_step` against the work dir. +4. Stores accepted stage output in `.planning/stages/`. +5. Advances `.planning/state.json`. +6. Runs consistency validation for the new mode. +7. Publishes the whole migration directory through plan 003's publisher. + +First source-target behavior: + +- `routing_pipeline` should seed a new planning snapshot and run exactly one planning step as the action. +- The first action should not run all planning stages to terminal through a compatibility wrapper. +- Any terminal-loop helper kept for transitional unit tests must not be used by `run`, `run-once`, or source-target routing. + +Step behavior: + +- `approaches`: writes approach docs and stores accepted stdout. +- `pick-best`: stores selected approach output. +- `expand`: writes `plan.md` and `phase-*.md`, then refreshes manifest phase metadata. +- `review`: stores findings; no findings advances to `final-review`; findings advances to `revise`. +- `revise`: updates docs and stores accepted stdout. +- `review-2`: findings fail without publish; no findings advances to `final-review`. +- `final-review`: parses `final-decision: approve-auto|approve-needs-human|reject - reason`. + +Terminal mapping: + +- `approve-auto`: publish `status: ready`, `awaiting_human_review: false`. +- `approve-needs-human`: publish `status: ready`, `awaiting_human_review: true`, and `human_review_reason`. +- `reject`: publish `status: skipped` and the intentional-skip doc. + +Ready transition gate: + +- Before publishing `status: ready`, run the consistency validator in `ready-publish` mode. +- Reject missing `plan.md`, missing phase docs, stale manifest phase metadata, invalid current phase, missing `## Precondition`, or missing `## Definition of Done`. +- Do not require fresh test validation here; host-side validation remains in phase execution. + +Compatibility: + +- Existing tests may keep a private helper that loops steps until terminal while they are ported. +- Product entry points use the one-step engine immediately. +- Plan 009 removes or narrows any leftover wrapper after scheduler and CLI integration. + +## Files/modules likely touched + +- `src/continuous_refactoring/planning.py` +- `src/continuous_refactoring/planning_state.py` +- `src/continuous_refactoring/planning_publish.py` +- `src/continuous_refactoring/prompts.py` +- `src/continuous_refactoring/routing_pipeline.py` +- `src/continuous_refactoring/migrations.py` +- `src/continuous_refactoring/failure_report.py` if new call roles are introduced +- `tests/test_planning.py` +- `tests/test_planning_state.py` +- `tests/test_planning_publish.py` +- `tests/test_prompts.py` +- `tests/test_run.py` + +## Test strategy + +Exact regression tests to add or modify: + +- `tests/test_planning.py::test_planning_publishes_initial_manifest_and_state_atomically` +- `tests/test_planning.py::test_successful_step_publishes_docs_and_state_together` +- `tests/test_planning.py::test_failed_step_does_not_publish_partial_docs_or_state` +- `tests/test_planning.py::test_resume_skips_completed_steps` +- `tests/test_planning.py::test_resume_reruns_failed_current_step_from_last_published_state` +- `tests/test_planning.py::test_resume_discards_failed_current_step_outputs_before_rerun` +- `tests/test_planning.py::test_resume_prompt_sees_published_docs_and_state_in_sync` +- `tests/test_run.py::test_source_target_planning_runs_only_one_step_as_first_action` +- `tests/test_planning.py::test_revise_path_records_review_findings_as_planning_state` +- `tests/test_planning.py::test_review_two_findings_fail_without_publish` +- `tests/test_planning.py::test_final_approval_clears_execution_blockers_but_keeps_planning_audit_state` +- `tests/test_planning.py::test_final_ready_rejects_inconsistent_manifest_docs_before_publish` + +Validation command: + +- `uv run pytest tests/test_planning.py tests/test_planning_state.py tests/test_planning_publish.py tests/test_prompts.py` +- then `uv run pytest` + +## Numbered task breakdown with agent assignments + +1. `[Scout]` Map the current `run_planning()` stages to the new step engine boundaries. +2. `[Architect]` Specify the one-step result type and compatibility wrapper rules. +3. `[Artisan]` Extract step execution without changing prompt contracts except durable work-dir paths. +4. `[Artisan]` Add resume behavior using `.planning/state.json` and durable stage outputs. +5. `[Test Maven]` Add no-publish-on-failure and resume-from-every-step tests. +6. `[Critic]` Review for hidden replay of failed output, mixed live/work-dir reads, and ready-gate gaps. +7. `[Artisan]` Apply fixes and remove transitional code only if nothing still depends on it. + +## Blocking dependencies + +- Depends on [002-planning-state-schema-and-durable-stage-outputs.md](002-planning-state-schema-and-durable-stage-outputs.md). +- Depends on [003-atomic-planning-workspace-publisher.md](003-atomic-planning-workspace-publisher.md). +- Blocks: + - [005-planning-before-phase-execution-scheduling.md](005-planning-before-phase-execution-scheduling.md) + - [007-migration-review-staged-publish.md](007-migration-review-staged-publish.md) + - [008-migration-refine.md](008-migration-refine.md) + - [009-hardening-compatibility-and-docs.md](009-hardening-compatibility-and-docs.md) + +## Mermaid dependency visualization + +```mermaid +graph TD + P002[002 planning state schema] --> P004[004 one-step planning engine] + P003[003 atomic publisher] --> P004 + P004 --> P005[005 planning scheduler] + P004 --> P007[007 migration review] + P004 --> P008[008 migration refine] + P004 --> P009[009 hardening + docs] +``` + +## Acceptance criteria + +- One call can run and publish exactly one accepted planning step. +- Source-target routing creates and publishes only the first accepted planning step. +- A crash or failure during a step leaves the live migration dir unchanged. +- The next run reruns the same failed step from the last published state. +- Completed accepted steps are not rerun. +- Agents read planning inputs from the staged work dir copied from the published snapshot. +- Final `ready` status cannot publish unless consistency validation passes. +- `status: planning` remains non-executable by phase logic. +- `uv run pytest` passes. + +## Risks and rollback + +- Risk: extraction changes prompt behavior. Preserve existing prompt tests and add staged-work-dir checks. +- Risk: compatibility wrapper hides the new one-step behavior. Keep wrappers out of product paths and schedule cleanup in plan 009. +- Risk: final-review rejection writes skip docs inconsistently. Publish skipped snapshots through the same transaction path. +- Risk: ready validation blocks valid legacy migrations. Only apply strict terminal planning checks to snapshots with `.planning/state.json`. + +## Open questions + +- Should normal source-target planning still run to terminal in one command? Recommendation: no; one step per action is easier to recover and review. +- Should every accepted planning step create a driver commit? Recommendation: yes once plan 005 wires scheduler action accounting. +- Should failed work dirs be retained for debugging? Recommendation: only as artifact evidence, never as resume input. + +## How later plans may need to adapt if this plan changes + +- If the engine keeps any terminal-loop helper, plan 005 must prove product paths still consume one accepted planning step per action. +- If ready validation remains partial, plan 005 must add a stronger pre-ready-check gate before phase execution. +- If review/refine need a different step API, plans 007 and 008 must use that API instead of bypassing it. diff --git a/docs/plans/005-planning-before-phase-execution-scheduling.md b/docs/plans/005-planning-before-phase-execution-scheduling.md new file mode 100644 index 0000000..986a25c --- /dev/null +++ b/docs/plans/005-planning-before-phase-execution-scheduling.md @@ -0,0 +1,164 @@ +# 005 - Planning Before Phase Execution Scheduling + +## Goal + +Teach `run`, `run-once`, and `--focus-on-live-migrations` to spend iterations completing `status: planning` migrations before executing ready phases or selecting new source targets. + +This makes mid-planning migrations recover naturally through normal automation. + +## Non-goals + +- Do not change the planning step engine. +- Do not add the new CLI. +- Do not make `status: planning` visible to phase-ready or phase-execute paths. +- Do not change host-side phase validation. +- Do not publish branch updates; the driver still creates local commits only. + +## Current behavior and evidence + +- `migration_tick.enumerate_eligible_manifests()` only considers `ready` and `in-progress` manifests. +- Automated ticks skip `awaiting_human_review` and defer no/over-budget phases, but there is no planning-resume tick. +- `run_loop()` probes migrations before source routing, but only for phase execution. +- Focused live migrations loop repeats migration ticks until no work remains, but planning manifests are invisible. +- `run-once` uses the same route path, so a stranded `status: planning` migration is not completed later. + +## Proposed design + +Add a dedicated planning tick that runs before the existing phase tick. + +Ordering: + +1. Baseline validation remains first. +2. `try_planning_tick()` runs. +3. If a planning step publishes a snapshot, commit it and consume the action/iteration. +4. If no planning migration is runnable, run existing `try_migration_tick()` for ready/in-progress phases. +5. If neither planning nor phase work is runnable, continue to source target selection. + +Eligibility: + +- visible migration dirs only, +- `manifest.status == "planning"`, +- not `awaiting_human_review`, +- not cooling down, +- sorted by `created_at` like current phase ticks. + +Planning candidates and runnable planning are separate: + +- Enumerate all visible `status: planning` manifests. +- Missing or invalid `.planning/state.json` is a blocked planning candidate, not invisible work. +- A blocked planning candidate returns `blocked`, writes the appropriate failure/transition record, and prevents source routing from proceeding as if nothing is wrong. +- Only valid planning state is runnable. + +Results: + +- accepted planning step -> `commit` with a planning-step label, +- blocked planning state -> `blocked` with validator findings and failure snapshot support, +- failed agent or invalid step -> `abandon` or existing retry semantics, using call role `planner` or a more specific planning role if introduced, +- no runnable planning -> `not-routed`. + +Readiness gate before phase execution: + +- Before a ready/in-progress migration reaches phase ready-check, run consistency validation in execution mode. +- If consistency fails, return `blocked` and do not call `check_phase_ready()`. + +Action accounting: + +- A published planning step counts as one action. +- A deferred or non-runnable planning migration should not consume source action budget. +- Existing phase tick behavior remains the fallback, not a sibling race. + +## Files/modules likely touched + +- `src/continuous_refactoring/migration_tick.py` +- `src/continuous_refactoring/loop.py` +- `src/continuous_refactoring/routing_pipeline.py` +- `src/continuous_refactoring/planning.py` +- `src/continuous_refactoring/failure_report.py` +- `src/continuous_refactoring/artifacts.py` +- `tests/test_loop_migration_tick.py` +- `tests/test_run.py` +- `tests/test_run_once.py` +- `tests/test_focus_on_live_migrations.py` +- `tests/test_failure_report.py` + +## Test strategy + +Exact regression tests to add or modify: + +- `tests/test_loop_migration_tick.py::test_enumerate_eligible_planning_manifests_includes_planning_migrations` +- `tests/test_loop_migration_tick.py::test_missing_planning_state_blocks_before_ready_phase_or_source_routing` +- `tests/test_loop_migration_tick.py::test_invalid_planning_state_blocks_before_ready_phase_or_source_routing` +- `tests/test_loop_migration_tick.py::test_try_migration_tick_completes_planning_before_ready_phase` +- `tests/test_loop_migration_tick.py::test_try_migration_tick_does_not_call_ready_check_for_planning_status` +- `tests/test_loop_migration_tick.py::test_try_migration_tick_blocks_ready_phase_when_consistency_validation_fails` +- `tests/test_run.py::test_run_loop_resumes_planning_before_source_target` +- `tests/test_run.py::test_run_loop_counts_completed_planning_as_action` +- `tests/test_run.py::test_run_loop_persists_planning_resume_failure_snapshot` +- `tests/test_run_once.py::test_run_once_resumes_planning_before_classification` +- `tests/test_run_once.py::test_run_once_raises_when_planning_resume_blocks` +- `tests/test_focus_on_live_migrations.py::test_focused_loop_completes_mid_planning_before_ready_phase` +- `tests/test_focus_on_live_migrations.py::test_focused_loop_stops_when_only_blocked_planning_remains` + +Validation command: + +- `uv run pytest tests/test_loop_migration_tick.py tests/test_run.py tests/test_run_once.py tests/test_focus_on_live_migrations.py tests/test_failure_report.py` +- then `uv run pytest` + +## Numbered task breakdown with agent assignments + +1. `[Scout]` Map every migration tick call site and action-budget branch. +2. `[Architect]` Define planning tick result records and commit-message labels. +3. `[Artisan]` Implement planning eligibility and `try_planning_tick()`. +4. `[Artisan]` Wire ordering into `run`, `run-once`, and focused live migrations. +5. `[Test Maven]` Add traps proving planning runs before ready phase and source routing. +6. `[Critic]` Review for starvation, budget accounting errors, and human-review regressions. +7. `[Artisan]` Apply review fixes and keep phase execution untouched except the consistency gate. + +## Blocking dependencies + +- Depends on [004-resumable-one-step-planning-engine.md](004-resumable-one-step-planning-engine.md). +- Blocks [009-hardening-compatibility-and-docs.md](009-hardening-compatibility-and-docs.md). +- Runs in parallel with [006-migration-list-and-doctor.md](006-migration-list-and-doctor.md) and [007-migration-review-staged-publish.md](007-migration-review-staged-publish.md) only if all use stable APIs from plan 004. + +## Mermaid dependency visualization + +```mermaid +graph TD + P004[004 one-step planning engine] --> P005[005 planning scheduler] + P005 --> P009[009 hardening + docs] + P004 --> P006[006 list + doctor] + P004 --> P007[007 migration review] + P004 --> P008[008 migration refine] + P006 -. API stable enough .-> P005 +``` + +## Acceptance criteria + +- `run` completes an eligible planning step before a ready phase. +- `run-once` completes an eligible planning step before classification/source routing. +- `--focus-on-live-migrations` completes planning steps before phase execution. +- `status: planning` never enters phase ready-check or phase execution. +- Ready/in-progress migrations fail closed when consistency validation fails. +- Missing or invalid planning state blocks automation instead of disappearing. +- Human-review gating remains intact. +- Action accounting matches existing run-loop semantics. +- `uv run pytest` passes. + +## Risks and rollback + +- Risk: planning migrations starve ready phases if many exist. Roll back by making one planning step consume one action, preserving operator control. +- Risk: focused loop never terminates on blocked planning. Mitigate with blocked/deferred result handling matching current phase behavior. +- Risk: failure snapshots mislabel planning failures. Add explicit call-role tests. +- Risk: ready consistency gate blocks legacy migrations. Gate legacy tolerance through the validator mode defined in plan 001/004. + +## Open questions + +- Should planning steps have effort metadata like phases? Recommendation: not initially; use the configured planning call effort and add effort metadata later if needed. +- Should a planning step commit message include the step name? Recommendation: yes for auditability. +- Should `run-once` execute more than one planning step if no source target work remains? Recommendation: no; one accepted step per action. + +## How later plans may need to adapt if this plan changes + +- If planning tick result types differ, plan 008 `refine` should reuse the final result shape for failures. +- If consistency validation lives entirely in phase tick, plan 009 docs must describe that ready status alone is not sufficient for execution. +- If one action can run multiple planning steps, plan 009 must update docs and failure snapshot expectations. diff --git a/docs/plans/006-migration-list-and-doctor.md b/docs/plans/006-migration-list-and-doctor.md new file mode 100644 index 0000000..2e9f580 --- /dev/null +++ b/docs/plans/006-migration-list-and-doctor.md @@ -0,0 +1,149 @@ +# 006 - Migration List And Doctor + +## Goal + +Add the read-only migration CLI foundation: + +```text +continuous-refactoring migration list +continuous-refactoring migration doctor +continuous-refactoring migration doctor --all +``` + +This gives operators visibility into planning state, ready state, and consistency drift before mutating review/refine commands land. + +## Non-goals + +- Do not add `migration review` yet. +- Do not add `migration refine` yet. +- Do not remove the existing top-level `review` command. +- Do not repair transaction directories or locks; report them first. +- Do not let path arguments resolve outside the configured live migrations root. + +## Current behavior and evidence + +- Current CLI has top-level `review list` and `review perform`, not the requested `migration ...` namespace. +- There is no CLI to list all migrations across statuses. +- There is no CLI to validate one migration or all migrations for doc/manifest/state drift. +- Visible-dir iteration intentionally skips transaction roots, so doctor needs an explicit scan for transaction and lock artifacts. + +## Proposed design + +Parser shape: + +```text +continuous-refactoring migration list [--status planning|ready|in-progress|skipped|done] [--awaiting-review] +continuous-refactoring migration doctor +continuous-refactoring migration doctor --all +``` + +Slug/path resolver: + +- Accept a slug under the configured live migrations dir. +- Accept a path only if its resolved real path is inside the configured live migrations root. +- Reject symlink escapes, parent traversal, outside directories, and ambiguous slug/path collisions. +- Use one resolver for all future migration subcommands. + +`migration list`: + +- Shows slug, status, planning next step or current phase, awaiting-review flag, last touch, cooldown, and reason when present. +- Includes planning, ready, in-progress, skipped, and done by default. +- Uses the shared visible-dir iterator. +- Marks missing/invalid planning state as blocked in display instead of hiding it. + +`migration doctor`: + +- Runs the shared consistency validator against one migration or all visible migrations. +- Scans the transaction root and lock path explicitly in addition to visible migrations. +- Reports transaction leftovers, lock presence/age, missing docs, stale manifest phase metadata, incomplete planning state, and ready-gate failures. +- Exits nonzero if any `error` severity finding exists. +- Does not repair anything in this plan. + +## Files/modules likely touched + +- `src/continuous_refactoring/cli.py` +- new internal module such as `src/continuous_refactoring/migration_cli.py` +- `src/continuous_refactoring/review_cli.py` only for shared context/resolver reuse +- `src/continuous_refactoring/migrations.py` +- `src/continuous_refactoring/planning_publish.py` +- `tests/test_cli_migrations.py` +- `tests/test_cli_review.py` +- `tests/test_migration_consistency.py` + +## Test strategy + +Exact regression tests to add: + +- `tests/test_cli_migrations.py::test_migration_parser_accepts_list_and_doctor` +- `tests/test_cli_migrations.py::test_migration_parser_accepts_doctor_all` +- `tests/test_cli_migrations.py::test_migration_list_includes_planning_ready_review_and_done_statuses` +- `tests/test_cli_migrations.py::test_migration_list_marks_mid_planning_current_step` +- `tests/test_cli_migrations.py::test_migration_list_marks_invalid_planning_state_as_blocked` +- `tests/test_cli_migrations.py::test_migration_resolver_accepts_slug_or_path_inside_live_root` +- `tests/test_cli_migrations.py::test_migration_resolver_rejects_outside_path_and_symlink_escape` +- `tests/test_cli_migrations.py::test_migration_doctor_checks_one_migration_by_name` +- `tests/test_cli_migrations.py::test_migration_doctor_all_checks_every_live_migration` +- `tests/test_cli_migrations.py::test_migration_doctor_reports_transaction_root_and_lock_presence` +- `tests/test_cli_migrations.py::test_migration_doctor_exits_nonzero_on_error_findings` + +Validation command: + +- `uv run pytest tests/test_cli_migrations.py tests/test_migration_consistency.py tests/test_cli_review.py` +- then `uv run pytest` + +## Numbered task breakdown with agent assignments + +1. `[Scout]` Map current parser wiring and review CLI context helpers that can be reused. +2. `[Architect]` Finalize slug/path resolution, output columns, and exit-code behavior. +3. `[Artisan]` Add the `migration` parser, shared resolver, `list`, and read-only `doctor`. +4. `[Test Maven]` Add parser, resolver, list, and doctor tests. +5. `[Critic]` Review path containment and whether doctor can actually see hidden transaction/lock roots. +6. `[Artisan]` Apply review fixes without adding repair or mutation behavior. + +## Blocking dependencies + +- Depends on [001-visible-migration-dirs-and-consistency-foundation.md](001-visible-migration-dirs-and-consistency-foundation.md). +- Depends on [003-atomic-planning-workspace-publisher.md](003-atomic-planning-workspace-publisher.md) for transaction/lock path conventions. +- Blocks: + - [007-migration-review-staged-publish.md](007-migration-review-staged-publish.md) + - [008-migration-refine.md](008-migration-refine.md) + - [009-hardening-compatibility-and-docs.md](009-hardening-compatibility-and-docs.md) + +## Mermaid dependency visualization + +```mermaid +graph TD + P001[001 foundation] --> P006[006 list + doctor] + P003[003 publisher paths] --> P006 + P006 --> P007[007 staged review] + P006 --> P008[008 refine] + P006 --> P009[009 hardening + docs] +``` + +## Acceptance criteria + +- `continuous-refactoring migration list` parses and displays all visible migration statuses. +- `continuous-refactoring migration doctor ` validates one contained migration. +- `continuous-refactoring migration doctor --all` validates all visible migrations and reports transaction/lock roots. +- Resolver rejects outside paths, symlink escapes, and ambiguous targets. +- Doctor uses stable finding codes and severity semantics from plan 001. +- No migration mutation happens in this plan. +- `uv run pytest` passes. + +## Risks and rollback + +- Risk: doctor output becomes too verbose. Keep machine codes and concise messages; leave formatting polish for docs. +- Risk: path containment blocks a useful external inspection case. Keep mutation safety first; external inspection can be a future explicit flag. +- Risk: transaction roots move in plan 003. Use path helpers from the publisher module rather than hard-coded strings where possible. + +## Open questions + +- Should doctor have JSON output? Recommendation: not in this first PR unless tests need it; stable codes are enough. +- Should lock age be called stale? Recommendation: report age only; repair/staleness policy belongs in a later repair plan. +- Should `migration list` include hidden transaction counts? Recommendation: no; doctor owns diagnostics. + +## How later plans may need to adapt if this plan changes + +- If resolver semantics change, plans 007 and 008 must use the final resolver without bypasses. +- If doctor does not report lock/transaction roots, plan 009 must avoid documenting that capability. +- If `migration list` output changes, README examples in plan 009 must follow the tested output. diff --git a/docs/plans/007-migration-review-staged-publish.md b/docs/plans/007-migration-review-staged-publish.md new file mode 100644 index 0000000..8171ed4 --- /dev/null +++ b/docs/plans/007-migration-review-staged-publish.md @@ -0,0 +1,140 @@ +# 007 - Migration Review With Staged Publish + +## Goal + +Move human-review mutation under the new migration namespace and through the staged workspace publisher: + +```text +continuous-refactoring migration review +``` + +The old top-level `review` command should remain as a compatibility wrapper unless explicitly removed in a later cleanup. + +## Non-goals + +- Do not add `migration refine` yet. +- Do not add doctor repair behavior. +- Do not let review agents mutate the live migration directory directly. +- Do not review migrations outside the configured live migrations root. +- Do not change phase execution semantics. + +## Current behavior and evidence + +- `review_cli.review perform` currently asks an interactive agent to update the live migration directory, then reloads the manifest to check whether `awaiting_human_review` was cleared. +- That direct live mutation violates the desired docs/state sync guarantee and bypasses the transaction publisher. +- Current review tests monkeypatch agent behavior and assert flag clearing, missing migration errors, and still-flagged failures. + +## Proposed design + +Parser shape: + +```text +continuous-refactoring migration review --with codex|claude --model --effort +``` + +Behavior: + +- Resolve `` through the contained resolver from plan 006. +- Require `awaiting_human_review=true`. +- Capture the live snapshot digest before copying to work. +- Copy the live snapshot to an XDG work dir. +- Run the review prompt against the work dir only. +- Validate the candidate with the consistency validator. +- Refuse to publish if `awaiting_human_review` remains true or `human_review_reason` is still set. +- Publish through the plan 003 publisher with `base_snapshot_id` compare-before-publish. +- Preserve existing top-level `review perform` as a compatibility wrapper to this implementation. + +Prompt contract: + +- Present the human-review reason verbatim. +- Tell the agent that the work dir is the only writable target. +- Tell the agent not to mutate `migrations/` directly. +- Keep `## Taste` injection. + +## Files/modules likely touched + +- `src/continuous_refactoring/cli.py` +- `src/continuous_refactoring/review_cli.py` +- `src/continuous_refactoring/migration_cli.py` +- `src/continuous_refactoring/prompts.py` +- `src/continuous_refactoring/planning_publish.py` +- `tests/test_cli_migrations.py` +- `tests/test_cli_review.py` +- `tests/test_prompts.py` + +## Test strategy + +Exact regression tests to add or modify: + +- `tests/test_cli_migrations.py::test_migration_review_accepts_slug_or_path_inside_live_root` +- `tests/test_cli_migrations.py::test_migration_review_rejects_outside_path_and_symlink_escape` +- `tests/test_cli_migrations.py::test_migration_review_rejects_missing_or_not_flagged_migration` +- `tests/test_cli_migrations.py::test_migration_review_runs_agent_against_work_dir` +- `tests/test_cli_migrations.py::test_migration_review_failure_leaves_live_snapshot_unchanged` +- `tests/test_cli_migrations.py::test_migration_review_rejects_stale_base_snapshot` +- `tests/test_cli_migrations.py::test_migration_review_refuses_publish_when_review_flag_remains` +- `tests/test_cli_review.py::test_top_level_review_perform_routes_to_migration_review_compatibility_path` +- `tests/test_prompts.py::test_review_prompt_names_work_dir_and_forbids_live_dir_mutation` + +Validation command: + +- `uv run pytest tests/test_cli_migrations.py tests/test_cli_review.py tests/test_prompts.py` +- then `uv run pytest` + +## Numbered task breakdown with agent assignments + +1. `[Scout]` Map old review CLI behavior and tests that must remain compatible. +2. `[Architect]` Define review result handling and compatibility wrapper behavior. +3. `[Artisan]` Add `migration review` parser and staged review implementation. +4. `[Artisan]` Route top-level `review perform` through the new staged implementation. +5. `[Test Maven]` Add no-live-change, stale-base, and compatibility tests. +6. `[Critic]` Review for direct live mutation escape hatches and path containment. +7. `[Artisan]` Apply review fixes. + +## Blocking dependencies + +- Depends on [003-atomic-planning-workspace-publisher.md](003-atomic-planning-workspace-publisher.md). +- Depends on [004-resumable-one-step-planning-engine.md](004-resumable-one-step-planning-engine.md) for validation/state expectations. +- Depends on [006-migration-list-and-doctor.md](006-migration-list-and-doctor.md) for parser namespace and resolver. +- Blocks: + - [008-migration-refine.md](008-migration-refine.md) + - [009-hardening-compatibility-and-docs.md](009-hardening-compatibility-and-docs.md) + +## Mermaid dependency visualization + +```mermaid +graph TD + P003[003 publisher] --> P007[007 staged review] + P004[004 planning engine] --> P007 + P006[006 list + doctor] --> P007 + P007 --> P008[008 refine] + P007 --> P009[009 hardening + docs] +``` + +## Acceptance criteria + +- `migration review ` parses and dispatches. +- Review agents write only to a staged work dir. +- Failed review leaves the live migration snapshot unchanged. +- Stale base snapshots block publish. +- Review cannot target paths outside the configured live migrations root. +- The compatibility top-level `review perform` path still works or fails with an intentional compatibility message. +- `uv run pytest` passes. + +## Risks and rollback + +- Risk: compatibility wrapper diverges from the canonical command. Mitigate by routing to one implementation. +- Risk: review prompt still implies live mutation. Add prompt tests. +- Risk: stale-base rejection surprises users. Error should point to `migration doctor` and suggest rerunning review. + +## Open questions + +- Should `migration review` support read-only preview? Recommendation: not in this PR. +- Should top-level `review list` be aliased to `migration list --awaiting-review`? Recommendation: optional; keep behavior stable unless tests make it easy. +- Should review clear stale transaction directories? Recommendation: no; doctor reports, future repair handles. + +## How later plans may need to adapt if this plan changes + +- If compatibility aliasing is deferred, plan 009 must document the old and new commands honestly. +- If review result handling changes, plan 008 refine should reuse the final mutation result shape. +- If prompt wording changes, plan 009 prompt audits must check the final contract. diff --git a/docs/plans/008-migration-refine.md b/docs/plans/008-migration-refine.md new file mode 100644 index 0000000..e878af8 --- /dev/null +++ b/docs/plans/008-migration-refine.md @@ -0,0 +1,139 @@ +# 008 - Migration Refine + +## Goal + +Add a controlled refinement command for planning feedback: + +```text +continuous-refactoring migration refine +``` + +Refine should reopen or continue planning through the same durable planning state and staged publish pipeline, without rewriting migrations that have already executed work. + +## Non-goals + +- Do not support refining migrations with completed phases. +- Do not support arbitrary external migration paths. +- Do not bypass the one-step planning engine. +- Do not add transaction or lock repair behavior. +- Do not make `refine` a substitute for `review`; human-review clearing stays in plan 007. + +## Current behavior and evidence + +- There is no migration refinement CLI. +- Current planning has a revise stage driven by automated review findings, but no user-feedback entry point. +- Ready migrations can be awaiting human review, but current review flow is about clearing review gates, not changing the plan with explicit feedback. + +## Proposed design + +Parser shape: + +```text +continuous-refactoring migration refine (--message |--file ) --with codex|claude --model --effort +``` + +Eligibility: + +- Allowed for `status: planning`. +- Allowed for `status: ready` only when no phase has `done=true` and `current_phase` points to the first phase. +- Refuse `in-progress`, `done`, `skipped`, or any migration with completed phases. +- Target must resolve inside the configured live migrations root through plan 006's resolver. + +State transition: + +- Append user feedback to planning state. +- For `status: planning`, keep the current planning cursor unless the state machine defines a refinement branch. +- For unexecuted `status: ready`, move back to `status: planning`, set `next_step` to a refinement/revise step, and preserve prior final-review output as audit history. +- Run exactly one planning/refinement step through the plan 004 engine. +- Publish through the plan 003 publisher with `base_snapshot_id` protection. + +The exact step name can be `refine` or reuse `revise`; choose during implementation based on the final state-machine vocabulary from plan 002. Do not invent a second feedback mechanism outside planning state. + +## Files/modules likely touched + +- `src/continuous_refactoring/cli.py` +- `src/continuous_refactoring/migration_cli.py` +- `src/continuous_refactoring/planning.py` +- `src/continuous_refactoring/planning_state.py` +- `src/continuous_refactoring/prompts.py` +- `src/continuous_refactoring/planning_publish.py` +- `tests/test_cli_migrations.py` +- `tests/test_planning.py` +- `tests/test_planning_state.py` +- `tests/test_prompts.py` + +## Test strategy + +Exact regression tests to add: + +- `tests/test_cli_migrations.py::test_migration_refine_requires_message_or_file` +- `tests/test_cli_migrations.py::test_migration_refine_rejects_outside_path_and_symlink_escape` +- `tests/test_cli_migrations.py::test_migration_refine_resumes_from_current_planning_state` +- `tests/test_cli_migrations.py::test_migration_refine_reopens_unexecuted_ready_migration_to_planning` +- `tests/test_cli_migrations.py::test_migration_refine_refuses_migration_with_completed_phase` +- `tests/test_cli_migrations.py::test_migration_refine_failure_leaves_live_snapshot_unchanged` +- `tests/test_cli_migrations.py::test_migration_refine_rejects_stale_base_snapshot` +- `tests/test_planning_state.py::test_planning_state_records_user_refinement_feedback` +- `tests/test_prompts.py::test_refine_prompt_names_work_dir_and_keeps_taste` + +Validation command: + +- `uv run pytest tests/test_cli_migrations.py tests/test_planning.py tests/test_planning_state.py tests/test_prompts.py` +- then `uv run pytest` + +## Numbered task breakdown with agent assignments + +1. `[Architect]` Decide whether refinement is a new step or a reuse of `revise`, based on the final transition graph. +2. `[Scout]` Identify ready/in-progress predicates and phase-completion helpers to avoid rewriting executed migrations. +3. `[Artisan]` Add parser behavior, feedback persistence, and refine eligibility checks. +4. `[Artisan]` Route refine through the one-step engine and staged publisher. +5. `[Test Maven]` Add eligibility, stale-base, and no-live-change tests. +6. `[Critic]` Review for unsafe rewrites of executed migrations and confusing UX. +7. `[Artisan]` Apply review fixes. + +## Blocking dependencies + +- Depends on [002-planning-state-schema-and-durable-stage-outputs.md](002-planning-state-schema-and-durable-stage-outputs.md). +- Depends on [004-resumable-one-step-planning-engine.md](004-resumable-one-step-planning-engine.md). +- Depends on [006-migration-list-and-doctor.md](006-migration-list-and-doctor.md). +- Depends on [007-migration-review-staged-publish.md](007-migration-review-staged-publish.md) for shared mutation UX and compatibility behavior. +- Blocks [009-hardening-compatibility-and-docs.md](009-hardening-compatibility-and-docs.md). + +## Mermaid dependency visualization + +```mermaid +graph TD + P002[002 planning state] --> P008[008 refine] + P004[004 one-step engine] --> P008 + P006[006 list + doctor] --> P008 + P007[007 staged review] --> P008 + P008 --> P009[009 hardening + docs] +``` + +## Acceptance criteria + +- `migration refine ` parses and dispatches. +- Refine records user feedback in planning state. +- Refine runs through the one-step engine and staged publisher. +- Refine refuses any migration with completed phase work. +- Failed refine leaves the live migration snapshot unchanged. +- Stale base snapshots block publish. +- `uv run pytest` passes. + +## Risks and rollback + +- Risk: ready-to-planning reopen semantics are confusing. Keep eligibility narrow and output explicit. +- Risk: refine duplicates review behavior. Keep review for gate clearing, refine for user feedback. +- Risk: phase completion predicate is wrong. Reuse existing manifest phase state instead of local string checks. + +## Open questions + +- Should refine use a distinct `refine` step or reuse `revise`? Recommendation: decide from the final state-machine API; prefer fewer step names if readable. +- Should refine be allowed for `ready` awaiting human review? Recommendation: yes only if no phase has executed; it should return to `planning`. +- Should refine auto-run final review? Recommendation: no; one accepted step per action remains the rule. + +## How later plans may need to adapt if this plan changes + +- If refine is deferred, plan 009 docs must omit it or mark it as future work. +- If refine cannot reopen ready migrations, plan 009 should document planning-only refine. +- If refinement step naming changes, AGENTS.md should record the final vocabulary. diff --git a/docs/plans/009-hardening-compatibility-and-docs.md b/docs/plans/009-hardening-compatibility-and-docs.md new file mode 100644 index 0000000..7807876 --- /dev/null +++ b/docs/plans/009-hardening-compatibility-and-docs.md @@ -0,0 +1,144 @@ +# 009 - Hardening, Compatibility Cleanup, And Docs + +## Goal + +Finish the stack by removing transitional seams, documenting the new planning-resume model, and proving the whole flow works end to end. + +This plan should land after the product behavior exists. + +## Non-goals + +- Do not introduce new migration semantics. +- Do not add broad `doctor --repair` behavior unless a separate plan approves it. +- Do not structurally refactor `loop.py` beyond cleanup made safe by earlier plans. +- Do not change release versions or changelog entries manually. +- Do not remove compatibility commands without clear tests and docs. + +## Current behavior and evidence + +- README currently documents the existing migration flow, not resumable planning snapshots. +- AGENTS.md currently describes `status: planning` and human-review gating, but not `.planning/state.json`, transaction directories, base snapshot checks, or the new `migration` CLI. +- Prompt tests enforce `## Taste` injection across prompt templates. +- There may be transitional wrappers after plan 004 and compatibility aliases after plan 007. +- Failure snapshots know call roles like classifier/planner/editor/reviewer, but planning-step failure wording may need tightening. + +## Proposed design + +Clean up after the stack: + +- Remove or narrow transitional planning wrappers kept only for staged rollout. +- Audit all planning, review, and refine prompts: + - name the staged work dir, + - forbid direct live-dir mutation, + - preserve `## Taste`, + - explain that failed current-step output is not resume input. +- Update README CLI docs: + - `migration list`, + - `migration review `, + - `migration refine `, + - `migration doctor `, + - `migration doctor --all`. +- Update AGENTS.md invariants: + - `.planning/state.json` is durable planning state, + - `.planning/stages/` stores accepted step outputs, + - planning publishes use base snapshot comparison before replacing live dirs, + - `migrations/__transactions__/` is internal and invisible to scheduling but visible to doctor, + - `status: planning` is eligible only for planning ticks, never phase execution, + - invalid planning state blocks automation, + - ready migrations must pass consistency validation before phase execution. +- Update failure snapshot wording and event labels for planning-step failures. +- Add focused end-to-end tests that cover source-target one-step creation, focused planning completion, and phase execution after `ready`. +- Re-check assumptions from plans 001 through 008 before editing README or AGENTS.md. + +## Files/modules likely touched + +- `README.md` +- `AGENTS.md` +- `src/continuous_refactoring/prompts.py` +- `src/continuous_refactoring/planning.py` +- `src/continuous_refactoring/cli.py` +- `src/continuous_refactoring/review_cli.py` +- `src/continuous_refactoring/failure_report.py` +- `tests/test_prompts.py` +- `tests/test_run.py` +- `tests/test_focus_on_live_migrations.py` +- `tests/test_failure_report.py` +- `tests/test_cli_migrations.py` + +## Test strategy + +Exact regression tests to add or modify: + +- `tests/test_prompts.py::test_planning_prompts_name_staged_work_dir_and_keep_taste` +- `tests/test_prompts.py::test_review_and_refine_prompts_forbid_live_dir_mutation` +- `tests/test_failure_report.py::test_planning_step_failure_snapshot_names_step_and_resume_behavior` +- `tests/test_run.py::test_e2e_source_target_creates_only_first_planning_step` +- `tests/test_run.py::test_e2e_source_target_then_focused_run_resumes_planning_to_ready` +- `tests/test_focus_on_live_migrations.py::test_e2e_focused_run_completes_planning_before_phase_execution` +- `tests/test_cli_migrations.py::test_documented_migration_commands_match_parser` + +Validation command: + +- targeted prompt/CLI/failure/run tests, +- then full `uv run pytest`. + +## Numbered task breakdown with agent assignments + +1. `[Scout]` Identify transitional wrappers, compatibility paths, and docs that now disagree with behavior. +2. `[Architect]` Decide which compatibility paths stay and which can be removed safely. +3. `[Artisan]` Update README, AGENTS.md, prompts, and failure wording. +4. `[Artisan]` Remove obsolete wrappers or mark compatibility aliases intentionally. +5. `[Test Maven]` Add end-to-end and prompt contract tests. +6. `[Critic]` Review docs for stale invariants, unsafe operator advice, and CLI mismatch. +7. `[Artisan]` Apply review fixes. + +## Blocking dependencies + +- Depends on [005-planning-before-phase-execution-scheduling.md](005-planning-before-phase-execution-scheduling.md). +- Depends on [006-migration-list-and-doctor.md](006-migration-list-and-doctor.md). +- Depends on [007-migration-review-staged-publish.md](007-migration-review-staged-publish.md). +- Depends on [008-migration-refine.md](008-migration-refine.md). +- Should also re-check assumptions from plans 001 through 008 before editing AGENTS.md. + +## Mermaid dependency visualization + +```mermaid +graph TD + P001[001 foundation] --> P009[009 hardening + docs] + P002[002 planning state] --> P009 + P003[003 publisher] --> P009 + P004[004 engine] --> P009 + P005[005 scheduler] --> P009 + P006[006 list + doctor] --> P009 + P007[007 review] --> P009 + P008[008 refine] --> P009 +``` + +## Acceptance criteria + +- README documents the new migration planning workflow and CLI accurately. +- AGENTS.md contains the new load-bearing invariants and removes stale ones. +- Prompt tests prove `## Taste` remains injected. +- Review/refine prompts no longer imply live-dir mutation. +- Failure snapshots for planning-step failures explain that the current step will rerun from the last atomic state. +- Any transitional wrapper left in place is documented and tested as compatibility. +- Full `uv run pytest` passes. + +## Risks and rollback + +- Risk: docs overpromise behavior not delivered by earlier plans. Mitigate by checking parser/tests before writing docs. +- Risk: removing compatibility paths breaks users. Keep aliases unless there is a clear deprecation decision. +- Risk: AGENTS.md becomes too large. Add only load-bearing invariants and remove obsolete wording in the same PR. +- Risk: end-to-end tests become brittle. Use existing fake-agent idioms and avoid real agent integration. + +## Open questions + +- Should old top-level `review` be deprecated in README or just kept undocumented? Recommendation: document `migration review` as canonical and mention top-level `review` only as compatibility if still present. +- Should `.planning/` be user-facing in README? Recommendation: mention it as durable audit state, not as something users edit by hand. +- Should docs describe manual transaction recovery? Recommendation: only after `doctor --repair` exists; until then tell users to run `migration doctor`. + +## How later plans may need to adapt if this plan changes + +- If earlier plans defer `refine`, docs must omit it or mark it experimental instead of pretending it exists. +- If doctor remains read-only, later repair work should get its own numbered plan. +- If compatibility aliases stay long-term, future cleanup plans should treat them as supported surface, not dead code. diff --git a/src/continuous_refactoring/cli.py b/src/continuous_refactoring/cli.py index 872caf8..a019cef 100644 --- a/src/continuous_refactoring/cli.py +++ b/src/continuous_refactoring/cli.py @@ -30,6 +30,8 @@ run_migrations_focused_loop, run_once, ) +from continuous_refactoring.migration_cli import handle_migration +from continuous_refactoring.migrations import MIGRATION_STATUSES from continuous_refactoring.review_cli import handle_review _PACKAGE_DISTRIBUTION = "continuous-refactoring" @@ -277,6 +279,81 @@ def _add_review_parser(subparsers: argparse._SubParsersAction) -> None: perform_parser.add_argument("--effort", required=True, help="Effort level.") +def _add_migration_parser(subparsers: argparse._SubParsersAction) -> None: + migration_parser = subparsers.add_parser( + "migration", + help="Inspect live migrations.", + ) + migration_parser.set_defaults(handler=handle_migration) + migration_sub = migration_parser.add_subparsers(dest="migration_command") + + list_parser = migration_sub.add_parser( + "list", + help="List visible migrations.", + ) + list_parser.add_argument( + "--status", + choices=MIGRATION_STATUSES, + default=None, + help="Only show migrations with this status.", + ) + list_parser.add_argument( + "--awaiting-review", + action="store_true", + help="Only show migrations awaiting human review.", + ) + + doctor_parser = migration_sub.add_parser( + "doctor", + help="Validate migration consistency.", + ) + doctor_parser.add_argument( + "target", + nargs="?", + help="Migration slug or contained path.", + ) + doctor_parser.add_argument( + "--all", + action="store_true", + help="Validate every visible migration and transaction state.", + ) + + review_parser = migration_sub.add_parser( + "review", + help="Perform staged review on a flagged migration.", + ) + review_parser.add_argument("target", help="Migration slug or contained path.") + review_parser.add_argument( + "--with", dest="agent", choices=("codex", "claude"), required=True, + help="Agent backend.", + ) + review_parser.add_argument("--model", required=True, help="Model name.") + review_parser.add_argument( + "--effort", choices=EFFORT_TIERS, required=True, help="Effort level." + ) + + refine_parser = migration_sub.add_parser( + "refine", + help="Refine a planning migration with user feedback.", + ) + refine_parser.add_argument("target", help="Migration slug or contained path.") + feedback_group = refine_parser.add_mutually_exclusive_group(required=True) + feedback_group.add_argument("--message", help="Refinement feedback text.") + feedback_group.add_argument( + "--file", + type=Path, + help="Path to a UTF-8 file containing refinement feedback.", + ) + refine_parser.add_argument( + "--with", dest="agent", choices=("codex", "claude"), required=True, + help="Agent backend.", + ) + refine_parser.add_argument("--model", required=True, help="Model name.") + refine_parser.add_argument( + "--effort", choices=EFFORT_TIERS, required=True, help="Effort level." + ) + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="Continuous refactoring CLI for AI coding agents.", @@ -297,6 +374,7 @@ def build_parser() -> argparse.ArgumentParser: help="Verify and upgrade global configuration.", ) upgrade_parser.set_defaults(handler=_handle_upgrade) + _add_migration_parser(subparsers) _add_review_parser(subparsers) return parser diff --git a/src/continuous_refactoring/failure_report.py b/src/continuous_refactoring/failure_report.py index 3e0e0a2..2ffab49 100644 --- a/src/continuous_refactoring/failure_report.py +++ b/src/continuous_refactoring/failure_report.py @@ -243,6 +243,13 @@ def write( def _next_step_text(record: DecisionRecord) -> str: + planning_step = _planning_step(record) + if planning_step is not None: + return ( + f"Rerun planning step `{planning_step}` from the last published " + ".planning/state.json; failed current-step output and partial " + "work are artifact evidence only, not resume input." + ) if record.decision == "retry": focus = f" Focus: {record.next_retry_focus}" if record.next_retry_focus else "" return f"Retry the same target on the next attempt.{focus}" @@ -253,6 +260,16 @@ def _next_step_text(record: DecisionRecord) -> str: return "Commit the validated result and continue normally." +def _planning_step(record: DecisionRecord) -> str | None: + if record.failure_kind != "planning-step-failed": + return None + prefix = "planning." + if not record.call_role.startswith(prefix): + return None + step = record.call_role.removeprefix(prefix) + return step or None + + def effective_record( record: DecisionRecord, *, @@ -342,16 +359,25 @@ def persist_decision( validation_command=validation_command, record=record, ) + planning_step = _planning_step(record) + log_fields: dict[str, object] = {} + if planning_step is not None: + log_fields["planning_step"] = planning_step artifacts.log( "WARN", f"failure snapshot written: {reason_doc}", - event="failure_doc_written", + event=( + "planning_step_failure_doc_written" + if planning_step is not None + else "failure_doc_written" + ), attempt=attempt, retry=retry, target=record.target, call_role=record.call_role, phase_reached=record.phase_reached, reason_doc_path=str(reason_doc), + **log_fields, ) _log_transition_from_record( artifacts, diff --git a/src/continuous_refactoring/loop.py b/src/continuous_refactoring/loop.py index b884e74..bc1c0ca 100644 --- a/src/continuous_refactoring/loop.py +++ b/src/continuous_refactoring/loop.py @@ -224,9 +224,31 @@ def _action_banner(action_index: int, action_limit: int | None) -> str: def _print_migration_probe(live_dir: Path, effort_budget: EffortBudget) -> None: + preflight = migration_tick._first_unloadable_visible_manifest(live_dir) + if preflight is not None: + migration_dir, _findings = preflight + print(f"Examining migration: migration/{migration_dir.name}") + return + + now = datetime.now(timezone.utc) + planning_candidates = migration_tick.enumerate_eligible_planning_manifests( + live_dir, + now, + ) + if planning_candidates: + if len(planning_candidates) > 1: + print( + f"Examining planning migrations: " + f"{len(planning_candidates)} eligible" + ) + return + manifest, _manifest_path = planning_candidates[0] + print(f"Examining planning migration: migration/{manifest.name}") + return + candidates = migration_tick.enumerate_eligible_manifests( live_dir, - datetime.now(timezone.utc), + now, effort_budget, ) if not candidates: @@ -244,6 +266,7 @@ def _print_migration_probe(live_dir: Path, effort_budget: EffortBudget) -> None: class _MigrationProbeArtifacts: def __init__(self, artifacts: RunArtifacts, action_index: int) -> None: self._artifacts = artifacts + self.run_id = artifacts.run_id self.root = artifacts.root / "migration-probes" / f"action-{action_index:03d}" def attempt_dir(self, attempt: int, retry: int = 1) -> Path: @@ -426,6 +449,15 @@ def run_once(args: argparse.Namespace) -> int: final_status = "completed" return 0 if route_result.outcome in {"abandon", "blocked"}: + if route_result.decision_record is not None: + persist_decision( + repo_root, + artifacts, + attempt=1, + retry=route_result.decision_record.retry_used, + validation_command=args.validation_command, + record=route_result.decision_record, + ) final_status = "migration_failed" raise ContinuousRefactorError( route_result.decision_record.summary @@ -650,6 +682,58 @@ def run_loop(args: argparse.Namespace) -> int: if live_dir is not None: _print_migration_probe(live_dir, base_effort_budget) migration_artifacts = _MigrationProbeArtifacts(artifacts, action_index) + planning_outcome, planning_record = migration_tick.try_planning_tick( + live_dir, + taste, + repo_root, + migration_artifacts, + agent=args.agent, + model=args.model, + effort=base_effort_budget.default_effort, + effort_budget=base_effort_budget, + timeout=timeout, + commit_message_prefix=args.commit_message_prefix, + attempt=action_index, + finalize_commit=_finalize_commit, + ) + + if planning_outcome in {"commit", "abandon", "blocked"}: + artifacts.mark_attempt_started(action_index) + if planning_record is not None: + persist_decision( + repo_root, + artifacts, + attempt=action_index, + retry=planning_record.retry_used, + validation_command=args.validation_command, + record=planning_record, + ) + actions_completed += 1 + if planning_outcome == "commit": + consecutive_failures = 0 + else: + if planning_record is not None: + print( + "Planning blocked: " + f"{planning_record.target} — {planning_record.summary}" + ) + consecutive_failures += 1 + if consecutive_failures >= max_consecutive: + final_status = "max_consecutive_failures" + raise ContinuousRefactorError( + f"Stopping: {max_consecutive} consecutive failures" + ) + _sleep_between_actions( + sleep_seconds, + artifacts=artifacts, + action_index=action_index, + has_more_actions=( + source_index < len(targets) + and _has_action_budget(actions_completed, action_limit) + ), + ) + continue + migration_outcome, migration_record = migration_tick.try_migration_tick( live_dir, taste, @@ -927,6 +1011,28 @@ def _focus_eligible_manifests( ] +def _focus_eligible_planning_manifests( + live_dir: Path, now: datetime, +) -> list[tuple[MigrationManifest, Path]]: + return [ + pair for pair in migration_tick.enumerate_eligible_planning_manifests( + live_dir, + now, + ) + if not pair[0].awaiting_human_review + ] + + +def _eligible_planning_path_labels( + repo_root: Path, + candidates: list[tuple[MigrationManifest, Path]], +) -> tuple[str, ...]: + return tuple( + _repo_relative_path(repo_root, manifest_path.parent) + for _manifest, manifest_path in candidates + ) + + def _eligible_phase_path_labels( repo_root: Path, candidates: list[tuple[MigrationManifest, Path]], @@ -1013,8 +1119,18 @@ def run_migrations_focused_loop(args: argparse.Namespace) -> int: while True: now = datetime.now(timezone.utc) - eligible = _focus_eligible_manifests(live_dir, now, base_effort_budget) - if not eligible: + preflight = migration_tick._first_unloadable_visible_manifest(live_dir) + planning_eligible = ( + [] + if preflight is not None + else _focus_eligible_planning_manifests(live_dir, now) + ) + phase_eligible = ( + [] + if preflight is not None or planning_eligible + else _focus_eligible_manifests(live_dir, now, base_effort_budget) + ) + if not planning_eligible and not phase_eligible and preflight is None: print( "Focused migrations loop: nothing eligible — " "every migration is done or blocked." @@ -1029,25 +1145,53 @@ def run_migrations_focused_loop(args: argparse.Namespace) -> int: iteration += 1 artifacts.mark_attempt_started(iteration) - names = ", ".join(_eligible_phase_path_labels(repo_root, eligible)) + names = ( + f"{preflight[0].name}/manifest.json" + if preflight is not None + else ( + ", ".join( + _eligible_planning_path_labels(repo_root, planning_eligible) + ) + if planning_eligible + else ", ".join( + _eligible_phase_path_labels(repo_root, phase_eligible) + ) + ) + ) print(f"\n── Migration tick {iteration} (eligible: {names}) ──") - outcome, record = migration_tick.try_migration_tick( - live_dir, - taste, - repo_root, - artifacts, - agent=args.agent, - model=args.model, - effort=base_effort_budget.default_effort, - effort_budget=base_effort_budget, - timeout=timeout, - commit_message_prefix=args.commit_message_prefix, - validation_command=args.validation_command, - max_attempts=max_attempts_effective, - attempt=iteration, - finalize_commit=_finalize_commit, - ) + if planning_eligible: + outcome, record = migration_tick.try_planning_tick( + live_dir, + taste, + repo_root, + artifacts, + agent=args.agent, + model=args.model, + effort=base_effort_budget.default_effort, + effort_budget=base_effort_budget, + timeout=timeout, + commit_message_prefix=args.commit_message_prefix, + attempt=iteration, + finalize_commit=_finalize_commit, + ) + else: + outcome, record = migration_tick.try_migration_tick( + live_dir, + taste, + repo_root, + artifacts, + agent=args.agent, + model=args.model, + effort=base_effort_budget.default_effort, + effort_budget=base_effort_budget, + timeout=timeout, + commit_message_prefix=args.commit_message_prefix, + validation_command=args.validation_command, + max_attempts=max_attempts_effective, + attempt=iteration, + finalize_commit=_finalize_commit, + ) if record is not None and outcome != "not-routed": persist_decision( diff --git a/src/continuous_refactoring/migration_cli.py b/src/continuous_refactoring/migration_cli.py new file mode 100644 index 0000000..d299e45 --- /dev/null +++ b/src/continuous_refactoring/migration_cli.py @@ -0,0 +1,697 @@ +from __future__ import annotations + +import argparse +import json +import sys +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path + +from continuous_refactoring.artifacts import ( + ContinuousRefactorError, + create_run_artifacts, +) +from continuous_refactoring.config import resolve_live_migrations_dir, resolve_project +from continuous_refactoring.migration_consistency import ( + MigrationConsistencyFinding, + check_migration_consistency, + has_blocking_consistency_findings, + iter_visible_migration_dirs, +) +from continuous_refactoring.migrations import ( + MigrationManifest, + load_manifest as load_migration_manifest, + phase_file_reference, + resolve_current_phase, +) +from continuous_refactoring.planning_publish import publish_lock_path +from continuous_refactoring.planning_state import ( + FeedbackSource, + load_planning_state, + planning_state_path, +) + +__all__ = [ + "MigrationCliContext", + "MigrationTarget", + "handle_migration", + "handle_migration_doctor", + "handle_migration_list", + "handle_migration_refine", + "handle_migration_review", + "resolve_migration_target", +] + +_MIGRATION_USAGE = "Usage: continuous-refactoring migration {list,doctor,review,refine}" +_MISSING_TEXT = "(none)" + + +@dataclass(frozen=True) +class MigrationCliContext: + repo_root: Path + live_dir: Path + project_state_dir: Path + + +@dataclass(frozen=True) +class MigrationTarget: + slug: str + path: Path + + +def handle_migration(args: argparse.Namespace) -> None: + if args.migration_command == "list": + return handle_migration_list(args) + if args.migration_command == "doctor": + return handle_migration_doctor(args) + if args.migration_command == "review": + return handle_migration_review(args) + if args.migration_command == "refine": + return handle_migration_refine(args) + print(_MIGRATION_USAGE, file=sys.stderr) + raise SystemExit(2) + + +def handle_migration_list(args: argparse.Namespace) -> None: + context = _resolve_context(error_code=1) + if not context.live_dir.is_dir(): + return + + for migration_dir in iter_visible_migration_dirs(context.live_dir): + row = _list_row(context, migration_dir) + if row is None: + continue + if args.status is not None and row.status != args.status: + continue + if args.awaiting_review and row.awaiting_review != "yes": + continue + print(row.format()) + + +def handle_migration_doctor(args: argparse.Namespace) -> None: + context = _resolve_context(error_code=2) + target: str | None = getattr(args, "target", None) + all_targets = bool(getattr(args, "all", False)) + if all_targets == bool(target): + print( + "Error: migration doctor requires exactly one of --all or .", + file=sys.stderr, + ) + raise SystemExit(2) + + if all_targets: + findings = _doctor_all(context) + else: + assert target is not None + try: + migration_target = resolve_migration_target( + live_dir=context.live_dir, + repo_root=context.repo_root, + value=target, + ) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(2) from error + findings = _doctor_migration(context, migration_target) + + for slug, finding in findings: + print(_format_doctor_finding(slug, finding)) + if has_blocking_consistency_findings(finding for _, finding in findings): + raise SystemExit(1) + + +def handle_migration_review(args: argparse.Namespace) -> None: + context = _resolve_context(error_code=2) + try: + target = resolve_migration_target( + live_dir=context.live_dir, + repo_root=context.repo_root, + value=args.target, + ) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(2) from error + + from continuous_refactoring.config import load_taste + from continuous_refactoring.review_cli import ( + StagedReviewRequest, + handle_staged_migration_review, + ) + + try: + taste = load_taste(resolve_project(context.repo_root)) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(1) from error + handle_staged_migration_review( + StagedReviewRequest( + repo_root=context.repo_root, + live_dir=context.live_dir, + target=target, + project_state_dir=context.project_state_dir, + agent=args.agent, + model=args.model, + effort=args.effort, + taste=taste, + ) + ) + + +def handle_migration_refine(args: argparse.Namespace) -> None: + context = _resolve_context(error_code=2) + feedback_text, feedback_source = _read_refine_feedback(args) + try: + target = resolve_migration_target( + live_dir=context.live_dir, + repo_root=context.repo_root, + value=args.target, + ) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(2) from error + + from continuous_refactoring.config import load_taste + from continuous_refactoring.planning import ( + PlanningRefineRequest, + run_refine_planning_step, + ) + + try: + taste = load_taste(resolve_project(context.repo_root)) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(1) from error + + artifacts = create_run_artifacts( + context.repo_root, + agent=args.agent, + model=args.model, + effort=args.effort, + test_command="migration refine", + ) + try: + result = run_refine_planning_step( + PlanningRefineRequest( + migration_name=target.slug, + feedback_text=feedback_text, + feedback_source=feedback_source, + taste=taste, + repo_root=context.repo_root, + live_dir=context.live_dir, + artifacts=artifacts, + agent=args.agent, + model=args.model, + effort=args.effort, + ) + ) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(_refine_error_code(str(error))) from error + + if result.status != "published": + print( + f"Error: {_refine_publish_error_message(result.reason, target.slug)}", + file=sys.stderr, + ) + raise SystemExit(1) + print(f"Refined {target.slug}: {result.reason}") + + +def resolve_migration_target( + *, + live_dir: Path, + repo_root: Path, + value: str, +) -> MigrationTarget: + live_root = live_dir.resolve() + slug_target = _slug_target(live_root, value) + path_target = _path_target( + live_root, + repo_root.resolve(), + value, + reject_symlink=slug_target is None, + ) + + if ( + slug_target is not None + and path_target is not None + and slug_target.path.resolve() != path_target.path.resolve() + ): + raise ContinuousRefactorError( + f"Migration target {value!r} is ambiguous between " + f"{slug_target.path} and {path_target.path}." + ) + if slug_target is not None: + return slug_target + if path_target is not None: + return path_target + if _looks_like_path(value): + _raise_invalid_path_target(live_root, repo_root.resolve(), value) + raise ContinuousRefactorError(f"Migration {value!r} does not exist.") + + +def _read_refine_feedback(args: argparse.Namespace) -> tuple[str, FeedbackSource]: + if args.message is not None: + text = str(args.message) + source: FeedbackSource = "message" + else: + try: + path = args.file + text = path.read_text(encoding="utf-8") + except OSError as error: + print( + f"Error: could not read refinement feedback file: {error}", + file=sys.stderr, + ) + raise SystemExit(2) from error + source = "file" + if not text.strip(): + print("Error: refinement feedback must not be empty.", file=sys.stderr) + raise SystemExit(2) + return text, source + + +def _refine_publish_error_message(reason: str, slug: str) -> str: + if "stale base snapshot" not in reason: + return reason + return ( + f"{reason}\n" + "Live migration changed while refine was running. " + f"Run `continuous-refactoring migration doctor {slug}` if unsure, then " + f"rerun `continuous-refactoring migration refine {slug} ...`." + ) + + +def _refine_error_code(message: str) -> int: + usage_fragments = ( + "cannot be refined", + "only planning or unexecuted ready migrations", + "already advanced", + "missing .planning/state.json", + "Cannot reopen planning state", + "Planning state is terminal", + ) + return 2 if any(fragment in message for fragment in usage_fragments) else 1 + + +@dataclass(frozen=True) +class _ListRow: + slug: str + status: str + cursor: str + awaiting_review: str + last_touch: str + cooldown: str + reason: str + + def format(self) -> str: + return "\t".join( + ( + self.slug, + self.status, + self.cursor, + self.awaiting_review, + self.last_touch, + self.cooldown, + self.reason, + ) + ) + + +def _resolve_context(*, error_code: int) -> MigrationCliContext: + try: + project = resolve_project(Path.cwd().resolve()) + except ContinuousRefactorError: + print( + "Error: project not initialized; no live-migrations-dir available.", + file=sys.stderr, + ) + raise SystemExit(error_code) + try: + live_dir = resolve_live_migrations_dir(project) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(error_code) + if live_dir is None: + print( + "Error: no live-migrations-dir configured for this project.", + file=sys.stderr, + ) + raise SystemExit(error_code) + return MigrationCliContext( + repo_root=Path(project.entry.path).resolve(), + live_dir=live_dir, + project_state_dir=project.project_dir, + ) + + +def _list_row( + context: MigrationCliContext, + migration_dir: Path, +) -> _ListRow | None: + manifest_path = migration_dir / "manifest.json" + if not manifest_path.exists(): + return _ListRow( + slug=migration_dir.name, + status="invalid-manifest", + cursor="blocked", + awaiting_review="no", + last_touch=_MISSING_TEXT, + cooldown=_MISSING_TEXT, + reason="missing-manifest", + ) + try: + manifest = load_migration_manifest(manifest_path) + except ContinuousRefactorError as error: + return _ListRow( + slug=migration_dir.name, + status="invalid-manifest", + cursor="blocked", + awaiting_review="no", + last_touch=_MISSING_TEXT, + cooldown=_MISSING_TEXT, + reason=f"invalid-manifest: {_single_line(str(error))}", + ) + + cursor, cursor_reason = _cursor_text(context, migration_dir, manifest) + return _ListRow( + slug=migration_dir.name, + status=manifest.status, + cursor=cursor, + awaiting_review="yes" if manifest.awaiting_human_review else "no", + last_touch=manifest.last_touch, + cooldown=manifest.cooldown_until or _MISSING_TEXT, + reason=_reason_text(manifest, cursor_reason), + ) + + +def _cursor_text( + context: MigrationCliContext, + migration_dir: Path, + manifest: MigrationManifest, +) -> tuple[str, str | None]: + if manifest.status == "planning": + return _planning_cursor(context, migration_dir) + if manifest.status in ("ready", "in-progress"): + if not manifest.current_phase: + return _MISSING_TEXT, None + try: + phase = resolve_current_phase(manifest) + except ContinuousRefactorError: + return "blocked", "invalid-current-phase" + return phase_file_reference(phase), None + return _MISSING_TEXT, None + + +def _planning_cursor( + context: MigrationCliContext, + migration_dir: Path, +) -> tuple[str, str | None]: + state_path = planning_state_path(migration_dir) + if not state_path.exists(): + return "planning:blocked", "planning-state-missing" + try: + state = load_planning_state( + context.repo_root, + state_path, + published_migration_root=migration_dir, + ) + except ContinuousRefactorError: + return "planning:blocked", "planning-state-invalid" + return f"planning:{state.next_step}", None + + +def _reason_text(manifest: MigrationManifest, cursor_reason: str | None) -> str: + if cursor_reason is not None: + return cursor_reason + if manifest.human_review_reason: + return manifest.human_review_reason + return _MISSING_TEXT + + +def _doctor_all( + context: MigrationCliContext, +) -> list[tuple[str, MigrationConsistencyFinding]]: + findings: list[tuple[str, MigrationConsistencyFinding]] = [] + for migration_dir in iter_visible_migration_dirs(context.live_dir): + findings.extend( + _doctor_migration( + context, + MigrationTarget(slug=migration_dir.name, path=migration_dir), + ) + ) + findings.extend(_transaction_findings(context.live_dir)) + return findings + + +def _doctor_migration( + context: MigrationCliContext, + target: MigrationTarget, +) -> list[tuple[str, MigrationConsistencyFinding]]: + findings = check_migration_consistency(target.path, mode="doctor") + findings.extend(_planning_state_findings(context, target.path)) + return [(target.slug, finding) for finding in findings] + + +def _planning_state_findings( + context: MigrationCliContext, + migration_dir: Path, +) -> list[MigrationConsistencyFinding]: + manifest_path = migration_dir / "manifest.json" + try: + manifest = load_migration_manifest(manifest_path) + except ContinuousRefactorError: + return [] + if manifest.status != "planning": + return [] + + state_path = planning_state_path(migration_dir) + if not state_path.exists(): + return [ + MigrationConsistencyFinding( + severity="error", + mode="doctor", + code="planning-state-missing", + path=state_path, + message="Planning migration is missing .planning/state.json.", + ) + ] + try: + load_planning_state( + context.repo_root, + state_path, + published_migration_root=migration_dir, + ) + except ContinuousRefactorError as error: + return [ + MigrationConsistencyFinding( + severity="error", + mode="doctor", + code="planning-state-invalid", + path=state_path, + message=_single_line(str(error)), + ) + ] + return [] + + +def _transaction_findings( + live_dir: Path, +) -> list[tuple[str, MigrationConsistencyFinding]]: + transaction_root = publish_lock_path(live_dir).parent + if not transaction_root.exists(): + return [] + if not transaction_root.is_dir(): + return [ + ( + "__transactions__", + MigrationConsistencyFinding( + severity="error", + mode="doctor", + code="transaction-root-invalid", + path=transaction_root, + message="Planning transaction root is not a directory.", + ), + ) + ] + + findings: list[tuple[str, MigrationConsistencyFinding]] = [] + lock_path = publish_lock_path(live_dir) + if lock_path.exists(): + findings.append( + ( + "__transactions__", + MigrationConsistencyFinding( + severity="error", + mode="doctor", + code="publish-lock-present", + path=lock_path, + message=_lock_message(lock_path), + ), + ) + ) + + for child in sorted(transaction_root.iterdir()): + if child == lock_path: + continue + if child.is_dir(): + findings.append( + ( + "__transactions__", + MigrationConsistencyFinding( + severity="error", + mode="doctor", + code="transaction-leftover", + path=child, + message="Planning transaction directory is still present.", + ), + ) + ) + return findings + + +def _format_doctor_finding( + slug: str, + finding: MigrationConsistencyFinding, +) -> str: + return "\t".join( + ( + slug, + finding.severity, + finding.code, + str(finding.path), + finding.message, + ) + ) + + +def _slug_target(live_root: Path, value: str) -> MigrationTarget | None: + if not _safe_slug(value): + return None + path = live_root / value + if not path.is_dir() or path.is_symlink(): + return None + return MigrationTarget(slug=value, path=path) + + +def _path_target( + live_root: Path, + repo_root: Path, + value: str, + *, + reject_symlink: bool, +) -> MigrationTarget | None: + if not _should_consider_path(repo_root, value): + return None + _require_no_parent_traversal(value) + path = _raw_path(repo_root, value) + if reject_symlink and path.is_symlink(): + raise ContinuousRefactorError( + f"Migration path must not be a symlink: {path}" + ) + resolved = path.resolve() + if not resolved.exists(): + return None + _require_contained_visible_child(live_root, resolved, original=path) + return MigrationTarget(slug=resolved.name, path=resolved) + + +def _raise_invalid_path_target(live_root: Path, repo_root: Path, value: str) -> None: + _require_no_parent_traversal(value) + path = _raw_path(repo_root, value) + if path.is_symlink(): + raise ContinuousRefactorError( + f"Migration path must not be a symlink: {path}" + ) + resolved = path.resolve() + _require_contained_visible_child(live_root, resolved, original=path) + if not resolved.is_dir(): + raise ContinuousRefactorError(f"Migration path is not a directory: {path}") + + +def _require_contained_visible_child( + live_root: Path, + resolved: Path, + *, + original: Path, +) -> None: + try: + relative = resolved.relative_to(live_root) + except ValueError as error: + raise ContinuousRefactorError( + f"Migration path must stay inside live migrations dir: {original}" + ) from error + if len(relative.parts) != 1: + raise ContinuousRefactorError( + f"Migration path must identify a direct migration directory: {original}" + ) + if not _safe_slug(relative.parts[0]): + raise ContinuousRefactorError( + f"Migration path targets a hidden or internal directory: {original}" + ) + if not resolved.is_dir(): + raise ContinuousRefactorError(f"Migration path is not a directory: {original}") + + +def _safe_slug(value: str) -> bool: + return ( + value != "" + and Path(value).name == value + and not value.startswith(".") + and not value.startswith("__") + ) + + +def _should_consider_path(repo_root: Path, value: str) -> bool: + return _looks_like_path(value) or _raw_path(repo_root, value).exists() + + +def _looks_like_path(value: str) -> bool: + path = Path(value) + return path.is_absolute() or len(path.parts) > 1 or value.startswith(".") + + +def _require_no_parent_traversal(value: str) -> None: + if ".." in Path(value).parts: + raise ContinuousRefactorError( + f"Migration path must not contain parent traversal: {value}" + ) + + +def _raw_path(repo_root: Path, value: str) -> Path: + path = Path(value) + if path.is_absolute(): + return path + return repo_root / path + + +def _lock_message(lock_path: Path) -> str: + details = _lock_metadata(lock_path) + try: + mtime = datetime.fromtimestamp(lock_path.stat().st_mtime).astimezone() + except OSError: + mtime_text = "unknown" + else: + mtime_text = mtime.isoformat(timespec="seconds") + suffix = f"; {details}" if details else "" + return f"Planning publish lock is present; mtime={mtime_text}{suffix}." + + +def _lock_metadata(lock_path: Path) -> str: + owner_path = lock_path / "owner.json" + try: + raw = json.loads(owner_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return "" + if not isinstance(raw, dict): + return "" + parts = [ + f"{key}={raw[key]}" + for key in ("pid", "operation", "created_at") + if key in raw + ] + return ", ".join(parts) + + +def _single_line(value: str) -> str: + return " ".join(value.split()) diff --git a/src/continuous_refactoring/migration_consistency.py b/src/continuous_refactoring/migration_consistency.py new file mode 100644 index 0000000..84f3337 --- /dev/null +++ b/src/continuous_refactoring/migration_consistency.py @@ -0,0 +1,449 @@ +from __future__ import annotations + +import re +from collections.abc import Iterable +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +from continuous_refactoring.artifacts import ContinuousRefactorError +from continuous_refactoring.migrations import MigrationManifest, load_manifest + +__all__ = [ + "CONSISTENCY_MODES", + "CONSISTENCY_SEVERITIES", + "ConsistencyMode", + "ConsistencySeverity", + "MigrationConsistencyFinding", + "check_migration_consistency", + "has_blocking_consistency_findings", + "iter_visible_migration_dirs", +] + +ConsistencyMode = Literal[ + "planning-snapshot", + "ready-publish", + "execution-gate", + "doctor", +] +ConsistencySeverity = Literal["info", "warning", "error"] + +CONSISTENCY_MODES: tuple[ConsistencyMode, ...] = ( + "planning-snapshot", + "ready-publish", + "execution-gate", + "doctor", +) +CONSISTENCY_SEVERITIES: tuple[ConsistencySeverity, ...] = ( + "info", + "warning", + "error", +) + +_PHASE_DOC_RE = re.compile(r"^phase-(?P\d+)-(?P.+)\.md$") +_INTERNAL_MIGRATION_DIR_NAMES = frozenset( + { + "__intentional_skips__", + "__transactions__", + } +) + + +@dataclass(frozen=True) +class MigrationConsistencyFinding: + severity: ConsistencySeverity + mode: ConsistencyMode + code: str + path: Path + message: str + + +def iter_visible_migration_dirs(live_dir: Path) -> list[Path]: + if not live_dir.is_dir(): + return [] + return [ + child + for child in sorted(live_dir.iterdir()) + if ( + child.is_dir() + and not child.is_symlink() + and _is_visible_migration_dir_name(child.name) + ) + ] + + +def check_migration_consistency( + migration_dir: Path, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + manifest_path = migration_dir / "manifest.json" + findings: list[MigrationConsistencyFinding] = [] + if not manifest_path.exists(): + return [ + _finding( + mode, + "error", + "missing-manifest", + manifest_path, + "Migration manifest is missing.", + ) + ] + + try: + manifest = load_manifest(manifest_path) + except ContinuousRefactorError as error: + return [ + _finding( + mode, + "error", + "invalid-manifest", + manifest_path, + str(error), + ) + ] + + if manifest.name != migration_dir.name: + findings.append( + _finding( + mode, + "error", + "manifest-slug-mismatch", + manifest_path, + ( + f"Manifest name {manifest.name!r} does not match " + f"directory slug {migration_dir.name!r}." + ), + ) + ) + + findings.extend(_phase_doc_duplicate_findings(migration_dir, mode)) + findings.extend(_plan_findings(migration_dir, manifest, mode)) + findings.extend(_manifest_phase_file_findings(migration_dir, manifest, mode)) + findings.extend(_manifest_phase_metadata_findings(migration_dir, manifest, mode)) + return findings + + +def has_blocking_consistency_findings( + findings: Iterable[MigrationConsistencyFinding], +) -> bool: + return any(finding.severity == "error" for finding in findings) + + +def _is_visible_migration_dir_name(name: str) -> bool: + return ( + not name.startswith(".") + and not name.startswith("__") + and name not in _INTERNAL_MIGRATION_DIR_NAMES + ) + + +def _finding( + mode: ConsistencyMode, + severity: ConsistencySeverity, + code: str, + path: Path, + message: str, +) -> MigrationConsistencyFinding: + return MigrationConsistencyFinding( + severity=severity, + mode=mode, + code=code, + path=path, + message=message, + ) + + +def _phase_doc_duplicate_findings( + migration_dir: Path, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + findings: list[MigrationConsistencyFinding] = [] + by_index: dict[int, Path] = {} + by_name: dict[str, Path] = {} + for path in _phase_doc_paths(migration_dir): + match = _PHASE_DOC_RE.match(path.name) + if match is None: + continue + phase_index = int(match.group("index")) + phase_name = match.group("name") + if phase_index in by_index: + findings.append( + _finding( + mode, + "error", + "duplicate-phase-doc-index", + path, + ( + f"Phase doc index {phase_index} is duplicated by " + f"{by_index[phase_index].name!r} and {path.name!r}." + ), + ) + ) + else: + by_index[phase_index] = path + if phase_name in by_name: + findings.append( + _finding( + mode, + "error", + "duplicate-phase-doc-name", + path, + ( + f"Phase doc name {phase_name!r} is duplicated by " + f"{by_name[phase_name].name!r} and {path.name!r}." + ), + ) + ) + else: + by_name[phase_name] = path + return findings + + +def _phase_doc_paths(migration_dir: Path) -> list[Path]: + try: + return [ + child + for child in sorted(migration_dir.iterdir()) + if _PHASE_DOC_RE.match(child.name) is not None + ] + except OSError as error: + raise ContinuousRefactorError( + f"Could not scan migration directory {migration_dir}: {error}" + ) from error + + +def _plan_findings( + migration_dir: Path, + manifest: MigrationManifest, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + if not _requires_plan(manifest, mode): + return [] + plan_path = migration_dir / "plan.md" + if plan_path.exists(): + return [] + return [ + _finding( + mode, + "error", + "missing-plan", + plan_path, + "Ready and in-progress migrations require plan.md.", + ) + ] + + +def _requires_plan(manifest: MigrationManifest, mode: ConsistencyMode) -> bool: + return mode == "ready-publish" or ( + mode in ("doctor", "execution-gate") + and manifest.status in ("ready", "in-progress") + ) + + +def _manifest_phase_file_findings( + migration_dir: Path, + manifest: MigrationManifest, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + findings: list[MigrationConsistencyFinding] = [] + for phase in manifest.phases: + ref = Path(phase.file) + if _invalid_phase_file_reference(ref): + findings.append( + _finding( + mode, + "error", + "invalid-phase-file-reference", + migration_dir / phase.file, + f"Phase file reference {phase.file!r} must stay inside the migration directory.", + ) + ) + continue + + phase_path = migration_dir / ref + if phase_path.is_symlink(): + findings.extend(_symlink_phase_file_findings(migration_dir, phase_path, mode)) + continue + + if not phase_path.exists(): + findings.append( + _finding( + mode, + "error", + "missing-phase-file", + phase_path, + f"Manifest phase file {phase.file!r} is missing.", + ) + ) + continue + + if not _is_inside(phase_path.resolve(), migration_dir.resolve()): + findings.append( + _finding( + mode, + "error", + "phase-file-escapes-migration", + phase_path, + f"Manifest phase file {phase.file!r} resolves outside the migration directory.", + ) + ) + continue + + if not phase_path.is_file(): + findings.append( + _finding( + mode, + "error", + "phase-file-not-regular", + phase_path, + f"Manifest phase file {phase.file!r} is not a regular file.", + ) + ) + return findings + + +def _manifest_phase_metadata_findings( + migration_dir: Path, + manifest: MigrationManifest, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + if not _requires_ready_publish_metadata(manifest, mode): + return [] + + findings: list[MigrationConsistencyFinding] = [] + phase_names = {phase.name for phase in manifest.phases} + if not phase_names: + findings.append( + _finding( + mode, + "error", + "missing-manifest-phases", + migration_dir / "manifest.json", + "Ready migrations require at least one manifest phase.", + ) + ) + elif manifest.current_phase not in phase_names: + findings.append( + _finding( + mode, + "error", + "invalid-current-phase", + migration_dir / "manifest.json", + ( + f"Current phase {manifest.current_phase!r} does not match " + "any manifest phase." + ), + ) + ) + + doc_phase_names = { + match.group("name") + for path in _phase_doc_paths(migration_dir) + if (match := _PHASE_DOC_RE.match(path.name)) is not None + } + for doc_phase_name in sorted(doc_phase_names - phase_names): + findings.append( + _finding( + mode, + "error", + "phase-doc-not-in-manifest", + migration_dir / f"phase-*-{doc_phase_name}.md", + f"Phase doc {doc_phase_name!r} is not represented in manifest phases.", + ) + ) + + for phase in manifest.phases: + phase_path = migration_dir / phase.file + if phase_path.is_file() and not phase_path.is_symlink(): + findings.extend(_phase_doc_contract_findings(phase_path, mode)) + return findings + + +def _requires_ready_publish_metadata( + manifest: MigrationManifest, + mode: ConsistencyMode, +) -> bool: + return mode == "ready-publish" or ( + mode == "doctor" and manifest.status in ("ready", "in-progress") + ) + + +def _phase_doc_contract_findings( + phase_path: Path, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + try: + content = phase_path.read_text(encoding="utf-8") + except OSError as error: + raise ContinuousRefactorError( + f"Could not read phase doc {phase_path}: {error}" + ) from error + + findings: list[MigrationConsistencyFinding] = [] + if not re.search(r"^##\s+Precondition\s*$", content, re.IGNORECASE | re.MULTILINE): + findings.append( + _finding( + mode, + "error", + "missing-phase-precondition", + phase_path, + "Phase docs require a ## Precondition section before ready publish.", + ) + ) + if not re.search( + r"^##\s+Definition of Done\s*$", + content, + re.IGNORECASE | re.MULTILINE, + ): + findings.append( + _finding( + mode, + "error", + "missing-phase-definition-of-done", + phase_path, + "Phase docs require a ## Definition of Done section before ready publish.", + ) + ) + return findings + + +def _invalid_phase_file_reference(ref: Path) -> bool: + return ( + str(ref) in ("", ".") + or ref.is_absolute() + or ".." in ref.parts + ) + + +def _symlink_phase_file_findings( + migration_dir: Path, + phase_path: Path, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + if not _is_inside(phase_path.resolve(), migration_dir.resolve()): + return [ + _finding( + mode, + "error", + "phase-file-escapes-migration", + phase_path, + f"Phase file symlink {phase_path.name!r} resolves outside the migration directory.", + ) + ] + return [ + _finding( + mode, + "error", + "phase-file-not-regular", + phase_path, + f"Phase file {phase_path.name!r} must be a regular file, not a symlink.", + ) + ] + + +def _is_inside(path: Path, root: Path) -> bool: + try: + path.relative_to(root) + except ValueError: + return False + return True diff --git a/src/continuous_refactoring/migration_tick.py b/src/continuous_refactoring/migration_tick.py index ff403e2..dc1d45c 100644 --- a/src/continuous_refactoring/migration_tick.py +++ b/src/continuous_refactoring/migration_tick.py @@ -42,11 +42,23 @@ resolve_current_phase, save_manifest, ) +from continuous_refactoring.migration_consistency import ( + MigrationConsistencyFinding, + check_migration_consistency, + has_blocking_consistency_findings, + iter_visible_migration_dirs, +) from continuous_refactoring.phases import ( ReadyVerdict, check_phase_ready, execute_phase, ) +from continuous_refactoring.planning import PlanningStepResult, run_next_planning_step +from continuous_refactoring.planning_state import ( + PlanningState, + load_planning_state, + planning_state_path, +) _BASELINE_VALIDATION_UNCERTAINTY_PHRASES = ( @@ -59,6 +71,18 @@ "tests pass now", ) +_EXECUTABLE_PLANNING_STEPS = frozenset( + { + "approaches", + "pick-best", + "expand", + "review", + "revise", + "review-2", + "final-review", + } +) + class _FinalizeCommit(Protocol): def __call__( @@ -94,6 +118,19 @@ def enumerate_eligible_manifests( return candidates +def enumerate_eligible_planning_manifests( + live_dir: Path, + now: datetime, +) -> list[tuple[MigrationManifest, Path]]: + candidates = [ + (manifest, manifest_path) + for manifest, manifest_path in _iter_candidate_manifests(live_dir) + if _is_planning_candidate(manifest, now) + ] + candidates.sort(key=lambda pair: datetime.fromisoformat(pair[0].created_at)) + return candidates + + def _cooling_effort_candidates( live_dir: Path, now: datetime, @@ -110,12 +147,8 @@ def _cooling_effort_candidates( def _iter_candidate_manifests( live_dir: Path, ) -> list[tuple[MigrationManifest, Path]]: - if not live_dir.is_dir(): - return [] candidates: list[tuple[MigrationManifest, Path]] = [] - for entry in sorted(live_dir.iterdir()): - if not entry.is_dir() or entry.name.startswith("__"): - continue + for entry in iter_visible_migration_dirs(live_dir): manifest_path = entry / "manifest.json" if not manifest_path.exists(): continue @@ -132,6 +165,14 @@ def _is_normally_eligible(manifest: MigrationManifest, now: datetime) -> bool: ) +def _is_planning_candidate(manifest: MigrationManifest, now: datetime) -> bool: + return ( + manifest.status == "planning" + and not manifest.awaiting_human_review + and eligible_now(manifest, now) + ) + + def _can_ignore_effort_cooldown( manifest: MigrationManifest, now: datetime, @@ -158,6 +199,22 @@ def _is_phase_candidate(manifest: MigrationManifest) -> bool: ) +def _first_unloadable_visible_manifest( + live_dir: Path, +) -> tuple[Path, list[MigrationConsistencyFinding]] | None: + for migration_dir in iter_visible_migration_dirs(live_dir): + if not (migration_dir / "manifest.json").exists(): + continue + findings = check_migration_consistency(migration_dir, mode="execution-gate") + invalid_findings = [ + finding for finding in findings + if finding.severity == "error" and finding.code == "invalid-manifest" + ] + if invalid_findings: + return migration_dir, invalid_findings + return None + + def try_migration_tick( live_dir: Path, taste: str, @@ -177,6 +234,14 @@ def try_migration_tick( ) -> tuple[RouteOutcome, DecisionRecord | None]: resolved_budget = effort_budget or resolve_effort_budget(effort, None) now = datetime.now(timezone.utc) + preflight = _first_unloadable_visible_manifest(live_dir) + if preflight is not None: + migration_dir, consistency_findings = preflight + return "abandon", _consistency_failure_record( + consistency_findings, + repo_root, + migration_dir.name, + ) candidates = enumerate_eligible_manifests(live_dir, now, resolved_budget) deferred_record: DecisionRecord | None = None pending_defers: list[tuple[MigrationManifest, Path]] = [] @@ -184,6 +249,21 @@ def try_migration_tick( for manifest, manifest_path in candidates: phase = resolve_current_phase(manifest) target_label = _target_label(manifest, phase) + try: + consistency_findings = check_migration_consistency( + manifest_path.parent, mode="execution-gate", + ) + except ContinuousRefactorError as error: + return "abandon", _consistency_error_record( + str(error), + repo_root, + target_label, + failure_kind=error_failure_kind(str(error)), + ) + if has_blocking_consistency_findings(consistency_findings): + return "abandon", _consistency_failure_record( + consistency_findings, repo_root, target_label, + ) if ( phase.required_effort is not None and effort_exceeds( @@ -301,6 +381,282 @@ def try_migration_tick( return "not-routed", deferred_record +def try_planning_tick( + live_dir: Path, + taste: str, + repo_root: Path, + artifacts: RunArtifacts, + *, + agent: str, + model: str, + effort: str, + timeout: int | None, + commit_message_prefix: str, + attempt: int, + finalize_commit: _FinalizeCommit, + effort_budget: EffortBudget | None = None, + effort_metadata: dict[str, object] | None = None, +) -> tuple[RouteOutcome, DecisionRecord | None]: + now = datetime.now(timezone.utc) + preflight = _first_unloadable_visible_manifest(live_dir) + if preflight is not None: + migration_dir, consistency_findings = preflight + return "abandon", _consistency_failure_record( + consistency_findings, + repo_root, + migration_dir.name, + ) + + candidates = enumerate_eligible_planning_manifests(live_dir, now) + for manifest, manifest_path in candidates: + migration_dir = manifest_path.parent + try: + consistency_findings = check_migration_consistency( + migration_dir, + mode="planning-snapshot", + ) + except ContinuousRefactorError as error: + return "blocked", _planning_state_record( + str(error), + repo_root, + migration_dir.name, + failure_kind=error_failure_kind(str(error)), + ) + if has_blocking_consistency_findings(consistency_findings): + return "blocked", _planning_consistency_record( + consistency_findings, + repo_root, + migration_dir.name, + ) + state_result = _load_planning_resume_state( + migration_dir, + repo_root, + ) + if isinstance(state_result, DecisionRecord): + return "blocked", state_result + state = state_result + step = state.next_step + if step not in _EXECUTABLE_PLANNING_STEPS: + return "blocked", _planning_state_record( + ( + f"Planning migration has terminal next_step {step!r} " + "while manifest status is still planning" + ), + repo_root, + manifest.name, + failure_kind="planning-state-invalid", + ) + head_before = get_head_sha(repo_root) + try: + result = run_next_planning_step( + manifest.name, + state.target, + taste, + repo_root, + live_dir, + artifacts, + attempt=attempt, + retry=1, + agent=agent, + model=model, + effort=effort, + effort_budget=effort_budget, + effort_metadata=effort_metadata, + timeout=timeout, + ) + except ContinuousRefactorError as error: + return "abandon", _planning_error_record( + str(error), + repo_root, + manifest.name, + call_role=_planning_call_role(step), + failure_kind=error_failure_kind(str(error)), + ) + + outcome = _planning_route_outcome(result) + if outcome == "commit": + finalize_commit( + repo_root, + head_before, + build_commit_message( + ( + f"{commit_message_prefix}: planning/" + f"{manifest.name}/{result.step}" + ), + why=sanitize_text(result.reason, repo_root) or result.reason, + ), + artifacts=artifacts, + attempt=attempt, + phase="planning", + ) + print( + "Planning: " + f"{_describe_planning_outcome(result)} — " + f"{manifest.name}/{result.step}: {result.reason}" + ) + return "commit", _planning_commit_record(result, repo_root) + if outcome == "blocked": + return "blocked", _planning_blocked_record(result, repo_root) + return "abandon", _planning_failed_record(result, repo_root) + + return "not-routed", None + + +def _planning_consistency_record( + findings: list[MigrationConsistencyFinding], + repo_root: Path, + migration_name: str, +) -> DecisionRecord: + error_findings = [finding for finding in findings if finding.severity == "error"] + codes = ", ".join(sorted({finding.code for finding in error_findings})) + message = ( + error_findings[0].message + if error_findings + else "planning snapshot consistency failed" + ) + return _planning_state_record( + f"Planning snapshot consistency failed ({codes}): {message}", + repo_root, + migration_name, + failure_kind="planning-consistency-error", + ) + + +def _load_planning_resume_state( + migration_dir: Path, + repo_root: Path, +) -> PlanningState | DecisionRecord: + state_path = planning_state_path(migration_dir) + if not state_path.exists(): + return _planning_state_record( + f"Planning migration is missing {state_path.relative_to(migration_dir)}", + repo_root, + migration_dir.name, + failure_kind="planning-state-missing", + ) + try: + return load_planning_state( + repo_root, + state_path, + published_migration_root=migration_dir, + ) + except ContinuousRefactorError as error: + return _planning_state_record( + str(error), + repo_root, + migration_dir.name, + failure_kind="planning-state-invalid", + ) + + +def _planning_route_outcome(result: PlanningStepResult) -> RouteOutcome: + if result.status == "published": + return "commit" + if result.status == "blocked": + return "blocked" + return "abandon" + + +def _planning_call_role(step: object) -> str: + if step in _EXECUTABLE_PLANNING_STEPS: + return f"planning.{step}" + return "planning.resume" + + +def _describe_planning_outcome(result: PlanningStepResult) -> str: + if result.terminal_outcome is None: + return f"{result.step} accepted" + if result.terminal_outcome.status == "ready": + return "queued for execution" + if result.terminal_outcome.status == "awaiting_human_review": + return "awaiting human review" + return result.terminal_outcome.status.replace("_", " ") + + +def _planning_state_record( + message: str, + repo_root: Path, + migration_name: str, + *, + failure_kind: str, +) -> DecisionRecord: + return DecisionRecord( + decision="blocked", + retry_recommendation="human-review", + target=migration_name, + call_role="planning.state", + phase_reached="planning.state", + failure_kind=failure_kind, + summary=sanitized_text_or(message, repo_root, message), + ) + + +def _planning_error_record( + message: str, + repo_root: Path, + migration_name: str, + *, + call_role: str, + failure_kind: str, +) -> DecisionRecord: + return DecisionRecord( + decision="abandon", + retry_recommendation="new-target", + target=migration_name, + call_role=call_role, + phase_reached=call_role, + failure_kind=failure_kind, + summary=sanitized_text_or(message, repo_root, message), + ) + + +def _planning_commit_record( + result: PlanningStepResult, + repo_root: Path, +) -> DecisionRecord: + call_role = f"planning.{result.step}" + return DecisionRecord( + decision="commit", + retry_recommendation="none", + target=result.migration_name, + call_role=call_role, + phase_reached=call_role, + failure_kind="none", + summary=sanitized_text_or(result.reason, repo_root, result.reason), + ) + + +def _planning_blocked_record( + result: PlanningStepResult, + repo_root: Path, +) -> DecisionRecord: + return DecisionRecord( + decision="blocked", + retry_recommendation="human-review", + target=result.migration_name, + call_role="planning.publish", + phase_reached="planning.publish", + failure_kind="planning-publish-blocked", + summary=sanitized_text_or(result.reason, repo_root, result.reason), + ) + + +def _planning_failed_record( + result: PlanningStepResult, + repo_root: Path, +) -> DecisionRecord: + call_role = f"planning.{result.step}" + return DecisionRecord( + decision="abandon", + retry_recommendation="new-target", + target=result.migration_name, + call_role=call_role, + phase_reached=call_role, + failure_kind="planning-step-failed", + summary=sanitized_text_or(result.reason, repo_root, result.reason), + ) + + def _target_label(manifest: MigrationManifest, phase: PhaseSpec) -> str: return f"{manifest.name} {phase_file_reference(phase)} ({phase.name})" @@ -382,6 +738,45 @@ def _ready_check_failure_record( ) +def _consistency_failure_record( + findings: list[MigrationConsistencyFinding], + repo_root: Path, + target_label: str, +) -> DecisionRecord: + error_findings = [finding for finding in findings if finding.severity == "error"] + codes = ", ".join(sorted({finding.code for finding in error_findings})) + message = ( + error_findings[0].message + if error_findings + else "migration consistency failed" + ) + summary = f"Migration consistency failed ({codes}): {message}" + return _consistency_error_record( + summary, + repo_root, + target_label, + failure_kind="migration-consistency-error", + ) + + +def _consistency_error_record( + message: str, + repo_root: Path, + target_label: str, + *, + failure_kind: str, +) -> DecisionRecord: + return DecisionRecord( + decision="abandon", + retry_recommendation="new-target", + target=target_label, + call_role="phase.execution-gate", + phase_reached="phase.execution-gate", + failure_kind=failure_kind, + summary=sanitized_text_or(message, repo_root, message), + ) + + def _phase_failure_record( outcome: ExecutePhaseOutcome, repo_root: Path, target_label: str, ) -> DecisionRecord: diff --git a/src/continuous_refactoring/planning.py b/src/continuous_refactoring/planning.py index 4ac44e0..4e0ebec 100644 --- a/src/continuous_refactoring/planning.py +++ b/src/continuous_refactoring/planning.py @@ -1,6 +1,8 @@ from __future__ import annotations import re +import shutil +import uuid from dataclasses import dataclass, replace from pathlib import Path from typing import TYPE_CHECKING, Callable, Literal @@ -10,20 +12,52 @@ from continuous_refactoring.agent import maybe_run_agent from continuous_refactoring.artifacts import ContinuousRefactorError, iso_timestamp +from continuous_refactoring.config import resolve_project from continuous_refactoring.effort import EffortBudget, require_effort_tier from continuous_refactoring.migrations import ( MigrationManifest, PhaseSpec, approaches_dir, intentional_skips_dir, + load_manifest, migration_root, save_manifest, ) +from continuous_refactoring.planning_publish import ( + PlanningPublishError, + PlanningPublishRequest, + PlanningPublishResult, + capture_live_snapshot, + prepare_planning_workspace, + publish_planning_workspace, +) +from continuous_refactoring.planning_state import ( + FeedbackSource, + PlanningCursor, + PlanningState, + PlanningStep, + append_planning_feedback, + complete_planning_step, + load_planning_state, + new_planning_state, + planning_state_path, + planning_step_stdout, + reopen_planning_for_revise, + save_planning_state, + write_planning_stage_stdout, +) from continuous_refactoring.prompts import PlanningStage, compose_planning_prompt -__all__ = ["PlanningOutcome", "run_planning"] +__all__ = [ + "PlanningOutcome", + "PlanningRefineRequest", + "PlanningStepResult", + "run_next_planning_step", + "run_refine_planning_step", +] PlanningStatus = Literal["ready", "awaiting_human_review", "skipped"] +PlanningStepStatus = Literal["published", "blocked", "failed"] _FINAL_DECISION_RE = re.compile( r"^final-decision:\s*(approve-auto|approve-needs-human|reject)(?:\s*[—-]\s*(.+))?$", @@ -47,6 +81,36 @@ class PlanningOutcome: reason: str +@dataclass(frozen=True) +class PlanningStepResult: + status: PlanningStepStatus + migration_name: str + step: PlanningStep + next_step: PlanningCursor + reason: str + terminal_outcome: PlanningOutcome | None = None + publish_result: PlanningPublishResult | None = None + + +@dataclass(frozen=True) +class PlanningRefineRequest: + migration_name: str + feedback_text: str + feedback_source: FeedbackSource + taste: str + repo_root: Path + live_dir: Path + artifacts: RunArtifacts + agent: str + model: str + effort: str + timeout: int | None = None + attempt: int = 1 + retry: int = 1 + effort_budget: EffortBudget | None = None + effort_metadata: dict[str, object] | None = None + + @dataclass(frozen=True) class _PhaseMetadata: precondition: str @@ -345,8 +409,38 @@ def _run_stage( # --------------------------------------------------------------------------- -def _build_context(target: str, mig_relative: Path, extra: str = "") -> str: - parts = [f"Target: {target}", f"Migration directory: {mig_relative}"] +def _build_context( + target: str, + mig_relative: Path, + extra: str = "", + *, + work_dir: Path | None = None, + live_mig_root: Path | None = None, +) -> str: + parts = [ + f"Target: {target}", + f"Migration directory: {mig_relative}", + "Read and write all migration planning artifacts inside that directory.", + ] + if work_dir is not None: + live_dir = live_mig_root or work_dir + parts.extend( + [ + f"Staged work dir: {work_dir}", + f"Work dir: {work_dir}", + f"Live migration dir: {live_dir}", + "The staged work dir is the planning workspace; successful " + "steps are atomically published by the harness.", + "Writable target: staged work dir only.", + "Writable target: work dir only.", + "The live migration directory is read-only reference material.", + "Do not mutate the live migration directory.", + "Resume input is the last published .planning/state.json plus " + "accepted stdout under .planning/stages/.", + "failed current-step output, stdout/stderr, and partial work " + "are run artifacts only; they are not resume input.", + ] + ) if extra: parts.append(extra) return "\n\n".join(parts) @@ -360,6 +454,13 @@ def _join_nonempty(*parts: str) -> str: return "\n\n".join(part for part in parts if part) +def _display_migration_path(repo_root: Path, mig_root: Path) -> Path: + try: + return mig_root.relative_to(repo_root) + except ValueError: + return mig_root + + def _read_approach_listing(live_dir: Path, migration_name: str) -> str: app_dir = approaches_dir(live_dir, migration_name) if not app_dir.exists(): @@ -370,6 +471,130 @@ def _read_approach_listing(live_dir: Path, migration_name: str) -> str: ) +def _durable_stdout_context( + title: str, + state: PlanningState, + repo_root: Path, + mig_root: Path, + step: str, + *, + published_migration_root: Path | None = None, +) -> str: + stdout_ref, stdout = planning_step_stdout( + state, + repo_root, + step, + state_path=planning_state_path(mig_root), + published_migration_root=published_migration_root, + ) + return f"{title} (from {stdout_ref}):\n{stdout}" + + +def _build_durable_planning_context( + *, + repo_root: Path, + live_dir: Path, + migration_name: str, + state: PlanningState, + extra_context: str = "", + published_migration_root: Path | None = None, +) -> str: + mig_root = migration_root(live_dir, migration_name) + mig_relative = _display_migration_path(repo_root, mig_root) + plan_path = mig_root / "plan.md" + + if state.next_step == "approaches": + step_context = "" + elif state.next_step == "pick-best": + step_context = f"Approaches:\n{_read_approach_listing(live_dir, migration_name)}" + elif state.next_step == "expand": + step_context = _durable_stdout_context( + "Chosen approach", + state, + repo_root, + mig_root, + "pick-best", + published_migration_root=published_migration_root, + ) + elif state.next_step == "review": + step_context = f"Plan:\n{_read_plan_text(plan_path)}" + elif state.next_step == "revise": + if state.revision_base_step_count is None: + step_context = _durable_stdout_context( + "Review findings to address", + state, + repo_root, + mig_root, + "review", + published_migration_root=published_migration_root, + ) + else: + step_context = _latest_feedback_context(state) + elif state.next_step == "review-2": + step_context = f"Plan (revised):\n{_read_plan_text(plan_path)}" + elif state.next_step == "final-review": + step_context = f"Plan:\n{_read_plan_text(plan_path)}" + else: + raise ContinuousRefactorError( + f"Planning state is terminal; no prompt context for {state.next_step!r}" + ) + + return _build_context( + state.target, + mig_relative, + _join_nonempty(extra_context, step_context), + work_dir=mig_root, + live_mig_root=published_migration_root, + ) + + +def _latest_feedback_context(state: PlanningState) -> str: + if not state.feedback: + raise ContinuousRefactorError("Planning refinement requires user feedback") + return f"User refinement feedback to address:\n{state.feedback[-1].text}" + + +def _record_completed_planning_step( + state: PlanningState, + *, + repo_root: Path, + mig_root: Path, + published_migration_root: Path | None = None, + stage_label: str, + outcome: str, + stdout: str, + agent: str, + model: str, + effort: str, + final_reason: str | None = None, +) -> PlanningState: + outputs = write_planning_stage_stdout( + repo_root, + mig_root, + stage_label, + stdout, + published_migration_root=published_migration_root, + ) + updated = complete_planning_step( + state, + stage_label, + outcome, + outputs, + completed_at=iso_timestamp(), + agent=agent, + model=model, + effort=effort, + final_reason=final_reason, + ) + save_planning_state( + updated, + planning_state_path(mig_root), + repo_root=repo_root, + published_migration_root=published_migration_root, + ) + return updated + + def _run_pipeline_stage( spec: _PlanningStageSpec, state: _PlanningStageState, @@ -432,11 +657,11 @@ def _refresh_manifest( # --------------------------------------------------------------------------- -# Main workflow +# One-step workflow # --------------------------------------------------------------------------- -def run_planning( +def run_next_planning_step( migration_name: str, target: str, taste: str, @@ -453,11 +678,233 @@ def run_planning( effort_budget: EffortBudget | None = None, effort_metadata: dict[str, object] | None = None, extra_context: str = "", -) -> PlanningOutcome: - mig_root = migration_root(live_dir, migration_name) - mig_root.mkdir(parents=True, exist_ok=True) - manifest_path = mig_root / "manifest.json" - mig_relative = mig_root.relative_to(repo_root) +) -> PlanningStepResult: + live_mig_root = migration_root(live_dir, migration_name) + base_snapshot_id = capture_live_snapshot(repo_root, live_dir, migration_name) + workspace_root = _prepare_step_workspace( + repo_root, + artifacts, + migration_name, + live_mig_root, + ) + manifest, state = _load_or_seed_step_snapshot( + workspace_root, + live_mig_root, + migration_name=migration_name, + target=target, + repo_root=repo_root, + ) + if state.next_step not in _STEP_PROMPT_STAGES: + raise ContinuousRefactorError( + f"Planning state is terminal; no next step for {state.next_step!r}" + ) + + step = state.next_step + manifest, state, terminal_outcome = _execute_step_in_workspace( + manifest, + state, + migration_name=migration_name, + taste=taste, + repo_root=repo_root, + workspace_root=workspace_root, + live_mig_root=live_mig_root, + artifacts=artifacts, + attempt=attempt, + retry=retry, + agent=agent, + model=model, + effort=effort, + timeout=timeout, + effort_budget=effort_budget, + effort_metadata=effort_metadata, + extra_context=extra_context, + ) + + validation_mode = "ready-publish" if manifest.status == "ready" else "planning-snapshot" + try: + publish_result = publish_planning_workspace( + PlanningPublishRequest( + repo_root=repo_root, + live_migrations_dir=live_dir, + slug=migration_name, + workspace_dir=workspace_root, + base_snapshot_id=base_snapshot_id, + validation_mode=validation_mode, + operation=f"planning.{step}", + ) + ) + except PlanningPublishError as error: + return PlanningStepResult( + status=error.result.status, + migration_name=migration_name, + step=step, + next_step=state.next_step, + reason=error.result.reason, + terminal_outcome=None, + publish_result=error.result, + ) + + return PlanningStepResult( + status="published", + migration_name=migration_name, + step=step, + next_step=state.next_step, + reason=_planning_step_reason(step, state, terminal_outcome), + terminal_outcome=terminal_outcome, + publish_result=publish_result, + ) + + +def run_refine_planning_step(request: PlanningRefineRequest) -> PlanningStepResult: + live_mig_root = migration_root(request.live_dir, request.migration_name) + base_snapshot_id = capture_live_snapshot( + request.repo_root, + request.live_dir, + request.migration_name, + ) + workspace_root = _prepare_step_workspace( + request.repo_root, + request.artifacts, + request.migration_name, + live_mig_root, + ) + manifest, state = _load_refine_snapshot( + workspace_root, + live_mig_root, + repo_root=request.repo_root, + migration_name=request.migration_name, + ) + manifest, state = _prepare_refine_state( + manifest, + state, + workspace_root=workspace_root, + live_mig_root=live_mig_root, + repo_root=request.repo_root, + feedback_text=request.feedback_text, + feedback_source=request.feedback_source, + ) + if state.next_step not in _STEP_PROMPT_STAGES: + raise ContinuousRefactorError( + f"Planning state is terminal; no next step for {state.next_step!r}" + ) + + step = state.next_step + manifest, state, terminal_outcome = _execute_step_in_workspace( + manifest, + state, + migration_name=request.migration_name, + taste=request.taste, + repo_root=request.repo_root, + workspace_root=workspace_root, + live_mig_root=live_mig_root, + artifacts=request.artifacts, + attempt=request.attempt, + retry=request.retry, + agent=request.agent, + model=request.model, + effort=request.effort, + timeout=request.timeout, + effort_budget=request.effort_budget, + effort_metadata=request.effort_metadata, + extra_context=_user_feedback_context(request.feedback_text), + ) + + validation_mode = "ready-publish" if manifest.status == "ready" else "planning-snapshot" + try: + publish_result = publish_planning_workspace( + PlanningPublishRequest( + repo_root=request.repo_root, + live_migrations_dir=request.live_dir, + slug=request.migration_name, + workspace_dir=workspace_root, + base_snapshot_id=base_snapshot_id, + validation_mode=validation_mode, + operation=f"migration.refine.{step}", + ) + ) + except PlanningPublishError as error: + return PlanningStepResult( + status=error.result.status, + migration_name=request.migration_name, + step=step, + next_step=state.next_step, + reason=error.result.reason, + terminal_outcome=None, + publish_result=error.result, + ) + + return PlanningStepResult( + status="published", + migration_name=request.migration_name, + step=step, + next_step=state.next_step, + reason=_planning_step_reason(step, state, terminal_outcome), + terminal_outcome=terminal_outcome, + publish_result=publish_result, + ) + + +_STEP_PROMPT_STAGES: dict[str, PlanningStage] = { + "approaches": "approaches", + "pick-best": "pick-best", + "expand": "expand", + "review": "review", + "revise": "expand", + "review-2": "review", + "final-review": "final-review", +} + + +def _prepare_step_workspace( + repo_root: Path, + artifacts: RunArtifacts, + migration_name: str, + live_mig_root: Path, +) -> Path: + project_state_dir = _planning_project_state_dir(repo_root, artifacts) + workspace = prepare_planning_workspace( + project_state_dir, + migration_name, + f"{artifacts.run_id}-{uuid.uuid4().hex}", + ) + if live_mig_root.exists(): + shutil.copytree(live_mig_root, workspace.root, dirs_exist_ok=True) + return workspace.root + + +def _planning_project_state_dir(repo_root: Path, artifacts: RunArtifacts) -> Path: + try: + return resolve_project(repo_root).project_dir + except ContinuousRefactorError: + return artifacts.root / "project-state" + + +def _load_or_seed_step_snapshot( + workspace_root: Path, + live_mig_root: Path, + *, + migration_name: str, + target: str, + repo_root: Path, +) -> tuple[MigrationManifest, PlanningState]: + manifest_path = workspace_root / "manifest.json" + state_path = planning_state_path(workspace_root) + if manifest_path.exists(): + manifest = load_manifest(manifest_path) + if manifest.status != "planning": + raise ContinuousRefactorError( + f"Planning snapshot {migration_name!r} is not in planning status" + ) + if not state_path.exists(): + raise ContinuousRefactorError( + f"Planning snapshot {migration_name!r} is missing .planning/state.json" + ) + state = load_planning_state( + repo_root, + state_path, + published_migration_root=live_mig_root, + ) + return manifest, state now = iso_timestamp() manifest = MigrationManifest( @@ -471,154 +918,258 @@ def run_planning( phases=(), ) save_manifest(manifest, manifest_path) + state = new_planning_state(target, now=now) + save_planning_state( + state, + state_path, + repo_root=repo_root, + published_migration_root=live_mig_root, + ) + return manifest, state + + +def _load_refine_snapshot( + workspace_root: Path, + live_mig_root: Path, + *, + repo_root: Path, + migration_name: str, +) -> tuple[MigrationManifest, PlanningState]: + manifest_path = workspace_root / "manifest.json" + state_path = planning_state_path(workspace_root) + if not manifest_path.exists(): + raise ContinuousRefactorError(f"Migration {migration_name!r} has no manifest") + if not state_path.exists(): + raise ContinuousRefactorError( + f"Migration {migration_name!r} is missing .planning/state.json" + ) + manifest = load_manifest(manifest_path) + state = load_planning_state( + repo_root, + state_path, + published_migration_root=live_mig_root, + ) + return manifest, state + + +def _prepare_refine_state( + manifest: MigrationManifest, + state: PlanningState, + *, + workspace_root: Path, + live_mig_root: Path, + repo_root: Path, + feedback_text: str, + feedback_source: FeedbackSource, +) -> tuple[MigrationManifest, PlanningState]: + _require_refine_eligible(manifest) + state = append_planning_feedback(state, feedback_text, feedback_source) + if manifest.status == "ready": + state = reopen_planning_for_revise(state) + manifest = _refresh_manifest( + manifest, + workspace_root / "manifest.json", + status="planning", + awaiting_human_review=False, + human_review_reason=None, + cooldown_until=None, + current_phase=manifest.phases[0].name, + ) + elif state.next_step not in _STEP_PROMPT_STAGES: + raise ContinuousRefactorError( + f"Planning state is terminal; no next step for {state.next_step!r}" + ) + save_planning_state( + state, + planning_state_path(workspace_root), + repo_root=repo_root, + published_migration_root=live_mig_root, + ) + return manifest, state - agent_kw = dict( + +def _require_refine_eligible(manifest: MigrationManifest) -> None: + if any(phase.done for phase in manifest.phases): + raise ContinuousRefactorError( + f"Migration {manifest.name!r} has completed phase work and cannot be refined" + ) + if manifest.status == "planning": + return + if manifest.status != "ready": + raise ContinuousRefactorError( + f"Migration {manifest.name!r} has status {manifest.status!r}; " + "only planning or unexecuted ready migrations can be refined" + ) + if not manifest.phases: + raise ContinuousRefactorError( + f"Migration {manifest.name!r} has no phases and cannot be refined" + ) + first_phase = manifest.phases[0] + if manifest.current_phase != first_phase.name: + raise ContinuousRefactorError( + f"Migration {manifest.name!r} has already advanced past its first phase" + ) + + +def _user_feedback_context(text: str) -> str: + return f"User refinement feedback:\n{text}" + + +def _execute_step_in_workspace( + manifest: MigrationManifest, + state: PlanningState, + *, + migration_name: str, + taste: str, + repo_root: Path, + workspace_root: Path, + live_mig_root: Path, + artifacts: RunArtifacts, + attempt: int, + retry: int, + agent: str, + model: str, + effort: str, + timeout: int | None, + effort_budget: EffortBudget | None, + effort_metadata: dict[str, object] | None, + extra_context: str, +) -> tuple[MigrationManifest, PlanningState, PlanningOutcome | None]: + step = state.next_step + if step not in _STEP_PROMPT_STAGES: + raise ContinuousRefactorError(f"Planning step {step!r} cannot be executed") + prompt_stage = _STEP_PROMPT_STAGES[step] + context = _build_durable_planning_context( + repo_root=repo_root, + live_dir=workspace_root.parent, + migration_name=migration_name, + state=state, + extra_context=extra_context, + published_migration_root=live_mig_root, + ) + stdout = _run_stage( + prompt_stage, + migration_name, + state.target, + taste, + context, + repo_root, + artifacts, + attempt=attempt, + retry=retry, agent=agent, model=model, effort=effort, timeout=timeout, effort_metadata=effort_metadata, effort_budget=effort_budget, + stage_label=step, ) - plan_path = mig_root / "plan.md" - state = _PlanningStageState(extra_context=extra_context) - always_run_stages = ( - _PlanningStageSpec( - prompt_stage="approaches", - stage_label="approaches", - build_context=lambda current: _build_context( - target, mig_relative, current.extra_context - ), - ), - _PlanningStageSpec( - prompt_stage="pick-best", - stage_label="pick-best", - build_context=lambda current: _build_context( - target, - mig_relative, - _join_nonempty( - current.extra_context, - f"Approaches:\n{current.approach_listing}", - ), - ), - ), - _PlanningStageSpec( - prompt_stage="expand", - stage_label="expand", - build_context=lambda current: _build_context( - target, - mig_relative, - _join_nonempty( - current.extra_context, - f"Chosen approach:\n{current.pick_stdout}", - ), - ), - refresh_phase_listing=True, - ), - _PlanningStageSpec( - prompt_stage="review", - stage_label="review", - build_context=lambda current: _build_context( - target, - mig_relative, - _join_nonempty(current.extra_context, f"Plan:\n{_read_plan_text(plan_path)}"), - ), - ), - ) - for spec in always_run_stages: - manifest, _ = _run_pipeline_stage( - spec, - state, - manifest, - manifest_path, - migration_name=migration_name, - target=target, - taste=taste, - repo_root=repo_root, - artifacts=artifacts, - mig_root=mig_root, - live_dir=live_dir, - attempt=attempt, - retry=retry, - agent_kw=agent_kw, - ) - review_stdout = state.review_stdout - - # Stage 5: revise + review again (only if first review had findings) - if _review_has_findings(review_stdout): - _run_stage( - "expand", migration_name, target, taste, - _build_context( - target, - mig_relative, - _join_nonempty( - extra_context, - f"Review findings to address:\n{review_stdout}", - ), - ), - repo_root, - artifacts, - attempt=attempt, - retry=retry, - stage_label="revise", - **agent_kw, - ) - manifest = _refresh_manifest(manifest, manifest_path, mig_root=mig_root) - - review_two_stdout = _run_stage( - "review", migration_name, target, taste, - _build_context( - target, - mig_relative, - _join_nonempty( - extra_context, - f"Plan (revised):\n{_read_plan_text(plan_path)}", - ), - ), - repo_root, - artifacts, - attempt=attempt, - retry=retry, - stage_label="review-2", - **agent_kw, - ) - _require_review_clear(review_two_stdout, "review-2") - manifest = _refresh_manifest(manifest, manifest_path) - # Stage 6: final-review - final_stdout = _run_stage( - "final-review", migration_name, target, taste, - _build_context( - target, - mig_relative, - _join_nonempty(extra_context, f"Plan:\n{_read_plan_text(plan_path)}"), - ), - repo_root, artifacts, attempt=attempt, retry=retry, **agent_kw, + outcome, final_reason = _step_outcome(step, stdout) + manifest = _refresh_manifest( + manifest, + workspace_root / "manifest.json", + mig_root=workspace_root if step in ("expand", "revise") else None, ) + state = _record_completed_planning_step( + state, + repo_root=repo_root, + mig_root=workspace_root, + published_migration_root=live_mig_root, + stage_label=step, + outcome=outcome, + stdout=stdout, + agent=agent, + model=model, + effort=effort, + final_reason=final_reason, + ) + terminal_outcome = _terminal_outcome(state) + if terminal_outcome is None: + return manifest, state, None + manifest = _apply_terminal_manifest_state( + manifest, + workspace_root / "manifest.json", + workspace_root=workspace_root, + live_dir=workspace_root.parent, + migration_name=migration_name, + target=state.target, + outcome=terminal_outcome, + ) + return manifest, state, terminal_outcome + + +def _step_outcome(step: PlanningStep, stdout: str) -> tuple[str, str | None]: + if step == "review": + return ("findings" if _review_has_findings(stdout) else "clear"), None + if step == "review-2": + _require_review_clear(stdout, "review-2") + return "clear", None + if step == "final-review": + try: + return _parse_final_decision(stdout) + except ContinuousRefactorError as error: + raise ContinuousRefactorError( + f"planning.final-review failed: {error}" + ) from error + return "completed", None + + +def _terminal_outcome(state: PlanningState) -> PlanningOutcome | None: + if state.next_step == "terminal-ready": + return PlanningOutcome(status="ready", reason=state.final_reason or "ready") + if state.next_step == "terminal-ready-awaiting-human": + return PlanningOutcome( + status="awaiting_human_review", + reason=state.final_reason or "awaiting human review", + ) + if state.next_step == "terminal-skipped": + return PlanningOutcome(status="skipped", reason=state.final_reason or "skipped") + return None - try: - decision, reason = _parse_final_decision(final_stdout) - except ContinuousRefactorError as error: - raise ContinuousRefactorError( - f"planning.final-review failed: {error}" - ) from error - manifest = _refresh_manifest(manifest, manifest_path) - - if decision == "approve-auto": - manifest = _refresh_manifest(manifest, manifest_path, status="ready") - return PlanningOutcome(status="ready", reason=reason) - if decision == "approve-needs-human": - manifest = _refresh_manifest( +def _apply_terminal_manifest_state( + manifest: MigrationManifest, + manifest_path: Path, + *, + workspace_root: Path, + live_dir: Path, + migration_name: str, + target: str, + outcome: PlanningOutcome, +) -> MigrationManifest: + if outcome.status == "ready": + return _refresh_manifest( + manifest, + manifest_path, + status="ready", + awaiting_human_review=False, + human_review_reason=None, + ) + if outcome.status == "awaiting_human_review": + return _refresh_manifest( manifest, manifest_path, status="ready", awaiting_human_review=True, - human_review_reason=reason, + human_review_reason=outcome.reason, ) - return PlanningOutcome(status="awaiting_human_review", reason=reason) - # reject - manifest = _refresh_manifest(manifest, manifest_path, status="skipped") - _write_skip_file(live_dir, migration_name, target, reason) - return PlanningOutcome(status="skipped", reason=reason) + (workspace_root / "intentional-skip.md").write_text( + f"# Intentional Skip: {migration_name}\n\n" + f"## Target\n{target}\n\n" + f"## Blocker Reason\n{outcome.reason}\n", + encoding="utf-8", + ) + return _refresh_manifest(manifest, manifest_path, status="skipped") + + +def _planning_step_reason( + step: PlanningStep, + state: PlanningState, + terminal_outcome: PlanningOutcome | None, +) -> str: + if terminal_outcome is not None: + return terminal_outcome.reason + return f"planning.{step} accepted; next step: {state.next_step}" diff --git a/src/continuous_refactoring/planning_publish.py b/src/continuous_refactoring/planning_publish.py new file mode 100644 index 0000000..576a67d --- /dev/null +++ b/src/continuous_refactoring/planning_publish.py @@ -0,0 +1,681 @@ +from __future__ import annotations + +import hashlib +import json +import os +import shutil +import stat +import uuid +from dataclasses import dataclass, replace +from pathlib import Path +from typing import Literal + +from continuous_refactoring.artifacts import ContinuousRefactorError, iso_timestamp +from continuous_refactoring.git import run_command +from continuous_refactoring.migration_consistency import ( + ConsistencyMode, + MigrationConsistencyFinding, + check_migration_consistency, + has_blocking_consistency_findings, +) + +__all__ = [ + "PlanningPublishError", + "PlanningPublishRequest", + "PlanningPublishResult", + "PlanningWorkspace", + "capture_live_snapshot", + "prepare_planning_workspace", + "publish_lock_path", + "publish_planning_workspace", + "snapshot_tree_digest", +] + +PublishStatus = Literal["published", "blocked", "failed"] + +_TRANSACTIONS_DIR_NAME = "__transactions__" +_LOCK_DIR_NAME = ".lock" +_LOCK_OWNER_FILE = "owner.json" +_DIGEST_VERSION = b"continuous-refactoring-tree-v1\n" +_MISSING_TREE_DIGEST_INPUT = b"missing\n" +_FS_ERRORS = (OSError, shutil.Error) + + +@dataclass(frozen=True) +class PlanningWorkspace: + root: Path + slug: str + run_id: str + + +@dataclass(frozen=True) +class PlanningPublishRequest: + repo_root: Path + live_migrations_dir: Path + slug: str + workspace_dir: Path + base_snapshot_id: str + validation_mode: ConsistencyMode = "ready-publish" + operation: str = "planning-publish" + now: str | None = None + + +@dataclass(frozen=True) +class PlanningPublishResult: + status: PublishStatus + reason: str + snapshot_id: str | None + live_dir: Path + transaction_dir: Path | None + staged_dir: Path | None + rollback_dir: Path | None + failed_dir: Path | None + findings: tuple[MigrationConsistencyFinding, ...] = () + dirty_paths: tuple[str, ...] = () + lock_path: Path | None = None + cleanup_error: str | None = None + + +class PlanningPublishError(ContinuousRefactorError): + def __init__(self, result: PlanningPublishResult) -> None: + self.result = result + super().__init__(_result_message(result)) + + +@dataclass(frozen=True) +class _TransactionPaths: + transaction_dir: Path + staged_dir: Path + rollback_dir: Path + failed_dir: Path + + +@dataclass(frozen=True) +class _PublishLock: + path: Path + + +def prepare_planning_workspace( + project_state_dir: Path, + slug: str, + run_id: str, +) -> PlanningWorkspace: + _require_safe_segment(slug, field="slug") + _require_safe_segment(run_id, field="run_id") + root = project_state_dir / "planning" / slug / run_id / "work" / slug + if root.exists() and any(root.iterdir()): + raise ContinuousRefactorError(f"Planning workspace is not empty: {root}") + root.mkdir(parents=True, exist_ok=True) + return PlanningWorkspace(root=root, slug=slug, run_id=run_id) + + +def capture_live_snapshot( + repo_root: Path, + live_migrations_dir: Path, + slug: str, +) -> str: + live_dir = _live_migration_dir(live_migrations_dir, slug) + dirty_paths = _dirty_live_migration_paths(repo_root, live_dir) + if dirty_paths: + _raise_result( + _blocked_result( + "dirty live migration directory; commit, discard, or inspect with migration doctor", + live_dir=live_dir, + dirty_paths=dirty_paths, + ) + ) + return snapshot_tree_digest(live_dir) + + +def publish_lock_path(live_migrations_dir: Path) -> Path: + return live_migrations_dir / _TRANSACTIONS_DIR_NAME / _LOCK_DIR_NAME + + +def snapshot_tree_digest(path: Path) -> str: + digest = hashlib.sha256() + digest.update(_DIGEST_VERSION) + if not path.exists(): + digest.update(_MISSING_TREE_DIGEST_INPUT) + return digest.hexdigest() + if path.is_symlink() or not path.is_dir(): + raise ContinuousRefactorError(f"Snapshot root must be a directory: {path}") + + root = path.resolve() + for child in sorted(path.rglob("*"), key=lambda item: _relative_name(path, item)): + if child.is_symlink(): + raise ContinuousRefactorError(f"Snapshot contains symlink: {child}") + try: + child_stat = child.stat() + except OSError as error: + raise ContinuousRefactorError( + f"Could not stat snapshot path {child}: {error}" + ) from error + rel = child.resolve().relative_to(root).as_posix() + mode = stat.S_IMODE(child_stat.st_mode) + if child.is_dir(): + digest.update(f"D {rel} {mode:o}\0".encode("utf-8")) + continue + if child.is_file(): + digest.update(f"F {rel} {mode:o} {child_stat.st_size}\0".encode("utf-8")) + try: + digest.update(child.read_bytes()) + except OSError as error: + raise ContinuousRefactorError( + f"Could not read snapshot path {child}: {error}" + ) from error + digest.update(b"\0") + continue + raise ContinuousRefactorError(f"Snapshot contains unsupported path: {child}") + return digest.hexdigest() + + +def publish_planning_workspace( + request: PlanningPublishRequest, +) -> PlanningPublishResult: + _validate_request(request) + live_migrations_dir = request.live_migrations_dir + live_dir = _live_migration_dir(live_migrations_dir, request.slug) + live_migrations_dir.mkdir(parents=True, exist_ok=True) + + lock = _acquire_publish_lock( + live_migrations_dir, + operation=request.operation, + now=request.now, + live_dir=live_dir, + ) + try: + result = _publish_planning_workspace_locked(request, live_dir) + except PlanningPublishError as error: + release_error = _release_publish_lock(lock.path) + if release_error is not None: + _raise_result(_with_cleanup_error(error.result, release_error)) + raise + except Exception as error: + release_error = _release_publish_lock(lock.path) + if release_error is not None: + raise ContinuousRefactorError( + f"{error}\n{release_error}" + ) from error + raise + + release_error = _release_publish_lock(lock.path) + if release_error is not None: + return _with_cleanup_error(result, release_error) + return result + + +def _publish_planning_workspace_locked( + request: PlanningPublishRequest, + live_dir: Path, +) -> PlanningPublishResult: + live_migrations_dir = request.live_migrations_dir + dirty_paths = _dirty_live_migration_paths(request.repo_root, live_dir) + if dirty_paths: + _raise_result( + _blocked_result( + "dirty live migration directory; commit, discard, or inspect with migration doctor", + live_dir=live_dir, + dirty_paths=dirty_paths, + ) + ) + + try: + _validate_snapshot(request.workspace_dir, mode=request.validation_mode) + except ContinuousRefactorError as error: + _raise_result( + _blocked_result( + f"workspace validation failed: {error}", + live_dir=live_dir, + ) + ) + + tx_paths = _prepare_transaction_paths(live_migrations_dir) + try: + _copy_tree(request.workspace_dir, tx_paths.staged_dir) + except _FS_ERRORS as error: + _raise_result( + _failed_result( + f"could not copy workspace to staged transaction path: {error}", + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + try: + _validate_snapshot(tx_paths.staged_dir, mode=request.validation_mode) + except ContinuousRefactorError as error: + _raise_result( + _blocked_result( + f"staged validation failed: {error}", + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + if not _same_device(tx_paths.staged_dir, live_migrations_dir): + _raise_result( + _blocked_result( + "staged publish source must be on the same filesystem as the live migrations dir", + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + current_snapshot_id = snapshot_tree_digest(live_dir) + if current_snapshot_id != request.base_snapshot_id: + _raise_result( + _blocked_result( + "stale base snapshot: base_snapshot_id does not match current live snapshot " + f"(base_snapshot_id={request.base_snapshot_id}, current_snapshot_id={current_snapshot_id})", + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + return _publish_staged_snapshot(request, live_dir, tx_paths) + + +def _publish_staged_snapshot( + request: PlanningPublishRequest, + live_dir: Path, + tx_paths: _TransactionPaths, +) -> PlanningPublishResult: + rollback_exists = False + try: + if live_dir.exists(): + _move_path(live_dir, tx_paths.rollback_dir) + rollback_exists = True + except OSError as error: + _raise_result( + _failed_result( + f"could not move live migration to rollback: {error}", + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + try: + _move_path(tx_paths.staged_dir, live_dir) + except OSError as error: + restore_error = _restore_rollback( + live_dir, + tx_paths, + move_live_to_failed=live_dir.exists(), + ) + _raise_result( + _failed_result( + _with_restore_context( + f"could not install staged migration: {error}", + restore_error, + ), + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + try: + _validate_snapshot(live_dir, mode=request.validation_mode) + except ContinuousRefactorError as error: + restore_error = _restore_rollback(live_dir, tx_paths, move_live_to_failed=True) + _raise_result( + _failed_result( + _with_restore_context( + f"live snapshot validation failed after publish: {error}", + restore_error, + ), + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + cleanup_error = _cleanup_rollback(tx_paths.rollback_dir) if rollback_exists else None + if cleanup_error is None: + _remove_empty_dir(tx_paths.transaction_dir) + return PlanningPublishResult( + status="published", + reason="published", + snapshot_id=snapshot_tree_digest(live_dir), + live_dir=live_dir, + transaction_dir=tx_paths.transaction_dir, + staged_dir=tx_paths.staged_dir, + rollback_dir=tx_paths.rollback_dir, + failed_dir=tx_paths.failed_dir, + cleanup_error=cleanup_error, + ) + + +def _validate_request(request: PlanningPublishRequest) -> None: + _require_safe_segment(request.slug, field="slug") + if request.workspace_dir.name != request.slug: + raise ContinuousRefactorError( + "Planning workspace snapshot directory must be named for the migration " + f"slug {request.slug!r}: {request.workspace_dir}" + ) + workspace = request.workspace_dir.resolve() + live_root = request.live_migrations_dir.resolve() + try: + workspace.relative_to(live_root) + except ValueError: + pass + else: + raise ContinuousRefactorError( + f"Planning workspace must be outside live migrations dir: {request.workspace_dir}" + ) + if not request.base_snapshot_id: + raise ContinuousRefactorError("base_snapshot_id is required") + + +def _require_safe_segment(value: str, *, field: str) -> None: + if ( + not value + or Path(value).name != value + or value.startswith(".") + or value.startswith("__") + ): + raise ContinuousRefactorError( + f"Planning publish {field} is not a safe path segment: {value!r}" + ) + + +def _live_migration_dir(live_migrations_dir: Path, slug: str) -> Path: + _require_safe_segment(slug, field="slug") + return live_migrations_dir / slug + + +def _prepare_transaction_paths(live_migrations_dir: Path) -> _TransactionPaths: + token = _new_transaction_token() + transaction_dir = live_migrations_dir / _TRANSACTIONS_DIR_NAME / token + staged_dir = transaction_dir / "staged" + rollback_dir = transaction_dir / "rollback" + failed_dir = transaction_dir / "failed" + try: + transaction_dir.mkdir(parents=True, exist_ok=False) + except OSError as error: + raise ContinuousRefactorError( + f"Could not create planning transaction directory {transaction_dir}: {error}" + ) from error + return _TransactionPaths( + transaction_dir=transaction_dir, + staged_dir=staged_dir, + rollback_dir=rollback_dir, + failed_dir=failed_dir, + ) + + +def _acquire_publish_lock( + live_migrations_dir: Path, + *, + operation: str, + now: str | None, + live_dir: Path, +) -> _PublishLock: + lock_path = publish_lock_path(live_migrations_dir) + try: + lock_path.parent.mkdir(parents=True, exist_ok=True) + lock_path.mkdir() + except FileExistsError: + _raise_result(_lock_conflict_result(lock_path, live_dir)) + except OSError as error: + raise ContinuousRefactorError( + f"Could not acquire planning publish lock {lock_path}: {error}" + ) from error + + metadata = { + "pid": os.getpid(), + "operation": operation, + "created_at": now or iso_timestamp(), + } + try: + (lock_path / _LOCK_OWNER_FILE).write_text( + json.dumps(metadata, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + except OSError as error: + shutil.rmtree(lock_path, ignore_errors=True) + raise ContinuousRefactorError( + f"Could not write planning publish lock metadata {lock_path}: {error}" + ) from error + + return _PublishLock(lock_path) + + +def _release_publish_lock(lock_path: Path) -> str | None: + try: + _remove_tree(lock_path) + except _FS_ERRORS as error: + return f"could not release planning publish lock {lock_path}: {error}" + _remove_empty_dir(lock_path.parent) + return None + + +def _lock_conflict_result(lock_path: Path, live_dir: Path) -> PlanningPublishResult: + metadata = _read_lock_metadata(lock_path) + detail = ", ".join( + f"{key}={metadata[key]}" + for key in ("pid", "operation", "created_at") + if key in metadata + ) + suffix = f" ({detail})" if detail else "" + return PlanningPublishResult( + status="blocked", + reason=f"concurrent mutation lock is active at {lock_path}{suffix}", + snapshot_id=None, + live_dir=live_dir, + transaction_dir=None, + staged_dir=None, + rollback_dir=None, + failed_dir=None, + lock_path=lock_path, + ) + + +def _read_lock_metadata(lock_path: Path) -> dict[str, object]: + try: + raw = json.loads((lock_path / _LOCK_OWNER_FILE).read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return {} + if not isinstance(raw, dict): + return {} + return {key: value for key, value in raw.items() if isinstance(key, str)} + + +def _validate_snapshot(path: Path, mode: ConsistencyMode = "ready-publish") -> None: + if not path.is_dir(): + raise ContinuousRefactorError(f"Migration snapshot is not a directory: {path}") + snapshot_tree_digest(path) + findings = _publish_validation_findings(path, mode) + if not has_blocking_consistency_findings(findings): + return + details = "; ".join( + f"{finding.code}: {finding.path}: {finding.message}" + for finding in findings + if finding.severity == "error" + ) + raise ContinuousRefactorError(f"migration snapshot is inconsistent: {details}") + + +def _publish_validation_findings( + path: Path, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + findings = check_migration_consistency(path, mode=mode) + if not _is_transaction_staged_snapshot(path): + return findings + return [ + finding + for finding in findings + if finding.code != "manifest-slug-mismatch" + ] + + +def _is_transaction_staged_snapshot(path: Path) -> bool: + return path.name == "staged" and _TRANSACTIONS_DIR_NAME in path.parts + + +def _dirty_live_migration_paths(repo_root: Path, live_dir: Path) -> tuple[str, ...]: + pathspec = _repo_relative(live_dir, repo_root) + result = run_command( + ["git", "status", "--porcelain", "--ignored=matching", "--", pathspec], + cwd=repo_root, + check=False, + ) + if result.returncode != 0: + raise ContinuousRefactorError( + "Could not inspect live migration git status.\n" + f"stdout:\n{result.stdout}\n" + f"stderr:\n{result.stderr}" + ) + return tuple( + line[3:] if len(line) > 3 else line + for line in result.stdout.splitlines() + if line.strip() + ) + + +def _repo_relative(path: Path, repo_root: Path) -> str: + try: + return path.resolve().relative_to(repo_root.resolve()).as_posix() + except ValueError as error: + raise ContinuousRefactorError( + f"Live migration path must stay inside repository: {path}" + ) from error + + +def _copy_tree(source: Path, destination: Path) -> None: + shutil.copytree(source, destination) + + +def _move_path(source: Path, destination: Path) -> None: + source.replace(destination) + + +def _remove_tree(path: Path) -> None: + shutil.rmtree(path) + + +def _same_device(source: Path, target_root: Path) -> bool: + return source.stat().st_dev == target_root.stat().st_dev + + +def _new_transaction_token() -> str: + return uuid.uuid4().hex + + +def _restore_rollback( + live_dir: Path, + tx_paths: _TransactionPaths, + *, + move_live_to_failed: bool = False, +) -> str | None: + try: + if move_live_to_failed and live_dir.exists(): + _move_path(live_dir, tx_paths.failed_dir) + if tx_paths.rollback_dir.exists(): + _move_path(tx_paths.rollback_dir, live_dir) + return None + return "rollback snapshot is unavailable" + except OSError as error: + return f"rollback restore failed: {error}" + + +def _with_restore_context(message: str, restore_error: str | None) -> str: + if restore_error is None: + return f"{message}; previous live snapshot was restored" + return f"{message}; {restore_error}" + + +def _cleanup_rollback(rollback_dir: Path) -> str | None: + if not rollback_dir.exists(): + return None + try: + _remove_tree(rollback_dir) + except _FS_ERRORS as error: + return f"could not remove rollback transaction directory {rollback_dir}: {error}" + return None + + +def _remove_empty_dir(path: Path) -> None: + try: + path.rmdir() + except OSError: + return + + +def _blocked_result( + reason: str, + *, + live_dir: Path, + tx_paths: _TransactionPaths | None = None, + dirty_paths: tuple[str, ...] = (), +) -> PlanningPublishResult: + return PlanningPublishResult( + status="blocked", + reason=reason, + snapshot_id=snapshot_tree_digest(live_dir), + live_dir=live_dir, + transaction_dir=tx_paths.transaction_dir if tx_paths is not None else None, + staged_dir=tx_paths.staged_dir if tx_paths is not None else None, + rollback_dir=tx_paths.rollback_dir if tx_paths is not None else None, + failed_dir=tx_paths.failed_dir if tx_paths is not None else None, + dirty_paths=dirty_paths, + ) + + +def _failed_result( + reason: str, + *, + live_dir: Path, + tx_paths: _TransactionPaths, +) -> PlanningPublishResult: + return PlanningPublishResult( + status="failed", + reason=reason, + snapshot_id=snapshot_tree_digest(live_dir), + live_dir=live_dir, + transaction_dir=tx_paths.transaction_dir, + staged_dir=tx_paths.staged_dir, + rollback_dir=tx_paths.rollback_dir, + failed_dir=tx_paths.failed_dir, + lock_path=publish_lock_path(live_dir.parent), + ) + + +def _with_cleanup_error( + result: PlanningPublishResult, + cleanup_error: str, +) -> PlanningPublishResult: + combined = ( + cleanup_error + if result.cleanup_error is None + else f"{result.cleanup_error}; {cleanup_error}" + ) + return replace(result, cleanup_error=combined) + + +def _raise_result(result: PlanningPublishResult) -> None: + raise PlanningPublishError(result) + + +def _result_message(result: PlanningPublishResult) -> str: + lines = [result.reason] + if result.dirty_paths: + lines.append("dirty paths:") + lines.extend(f"- {path}" for path in result.dirty_paths) + if result.lock_path is not None: + lines.append(f"lock={result.lock_path}") + if result.status == "failed": + if result.live_dir is not None: + lines.append(f"live={result.live_dir}") + if result.rollback_dir is not None: + lines.append(f"rollback={result.rollback_dir}") + if result.staged_dir is not None: + lines.append(f"staged={result.staged_dir}") + if result.failed_dir is not None: + lines.append(f"failed={result.failed_dir}") + if result.cleanup_error is not None: + lines.append(f"cleanup_error={result.cleanup_error}") + return "\n".join(lines) + + +def _relative_name(root: Path, path: Path) -> str: + return path.relative_to(root).as_posix() diff --git a/src/continuous_refactoring/planning_state.py b/src/continuous_refactoring/planning_state.py new file mode 100644 index 0000000..e56eb35 --- /dev/null +++ b/src/continuous_refactoring/planning_state.py @@ -0,0 +1,995 @@ +from __future__ import annotations + +import json +import tempfile +from dataclasses import dataclass +from pathlib import Path +from typing import Literal, cast, get_args + +from continuous_refactoring.artifacts import ContinuousRefactorError, iso_timestamp + +__all__ = [ + "CompletedPlanningStep", + "FeedbackSource", + "FinalPlanningDecision", + "PlanningCursor", + "PlanningState", + "PlanningStep", + "PlanningStepOutcome", + "UserPlanningFeedback", + "append_planning_feedback", + "complete_planning_step", + "initial_planning_state", + "load_planning_state", + "new_planning_state", + "planning_stage_stdout_path", + "planning_state_path", + "planning_step_stdout", + "reopen_planning_for_revise", + "replay_planning_state", + "save_planning_state", + "validate_planning_state", + "write_planning_stage_stdout", +] + +SCHEMA_VERSION = 1 + +PlanningStep = Literal[ + "approaches", + "pick-best", + "expand", + "review", + "revise", + "review-2", + "final-review", +] +TerminalPlanningCursor = Literal[ + "terminal-ready", + "terminal-ready-awaiting-human", + "terminal-skipped", +] +PlanningCursor = PlanningStep | TerminalPlanningCursor +FinalPlanningDecision = Literal["approve-auto", "approve-needs-human", "reject"] +PlanningStepOutcome = Literal[ + "completed", + "clear", + "findings", + "approve-auto", + "approve-needs-human", + "reject", +] +FeedbackSource = Literal["message", "file"] + +_PLANNING_STEPS: tuple[str, ...] = cast(tuple[str, ...], get_args(PlanningStep)) +_TERMINAL_CURSORS: tuple[str, ...] = cast( + tuple[str, ...], get_args(TerminalPlanningCursor) +) +_PLANNING_CURSORS: tuple[str, ...] = (*_PLANNING_STEPS, *_TERMINAL_CURSORS) +_FINAL_DECISIONS: tuple[str, ...] = cast( + tuple[str, ...], get_args(FinalPlanningDecision) +) +_STEP_OUTCOMES: tuple[str, ...] = cast(tuple[str, ...], get_args(PlanningStepOutcome)) + +_COMPLETED_OUTCOME = "completed" +_TERMINAL_BY_DECISION: dict[str, TerminalPlanningCursor] = { + "approve-auto": "terminal-ready", + "approve-needs-human": "terminal-ready-awaiting-human", + "reject": "terminal-skipped", +} + + +@dataclass(frozen=True) +class CompletedPlanningStep: + name: PlanningStep + completed_at: str + outcome: PlanningStepOutcome + outputs: dict[str, str] + agent: str | None = None + model: str | None = None + effort: str | None = None + + def to_payload(self) -> dict[str, object]: + payload: dict[str, object] = { + "name": self.name, + "completed_at": self.completed_at, + "outcome": self.outcome, + "outputs": dict(self.outputs), + } + if self.agent is not None: + payload["agent"] = self.agent + if self.model is not None: + payload["model"] = self.model + if self.effort is not None: + payload["effort"] = self.effort + return payload + + +@dataclass(frozen=True) +class UserPlanningFeedback: + received_at: str + source: FeedbackSource + text: str + + def to_payload(self) -> dict[str, object]: + return { + "received_at": self.received_at, + "source": self.source, + "text": self.text, + } + + +@dataclass(frozen=True) +class PlanningState: + schema_version: int + target: str + next_step: PlanningCursor + completed_steps: tuple[CompletedPlanningStep, ...] + started_at: str + updated_at: str + feedback: tuple[UserPlanningFeedback, ...] + review_findings: str | None + final_decision: FinalPlanningDecision | None + final_reason: str | None + revision_base_step_count: int | None = None + + +@dataclass(frozen=True) +class _ReplayResult: + next_step: PlanningCursor + review_findings: str | None + final_decision: FinalPlanningDecision | None + + +def planning_state_path(mig_root: Path) -> Path: + return mig_root / ".planning" / "state.json" + + +def planning_stage_stdout_path(mig_root: Path, step: str) -> Path: + _require_step(step) + return mig_root / ".planning" / "stages" / f"{step}.stdout.md" + + +def new_planning_state(target: str, *, now: str | None = None) -> PlanningState: + timestamp = now or iso_timestamp() + return PlanningState( + schema_version=SCHEMA_VERSION, + target=target, + next_step="approaches", + completed_steps=(), + started_at=timestamp, + updated_at=timestamp, + feedback=(), + review_findings=None, + final_decision=None, + final_reason=None, + revision_base_step_count=None, + ) + + +def initial_planning_state(target: str, *, now: str | None = None) -> PlanningState: + return new_planning_state(target, now=now) + + +def complete_planning_step( + state: PlanningState, + step: str, + outcome: str, + outputs: dict[str, str], + *, + completed_at: str | None = None, + agent: str | None = None, + model: str | None = None, + effort: str | None = None, + final_reason: str | None = None, +) -> PlanningState: + step_name = _require_step(step) + step_outcome = _require_outcome(outcome) + replay = _replay_details(state) + if state.next_step != replay.next_step: + raise ContinuousRefactorError( + f"Planning state next_step {state.next_step!r} does not match " + f"replayed cursor {replay.next_step!r}" + ) + _validate_replay_metadata(state, replay) + if state.next_step != step_name: + raise ContinuousRefactorError( + f"Cannot complete planning step {step_name!r}; " + f"current step is {state.next_step!r}" + ) + completed = CompletedPlanningStep( + name=step_name, + completed_at=completed_at or iso_timestamp(), + outcome=step_outcome, + outputs=dict(outputs), + agent=agent, + model=model, + effort=effort, + ) + _validate_output_refs_syntax(completed) + updated_steps = (*state.completed_steps, completed) + updated = PlanningState( + schema_version=state.schema_version, + target=state.target, + next_step=state.next_step, + completed_steps=updated_steps, + started_at=state.started_at, + updated_at=completed.completed_at, + feedback=state.feedback, + review_findings=state.review_findings, + final_decision=state.final_decision, + final_reason=state.final_reason, + revision_base_step_count=state.revision_base_step_count, + ) + replay = _replay_details(updated) + return PlanningState( + schema_version=updated.schema_version, + target=updated.target, + next_step=replay.next_step, + completed_steps=updated.completed_steps, + started_at=updated.started_at, + updated_at=updated.updated_at, + feedback=updated.feedback, + review_findings=replay.review_findings, + final_decision=replay.final_decision, + final_reason=_next_final_reason( + state.final_reason, + replay.final_decision, + final_reason, + ), + revision_base_step_count=updated.revision_base_step_count, + ) + + +def append_planning_feedback( + state: PlanningState, + text: str, + source: FeedbackSource, + *, + now: str | None = None, +) -> PlanningState: + feedback_source = _require_feedback_source(source, field="source") + feedback = UserPlanningFeedback( + received_at=now or iso_timestamp(), + source=feedback_source, + text=text, + ) + updated = PlanningState( + schema_version=state.schema_version, + target=state.target, + next_step=state.next_step, + completed_steps=state.completed_steps, + started_at=state.started_at, + updated_at=feedback.received_at, + feedback=(*state.feedback, feedback), + review_findings=state.review_findings, + final_decision=state.final_decision, + final_reason=state.final_reason, + revision_base_step_count=state.revision_base_step_count, + ) + _validate_replay_metadata(updated, _replay_details(updated)) + return updated + + +def reopen_planning_for_revise( + state: PlanningState, + *, + now: str | None = None, +) -> PlanningState: + replay = _replay_details(state) + if replay.next_step not in ("terminal-ready", "terminal-ready-awaiting-human"): + raise ContinuousRefactorError( + f"Cannot reopen planning state at {replay.next_step!r} for revise" + ) + updated = PlanningState( + schema_version=state.schema_version, + target=state.target, + next_step="revise", + completed_steps=state.completed_steps, + started_at=state.started_at, + updated_at=now or iso_timestamp(), + feedback=state.feedback, + review_findings=None, + final_decision=None, + final_reason=None, + revision_base_step_count=len(state.completed_steps), + ) + _validate_replay_metadata(updated, _replay_details(updated)) + return updated + + +def replay_planning_state(state: PlanningState) -> PlanningCursor: + return _replay_details(state).next_step + + +def validate_planning_state( + state: PlanningState, + repo_root: Path, + *, + state_path: Path | None = None, + published_migration_root: Path | None = None, +) -> None: + if state.schema_version != SCHEMA_VERSION: + raise ContinuousRefactorError( + f"Unsupported planning state schema_version: {state.schema_version!r}" + ) + replay = _replay_details(state) + if state.next_step != replay.next_step: + raise ContinuousRefactorError( + f"Planning state next_step {state.next_step!r} does not match " + f"replayed cursor {replay.next_step!r}" + ) + _validate_replay_metadata(state, replay) + migration_root = state_path.parent.parent if state_path is not None else None + _validate_output_paths( + state, + repo_root, + migration_root, + published_migration_root=published_migration_root, + ) + + +def _validate_replay_metadata(state: PlanningState, replay: _ReplayResult) -> None: + if state.review_findings != replay.review_findings: + raise ContinuousRefactorError( + "Planning state review_findings does not match replayed history" + ) + if state.final_decision != replay.final_decision: + raise ContinuousRefactorError( + "Planning state final_decision does not match replayed history" + ) + if replay.final_decision is None and state.final_reason is not None: + raise ContinuousRefactorError( + "Planning state final_reason requires a final-review decision" + ) + if replay.final_decision is not None and not state.final_reason: + raise ContinuousRefactorError( + "Planning state terminal final-review requires final_reason" + ) + + +def load_planning_state( + repo_root: Path, + path: Path, + *, + published_migration_root: Path | None = None, +) -> PlanningState: + try: + content = path.read_text(encoding="utf-8") + except OSError as error: + raise ContinuousRefactorError( + f"Could not load planning state {path}: {error}" + ) from error + try: + raw = json.loads(content) + except json.JSONDecodeError as error: + raise ContinuousRefactorError( + f"Could not parse planning state {path}: {error}" + ) from error + state = _decode_state_payload(raw) + validate_planning_state( + state, + repo_root, + state_path=path, + published_migration_root=published_migration_root, + ) + return state + + +def save_planning_state( + state: PlanningState, + path: Path, + *, + repo_root: Path, + published_migration_root: Path | None = None, +) -> None: + validate_planning_state( + state, + repo_root, + state_path=path, + published_migration_root=published_migration_root, + ) + content = _encode_state_payload(state) + try: + path.parent.mkdir(parents=True, exist_ok=True) + except OSError as error: + raise ContinuousRefactorError( + f"Could not save planning state {path}: {error}" + ) from error + + tmp_path: Path | None = None + try: + with tempfile.NamedTemporaryFile( + mode="w", encoding="utf-8", dir=path.parent, suffix=".tmp", delete=False + ) as tmp: + tmp_path = Path(tmp.name) + tmp.write(content) + except OSError as error: + if tmp_path is not None: + tmp_path.unlink(missing_ok=True) + raise ContinuousRefactorError( + f"Could not save planning state {path}: {error}" + ) from error + + try: + tmp_path.replace(path) + except OSError as error: + tmp_path.unlink(missing_ok=True) + raise ContinuousRefactorError( + f"Could not save planning state {path}: {error}" + ) from error + + +def write_planning_stage_stdout( + repo_root: Path, + mig_root: Path, + step: str, + stdout: str, + *, + published_migration_root: Path | None = None, +) -> dict[str, str]: + path = _next_planning_stage_stdout_path(mig_root, step) + _write_text_atomic(path, stdout) + if published_migration_root is None: + ref_path = path + else: + ref_path = published_migration_root / path.relative_to(mig_root) + return {"stdout": _repo_relative(ref_path, repo_root)} + + +def planning_step_stdout( + state: PlanningState, + repo_root: Path, + step: str, + *, + state_path: Path, + published_migration_root: Path | None = None, +) -> tuple[str, str]: + validate_planning_state( + state, + repo_root, + state_path=state_path, + published_migration_root=published_migration_root, + ) + step_name = _require_step(step) + migration_root = state_path.parent.parent + for completed in reversed(state.completed_steps): + if completed.name != step_name: + continue + stdout_ref = completed.outputs.get("stdout") + if stdout_ref is None: + break + path = _output_path_for_ref( + stdout_ref, + repo_root, + migration_root, + published_migration_root=published_migration_root, + ) + try: + return stdout_ref, path.read_text(encoding="utf-8") + except OSError as error: + raise ContinuousRefactorError( + f"Could not read planning output {stdout_ref}: {error}" + ) from error + raise ContinuousRefactorError( + f"Planning state has no accepted stdout output for step {step_name!r}" + ) + + +def _next_planning_stage_stdout_path(mig_root: Path, step: str) -> Path: + base = planning_stage_stdout_path(mig_root, step) + if not base.exists(): + return base + index = 2 + while True: + candidate = base.with_name(f"{step}-{index}.stdout.md") + if not candidate.exists(): + return candidate + index += 1 + + +def _replay_details(state: PlanningState) -> _ReplayResult: + expected: PlanningCursor = "approaches" + review_findings: str | None = None + final_decision: FinalPlanningDecision | None = None + + _validate_revision_base_step_count(state) + for index, completed in enumerate(state.completed_steps): + if state.revision_base_step_count == index: + expected, review_findings, final_decision = _reopen_cursor(expected) + if expected not in _PLANNING_STEPS: + raise ContinuousRefactorError( + f"Planning step {completed.name!r} appears after terminal cursor {expected!r}" + ) + if completed.name != expected: + raise ContinuousRefactorError( + f"Completed planning step {completed.name!r} is invalid: " + f"expected {expected}" + ) + expected, review_findings, final_decision = _advance_cursor( + completed, + review_findings=review_findings, + final_decision=final_decision, + ) + + if state.revision_base_step_count == len(state.completed_steps): + expected, review_findings, final_decision = _reopen_cursor(expected) + + return _ReplayResult( + next_step=expected, + review_findings=review_findings, + final_decision=final_decision, + ) + + +def _validate_revision_base_step_count(state: PlanningState) -> None: + value = state.revision_base_step_count + if value is None: + return + if value < 1 or value > len(state.completed_steps): + raise ContinuousRefactorError( + "Planning state revision_base_step_count is outside completed history" + ) + + +def _reopen_cursor( + cursor: PlanningCursor, +) -> tuple[PlanningCursor, str | None, FinalPlanningDecision | None]: + if cursor not in ("terminal-ready", "terminal-ready-awaiting-human"): + raise ContinuousRefactorError( + "Planning state revision_base_step_count must point at a " + f"terminal ready cursor, got {cursor!r}" + ) + return "revise", None, None + + +def _advance_cursor( + completed: CompletedPlanningStep, + *, + review_findings: str | None, + final_decision: FinalPlanningDecision | None, +) -> tuple[PlanningCursor, str | None, FinalPlanningDecision | None]: + _require_valid_outcome_for_step(completed) + if completed.name == "approaches": + return "pick-best", review_findings, final_decision + if completed.name == "pick-best": + return "expand", review_findings, final_decision + if completed.name == "expand": + return "review", review_findings, final_decision + if completed.name == "review": + if completed.outcome == "findings": + return "revise", _required_stdout_output(completed), final_decision + return "final-review", review_findings, final_decision + if completed.name == "revise": + return "review-2", review_findings, final_decision + if completed.name == "review-2": + return "final-review", review_findings, final_decision + decision = cast(FinalPlanningDecision, completed.outcome) + return _TERMINAL_BY_DECISION[decision], review_findings, decision + + +def _require_valid_outcome_for_step(completed: CompletedPlanningStep) -> None: + allowed = _allowed_outcomes(completed.name) + if completed.outcome not in allowed: + allowed_text = ", ".join(repr(outcome) for outcome in allowed) + raise ContinuousRefactorError( + f"Planning step {completed.name!r} outcome {completed.outcome!r} " + f"is invalid; expected one of {allowed_text}" + ) + + +def _allowed_outcomes(step: PlanningStep) -> tuple[str, ...]: + if step in ("approaches", "pick-best", "expand", "revise"): + return (_COMPLETED_OUTCOME,) + if step == "review": + return ("clear", "findings") + if step == "review-2": + return ("clear",) + return _FINAL_DECISIONS + + +def _required_stdout_output(completed: CompletedPlanningStep) -> str: + stdout_ref = completed.outputs.get("stdout") + if not stdout_ref: + raise ContinuousRefactorError( + f"Planning step {completed.name!r} must record a stdout output" + ) + return stdout_ref + + +def _next_final_reason( + previous: str | None, + final_decision: FinalPlanningDecision | None, + final_reason: str | None, +) -> str | None: + if final_decision is None: + return None + if final_reason is not None: + return final_reason + return previous + + +def _validate_output_paths( + state: PlanningState, + repo_root: Path, + migration_root: Path | None, + *, + published_migration_root: Path | None, +) -> None: + for completed in state.completed_steps: + stdout_ref = _required_stdout_output(completed) + _validate_output_refs_syntax(completed) + _require_existing_output( + stdout_ref, + repo_root, + migration_root, + published_migration_root=published_migration_root, + field=f"completed_steps.{completed.name}.outputs.stdout", + ) + + +def _validate_output_refs_syntax(completed: CompletedPlanningStep) -> None: + if completed.outputs.keys() != {"stdout"}: + raise ContinuousRefactorError( + f"Planning step {completed.name!r} has unsupported outputs" + ) + _require_repo_relative_path( + _required_stdout_output(completed), + field=f"completed_steps.{completed.name}.outputs.stdout", + ) + + +def _require_existing_output( + value: str, + repo_root: Path, + migration_root: Path | None, + *, + published_migration_root: Path | None, + field: str, +) -> None: + ref = _require_repo_relative_path(value, field=field) + repo_output_path = repo_root / ref + output_path = _output_path_for_ref( + value, + repo_root, + migration_root, + published_migration_root=published_migration_root, + ) + resolved_output = output_path.resolve() + try: + repo_output_path.resolve().relative_to(repo_root.resolve()) + except ValueError as error: + raise ContinuousRefactorError( + f"Planning output path {value!r} must be repo-relative" + ) from error + if published_migration_root is not None: + try: + repo_output_path.resolve().relative_to(published_migration_root.resolve()) + except ValueError as error: + raise ContinuousRefactorError( + f"Planning output path {value!r} must stay inside the published migration directory" + ) from error + if migration_root is not None: + try: + resolved_output.relative_to(migration_root.resolve()) + except ValueError as error: + raise ContinuousRefactorError( + f"Planning output path {value!r} must stay inside the migration directory" + ) from error + if output_path.is_symlink(): + raise ContinuousRefactorError( + f"Planning output path {value!r} must be a regular file, not a symlink" + ) + if not output_path.is_file(): + raise ContinuousRefactorError(f"missing planning output: {value}") + + +def _output_path_for_ref( + value: str, + repo_root: Path, + migration_root: Path | None, + *, + published_migration_root: Path | None, +) -> Path: + ref_path = repo_root / _require_repo_relative_path(value, field="stdout") + if migration_root is None or published_migration_root is None: + return ref_path + try: + relative = ref_path.resolve().relative_to(published_migration_root.resolve()) + except ValueError: + return ref_path + return migration_root / relative + + +def _require_repo_relative_path(value: str, *, field: str) -> Path: + if not isinstance(value, str): + raise ContinuousRefactorError(f"Planning field {field!r} must be a string") + ref = Path(value) + if str(ref) in ("", ".") or ref.is_absolute() or ".." in ref.parts: + raise ContinuousRefactorError( + f"Planning output path {value!r} must be repo-relative" + ) + return ref + + +def _repo_relative(path: Path, repo_root: Path) -> str: + try: + return path.relative_to(repo_root).as_posix() + except ValueError as error: + raise ContinuousRefactorError( + f"Planning output path {path} must be inside repository {repo_root}" + ) from error + + +def _decode_state_payload(raw_payload: object) -> PlanningState: + raw = _require_mapping(raw_payload, field="planning state") + _require_keys( + raw, + { + "schema_version", + "target", + "next_step", + "completed_steps", + "started_at", + "updated_at", + "feedback", + "review_findings", + "final_decision", + "final_reason", + }, + optional={"revision_base_step_count"}, + field="planning state", + ) + return PlanningState( + schema_version=_require_int(raw.get("schema_version"), field="schema_version"), + target=_require_str(raw.get("target"), field="target"), + next_step=_require_cursor(raw.get("next_step"), field="next_step"), + completed_steps=_require_completed_steps(raw.get("completed_steps")), + started_at=_require_str(raw.get("started_at"), field="started_at"), + updated_at=_require_str(raw.get("updated_at"), field="updated_at"), + feedback=_require_feedback_tuple(raw.get("feedback"), field="feedback"), + review_findings=_optional_str(raw.get("review_findings"), field="review_findings"), + final_decision=_optional_final_decision( + raw.get("final_decision"), field="final_decision" + ), + final_reason=_optional_str(raw.get("final_reason"), field="final_reason"), + revision_base_step_count=_optional_int( + raw.get("revision_base_step_count"), field="revision_base_step_count" + ), + ) + + +def _encode_state_payload(state: PlanningState) -> str: + replay = _replay_details(state) + if state.next_step != replay.next_step: + raise ContinuousRefactorError( + f"Cannot save planning state with next_step {state.next_step!r}; " + f"replayed cursor is {replay.next_step!r}" + ) + _validate_replay_metadata(state, replay) + payload = { + "schema_version": state.schema_version, + "target": state.target, + "next_step": state.next_step, + "completed_steps": [step.to_payload() for step in state.completed_steps], + "started_at": state.started_at, + "updated_at": state.updated_at, + "feedback": [feedback.to_payload() for feedback in state.feedback], + "review_findings": state.review_findings, + "final_decision": state.final_decision, + "final_reason": state.final_reason, + "revision_base_step_count": state.revision_base_step_count, + } + return json.dumps(payload, indent=2, sort_keys=True) + "\n" + + +def _require_completed_steps(value: object) -> tuple[CompletedPlanningStep, ...]: + if not isinstance(value, list): + raise ContinuousRefactorError( + f"Planning field 'completed_steps' must be a list: {value!r}" + ) + return tuple( + _require_completed_step(raw_step, index=index) + for index, raw_step in enumerate(value) + ) + + +def _require_completed_step(raw_step: object, *, index: int) -> CompletedPlanningStep: + raw = _require_mapping(raw_step, field=f"completed_steps[{index}]") + _require_keys( + raw, + {"name", "completed_at", "outcome", "outputs"}, + optional={"agent", "model", "effort"}, + field=f"completed_steps[{index}]", + ) + return CompletedPlanningStep( + name=_require_step(raw.get("name")), + completed_at=_require_str( + raw.get("completed_at"), field=f"completed_steps[{index}].completed_at" + ), + outcome=_require_outcome(raw.get("outcome")), + outputs=_require_outputs(raw.get("outputs"), index=index), + agent=_optional_str(raw.get("agent"), field=f"completed_steps[{index}].agent"), + model=_optional_str(raw.get("model"), field=f"completed_steps[{index}].model"), + effort=_optional_str(raw.get("effort"), field=f"completed_steps[{index}].effort"), + ) + + +def _require_outputs(value: object, *, index: int) -> dict[str, str]: + raw = _require_mapping(value, field=f"completed_steps[{index}].outputs") + outputs: dict[str, str] = {} + for key, output in raw.items(): + if not isinstance(key, str): + raise ContinuousRefactorError( + f"Planning outputs keys must be strings: {key!r}" + ) + outputs[key] = _require_str( + output, + field=f"completed_steps[{index}].outputs.{key}", + ) + return outputs + + +def _require_mapping(value: object, *, field: str) -> dict[str, object]: + if not isinstance(value, dict): + raise ContinuousRefactorError( + f"Planning field {field!r} must be an object: {value!r}" + ) + return value + + +def _require_keys( + raw: dict[str, object], + required: set[str], + *, + field: str, + optional: set[str] | None = None, +) -> None: + allowed = required | (optional or set()) + missing = sorted(required - raw.keys()) + extra = sorted(raw.keys() - allowed) + if missing: + raise ContinuousRefactorError( + f"Planning field {field!r} is missing keys: {', '.join(missing)}" + ) + if extra: + raise ContinuousRefactorError( + f"Planning field {field!r} has unknown keys: {', '.join(extra)}" + ) + + +def _require_str(value: object, *, field: str) -> str: + if not isinstance(value, str): + raise ContinuousRefactorError( + f"Planning field {field!r} must be a string: {value!r}" + ) + return value + + +def _optional_str(value: object, *, field: str) -> str | None: + if value is None: + return None + return _require_str(value, field=field) + + +def _require_int(value: object, *, field: str) -> int: + if isinstance(value, bool) or not isinstance(value, int): + raise ContinuousRefactorError( + f"Planning field {field!r} must be an integer: {value!r}" + ) + return value + + +def _optional_int(value: object, *, field: str) -> int | None: + if value is None: + return None + return _require_int(value, field=field) + + +def _require_feedback_tuple( + value: object, + *, + field: str, +) -> tuple[UserPlanningFeedback, ...]: + if not isinstance(value, list): + raise ContinuousRefactorError( + f"Planning field {field!r} must be a list: {value!r}" + ) + return tuple( + _require_feedback(item, field=f"{field}[{index}]") + for index, item in enumerate(value) + ) + + +def _require_feedback(value: object, *, field: str) -> UserPlanningFeedback: + if isinstance(value, str): + return UserPlanningFeedback(received_at="", source="message", text=value) + raw = _require_mapping(value, field=field) + _require_keys(raw, {"received_at", "source", "text"}, field=field) + return UserPlanningFeedback( + received_at=_require_str(raw.get("received_at"), field=f"{field}.received_at"), + source=_require_feedback_source(raw.get("source"), field=f"{field}.source"), + text=_require_str(raw.get("text"), field=f"{field}.text"), + ) + + +def _require_feedback_source(value: object, *, field: str) -> FeedbackSource: + source = _require_str(value, field=field) + if source not in ("message", "file"): + raise ContinuousRefactorError( + f"Planning field {field!r} must be 'message' or 'file': {source!r}" + ) + return cast(FeedbackSource, source) + + +def _require_cursor(value: object, *, field: str) -> PlanningCursor: + if not isinstance(value, str): + raise ContinuousRefactorError( + f"Planning field {field!r} must be a string: {value!r}" + ) + if value not in _PLANNING_CURSORS: + raise ContinuousRefactorError(f"Unknown planning cursor: {value!r}") + return cast(PlanningCursor, value) + + +def _require_step(value: object) -> PlanningStep: + if not isinstance(value, str): + raise ContinuousRefactorError( + f"Planning step name must be a string: {value!r}" + ) + if value not in _PLANNING_STEPS: + raise ContinuousRefactorError(f"Unknown planning step: {value!r}") + return cast(PlanningStep, value) + + +def _require_outcome(value: object) -> PlanningStepOutcome: + if not isinstance(value, str): + raise ContinuousRefactorError( + f"Planning step outcome must be a string: {value!r}" + ) + if value not in _STEP_OUTCOMES: + raise ContinuousRefactorError(f"Unknown planning outcome: {value!r}") + return cast(PlanningStepOutcome, value) + + +def _optional_final_decision( + value: object, + *, + field: str, +) -> FinalPlanningDecision | None: + if value is None: + return None + decision = _require_str(value, field=field) + if decision not in _FINAL_DECISIONS: + raise ContinuousRefactorError(f"Unknown final planning decision: {decision!r}") + return cast(FinalPlanningDecision, decision) + + +def _write_text_atomic(path: Path, content: str) -> None: + try: + path.parent.mkdir(parents=True, exist_ok=True) + except OSError as error: + raise ContinuousRefactorError( + f"Could not save planning output {path}: {error}" + ) from error + + tmp_path: Path | None = None + try: + with tempfile.NamedTemporaryFile( + mode="w", encoding="utf-8", dir=path.parent, suffix=".tmp", delete=False + ) as tmp: + tmp_path = Path(tmp.name) + tmp.write(content) + except OSError as error: + if tmp_path is not None: + tmp_path.unlink(missing_ok=True) + raise ContinuousRefactorError( + f"Could not save planning output {path}: {error}" + ) from error + + try: + tmp_path.replace(path) + except OSError as error: + tmp_path.unlink(missing_ok=True) + raise ContinuousRefactorError( + f"Could not save planning output {path}: {error}" + ) from error diff --git a/src/continuous_refactoring/prompts.py b/src/continuous_refactoring/prompts.py index a65af34..ec5a9a8 100644 --- a/src/continuous_refactoring/prompts.py +++ b/src/continuous_refactoring/prompts.py @@ -843,6 +843,8 @@ def compose_phase_execution_prompt( You are conducting a human review of a refactoring migration that was flagged for human input during planning. +Project-specific taste is injected by the caller in the `## Taste` section. + The plan and phase files were written at an earlier point in time. The repository may have drifted since then: files referenced in the plan may have moved, been renamed, been deleted, or changed in shape. Line numbers, symbol @@ -850,7 +852,8 @@ def compose_phase_execution_prompt( against the current tree. Your job: -1. Read the migration plan (plan.md), the current phase file, and the manifest. +1. Read the migration plan (plan.md), the current phase file, and the manifest + from the staged work dir. 2. Check the plan against the current repo state. For each file, symbol, or line reference the plan relies on, confirm it still exists and still means what the plan assumes. Note any drift you find — stale assumptions change @@ -863,13 +866,21 @@ def compose_phase_execution_prompt( shape the plan was written against. 4. Ask the user whatever questions are needed to unblock the migration. 5. Based on the user's answers, update plan.md and/or phase files as needed. - Fix drifted references while you are there. + Fix drifted references while you are there. Write only inside the staged + work dir. 6. When the review is complete and the user approves, update manifest.json: set "awaiting_human_review" to false and set "human_review_reason" to null. If the user wants to abort or cannot resolve the review, leave awaiting_human_review as true and exit cleanly. +Do not mutate the live migration directory. It is read-only reference material. +The staged work dir is the only writable target. Successful review changes are +published by the harness after validation. +If review fails or exits before publish, failed review output and partial staged +changes are run artifacts only; they are not resume input. Rerun review starts +from the last published live migration snapshot. + ## Output Contract When the review is successfully completed: - manifest.json MUST have "awaiting_human_review": false @@ -880,21 +891,47 @@ def compose_phase_execution_prompt( def compose_review_perform_prompt( migration_name: str, - manifest_path: Path, - plan_path: Path, + repo_root: Path, + work_dir: Path, + live_dir: Path, phase: PhaseSpec | None, manifest: MigrationManifest, + taste: str, ) -> str: + reason = manifest.human_review_reason or "(no reason recorded)" sections: list[str] = [ REVIEW_PERFORM_PROMPT, f"## Migration\nName: {migration_name}", - f"## Manifest\nPath: {manifest_path}\n{_format_manifest_summary(manifest)}", - f"## Plan\nPath: {plan_path}", + ( + "## Workspace\n" + f"Repo root: {repo_root}\n" + f"Staged work dir: {work_dir}\n" + f"Work dir: {work_dir}\n" + f"Live migration dir: {live_dir}\n" + "Writable target: staged work dir only.\n" + "Writable target: work dir only.\n" + "The live migration directory is read-only reference material.\n" + "Do not mutate the live migration directory." + ), + f"## Human Review\n{reason}", + ( + "## Manifest\n" + f"Path: {work_dir / 'manifest.json'}\n" + f"{_format_manifest_summary(manifest)}" + ), + f"## Plan\nPath: {work_dir / 'plan.md'}", ] if phase is not None: sections.append( "## Current Phase\n" f"Name: {phase.name}\n" - f"File: {phase_file_reference(phase)}" + f"File: {work_dir / phase_file_reference(phase)}" + ) + else: + sections.append( + "## Current Phase\n" + "Current phase file: (none)\n" + "Current phase name: (none)" ) + sections.append(f"## Taste\n{taste}") return _join_sections(*sections) diff --git a/src/continuous_refactoring/review_cli.py b/src/continuous_refactoring/review_cli.py index 1d9f7da..4c63b07 100644 --- a/src/continuous_refactoring/review_cli.py +++ b/src/continuous_refactoring/review_cli.py @@ -1,31 +1,78 @@ from __future__ import annotations import argparse +import shutil import sys -from dataclasses import replace +import uuid +from dataclasses import dataclass from pathlib import Path from continuous_refactoring.agent import run_agent_interactive from continuous_refactoring.artifacts import ContinuousRefactorError -from continuous_refactoring.config import resolve_live_migrations_dir, resolve_project +from continuous_refactoring.config import ( + load_taste, + resolve_live_migrations_dir, + resolve_project, +) +from continuous_refactoring.migration_cli import MigrationTarget, resolve_migration_target +from continuous_refactoring.migration_consistency import ( + check_migration_consistency, + has_blocking_consistency_findings, + iter_visible_migration_dirs, +) from continuous_refactoring.migrations import ( load_manifest as load_migration_manifest, phase_file_reference, resolve_current_phase, - save_manifest as save_migration_manifest, +) +from continuous_refactoring.planning_publish import ( + PlanningPublishError, + PlanningPublishRequest, + PlanningPublishResult, + capture_live_snapshot, + prepare_planning_workspace, + publish_planning_workspace, ) from continuous_refactoring.prompts import compose_review_perform_prompt __all__ = [ + "StagedReviewRequest", "handle_review", "handle_review_list", "handle_review_perform", + "handle_staged_migration_review", + "perform_staged_migration_review", ] _REVIEW_USAGE = "Usage: continuous-refactoring review {list,perform}" -def _resolve_review_context(*, error_code: int) -> Path: +@dataclass(frozen=True) +class _ReviewCliContext: + repo_root: Path + live_dir: Path + project_state_dir: Path + + +@dataclass(frozen=True) +class StagedReviewRequest: + repo_root: Path + live_dir: Path + target: MigrationTarget + project_state_dir: Path + agent: str + model: str + effort: str + taste: str + + +class _ReviewCliError(ContinuousRefactorError): + def __init__(self, message: str, exit_code: int) -> None: + self.exit_code = exit_code + super().__init__(message) + + +def _resolve_review_context(*, error_code: int) -> _ReviewCliContext: try: project = resolve_project(Path.cwd().resolve()) except ContinuousRefactorError: @@ -46,18 +93,21 @@ def _resolve_review_context(*, error_code: int) -> Path: ) raise SystemExit(error_code) - return live_dir + return _ReviewCliContext( + repo_root=Path(project.entry.path).resolve(), + live_dir=live_dir, + project_state_dir=project.project_dir, + ) def handle_review_list() -> None: - live_dir = _resolve_review_context(error_code=1) + context = _resolve_review_context(error_code=1) + live_dir = context.live_dir if not live_dir.is_dir(): return - for child in sorted(live_dir.iterdir()): - if not child.is_dir() or child.name.startswith("__"): - continue + for child in iter_visible_migration_dirs(live_dir): manifest_file = child / "manifest.json" if not manifest_file.exists(): continue @@ -75,56 +125,166 @@ def handle_review_list() -> None: def handle_review_perform(args: argparse.Namespace) -> None: - live_dir = _resolve_review_context(error_code=2) + context = _resolve_review_context(error_code=2) + try: + target = resolve_migration_target( + live_dir=context.live_dir, + repo_root=context.repo_root, + value=args.migration, + ) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(2) from error - migration_name: str = args.migration - migration_dir = live_dir / migration_name - manifest_path = migration_dir / "manifest.json" - if not manifest_path.exists(): + try: + taste = load_taste(resolve_project(context.repo_root)) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(1) from error + + handle_staged_migration_review( + StagedReviewRequest( + repo_root=context.repo_root, + live_dir=context.live_dir, + target=target, + project_state_dir=context.project_state_dir, + agent=args.agent, + model=args.model, + effort=args.effort, + taste=taste, + ) + ) + + +def handle_staged_migration_review( + request: StagedReviewRequest, +) -> PlanningPublishResult: + try: + return perform_staged_migration_review(request) + except _ReviewCliError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(error.exit_code) from error + except PlanningPublishError as error: print( - f"Error: migration '{migration_name}' does not exist.", + f"Error: {_review_publish_error_message(error, request.target.slug)}", file=sys.stderr, ) - raise SystemExit(2) + raise SystemExit(1) from error + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(1) from error + + +def perform_staged_migration_review( + request: StagedReviewRequest, +) -> PlanningPublishResult: + manifest_path = request.target.path / "manifest.json" + if not manifest_path.exists(): + raise _ReviewCliError( + f"migration '{request.target.slug}' does not exist.", + 2, + ) manifest = load_migration_manifest(manifest_path) if not manifest.awaiting_human_review: - print( - f"Error: migration '{migration_name}' is not flagged for review.", - file=sys.stderr, + raise _ReviewCliError( + f"migration '{request.target.slug}' is not flagged for review.", + 2, ) - raise SystemExit(2) - plan_path = migration_dir / "plan.md" - phase = resolve_current_phase(manifest) if manifest.current_phase else None + base_snapshot_id = capture_live_snapshot( + request.repo_root, + request.live_dir, + request.target.slug, + ) + workspace = prepare_planning_workspace( + request.project_state_dir, + request.target.slug, + f"review-{uuid.uuid4().hex}", + ) + try: + shutil.copytree(request.target.path, workspace.root, dirs_exist_ok=True) + except (OSError, shutil.Error) as error: + raise ContinuousRefactorError( + f"Could not copy migration to review workspace {workspace.root}: {error}" + ) from error + phase = resolve_current_phase(manifest) if manifest.current_phase else None prompt = compose_review_perform_prompt( - migration_name, manifest_path, plan_path, phase, manifest, + request.target.slug, + request.repo_root, + workspace.root, + request.target.path, + phase, + manifest, + request.taste, ) - repo_root = Path.cwd().resolve() returncode = run_agent_interactive( - args.agent, args.model, args.effort, prompt, repo_root, + request.agent, + request.model, + request.effort, + prompt, + workspace.root, ) if returncode != 0: - print( - f"Error: review agent exited with code {returncode}.", - file=sys.stderr, + raise _ReviewCliError( + f"review agent exited with code {returncode}.", + returncode, ) - raise SystemExit(returncode) - reloaded = load_migration_manifest(manifest_path) + _require_consistent_review_workspace(workspace.root) + reloaded = load_migration_manifest(workspace.root / "manifest.json") if reloaded.awaiting_human_review: - print( - f"Error: review of '{migration_name}' was not completed — " + raise _ReviewCliError( + f"review of '{request.target.slug}' was not completed; " "awaiting_human_review is still set.", - file=sys.stderr, + 1, ) - raise SystemExit(1) - if reloaded.human_review_reason is not None: - save_migration_manifest( - replace(reloaded, human_review_reason=None), manifest_path, + raise _ReviewCliError( + f"review of '{request.target.slug}' was not completed; " + "human_review_reason is still set.", + 1, + ) + + return publish_planning_workspace( + PlanningPublishRequest( + repo_root=request.repo_root, + live_migrations_dir=request.live_dir, + slug=request.target.slug, + workspace_dir=workspace.root, + base_snapshot_id=base_snapshot_id, + validation_mode="ready-publish", + operation="migration.review", ) + ) + + +def _require_consistent_review_workspace(workspace_root: Path) -> None: + findings = check_migration_consistency(workspace_root, mode="ready-publish") + if not has_blocking_consistency_findings(findings): + return + details = "; ".join( + f"{finding.code}: {finding.path}: {finding.message}" + for finding in findings + if finding.severity == "error" + ) + raise _ReviewCliError( + f"review workspace validation failed: {details}", + 1, + ) + + +def _review_publish_error_message(error: PlanningPublishError, slug: str) -> str: + message = str(error) + if "stale base snapshot" not in error.result.reason: + return message + return ( + f"{message}\n" + "Live migration changed while review was running. " + f"Run `continuous-refactoring migration doctor {slug}` if unsure, then " + f"rerun `continuous-refactoring migration review {slug} ...`." + ) def handle_review(args: argparse.Namespace) -> None: diff --git a/src/continuous_refactoring/routing_pipeline.py b/src/continuous_refactoring/routing_pipeline.py index 44fedec..61040e2 100644 --- a/src/continuous_refactoring/routing_pipeline.py +++ b/src/continuous_refactoring/routing_pipeline.py @@ -30,8 +30,11 @@ ) from continuous_refactoring.effort import EffortBudget, resolve_effort_budget from continuous_refactoring.git import get_head_sha -from continuous_refactoring.migration_tick import try_migration_tick as _try_migration_tick -from continuous_refactoring.planning import run_planning +from continuous_refactoring.migration_tick import ( + try_migration_tick as _try_migration_tick, + try_planning_tick as _try_planning_tick, +) +from continuous_refactoring.planning import PlanningStepResult, run_next_planning_step from continuous_refactoring.prompts import describe_scope_candidate from continuous_refactoring.routing import classify_target from continuous_refactoring.scope_expansion import ( @@ -97,6 +100,8 @@ def _planning_result( planning_context: str, repo_root: Path, reason: str, + call_role: str = "planning.final-review", + failure_kind: str | None = None, ) -> RouteResult: summary = _sanitized_summary(reason, repo_root) return RouteResult( @@ -107,14 +112,28 @@ def _planning_result( decision=outcome, retry_recommendation="none" if outcome == "commit" else "new-target", target=target.description, - call_role="planning.final-review", - phase_reached="planning.final-review", - failure_kind="none" if outcome == "commit" else "planning-rejected", + call_role=call_role, + phase_reached=call_role, + failure_kind=( + failure_kind + if failure_kind is not None + else ("none" if outcome == "commit" else "planning-rejected") + ), summary=summary, ), ) +def _planning_route_outcome(result: PlanningStepResult) -> RouteOutcome: + if result.status == "published": + if result.terminal_outcome is not None and result.terminal_outcome.status == "skipped": + return "abandon" + return "commit" + if result.status == "blocked": + return "blocked" + return "abandon" + + def _scope_bypass_context(target: Target, reason: str) -> str: lines = [ f"Scope expansion bypassed: {reason}", @@ -205,6 +224,22 @@ def route_and_run( return RouteResult(outcome="not-routed", target=target) if check_migrations: + planning_result, planning_record = _try_planning_tick( + live_dir, taste, repo_root, artifacts, + agent=agent, model=model, effort=effort, + effort_budget=resolved_budget, + effort_metadata=effort_metadata, + timeout=timeout, commit_message_prefix=commit_message_prefix, + attempt=attempt, + finalize_commit=finalize_commit, + ) + if planning_result != "not-routed": + return RouteResult( + outcome=planning_result, + target=target, + decision_record=planning_record, + ) + migration_result, migration_record = _try_migration_tick( live_dir, taste, repo_root, artifacts, agent=agent, model=model, effort=effort, @@ -269,7 +304,7 @@ def route_and_run( migration_name = migration_name_from_target(target) head_before = get_head_sha(repo_root) try: - outcome = run_planning( + outcome = run_next_planning_step( migration_name, target.description, taste, @@ -299,37 +334,37 @@ def route_and_run( call_role=call_role, ) - finalize_commit( - repo_root, - head_before, - build_commit_message( - f"{commit_message_prefix}: plan {migration_name}", - why=sanitize_text(outcome.reason, repo_root) or outcome.reason, - ), - artifacts=artifacts, - attempt=attempt, - phase="planning", - ) - - print(f"Planning: {describe_planning_outcome(outcome.status)} — {outcome.reason}") - if outcome.status == "skipped": - return _planning_result( - outcome="abandon", - target=target, - planning_context=planning_context, - repo_root=repo_root, - reason=outcome.reason, + route_outcome = _planning_route_outcome(outcome) + if route_outcome == "commit": + finalize_commit( + repo_root, + head_before, + build_commit_message( + f"{commit_message_prefix}: plan {migration_name}", + why=sanitize_text(outcome.reason, repo_root) or outcome.reason, + ), + artifacts=artifacts, + attempt=attempt, + phase="planning", ) + + print(f"Planning: {describe_planning_outcome(outcome)} — {outcome.reason}") return _planning_result( - outcome="commit", + outcome=route_outcome, target=target, planning_context=planning_context, repo_root=repo_root, reason=outcome.reason, + call_role=f"planning.{outcome.step}", + failure_kind="none" if route_outcome == "commit" else "planning-blocked", ) -def describe_planning_outcome(status: str) -> str: +def describe_planning_outcome(status: str | PlanningStepResult) -> str: + if not isinstance(status, str): + if status.terminal_outcome is None: + return f"{status.step} accepted" + status = status.terminal_outcome.status if status == "ready": return "queued for execution" if status == "awaiting_human_review": diff --git a/tests/test_cli_migrations.py b/tests/test_cli_migrations.py new file mode 100644 index 0000000..fe2e9b4 --- /dev/null +++ b/tests/test_cli_migrations.py @@ -0,0 +1,1300 @@ +from __future__ import annotations + +import argparse +import json +import shlex +from dataclasses import replace +from pathlib import Path + +import pytest + +from conftest import init_repo +from continuous_refactoring.artifacts import ContinuousRefactorError +from continuous_refactoring.artifacts import CommandCapture +from continuous_refactoring.cli import build_parser +from continuous_refactoring.config import register_project, set_live_migrations_dir +from continuous_refactoring.git import run_command +from continuous_refactoring.migration_cli import ( + handle_migration, + handle_migration_doctor, + handle_migration_list, + handle_migration_refine, + handle_migration_review, + resolve_migration_target, +) +from continuous_refactoring.migrations import ( + MigrationManifest, + PhaseSpec, + load_manifest, + save_manifest, +) +from continuous_refactoring.planning_publish import snapshot_tree_digest +from continuous_refactoring.planning_state import ( + complete_planning_step, + load_planning_state, + new_planning_state, + planning_stage_stdout_path, + planning_state_path, + save_planning_state, +) + +_CREATED = "2025-01-01T00:00:00+00:00" +_PHASE = PhaseSpec( + name="setup", + file="phase-1-setup.md", + done=False, + precondition="always", +) + + +def test_migration_parser_accepts_list_and_doctor() -> None: + parser = build_parser() + + list_args = parser.parse_args(["migration", "list"]) + assert list_args.command == "migration" + assert list_args.migration_command == "list" + assert list_args.handler.__name__ == "handle_migration" + + filtered = parser.parse_args( + ["migration", "list", "--status", "planning", "--awaiting-review"] + ) + assert filtered.status == "planning" + assert filtered.awaiting_review is True + + doctor_args = parser.parse_args(["migration", "doctor", "my-mig"]) + assert doctor_args.migration_command == "doctor" + assert doctor_args.target == "my-mig" + assert doctor_args.all is False + + review_args = parser.parse_args( + [ + "migration", + "review", + "my-mig", + "--with", + "codex", + "--model", + "test-model", + "--effort", + "low", + ] + ) + assert review_args.migration_command == "review" + assert review_args.target == "my-mig" + assert review_args.agent == "codex" + assert review_args.model == "test-model" + assert review_args.effort == "low" + + +def test_migration_parser_accepts_doctor_all() -> None: + parser = build_parser() + + args = parser.parse_args(["migration", "doctor", "--all"]) + + assert args.command == "migration" + assert args.migration_command == "doctor" + assert args.target is None + assert args.all is True + + +def test_documented_migration_commands_match_parser() -> None: + readme = Path("README.md").read_text(encoding="utf-8") + parser = build_parser() + documented_commands = _canonical_migration_commands(readme) + + assert documented_commands == ( + "continuous-refactoring migration list", + "continuous-refactoring migration list --status planning", + "continuous-refactoring migration list --awaiting-review", + "continuous-refactoring migration doctor ", + "continuous-refactoring migration doctor --all", + ( + "continuous-refactoring migration review --with codex " + "--model gpt-5 --effort high" + ), + ( + "continuous-refactoring migration refine --message " + "\"split the risky phase\" --with codex --model gpt-5 --effort high" + ), + ( + "continuous-refactoring migration refine --file " + "feedback.md --with codex --model gpt-5 --effort high" + ), + ) + + for command in documented_commands: + argv = _argv_from_documented_command(command) + args = parser.parse_args(argv) + assert args.command == "migration" + assert args.handler.__name__ == "handle_migration" + + +def _canonical_migration_commands(readme: str) -> tuple[str, ...]: + marker = "Canonical migration commands:" + lines = readme.splitlines() + start = lines.index(marker) + block_start = lines.index("```bash", start) + block_end = lines.index("```", block_start + 1) + return tuple( + line + for line in lines[block_start + 1:block_end] + if line.startswith("continuous-refactoring migration ") + ) + + +def _argv_from_documented_command(command: str) -> list[str]: + parts = shlex.split(command) + if parts[0] != "continuous-refactoring": + raise AssertionError(f"unexpected command prefix: {command}") + return [ + "auth-cleanup" if part == "" else part + for part in parts[1:] + ] + + +def test_migration_refine_requires_message_or_file() -> None: + parser = build_parser() + + with pytest.raises(SystemExit) as missing_exit: + parser.parse_args( + [ + "migration", + "refine", + "my-mig", + "--with", + "codex", + "--model", + "test-model", + "--effort", + "low", + ] + ) + assert missing_exit.value.code == 2 + + with pytest.raises(SystemExit) as both_exit: + parser.parse_args( + [ + "migration", + "refine", + "my-mig", + "--message", + "tighten it", + "--file", + "feedback.md", + "--with", + "codex", + "--model", + "test-model", + "--effort", + "low", + ] + ) + assert both_exit.value.code == 2 + + args = parser.parse_args( + [ + "migration", + "refine", + "my-mig", + "--message", + "tighten it", + "--with", + "codex", + "--model", + "test-model", + "--effort", + "low", + ] + ) + + assert args.migration_command == "refine" + assert args.target == "my-mig" + assert args.message == "tighten it" + assert args.file is None + assert args.agent == "codex" + assert args.model == "test-model" + assert args.effort == "low" + + +def test_migration_list_includes_planning_ready_review_and_done_statuses( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "done-mig", status="done", current_phase="") + planning_dir = _write_migration( + live_dir, "planning-mig", status="planning", current_phase="", phases=(), + ) + _write_planning_state(repo, planning_dir) + _write_migration( + live_dir, + "ready-review", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + + handle_migration_list(_list_args()) + + lines = [line.split("\t") for line in capsys.readouterr().out.splitlines()] + assert lines == [ + [ + "done-mig", + "done", + "(none)", + "no", + _CREATED, + "(none)", + "(none)", + ], + [ + "planning-mig", + "planning", + "planning:approaches", + "no", + _CREATED, + "(none)", + "(none)", + ], + [ + "ready-review", + "ready", + "phase-1-setup.md", + "yes", + _CREATED, + "(none)", + "needs approval", + ], + ] + + +def test_migration_list_filters_by_status_and_awaiting_review( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration( + live_dir, "planning-review", status="planning", current_phase="", phases=(), + ) + _write_migration(live_dir, "ready-review", awaiting_human_review=True) + _write_migration(live_dir, "ready-normal") + + handle_migration_list(_list_args(status="ready", awaiting_review=True)) + + assert capsys.readouterr().out.splitlines() == [ + "ready-review\tready\tphase-1-setup.md\tyes\t" + f"{_CREATED}\t(none)\t(none)" + ] + + +def test_migration_list_marks_invalid_planning_state_as_blocked( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + planning_dir = _write_migration( + live_dir, "planning-mig", status="planning", current_phase="", phases=(), + ) + state_path = planning_state_path(planning_dir) + state_path.parent.mkdir(parents=True) + state_path.write_text("{not json\n", encoding="utf-8") + + handle_migration_list(_list_args()) + + fields = capsys.readouterr().out.strip().split("\t") + assert fields[0:3] == ["planning-mig", "planning", "planning:blocked"] + assert fields[-1] == "planning-state-invalid" + + +def test_migration_list_marks_invalid_ready_cursor_as_blocked( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "ready-mig") + + def fail_resolve(_manifest: MigrationManifest) -> PhaseSpec: + raise ContinuousRefactorError("invalid current phase") + + monkeypatch.setattr( + "continuous_refactoring.migration_cli.resolve_current_phase", + fail_resolve, + ) + + handle_migration_list(_list_args()) + + fields = capsys.readouterr().out.strip().split("\t") + assert fields[0:3] == ["ready-mig", "ready", "blocked"] + assert fields[-1] == "invalid-current-phase" + + +def test_migration_resolver_accepts_slug_or_path_inside_live_root( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration(live_dir, "target") + + by_slug = resolve_migration_target( + live_dir=live_dir, repo_root=repo, value="target", + ) + by_path = resolve_migration_target( + live_dir=live_dir, repo_root=repo, value="migrations/target", + ) + + assert by_slug.slug == "target" + assert by_slug.path == migration_dir + assert by_path == by_slug + + +def test_migration_resolver_rejects_outside_path_and_symlink_escape( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + outside = tmp_path / "outside" + outside.mkdir() + + with pytest.raises(ContinuousRefactorError, match="inside live migrations dir"): + resolve_migration_target( + live_dir=live_dir, repo_root=repo, value=str(outside), + ) + + link = live_dir / "linked" + try: + link.symlink_to(outside, target_is_directory=True) + except (NotImplementedError, OSError) as error: + pytest.skip(f"directory symlinks unavailable: {error}") + + with pytest.raises(ContinuousRefactorError, match="symlink"): + resolve_migration_target( + live_dir=live_dir, repo_root=repo, value=str(link), + ) + + +def test_migration_resolver_rejects_parent_traversal_before_resolution( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "target") + + with pytest.raises(ContinuousRefactorError, match="parent traversal"): + resolve_migration_target( + live_dir=live_dir, + repo_root=repo, + value="migrations/../migrations/target", + ) + + +def test_migration_resolver_rejects_ambiguous_slug_path_collision( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "ambiguous") + other = _write_migration(live_dir, "other") + link = repo / "ambiguous" + try: + link.symlink_to(other, target_is_directory=True) + except (NotImplementedError, OSError) as error: + pytest.skip(f"directory symlinks unavailable: {error}") + + with pytest.raises(ContinuousRefactorError, match="ambiguous"): + resolve_migration_target( + live_dir=live_dir, repo_root=repo, value="ambiguous", + ) + + +def test_migration_review_accepts_slug_or_path_inside_live_root( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + ) + seen: list[Path] = [] + + def fake_review(request: object) -> None: + seen.append(request.target.path) + + monkeypatch.setattr( + "continuous_refactoring.review_cli.handle_staged_migration_review", + fake_review, + ) + + handle_migration_review(_review_args("target")) + handle_migration_review(_review_args("migrations/target")) + + assert seen == [migration_dir, migration_dir] + + +def test_migration_review_rejects_outside_path_and_symlink_escape( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + outside = tmp_path / "outside" + outside.mkdir() + + with pytest.raises(SystemExit) as outside_exit: + handle_migration_review(_review_args(str(outside))) + + assert outside_exit.value.code == 2 + assert "inside live migrations dir" in capsys.readouterr().err + + link = live_dir / "linked" + try: + link.symlink_to(outside, target_is_directory=True) + except (NotImplementedError, OSError) as error: + pytest.skip(f"directory symlinks unavailable: {error}") + + with pytest.raises(SystemExit) as link_exit: + handle_migration_review(_review_args(str(link.relative_to(repo)))) + + assert link_exit.value.code == 2 + assert "symlink" in capsys.readouterr().err + + +def test_migration_review_rejects_missing_or_not_flagged_migration( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "not-flagged") + + with pytest.raises(SystemExit) as missing_exit: + handle_migration_review(_review_args("missing")) + + assert missing_exit.value.code == 2 + assert "does not exist" in capsys.readouterr().err + + with pytest.raises(SystemExit) as not_flagged_exit: + handle_migration_review(_review_args("not-flagged")) + + assert not_flagged_exit.value.code == 2 + assert "not flagged" in capsys.readouterr().err + + +def test_migration_review_runs_agent_against_work_dir( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + _commit_all(repo) + seen: dict[str, Path | str] = {} + + def fake_interactive( + agent: str, model: str, effort: str, prompt: str, repo_root: Path, + ) -> int: + seen["agent"] = agent + seen["cwd"] = repo_root + seen["prompt"] = prompt + manifest = load_manifest(repo_root / "manifest.json") + save_manifest( + replace( + manifest, + awaiting_human_review=False, + human_review_reason=None, + ), + repo_root / "manifest.json", + ) + return 0 + + monkeypatch.setattr( + "continuous_refactoring.review_cli.run_agent_interactive", + fake_interactive, + ) + + handle_migration_review(_review_args("target")) + + assert seen["agent"] == "codex" + assert seen["cwd"] != migration_dir + assert isinstance(seen["cwd"], Path) + assert seen["cwd"].name == "target" + assert str(seen["cwd"]).endswith("/work/target") + assert str(migration_dir) in str(seen["prompt"]) + assert str(seen["cwd"]) in str(seen["prompt"]) + reloaded = load_manifest(migration_dir / "manifest.json") + assert reloaded.awaiting_human_review is False + assert reloaded.human_review_reason is None + + +def test_migration_review_failure_leaves_live_snapshot_unchanged( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + + monkeypatch.setattr( + "continuous_refactoring.review_cli.run_agent_interactive", + lambda *_args: 7, + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_review(_review_args("target")) + + assert exc_info.value.code == 7 + assert snapshot_tree_digest(migration_dir) == before + + +def test_migration_review_rejects_stale_base_snapshot( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + concurrent: dict[str, str] = {} + + def fake_interactive( + agent: str, model: str, effort: str, prompt: str, repo_root: Path, + ) -> int: + manifest = load_manifest(repo_root / "manifest.json") + save_manifest( + replace( + manifest, + awaiting_human_review=False, + human_review_reason=None, + ), + repo_root / "manifest.json", + ) + (migration_dir / "plan.md").write_text("# Changed live plan\n", encoding="utf-8") + _commit_all(repo, "stale live migration") + concurrent["digest"] = snapshot_tree_digest(migration_dir) + concurrent["plan"] = (migration_dir / "plan.md").read_text(encoding="utf-8") + return 0 + + monkeypatch.setattr( + "continuous_refactoring.review_cli.run_agent_interactive", + fake_interactive, + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_review(_review_args("target")) + + assert exc_info.value.code == 1 + err = capsys.readouterr().err + assert "stale base snapshot" in err + assert "continuous-refactoring migration doctor target" in err + assert "continuous-refactoring migration review target" in err + assert snapshot_tree_digest(migration_dir) != before + assert snapshot_tree_digest(migration_dir) == concurrent["digest"] + assert (migration_dir / "plan.md").read_text(encoding="utf-8") == concurrent["plan"] + + +def test_migration_review_rejects_inconsistent_workspace_and_preserves_live_snapshot( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + before_manifest = load_manifest(migration_dir / "manifest.json") + before_phase = (migration_dir / _PHASE.file).read_text(encoding="utf-8") + + def fake_interactive( + agent: str, model: str, effort: str, prompt: str, repo_root: Path, + ) -> int: + manifest = load_manifest(repo_root / "manifest.json") + save_manifest( + replace( + manifest, + awaiting_human_review=False, + human_review_reason=None, + ), + repo_root / "manifest.json", + ) + (repo_root / _PHASE.file).write_text( + "# Phase\n\n" + "## Precondition\n\n" + "Ready.\n", + encoding="utf-8", + ) + return 0 + + monkeypatch.setattr( + "continuous_refactoring.review_cli.run_agent_interactive", + fake_interactive, + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_review(_review_args("target")) + + assert exc_info.value.code == 1 + err = capsys.readouterr().err + assert "review workspace validation failed" in err + assert "missing-phase-definition-of-done" in err + assert snapshot_tree_digest(migration_dir) == before + assert load_manifest(migration_dir / "manifest.json") == before_manifest + assert (migration_dir / _PHASE.file).read_text(encoding="utf-8") == before_phase + + +def test_migration_review_refuses_publish_when_review_flag_remains( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + + monkeypatch.setattr( + "continuous_refactoring.review_cli.run_agent_interactive", + lambda *_args: 0, + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_review(_review_args("target")) + + assert exc_info.value.code == 1 + assert "awaiting_human_review is still set" in capsys.readouterr().err + assert snapshot_tree_digest(migration_dir) == before + + +def test_migration_refine_rejects_outside_path_and_symlink_escape( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + outside = tmp_path / "outside" + outside.mkdir() + + with pytest.raises(SystemExit) as outside_exit: + handle_migration_refine(_refine_args(str(outside))) + + assert outside_exit.value.code == 2 + assert "inside live migrations dir" in capsys.readouterr().err + + link = live_dir / "linked" + try: + link.symlink_to(outside, target_is_directory=True) + except (NotImplementedError, OSError) as error: + pytest.skip(f"directory symlinks unavailable: {error}") + + with pytest.raises(SystemExit) as link_exit: + handle_migration_refine(_refine_args(str(link.relative_to(repo)))) + + assert link_exit.value.code == 2 + assert "symlink" in capsys.readouterr().err + + +def test_migration_refine_resumes_from_current_planning_state( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, "target", status="planning", current_phase="", phases=(), + ) + _write_completed_planning_state( + repo, + migration_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ], + ) + _commit_all(repo) + fake = _RefineAgent( + [ + _agent_response( + "Expanded.\n", + { + "plan.md": "# Refined Plan\n", + _PHASE.file: _phase_doc("always", "Done."), + }, + ) + ] + ) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", fake) + + handle_migration_refine(_refine_args("target", message="split phase one")) + + state = load_planning_state(repo, planning_state_path(migration_dir)) + assert fake.stage_labels == ["expand"] + assert state.next_step == "review" + assert state.feedback[-1].source == "message" + assert state.feedback[-1].text == "split phase one" + assert (migration_dir / "plan.md").read_text(encoding="utf-8") == "# Refined Plan\n" + + +def test_migration_refine_reopens_unexecuted_ready_migration_to_planning( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration(live_dir, "target") + _write_terminal_ready_planning_state(repo, migration_dir) + _commit_all(repo) + fake = _RefineAgent( + [ + _agent_response( + "Revised.\n", + { + "plan.md": "# Plan v2\n", + _PHASE.file: _phase_doc("always", "Still done."), + }, + ) + ] + ) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", fake) + + handle_migration_refine(_refine_args("target", message="make setup smaller")) + + manifest = load_manifest(migration_dir / "manifest.json") + state = load_planning_state(repo, planning_state_path(migration_dir)) + assert fake.stage_labels == ["revise"] + assert manifest.status == "planning" + assert manifest.awaiting_human_review is False + assert manifest.human_review_reason is None + assert manifest.current_phase == "setup" + assert all(not phase.done for phase in manifest.phases) + assert state.next_step == "review-2" + assert state.revision_base_step_count == 5 + assert planning_stage_stdout_path(migration_dir, "final-review").is_file() + + +def test_migration_refine_refuses_migration_with_completed_phase( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + done_phase = replace(_PHASE, done=True) + migration_dir = _write_migration(live_dir, "target", phases=(done_phase,)) + _write_terminal_ready_planning_state(repo, migration_dir) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_refine(_refine_args("target")) + + assert exc_info.value.code == 2 + assert "completed phase" in capsys.readouterr().err + assert snapshot_tree_digest(migration_dir) == before + + +def test_migration_refine_refuses_non_reopenable_ready_planning_state( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration(live_dir, "target") + _write_terminal_skipped_planning_state(repo, migration_dir) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_refine(_refine_args("target")) + + assert exc_info.value.code == 2 + assert "Cannot reopen planning state" in capsys.readouterr().err + assert snapshot_tree_digest(migration_dir) == before + + +def test_migration_refine_failure_leaves_live_snapshot_unchanged( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, "target", status="planning", current_phase="", phases=(), + ) + _write_planning_state(repo, migration_dir) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + before_state = planning_state_path(migration_dir).read_text(encoding="utf-8") + fake = _RefineAgent([_agent_response("partial\n", {}, returncode=1)]) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", fake) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_refine(_refine_args("target", message="try it")) + + assert exc_info.value.code == 1 + assert snapshot_tree_digest(migration_dir) == before + assert planning_state_path(migration_dir).read_text(encoding="utf-8") == before_state + + +def test_migration_refine_rejects_stale_base_snapshot( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, "target", status="planning", current_phase="", phases=(), + ) + _write_planning_state(repo, migration_dir) + _commit_all(repo) + concurrent: dict[str, str] = {} + + def on_call(_migration_dir: Path) -> None: + (migration_dir / "plan.md").write_text("# Concurrent Plan\n", encoding="utf-8") + _commit_all(repo, "stale live migration") + concurrent["digest"] = snapshot_tree_digest(migration_dir) + concurrent["plan"] = (migration_dir / "plan.md").read_text(encoding="utf-8") + + fake = _RefineAgent([_agent_response("Approaches.\n", {})], on_call=on_call) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", fake) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_refine(_refine_args("target", message="try it")) + + assert exc_info.value.code == 1 + err = capsys.readouterr().err + assert "stale base snapshot" in err + assert "continuous-refactoring migration doctor target" in err + assert "continuous-refactoring migration refine target" in err + assert snapshot_tree_digest(migration_dir) == concurrent["digest"] + assert (migration_dir / "plan.md").read_text(encoding="utf-8") == concurrent["plan"] + + +def test_migration_doctor_checks_one_migration_by_name( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "valid") + + handle_migration_doctor(_doctor_args(target="valid")) + + assert capsys.readouterr().out == "" + + +def test_migration_doctor_all_checks_every_live_migration( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "valid") + (live_dir / "broken").mkdir() + + with pytest.raises(SystemExit) as exc_info: + handle_migration_doctor(_doctor_args(all_=True)) + + assert exc_info.value.code == 1 + out = capsys.readouterr().out + assert "broken\terror\tmissing-manifest" in out + + +def test_migration_doctor_reports_missing_planning_state( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration( + live_dir, "planning-mig", status="planning", current_phase="", phases=(), + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_doctor(_doctor_args(target="planning-mig")) + + assert exc_info.value.code == 1 + assert "planning-mig\terror\tplanning-state-missing" in capsys.readouterr().out + + +def test_migration_doctor_reports_ready_gate_phase_doc_drift( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration(live_dir, "ready-mig") + (migration_dir / _PHASE.file).write_text( + "# Phase\n\n## Precondition\n\nReady.\n", + encoding="utf-8", + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_doctor(_doctor_args(target="ready-mig")) + + assert exc_info.value.code == 1 + assert "ready-mig\terror\tmissing-phase-definition-of-done" in ( + capsys.readouterr().out + ) + + +def test_migration_doctor_reports_transaction_root_and_lock_presence( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + tx_root = live_dir / "__transactions__" + (tx_root / "tx-leftover").mkdir(parents=True) + lock = tx_root / ".lock" + lock.mkdir() + (lock / "owner.json").write_text( + json.dumps( + { + "pid": 123, + "operation": "planning-publish", + "created_at": _CREATED, + } + ), + encoding="utf-8", + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_doctor(_doctor_args(all_=True)) + + assert exc_info.value.code == 1 + out = capsys.readouterr().out + assert "__transactions__\terror\tpublish-lock-present" in out + assert "pid=123" in out + assert "__transactions__\terror\ttransaction-leftover" in out + + +def test_migration_doctor_reports_invalid_transaction_root( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + (live_dir / "__transactions__").write_text("not a dir\n", encoding="utf-8") + + with pytest.raises(SystemExit) as exc_info: + handle_migration_doctor(_doctor_args(all_=True)) + + assert exc_info.value.code == 1 + assert "__transactions__\terror\ttransaction-root-invalid" in ( + capsys.readouterr().out + ) + + +def test_migration_doctor_exits_nonzero_on_error_findings( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "missing-plan", write_plan=False) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_doctor(_doctor_args(target="missing-plan")) + + assert exc_info.value.code == 1 + assert "missing-plan\terror\tmissing-plan" in capsys.readouterr().out + + +def test_migration_dispatches_subcommands( + monkeypatch: pytest.MonkeyPatch, +) -> None: + seen: list[str] = [] + monkeypatch.setattr( + "continuous_refactoring.migration_cli.handle_migration_list", + lambda _args: seen.append("list"), + ) + monkeypatch.setattr( + "continuous_refactoring.migration_cli.handle_migration_doctor", + lambda _args: seen.append("doctor"), + ) + monkeypatch.setattr( + "continuous_refactoring.migration_cli.handle_migration_review", + lambda _args: seen.append("review"), + ) + monkeypatch.setattr( + "continuous_refactoring.migration_cli.handle_migration_refine", + lambda _args: seen.append("refine"), + ) + + handle_migration(argparse.Namespace(migration_command="list")) + handle_migration(argparse.Namespace(migration_command="doctor")) + handle_migration(argparse.Namespace(migration_command="review")) + handle_migration(argparse.Namespace(migration_command="refine")) + + assert seen == ["list", "doctor", "review", "refine"] + + +def test_migration_exits_2_without_subcommand( + capsys: pytest.CaptureFixture[str], +) -> None: + with pytest.raises(SystemExit) as exc_info: + handle_migration(argparse.Namespace(migration_command=None)) + + assert exc_info.value.code == 2 + assert "Usage: continuous-refactoring migration" in capsys.readouterr().err + + +def _init_migration_project( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> tuple[Path, Path]: + monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) + repo = tmp_path / "project" + init_repo(repo) + monkeypatch.chdir(repo) + project = register_project(repo) + live_dir = repo / "migrations" + live_dir.mkdir() + set_live_migrations_dir(project.entry.uuid, "migrations") + return repo, live_dir + + +def _write_migration( + live_dir: Path, + slug: str, + *, + status: str = "ready", + awaiting_human_review: bool = False, + current_phase: str = "setup", + human_review_reason: str | None = None, + phases: tuple[PhaseSpec, ...] = (_PHASE,), + write_plan: bool = True, + write_phase: bool = True, +) -> Path: + migration_dir = live_dir / slug + migration_dir.mkdir(parents=True) + if write_plan: + (migration_dir / "plan.md").write_text("# Plan\n", encoding="utf-8") + if write_phase: + for phase in phases: + (migration_dir / phase.file).write_text( + "# Phase\n\n" + "## Precondition\n\n" + "Ready.\n\n" + "## Definition of Done\n\n" + "Done.\n", + encoding="utf-8", + ) + save_manifest( + MigrationManifest( + name=slug, + created_at=_CREATED, + last_touch=_CREATED, + wake_up_on=None, + awaiting_human_review=awaiting_human_review, + status=status, + current_phase=current_phase, + phases=phases, + human_review_reason=human_review_reason, + ), + migration_dir / "manifest.json", + ) + return migration_dir + + +def _write_planning_state(repo: Path, migration_dir: Path) -> None: + save_planning_state( + new_planning_state("src/example.py", now=_CREATED), + planning_state_path(migration_dir), + repo_root=repo, + published_migration_root=migration_dir, + ) + + +def _write_completed_planning_state( + repo: Path, + migration_dir: Path, + completed: list[tuple[str, str, str]], +) -> None: + state = new_planning_state("src/example.py", now=_CREATED) + for step, outcome, stdout in completed: + stdout_path = planning_stage_stdout_path(migration_dir, step) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + state = complete_planning_step( + state, + step, + outcome, + {"stdout": stdout_path.relative_to(repo).as_posix()}, + completed_at=_CREATED, + final_reason="ready" if step == "final-review" else None, + ) + save_planning_state( + state, + planning_state_path(migration_dir), + repo_root=repo, + published_migration_root=migration_dir, + ) + + +def _write_terminal_ready_planning_state(repo: Path, migration_dir: Path) -> None: + _write_completed_planning_state( + repo, + migration_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "clear", "No findings.\n"), + ("final-review", "approve-auto", "final-decision: approve-auto - ready\n"), + ], + ) + + +def _write_terminal_skipped_planning_state(repo: Path, migration_dir: Path) -> None: + _write_completed_planning_state( + repo, + migration_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "clear", "No findings.\n"), + ("final-review", "reject", "final-decision: reject - flawed\n"), + ], + ) + + +def _commit_all(repo: Path, message: str = "test state") -> None: + run_command(["git", "add", "-A"], cwd=repo) + run_command(["git", "commit", "-m", message], cwd=repo) + + +def _list_args( + *, + status: str | None = None, + awaiting_review: bool = False, +) -> argparse.Namespace: + return argparse.Namespace(status=status, awaiting_review=awaiting_review) + + +def _doctor_args( + *, + target: str | None = None, + all_: bool = False, +) -> argparse.Namespace: + return argparse.Namespace(target=target, all=all_) + + +def _review_args(target: str) -> argparse.Namespace: + return argparse.Namespace( + target=target, + agent="codex", + model="test-model", + effort="low", + ) + + +def _refine_args( + target: str, + *, + message: str = "please refine this migration", + file: Path | None = None, +) -> argparse.Namespace: + return argparse.Namespace( + target=target, + message=message if file is None else None, + file=file, + agent="codex", + model="test-model", + effort="low", + ) + + +def _phase_doc(precondition: str, definition_of_done: str) -> str: + return ( + f"# Phase\n\n" + f"## Precondition\n\n{precondition}\n\n" + f"## Definition of Done\n\n{definition_of_done}\n" + ) + + +def _agent_response( + stdout: str, + writes: dict[str, str] | None = None, + *, + returncode: int = 0, +) -> tuple[str, dict[str, str], int]: + return stdout, writes or {}, returncode + + +class _RefineAgent: + def __init__( + self, + responses: list[tuple[str, dict[str, str], int]], + *, + on_call: object | None = None, + ) -> None: + self._responses = responses + self._index = 0 + self._on_call = on_call + self.stage_labels: list[str] = [] + self.prompts: list[str] = [] + + def __call__(self, **kwargs: object) -> CommandCapture: + assert self._index < len(self._responses) + stdout, writes, returncode = self._responses[self._index] + self._index += 1 + prompt = str(kwargs["prompt"]) + stdout_path = Path(str(kwargs["stdout_path"])) + stderr_path = Path(str(kwargs["stderr_path"])) + migration_dir = _prompt_migration_dir(prompt) + + self.prompts.append(prompt) + self.stage_labels.append(stdout_path.parent.name) + for rel_path, content in writes.items(): + path = migration_dir / rel_path + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + if self._on_call is not None: + self._on_call(migration_dir) + + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + stderr_path.parent.mkdir(parents=True, exist_ok=True) + stderr_path.write_text("", encoding="utf-8") + return CommandCapture( + command=("fake",), + returncode=returncode, + stdout=stdout, + stderr="", + stdout_path=stdout_path, + stderr_path=stderr_path, + ) + + +def _prompt_migration_dir(prompt: str) -> Path: + for line in prompt.splitlines(): + if line.startswith("Migration directory:"): + return Path(line.split(":", 1)[1].strip()) + raise AssertionError("Migration directory missing from prompt") diff --git a/tests/test_cli_review.py b/tests/test_cli_review.py index 8a5da3b..78af49d 100644 --- a/tests/test_cli_review.py +++ b/tests/test_cli_review.py @@ -117,6 +117,11 @@ def _make_perform_args(migration: str) -> argparse.Namespace: ) +def _commit_all(repo: Path, message: str = "test state") -> None: + subprocess.run(["git", "add", "-A"], cwd=repo, check=True, capture_output=True) + subprocess.run(["git", "commit", "-m", message], cwd=repo, check=True, capture_output=True) + + def _init_review_project( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> tuple[Path, Path]: @@ -245,6 +250,45 @@ def test_review_list_filters_flagged_migrations( ] +def test_review_list_ignores_hidden_and_transaction_dirs( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _, live_dir = _init_review_project(tmp_path, monkeypatch) + save_migration( + _make_manifest( + "visible-review", + awaiting_human_review=True, + human_review_reason="visible", + ), + live_dir / "visible-review" / "manifest.json", + ) + save_migration( + _make_manifest( + "hidden-review", + awaiting_human_review=True, + human_review_reason="hidden", + ), + live_dir / ".hidden-review" / "manifest.json", + ) + save_migration( + _make_manifest( + "transaction-review", + awaiting_human_review=True, + human_review_reason="transaction", + ), + live_dir / "__transactions__" / "manifest.json", + ) + + handle_review_list() + + out = capsys.readouterr().out + assert "visible-review\tready" in out + assert "hidden-review" not in out + assert "transaction-review" not in out + + @pytest.mark.parametrize( ("handler", "error_code", "setup", "expected_message"), [ @@ -341,6 +385,16 @@ def _setup_review_project( ), live_dir / "my-mig" / "manifest.json", ) + (live_dir / "my-mig" / "plan.md").write_text("# Plan\n", encoding="utf-8") + for phase in _PHASES: + (live_dir / "my-mig" / phase.file).write_text( + "# Phase\n\n" + "## Precondition\n\n" + "Ready.\n\n" + "## Definition of Done\n\n" + "Done.\n", + encoding="utf-8", + ) return repo, live_dir @@ -348,11 +402,12 @@ def test_review_perform_happy_path( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - _, live_dir = _setup_review_project( + repo, live_dir = _setup_review_project( tmp_path, monkeypatch, awaiting=True, human_review_reason="needs security audit", ) + _commit_all(repo) manifest_path = live_dir / "my-mig" / "manifest.json" captured_prompt: dict[str, str] = {} @@ -361,10 +416,14 @@ def fake_interactive( ) -> int: captured_prompt["prompt"] = prompt captured_prompt["repo_root"] = str(repo_root) - manifest = load_migration_manifest(manifest_path) + manifest = load_migration_manifest(repo_root / "manifest.json") from dataclasses import replace - updated = replace(manifest, awaiting_human_review=False) - save_migration(updated, manifest_path) + updated = replace( + manifest, + awaiting_human_review=False, + human_review_reason=None, + ) + save_migration(updated, repo_root / "manifest.json") return 0 monkeypatch.setattr( @@ -376,7 +435,8 @@ def fake_interactive( assert "needs security audit" in captured_prompt["prompt"] assert "phase-2-review-target.md" in captured_prompt["prompt"] assert "Name: review-target" in captured_prompt["prompt"] - assert captured_prompt["repo_root"] == str(Path.cwd().resolve()) + assert captured_prompt["repo_root"] != str(Path.cwd().resolve()) + assert captured_prompt["repo_root"].endswith("/work/my-mig") reloaded = load_migration_manifest(manifest_path) assert reloaded.awaiting_human_review is False @@ -387,12 +447,13 @@ def test_review_perform_happy_path_without_current_phase( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - _, live_dir = _setup_review_project( + repo, live_dir = _setup_review_project( tmp_path, monkeypatch, awaiting=True, current_phase="", human_review_reason="phase cursor cleared", ) + _commit_all(repo) manifest_path = live_dir / "my-mig" / "manifest.json" captured_prompt: dict[str, str] = {} @@ -400,10 +461,15 @@ def fake_interactive( agent: str, model: str, effort: str, prompt: str, repo_root: Path, ) -> int: captured_prompt["prompt"] = prompt - manifest = load_migration_manifest(manifest_path) + manifest = load_migration_manifest(repo_root / "manifest.json") from dataclasses import replace - updated = replace(manifest, awaiting_human_review=False) - save_migration(updated, manifest_path) + updated = replace( + manifest, + awaiting_human_review=False, + current_phase="review-target", + human_review_reason=None, + ) + save_migration(updated, repo_root / "manifest.json") return 0 monkeypatch.setattr( @@ -423,6 +489,7 @@ def test_review_perform_exits_1_when_flag_not_cleared( capsys: pytest.CaptureFixture[str], ) -> None: repo, live_dir = _setup_review_project(tmp_path, monkeypatch, awaiting=True) + _commit_all(repo) def fake_interactive( agent: str, model: str, effort: str, prompt: str, repo_root: Path, @@ -446,7 +513,8 @@ def test_review_perform_exits_with_agent_returncode( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - _setup_review_project(tmp_path, monkeypatch, awaiting=True) + repo, _live_dir = _setup_review_project(tmp_path, monkeypatch, awaiting=True) + _commit_all(repo) def fake_interactive( agent: str, model: str, effort: str, prompt: str, repo_root: Path, @@ -495,6 +563,41 @@ def test_review_perform_exits_2_when_not_flagged_for_review( assert "not flagged" in err +def test_top_level_review_perform_routes_to_migration_review_compatibility_path( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _repo, live_dir = _setup_review_project( + tmp_path, + monkeypatch, + awaiting=True, + human_review_reason="needs approval", + ) + seen: dict[str, object] = {} + + def fake_staged_review(request: object) -> None: + seen["slug"] = request.target.slug + seen["path"] = request.target.path + seen["agent"] = request.agent + seen["model"] = request.model + seen["effort"] = request.effort + + monkeypatch.setattr( + "continuous_refactoring.review_cli.handle_staged_migration_review", + fake_staged_review, + ) + + handle_review_perform(_make_perform_args("my-mig")) + + assert seen == { + "slug": "my-mig", + "path": live_dir / "my-mig", + "agent": "codex", + "model": "test-model", + "effort": "low", + } + + def test_review_dispatches_list_subcommand( monkeypatch: pytest.MonkeyPatch, ) -> None: diff --git a/tests/test_continuous_refactoring.py b/tests/test_continuous_refactoring.py index 906270b..005e7d4 100644 --- a/tests/test_continuous_refactoring.py +++ b/tests/test_continuous_refactoring.py @@ -99,7 +99,10 @@ "check_phase_ready", "execute_phase", "PlanningOutcome", - "run_planning", + "PlanningRefineRequest", + "PlanningStepResult", + "run_next_planning_step", + "run_refine_planning_step", "CONTINUOUS_REFACTORING_STATUS_BEGIN", "CONTINUOUS_REFACTORING_STATUS_END", "CLASSIFIER_PROMPT", @@ -227,6 +230,10 @@ def test_package_exports_contain_known_public_symbols() -> None: "bump_last_touch", "check_phase_ready", "PlanningOutcome", + "PlanningRefineRequest", + "PlanningStepResult", + "run_next_planning_step", + "run_refine_planning_step", "compose_full_prompt", "ClassifierDecision", "run_once", diff --git a/tests/test_failure_report.py b/tests/test_failure_report.py index 54679f8..cc8cd26 100644 --- a/tests/test_failure_report.py +++ b/tests/test_failure_report.py @@ -291,3 +291,45 @@ def test_persist_decision_records_non_commit_snapshot( events = artifacts.events_path.read_text(encoding="utf-8") assert '"event": "failure_doc_written"' in events assert '"event": "target_transition"' in events + + +def test_planning_step_failure_snapshot_names_step_and_resume_behavior( + tmp_path: Path, + monkeypatch, +) -> None: + monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) + repo_root = tmp_path / "repo" + repo_root.mkdir() + artifacts = _artifacts(tmp_path / "artifacts") + record = _record( + decision="abandon", + retry_recommendation="new-target", + target="auth-cleanup", + call_role="planning.review-2", + phase_reached="planning.review-2", + failure_kind="planning-step-failed", + summary="Revised plan still has findings", + next_retry_focus=None, + ) + + result = persist_decision( + repo_root, + artifacts, + attempt=1, + retry=1, + validation_command="uv run pytest", + record=record, + ) + + assert result is not None + content = result.read_text(encoding="utf-8") + assert 'call_role: "planning.review-2"' in content + assert "planning step `review-2`" in content + assert ".planning/state.json" in content + assert "failed current-step output" in content + assert "not resume input" in content + + events = artifacts.events_path.read_text(encoding="utf-8") + assert '"event": "planning_step_failure_doc_written"' in events + assert '"planning_step": "review-2"' in events + assert '"event": "failure_doc_written"' not in events diff --git a/tests/test_focus_on_live_migrations.py b/tests/test_focus_on_live_migrations.py index 45dc831..d97eba3 100644 --- a/tests/test_focus_on_live_migrations.py +++ b/tests/test_focus_on_live_migrations.py @@ -8,7 +8,7 @@ import pytest import continuous_refactoring -from continuous_refactoring.artifacts import ContinuousRefactorError +from continuous_refactoring.artifacts import CommandCapture, ContinuousRefactorError from continuous_refactoring.cli import build_parser from continuous_refactoring.decisions import DecisionRecord, RouteOutcome from continuous_refactoring.effort import EffortBudget @@ -19,6 +19,13 @@ migration_root, save_manifest, ) +from continuous_refactoring.planning_state import ( + complete_planning_step, + new_planning_state, + planning_stage_stdout_path, + planning_state_path, + save_planning_state, +) from conftest import make_run_loop_args @@ -63,11 +70,76 @@ def _seed_manifest( ) root = migration_root(live_dir, name) root.mkdir(parents=True, exist_ok=True) + (root / "plan.md").write_text("# Plan\n", encoding="utf-8") + for phase in phases: + phase_path = root / phase.file + phase_path.parent.mkdir(parents=True, exist_ok=True) + phase_path.write_text(f"# {phase.name}\n", encoding="utf-8") + path = root / "manifest.json" + save_manifest(manifest, path) + return path + + +def _seed_planning_manifest(live_dir: Path, name: str) -> Path: + manifest = MigrationManifest( + name=name, + created_at=(_utc_now() - timedelta(days=2)).isoformat(timespec="milliseconds"), + last_touch=(_utc_now() - timedelta(days=1)).isoformat(timespec="milliseconds"), + wake_up_on=None, + awaiting_human_review=False, + status="planning", + current_phase="", + phases=(), + ) + root = migration_root(live_dir, name) + root.mkdir(parents=True, exist_ok=True) path = root / "manifest.json" save_manifest(manifest, path) return path +def _seed_planning_manifest_at_final_review( + repo_root: Path, + live_dir: Path, + name: str, +) -> Path: + path = _seed_planning_manifest(live_dir, name) + root = path.parent + (root / "plan.md").write_text("# Plan\n", encoding="utf-8") + (root / _PHASE.file).write_text( + "## Precondition\n\nalways\n\n" + "## Definition of Done\n\nSetup is complete.\n", + encoding="utf-8", + ) + manifest = load_manifest(path) + save_manifest( + replace(manifest, current_phase=_PHASE.name, phases=(_PHASE,)), + path, + ) + state = new_planning_state( + f"Finish {name}", + now="2026-04-29T12:00:00.000+00:00", + ) + for step, outcome, stdout in ( + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "clear", "no findings\n"), + ): + stdout_path = planning_stage_stdout_path(root, step) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + state = complete_planning_step( + state, + step, + outcome, + {"stdout": stdout_path.relative_to(repo_root).as_posix()}, + completed_at="2026-04-29T12:00:00.000+00:00", + ) + save_planning_state(state, planning_state_path(root), repo_root=repo_root) + return path + + def _mark_done(path: Path) -> None: manifest = load_manifest(path) updated = replace( @@ -79,6 +151,31 @@ def _mark_done(path: Path) -> None: save_manifest(updated, path) +def _prompt_migration_dir(prompt: str, repo_root: Path) -> Path: + for line in prompt.splitlines(): + if line.startswith("Migration directory:"): + path = Path(line.split(":", 1)[1].strip()) + return path if path.is_absolute() else repo_root / path + raise AssertionError("Migration directory missing from prompt") + + +def _planning_agent_result(kwargs: dict[str, object], stdout: str) -> CommandCapture: + stdout_path = Path(str(kwargs["stdout_path"])) + stderr_path = Path(str(kwargs["stderr_path"])) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + stderr_path.parent.mkdir(parents=True, exist_ok=True) + stderr_path.write_text("", encoding="utf-8") + return CommandCapture( + command=("fake",), + returncode=0, + stdout=stdout, + stderr="", + stdout_path=stdout_path, + stderr_path=stderr_path, + ) + + def _flag_for_review(path: Path) -> None: manifest = load_manifest(path) updated = replace( @@ -101,6 +198,30 @@ def _commit_ok(target: str) -> DecisionRecord: ) +def _planning_commit_ok(target: str) -> DecisionRecord: + return DecisionRecord( + decision="commit", + retry_recommendation="none", + target=target, + call_role="planning.approaches", + phase_reached="planning.approaches", + failure_kind="none", + summary="ok", + ) + + +def _planning_blocked(target: str) -> DecisionRecord: + return DecisionRecord( + decision="blocked", + retry_recommendation="human-review", + target=target, + call_role="planning.state", + phase_reached="planning.state", + failure_kind="planning-state-missing", + summary="missing planning state", + ) + + def _abandon(target: str) -> DecisionRecord: return DecisionRecord( decision="abandon", @@ -451,6 +572,109 @@ def fake_execute( captured = capsys.readouterr() assert "Migration tick deferred all eligible migrations: wait for follow-up" in captured.out +def test_e2e_focused_run_completes_planning_before_phase_execution( + run_loop_env: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = run_loop_env / "migrations" + live_dir.mkdir() + planning_path = _seed_planning_manifest_at_final_review( + run_loop_env, + live_dir, + "mid-planning", + ) + ready_path = _seed_manifest(live_dir, "ready-phase") + continuous_refactoring.run_command(["git", "add", "migrations"], cwd=run_loop_env) + continuous_refactoring.run_command( + ["git", "commit", "-m", "seed focused migrations"], + cwd=run_loop_env, + ) + _install_focused_loop_env(run_loop_env, monkeypatch, live_dir) + calls: list[str] = [] + + def final_review_agent(**kwargs: object) -> CommandCapture: + stage_label = Path(str(kwargs["stdout_path"])).parent.name + assert stage_label == "final-review" + migration_dir = _prompt_migration_dir( + str(kwargs["prompt"]), + Path(str(kwargs["repo_root"])), + ) + assert migration_dir.name == "mid-planning" + calls.append("planning:final-review") + return _planning_agent_result( + kwargs, + "final-decision: approve-auto - ready\n", + ) + + monkeypatch.setattr( + "continuous_refactoring.planning.maybe_run_agent", + final_review_agent, + ) + + executed: list[str] = [] + + def fake_ready( + phase: PhaseSpec, + manifest: MigrationManifest, + *_args: object, + **_kwargs: object, + ) -> tuple[str, str]: + calls.append(f"ready:{manifest.name}") + if executed: + return ("no", "stop after first phase") + return ("yes", "ready") + + def fake_execute( + phase: PhaseSpec, + manifest: MigrationManifest, + *_args: object, + **_kwargs: object, + ) -> object: + calls.append(f"execute:{manifest.name}") + executed.append(manifest.name) + manifest_path = live_dir / manifest.name / "manifest.json" + _mark_done(manifest_path) + from continuous_refactoring.phases import ExecutePhaseOutcome + + return ExecutePhaseOutcome(status="done", reason="ok") + + monkeypatch.setattr("continuous_refactoring.migration_tick.check_phase_ready", fake_ready) + monkeypatch.setattr("continuous_refactoring.migration_tick.execute_phase", fake_execute) + + args = make_run_loop_args(run_loop_env, focus_on_live_migrations=True) + assert continuous_refactoring.run_migrations_focused_loop(args) == 0 + assert calls[0] == "planning:final-review" + assert len(executed) == 1 + assert calls.index(f"execute:{executed[0]}") > calls.index("planning:final-review") + assert load_manifest(planning_path).status in {"ready", "done"} + assert load_manifest(ready_path).status in {"ready", "done"} + + +def test_focused_loop_stops_when_only_blocked_planning_remains( + run_loop_env: Path, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = tmp_path / "live-migrations" + live_dir.mkdir() + _seed_planning_manifest(live_dir, "blocked-planning") + _install_focused_loop_env(run_loop_env, monkeypatch, live_dir) + monkeypatch.setattr( + "continuous_refactoring.migration_tick.try_planning_tick", + lambda *_args, **_kwargs: ("blocked", _planning_blocked("blocked-planning")), + ) + monkeypatch.setattr( + "continuous_refactoring.migration_tick.try_migration_tick", + lambda *_args, **_kwargs: pytest.fail("blocked planning must stop phase tick"), + ) + + args = make_run_loop_args( + run_loop_env, + focus_on_live_migrations=True, + max_consecutive_failures=1, + ) + with pytest.raises(ContinuousRefactorError, match="1 consecutive failures"): + continuous_refactoring.run_migrations_focused_loop(args) def test_focused_loop_terminates_when_only_awaiting_human_review_remains( diff --git a/tests/test_loop_migration_tick.py b/tests/test_loop_migration_tick.py index dab8b74..d1861b7 100644 --- a/tests/test_loop_migration_tick.py +++ b/tests/test_loop_migration_tick.py @@ -26,9 +26,17 @@ migration_root, save_manifest, ) +from continuous_refactoring.planning import PlanningStepResult +from continuous_refactoring.planning_state import ( + new_planning_state, + planning_state_path, + save_planning_state, +) from continuous_refactoring.phases import ExecutePhaseOutcome from continuous_refactoring.migration_tick import ( enumerate_eligible_manifests, + enumerate_eligible_planning_manifests, + try_planning_tick, try_migration_tick, ) @@ -90,11 +98,87 @@ def _make_manifest( def _save(manifest: MigrationManifest, live_dir: Path) -> Path: root = migration_root(live_dir, manifest.name) root.mkdir(parents=True, exist_ok=True) + if manifest.status in ("ready", "in-progress"): + plan_path = root / "plan.md" + if not plan_path.exists(): + plan_path.write_text("# Plan\n", encoding="utf-8") + for phase in manifest.phases: + phase_path = root / phase.file + if not phase_path.exists(): + phase_path.parent.mkdir(parents=True, exist_ok=True) + phase_path.write_text(f"# {phase.name}\n", encoding="utf-8") path = root / "manifest.json" save_manifest(manifest, path) return path +def _make_planning_manifest( + name: str, + *, + last_touch: datetime, + created_at: datetime | None = None, + awaiting_human_review: bool = False, + human_review_reason: str | None = None, + cooldown_until: datetime | None = None, +) -> MigrationManifest: + ts = (created_at or _utc_now()).isoformat(timespec="milliseconds") + return MigrationManifest( + name=name, + created_at=ts, + last_touch=last_touch.isoformat(timespec="milliseconds"), + wake_up_on=None, + awaiting_human_review=awaiting_human_review, + status="planning", + current_phase="", + phases=(), + human_review_reason=human_review_reason, + cooldown_until=( + cooldown_until.isoformat(timespec="milliseconds") + if cooldown_until is not None + else None + ), + ) + + +def _save_planning( + live_dir: Path, + repo_root: Path, + name: str, + *, + last_touch: datetime, + created_at: datetime | None = None, + awaiting_human_review: bool = False, + cooldown_until: datetime | None = None, + state: str = "valid", +) -> Path: + manifest = _make_planning_manifest( + name, + last_touch=last_touch, + created_at=created_at, + awaiting_human_review=awaiting_human_review, + human_review_reason="needs review" if awaiting_human_review else None, + cooldown_until=cooldown_until, + ) + root = migration_root(live_dir, manifest.name) + root.mkdir(parents=True, exist_ok=True) + path = root / "manifest.json" + save_manifest(manifest, path) + if state == "valid": + save_planning_state( + new_planning_state(f"Target {name}", now=manifest.created_at), + planning_state_path(root), + repo_root=repo_root, + published_migration_root=root, + ) + elif state == "invalid": + state_path = planning_state_path(root) + state_path.parent.mkdir(parents=True, exist_ok=True) + state_path.write_text("{not json", encoding="utf-8") + elif state != "missing": + raise AssertionError(f"unknown state fixture: {state}") + return path + + def _seed_manifest( run_once_env: Path, *, @@ -252,6 +336,40 @@ def noop_finalize(*_args: object, **_kwargs: object) -> None: ) +def _planning_tick( + live_dir: Path, + repo_root: Path, + *, + taste: str = "runtime taste", + attempt: int = 7, + finalize_commit: Callable[..., object] | None = None, +) -> tuple[RouteOutcome, DecisionRecord | None]: + artifacts = create_run_artifacts( + repo_root=repo_root, + agent="codex", + model="fake-model", + effort="xhigh", + test_command="uv run pytest", + ) + + def noop_finalize(*_args: object, **_kwargs: object) -> None: + return None + + return try_planning_tick( + live_dir, + taste, + repo_root, + artifacts, + agent="codex", + model="fake-model", + effort="xhigh", + timeout=123, + commit_message_prefix="continuous refactor", + attempt=attempt, + finalize_commit=finalize_commit or noop_finalize, + ) + + def test_enumerate_eligible_manifests_ignores_noise_and_sorts_by_created_at( tmp_path: Path, ) -> None: @@ -309,6 +427,34 @@ def test_enumerate_eligible_manifests_ignores_noise_and_sorts_by_created_at( assert [path.parent.name for _, path in candidates] == ["older", "newer"] +def test_enumeration_uses_visible_migration_dirs(tmp_path: Path) -> None: + live_dir = tmp_path / "live" + live_dir.mkdir() + now = _utc_now() + + _save( + _make_manifest(".hidden", last_touch=now - timedelta(days=1)), + live_dir, + ) + _save( + _make_manifest("__transactions__", last_touch=now - timedelta(days=1)), + live_dir, + ) + _save( + _make_manifest( + "visible", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=1), + ), + live_dir, + ) + + candidates = enumerate_eligible_manifests(live_dir, now) + + assert [manifest.name for manifest, _ in candidates] == ["visible"] + assert [path.parent.name for _, path in candidates] == ["visible"] + + def test_enumerate_eligible_manifests_includes_cooling_effort_candidate_once( tmp_path: Path, ) -> None: @@ -350,6 +496,261 @@ def test_enumerate_eligible_manifests_includes_cooling_effort_candidate_once( ] +def test_enumerate_eligible_planning_manifests_includes_planning_migrations( + run_once_env: Path, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + _save_planning( + live_dir, + run_once_env, + "newer-plan", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=1), + ) + _save_planning( + live_dir, + run_once_env, + "older-plan", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=2), + ) + _save_planning( + live_dir, + run_once_env, + "needs-review", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=3), + awaiting_human_review=True, + ) + _save_planning( + live_dir, + run_once_env, + "cooling", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=4), + cooldown_until=now + timedelta(hours=1), + ) + _save( + _make_manifest( + "ready-now", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=5), + ), + live_dir, + ) + + candidates = enumerate_eligible_planning_manifests(live_dir, now) + + assert [manifest.name for manifest, _ in candidates] == [ + "older-plan", + "newer-plan", + ] + + +def test_try_migration_tick_completes_planning_before_ready_phase( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + _save_planning( + live_dir, + run_once_env, + "mid-plan", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=2), + ) + _save( + _make_manifest( + "ready-now", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=1), + ), + live_dir, + ) + planning_calls: list[tuple[str, str]] = [] + commits: list[tuple[str, str]] = [] + + def fake_planning( + migration_name: str, + target: str, + *_args: object, + **_kwargs: object, + ) -> PlanningStepResult: + planning_calls.append((migration_name, target)) + return PlanningStepResult( + status="published", + migration_name=migration_name, + step="approaches", + next_step="pick-best", + reason="planning accepted", + ) + + def finalize( + _repo_root: Path, + _head_before: str, + message: str, + **kwargs: object, + ) -> None: + commits.append((message, str(kwargs["phase"]))) + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + fake_planning, + ) + _patch_check_ready( + monkeypatch, + "yes", + "ready check must not run before planning", + ) + _patch_execute_phase_trap(monkeypatch) + + outcome, record = _planning_tick( + live_dir, + run_once_env, + finalize_commit=finalize, + ) + + assert outcome == "commit" + assert record is not None + assert record.call_role == "planning.approaches" + assert planning_calls == [("mid-plan", "Target mid-plan")] + assert commits == [ + ( + "continuous refactor: planning/mid-plan/approaches\n" + "\n" + "Why:\n" + "planning accepted", + "planning", + ) + ] + + +def test_try_migration_tick_does_not_call_ready_check_for_planning_status( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + _save_planning( + live_dir, + run_once_env, + "only-plan", + last_touch=now - timedelta(days=1), + ) + + def fake_planning(*_args: object, **_kwargs: object) -> PlanningStepResult: + return PlanningStepResult( + status="published", + migration_name="only-plan", + step="approaches", + next_step="pick-best", + reason="ok", + ) + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + fake_planning, + ) + _patch_check_ready(monkeypatch, "yes") + _patch_execute_phase_trap(monkeypatch) + + outcome, record = _planning_tick(live_dir, run_once_env) + + assert outcome == "commit" + assert record is not None + assert record.target == "only-plan" + + +def test_missing_planning_state_blocks_before_ready_phase_or_source_routing( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + _save_planning( + live_dir, + run_once_env, + "missing-state", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=2), + state="missing", + ) + _save( + _make_manifest( + "ready-now", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=1), + ), + live_dir, + ) + _patch_check_ready(monkeypatch, "yes") + _patch_execute_phase_trap(monkeypatch) + + outcome, record = _planning_tick(live_dir, run_once_env) + + assert outcome == "blocked" + assert record is not None + assert record.call_role == "planning.state" + assert record.failure_kind == "planning-state-missing" + assert record.target == "missing-state" + assert ".planning/state.json" in record.summary + + +def test_invalid_planning_state_blocks_before_ready_phase_or_source_routing( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + _save_planning( + live_dir, + run_once_env, + "invalid-state", + last_touch=now - timedelta(days=1), + state="invalid", + ) + _patch_check_ready(monkeypatch, "yes") + _patch_execute_phase_trap(monkeypatch) + + outcome, record = _planning_tick(live_dir, run_once_env) + + assert outcome == "blocked" + assert record is not None + assert record.call_role == "planning.state" + assert record.failure_kind == "planning-state-invalid" + assert record.target == "invalid-state" + + +def test_planning_slug_mismatch_blocks_before_resume_publish( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + manifest_path = _save_planning( + live_dir, + run_once_env, + "visible-name", + last_touch=now - timedelta(days=1), + ) + manifest = load_manifest(manifest_path) + save_manifest(replace(manifest, name="manifest-name"), manifest_path) + + def fake_planning(*_args: object, **_kwargs: object) -> object: + raise AssertionError("slug mismatch must block before planning publish") + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + fake_planning, + ) + + outcome, record = _planning_tick(live_dir, run_once_env) + + assert outcome == "blocked" + assert record is not None + assert record.target == "visible-name" + assert record.call_role == "planning.state" + assert record.failure_kind == "planning-consistency-error" + assert "manifest-slug-mismatch" in record.summary + + def test_try_migration_tick_skips_migrations_awaiting_human_review( run_once_env: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -419,6 +820,68 @@ def fail_ready(*_args: object, **_kwargs: object) -> tuple[str, str]: assert record.summary == "ready check failed at /phase.md " +def test_execution_gate_blocks_inconsistent_migration_before_ready_check( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + manifest = _make_manifest( + "missing-plan", + last_touch=now - timedelta(days=1), + ) + root = migration_root(live_dir, manifest.name) + root.mkdir(parents=True) + (root / _PHASE_0.file).write_text("# Setup\n", encoding="utf-8") + save_manifest(manifest, root / "manifest.json") + + def fail_ready(*_args: object, **_kwargs: object) -> tuple[str, str]: + raise AssertionError("check_phase_ready must not be called") + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.check_phase_ready", + fail_ready, + ) + _patch_execute_phase_trap(monkeypatch) + + outcome, record = _tick(live_dir, run_once_env) + + assert outcome == "abandon" + assert record is not None + assert record.decision == "abandon" + assert record.call_role == "phase.execution-gate" + assert record.phase_reached == "phase.execution-gate" + assert record.failure_kind == "migration-consistency-error" + assert "missing-plan" in record.summary + + +def test_execution_gate_reports_malformed_manifest_before_candidate_loading( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + migration_dir = live_dir / "bad-manifest" + migration_dir.mkdir(parents=True) + (migration_dir / "manifest.json").write_text("{not json", encoding="utf-8") + + def fail_ready(*_args: object, **_kwargs: object) -> tuple[str, str]: + raise AssertionError("check_phase_ready must not be called") + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.check_phase_ready", + fail_ready, + ) + _patch_execute_phase_trap(monkeypatch) + + outcome, record = _tick(live_dir, run_once_env) + + assert outcome == "abandon" + assert record is not None + assert record.decision == "abandon" + assert record.target == "bad-manifest" + assert record.call_role == "phase.execution-gate" + assert record.failure_kind == "migration-consistency-error" + assert "invalid-manifest" in record.summary + + def test_ready_check_wrapped_failure_keeps_root_cause_in_summary( run_once_env: Path, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/test_migration_consistency.py b/tests/test_migration_consistency.py new file mode 100644 index 0000000..c13f075 --- /dev/null +++ b/tests/test_migration_consistency.py @@ -0,0 +1,215 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +from continuous_refactoring.migration_consistency import ( + CONSISTENCY_MODES, + CONSISTENCY_SEVERITIES, + MigrationConsistencyFinding, + check_migration_consistency, + has_blocking_consistency_findings, + iter_visible_migration_dirs, +) +from continuous_refactoring.migrations import ( + MigrationManifest, + PhaseSpec, + save_manifest, +) + +_PHASE = PhaseSpec( + name="setup", + file="phase-0-setup.md", + done=False, + precondition="always", +) + + +def _manifest( + name: str, + *, + status: str = "ready", + phase: PhaseSpec = _PHASE, +) -> MigrationManifest: + return MigrationManifest( + name=name, + created_at="2025-01-01T00:00:00.000+00:00", + last_touch="2025-01-01T00:00:00.000+00:00", + wake_up_on=None, + awaiting_human_review=False, + status=status, + current_phase=phase.name, + phases=(phase,), + ) + + +def _write_migration( + root: Path, + slug: str, + *, + manifest_name: str | None = None, + status: str = "ready", + phase: PhaseSpec = _PHASE, + write_plan: bool = True, + write_phase: bool = True, +) -> Path: + migration_dir = root / slug + migration_dir.mkdir(parents=True) + if write_plan: + (migration_dir / "plan.md").write_text("# Plan\n", encoding="utf-8") + if write_phase: + phase_path = migration_dir / phase.file + phase_path.parent.mkdir(parents=True, exist_ok=True) + phase_path.write_text("# Setup\n", encoding="utf-8") + save_manifest( + _manifest(manifest_name or slug, status=status, phase=phase), + migration_dir / "manifest.json", + ) + return migration_dir + + +def _codes(findings: list[MigrationConsistencyFinding]) -> set[str]: + return {finding.code for finding in findings} + + +def test_visible_migration_dirs_skip_hidden_dotted_and_transaction_dirs( + tmp_path: Path, +) -> None: + live_dir = tmp_path / "live" + live_dir.mkdir() + (live_dir / "plain-file").write_text("ignore\n", encoding="utf-8") + (live_dir / "visible-b").mkdir() + (live_dir / ".staged").mkdir() + (live_dir / "__internal").mkdir() + (live_dir / "__transactions__").mkdir() + (live_dir / "visible-a").mkdir() + + dirs = iter_visible_migration_dirs(live_dir) + + assert [path.name for path in dirs] == ["visible-a", "visible-b"] + + +def test_visible_migration_dirs_skip_directory_symlinks(tmp_path: Path) -> None: + live_dir = tmp_path / "live" + live_dir.mkdir() + outside = tmp_path / "outside" + outside.mkdir() + (live_dir / "real").mkdir() + link = live_dir / "linked" + try: + link.symlink_to(outside, target_is_directory=True) + except (NotImplementedError, OSError) as error: + pytest.skip(f"directory symlinks unavailable: {error}") + + assert link.is_dir() + assert [path.name for path in iter_visible_migration_dirs(live_dir)] == ["real"] + + +def test_consistency_reports_missing_manifest(tmp_path: Path) -> None: + migration_dir = tmp_path / "missing-manifest" + migration_dir.mkdir() + + findings = check_migration_consistency(migration_dir, mode="doctor") + + assert [(finding.code, finding.severity, finding.mode, finding.path) for finding in findings] == [ + ( + "missing-manifest", + "error", + "doctor", + migration_dir / "manifest.json", + ) + ] + + +def test_consistency_rejects_manifest_slug_mismatch(tmp_path: Path) -> None: + migration_dir = _write_migration( + tmp_path, "actual-slug", manifest_name="different-slug", + ) + + findings = check_migration_consistency(migration_dir, mode="execution-gate") + + assert "manifest-slug-mismatch" in _codes(findings) + assert has_blocking_consistency_findings(findings) + + +def test_consistency_rejects_manifest_phase_symlink_escape(tmp_path: Path) -> None: + migration_dir = _write_migration(tmp_path, "symlink-escape", write_phase=False) + outside = tmp_path / "outside-phase.md" + outside.write_text("# Outside\n", encoding="utf-8") + try: + (migration_dir / _PHASE.file).symlink_to(outside) + except (NotImplementedError, OSError) as error: + pytest.skip(f"symlinks unavailable: {error}") + + findings = check_migration_consistency(migration_dir, mode="execution-gate") + + assert "phase-file-escapes-migration" in _codes(findings) + assert has_blocking_consistency_findings(findings) + + +def test_consistency_reports_duplicate_phase_doc_indexes(tmp_path: Path) -> None: + migration_dir = _write_migration(tmp_path, "duplicate-phase-index") + (migration_dir / "phase-0-other.md").write_text("# Other\n", encoding="utf-8") + + findings = check_migration_consistency(migration_dir, mode="doctor") + + assert "duplicate-phase-doc-index" in _codes(findings) + + +def test_consistency_reports_manifest_phase_missing_doc(tmp_path: Path) -> None: + migration_dir = _write_migration(tmp_path, "missing-phase-doc", write_phase=False) + + findings = check_migration_consistency(migration_dir, mode="execution-gate") + + assert "missing-phase-file" in _codes(findings) + assert has_blocking_consistency_findings(findings) + + +def test_consistency_requires_plan_for_ready_and_in_progress(tmp_path: Path) -> None: + migration_dir = _write_migration(tmp_path, "missing-plan", write_plan=False) + + execution_findings = check_migration_consistency( + migration_dir, mode="execution-gate", + ) + planning_findings = check_migration_consistency( + migration_dir, mode="planning-snapshot", + ) + + assert "missing-plan" in _codes(execution_findings) + assert "missing-plan" not in _codes(planning_findings) + assert has_blocking_consistency_findings(execution_findings) + + +def test_consistency_modes_share_severity_blocking_contract(tmp_path: Path) -> None: + info = MigrationConsistencyFinding( + severity="info", + mode="doctor", + code="context", + path=tmp_path, + message="context", + ) + warning = MigrationConsistencyFinding( + severity="warning", + mode="ready-publish", + code="suspicious", + path=tmp_path, + message="suspicious", + ) + error = MigrationConsistencyFinding( + severity="error", + mode="execution-gate", + code="unsafe", + path=tmp_path, + message="unsafe", + ) + + assert set(CONSISTENCY_MODES) == { + "planning-snapshot", + "ready-publish", + "execution-gate", + "doctor", + } + assert set(CONSISTENCY_SEVERITIES) == {"info", "warning", "error"} + assert not has_blocking_consistency_findings([info, warning]) + assert has_blocking_consistency_findings([info, warning, error]) diff --git a/tests/test_no_driver_branching.py b/tests/test_no_driver_branching.py index b92a71d..51818d4 100644 --- a/tests/test_no_driver_branching.py +++ b/tests/test_no_driver_branching.py @@ -168,6 +168,9 @@ def _seed_live_manifest(live_dir: Path, name: str = "auto-migration") -> None: ) migration_dir = live_dir / name migration_dir.mkdir(parents=True, exist_ok=True) + (migration_dir / "plan.md").write_text("# Plan\n", encoding="utf-8") + for phase in manifest.phases: + (migration_dir / phase.file).write_text(f"# {phase.name}\n", encoding="utf-8") save_manifest(manifest, migration_dir / "manifest.json") diff --git a/tests/test_planning.py b/tests/test_planning.py index df1cfdf..6c3fd08 100644 --- a/tests/test_planning.py +++ b/tests/test_planning.py @@ -1,9 +1,11 @@ from __future__ import annotations +import shutil from pathlib import Path import pytest +from conftest import init_repo from continuous_refactoring.artifacts import ( CommandCapture, ContinuousRefactorError, @@ -12,19 +14,31 @@ ) from continuous_refactoring.migrations import ( MigrationManifest, - intentional_skips_dir, load_manifest, migration_root, save_manifest, ) from continuous_refactoring.planning import ( + _build_durable_planning_context, _parse_final_decision, _refresh_manifest, _review_has_findings, _discover_phase_files, PlanningOutcome, - run_planning, + PlanningRefineRequest, + run_next_planning_step, + run_refine_planning_step, ) +from continuous_refactoring.git import run_command +from continuous_refactoring.planning_state import ( + complete_planning_step, + load_planning_state, + new_planning_state, + planning_stage_stdout_path, + planning_state_path, + save_planning_state, +) +from continuous_refactoring.planning_publish import snapshot_tree_digest _TASTE = "- Prefer deletion over wrapping.\n- Fail fast at boundaries." @@ -37,7 +51,10 @@ def _planning_context( monkeypatch: pytest.MonkeyPatch, ) -> tuple[Path, Path]: monkeypatch.setenv("TMPDIR", str(tmp_path / "tmpdir")) + monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) (tmp_path / "tmpdir").mkdir() + (tmp_path / "xdg").mkdir() + init_repo(tmp_path) live_dir = tmp_path / "live" live_dir.mkdir() @@ -45,6 +62,26 @@ def _planning_context( return live_dir, mig_root +def _planning_repo_context( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> tuple[Path, Path, Path]: + monkeypatch.setenv("TMPDIR", str(tmp_path / "tmpdir")) + monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) + (tmp_path / "tmpdir").mkdir() + (tmp_path / "xdg").mkdir() + repo_root = tmp_path / "repo" + init_repo(repo_root) + live_dir = repo_root / "live" + live_dir.mkdir() + return repo_root, live_dir, migration_root(live_dir, _MIGRATION) + + +def _commit_all(repo_root: Path, message: str) -> None: + run_command(["git", "add", "-A"], cwd=repo_root) + run_command(["git", "commit", "-m", message], cwd=repo_root) + + def _planning_decision_response(decision: str, reason: str) -> tuple[str, dict[str, str]]: return f"final-decision: {decision} — {reason}\n", {} @@ -59,11 +96,16 @@ def _run_planning( mock = _MockAgent(mig_root, responses) monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", mock) - outcome = run_planning( - _MIGRATION, _TARGET, _TASTE, tmp_path, live_dir, - _make_artifacts(tmp_path), - agent="codex", model="fake", effort="low", timeout=None, - ) + outcome: PlanningOutcome | None = None + while outcome is None: + result = run_next_planning_step( + _MIGRATION, _TARGET, _TASTE, tmp_path, live_dir, + _make_artifacts(tmp_path), + agent="codex", model="fake", effort="low", timeout=None, + ) + assert result.status == "published", result.reason + _commit_all(tmp_path, f"planning {result.step}") + outcome = result.terminal_outcome return outcome, mock, mig_root @@ -103,8 +145,12 @@ def __call__(self, **kwargs: object) -> CommandCapture: stdout_path = Path(str(kwargs["stdout_path"])) self.stage_labels.append(stdout_path.parent.name) + migration_dir = _prompt_migration_dir( + self.prompts[-1], + Path(str(kwargs["repo_root"])), + ) for rel_path, content in writes.items(): - full = self._mig_root / rel_path + full = migration_dir / rel_path full.parent.mkdir(parents=True, exist_ok=True) full.write_text(content, encoding="utf-8") @@ -124,6 +170,198 @@ def __call__(self, **kwargs: object) -> CommandCapture: ) +class _WorkspaceAgent: + def __init__( + self, + responses: list[tuple[str, dict[str, str], int]], + ) -> None: + self._responses = responses + self._index = 0 + self.stage_labels: list[str] = [] + self.prompts: list[str] = [] + self.migration_dirs: list[Path] = [] + + def __call__(self, **kwargs: object) -> CommandCapture: + assert self._index < len(self._responses), ( + f"Unexpected agent call #{self._index + 1}" + ) + stdout, writes, returncode = self._responses[self._index] + self._index += 1 + prompt = str(kwargs["prompt"]) + stdout_path = Path(str(kwargs["stdout_path"])) + stderr_path = Path(str(kwargs["stderr_path"])) + migration_dir = _prompt_migration_dir(prompt, Path(str(kwargs["repo_root"]))) + + self.prompts.append(prompt) + self.stage_labels.append(stdout_path.parent.name) + self.migration_dirs.append(migration_dir) + + for rel_path, content in writes.items(): + full = migration_dir / rel_path + full.parent.mkdir(parents=True, exist_ok=True) + full.write_text(content, encoding="utf-8") + + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + stderr_path.parent.mkdir(parents=True, exist_ok=True) + stderr_path.write_text("", encoding="utf-8") + return CommandCapture( + command=("fake",), + returncode=returncode, + stdout=stdout, + stderr="", + stdout_path=stdout_path, + stderr_path=stderr_path, + ) + + +def _prompt_migration_dir(prompt: str, repo_root: Path) -> Path: + for line in prompt.splitlines(): + if line.startswith("Migration directory:"): + path = Path(line.split(":", 1)[1].strip()) + return path if path.is_absolute() else repo_root / path + raise AssertionError("Migration directory missing from prompt") + + +def _workspace_response( + stdout: str, + writes: dict[str, str] | None = None, + *, + returncode: int = 0, +) -> tuple[str, dict[str, str], int]: + return stdout, writes or {}, returncode + + +def _run_next_step( + repo_root: Path, + live_dir: Path, + responses: list[tuple[str, dict[str, str], int]], + monkeypatch: pytest.MonkeyPatch, +): + mock = _WorkspaceAgent(responses) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", mock) + result = run_next_planning_step( + _MIGRATION, + _TARGET, + _TASTE, + repo_root, + live_dir, + _make_artifacts(repo_root), + agent="codex", + model="fake", + effort="low", + timeout=None, + ) + return result, mock + + +def _run_refine_step( + repo_root: Path, + live_dir: Path, + responses: list[tuple[str, dict[str, str], int]], + monkeypatch: pytest.MonkeyPatch, + *, + feedback: str = "Refine this plan.", +): + mock = _WorkspaceAgent(responses) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", mock) + result = run_refine_planning_step( + PlanningRefineRequest( + migration_name=_MIGRATION, + feedback_text=feedback, + feedback_source="message", + taste=_TASTE, + repo_root=repo_root, + live_dir=live_dir, + artifacts=_make_artifacts(repo_root), + agent="codex", + model="fake", + effort="low", + ) + ) + return result, mock + + +def _seed_planning_snapshot( + repo_root: Path, + live_dir: Path, + completed: list[tuple[str, str, str]], + *, + plan_text: str | None = None, + phase_text: str | None = None, +) -> None: + mig_root = migration_root(live_dir, _MIGRATION) + mig_root.mkdir(parents=True, exist_ok=True) + manifest_path = mig_root / "manifest.json" + now = "2026-04-29T12:00:00.000+00:00" + manifest = MigrationManifest( + name=_MIGRATION, + created_at=now, + last_touch=now, + wake_up_on=None, + awaiting_human_review=False, + status="planning", + current_phase="", + phases=(), + ) + save_manifest(manifest, manifest_path) + if plan_text is not None: + (mig_root / "plan.md").write_text(plan_text, encoding="utf-8") + if phase_text is not None: + (mig_root / "phase-0-setup.md").write_text(phase_text, encoding="utf-8") + _refresh_manifest(manifest, manifest_path, mig_root=mig_root) + + state = new_planning_state(_TARGET, now=now) + for step, outcome, stdout in completed: + stdout_path = planning_stage_stdout_path(mig_root, step) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + state = complete_planning_step( + state, + step, + outcome, + {"stdout": stdout_path.relative_to(repo_root).as_posix()}, + completed_at=now, + ) + save_planning_state(state, planning_state_path(mig_root), repo_root=repo_root) + _commit_all(repo_root, "seed planning snapshot") + + +def _seed_ready_snapshot(repo_root: Path, live_dir: Path) -> None: + mig_root = migration_root(live_dir, _MIGRATION) + now = "2026-04-29T12:00:00.000+00:00" + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "clear", "No findings.\n"), + ], + plan_text="# Plan\n", + phase_text=_phase_doc("always", "Setup is complete."), + ) + state = load_planning_state(repo_root, planning_state_path(mig_root)) + stdout_path = planning_stage_stdout_path(mig_root, "final-review") + stdout_path.write_text( + "final-decision: approve-auto - ready\n", + encoding="utf-8", + ) + state = complete_planning_step( + state, + "final-review", + "approve-auto", + {"stdout": stdout_path.relative_to(repo_root).as_posix()}, + completed_at=now, + final_reason="ready", + ) + save_planning_state(state, planning_state_path(mig_root), repo_root=repo_root) + manifest = load_manifest(mig_root / "manifest.json") + _refresh_manifest(manifest, mig_root / "manifest.json", status="ready") + _commit_all(repo_root, "seed ready snapshot") + + def _phase_doc(precondition: str, definition_of_done: str) -> str: return ( f"## Precondition\n\n{precondition}\n\n" @@ -155,6 +393,348 @@ def _base_responses() -> list[tuple[str, dict[str, str]]]: ] +# --------------------------------------------------------------------------- +# one-step planning +# --------------------------------------------------------------------------- + + +def test_successful_step_publishes_docs_and_state_together( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + + result, mock = _run_next_step( + repo_root, + live_dir, + [ + _workspace_response( + "Generated 2 approaches\n", + {"approaches/incremental.md": "# Incremental\n"}, + ) + ], + monkeypatch, + ) + + assert result.status == "published" + assert result.step == "approaches" + assert result.next_step == "pick-best" + assert result.terminal_outcome is None + assert mock.stage_labels == ["approaches"] + assert mock.migration_dirs[0] != mig_root + + manifest = load_manifest(mig_root / "manifest.json") + state = load_planning_state(repo_root, planning_state_path(mig_root)) + assert manifest.status == "planning" + assert state.next_step == "pick-best" + assert [step.name for step in state.completed_steps] == ["approaches"] + assert (mig_root / "approaches" / "incremental.md").is_file() + assert planning_stage_stdout_path(mig_root, "approaches").read_text( + encoding="utf-8" + ) == "Generated 2 approaches\n" + + +def test_failed_step_does_not_publish_partial_docs_or_state( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ], + ) + before = snapshot_tree_digest(mig_root) + + with pytest.raises(ContinuousRefactorError, match="planning.expand failed"): + _run_next_step( + repo_root, + live_dir, + [ + _workspace_response( + "bad expansion\n", + {"plan.md": "# Partial bad plan\n"}, + returncode=1, + ) + ], + monkeypatch, + ) + + assert snapshot_tree_digest(mig_root) == before + assert not (mig_root / "plan.md").exists() + assert not planning_stage_stdout_path(mig_root, "expand").exists() + assert load_planning_state(repo_root, planning_state_path(mig_root)).next_step == "expand" + + +def test_resume_skips_completed_steps( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ], + ) + + result, mock = _run_next_step( + repo_root, + live_dir, + [ + _workspace_response( + "Expanded.\n", + { + "plan.md": "# Plan\n", + "phase-0-setup.md": _phase_doc("always", "Setup is complete."), + }, + ) + ], + monkeypatch, + ) + + assert result.status == "published" + assert mock.stage_labels == ["expand"] + state = load_planning_state(repo_root, planning_state_path(mig_root)) + assert [step.name for step in state.completed_steps] == [ + "approaches", + "pick-best", + "expand", + ] + assert state.next_step == "review" + + +def test_revise_path_records_review_findings_as_planning_state( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ], + plan_text="# Plan v1\n", + phase_text=_phase_doc("always", "Setup is complete."), + ) + + result, mock = _run_next_step( + repo_root, + live_dir, + [_workspace_response("1. Missing rollback step.\n", {})], + monkeypatch, + ) + + state = load_planning_state(repo_root, planning_state_path(mig_root)) + assert result.status == "published" + assert mock.stage_labels == ["review"] + assert state.next_step == "revise" + assert state.review_findings == "live/rework-auth/.planning/stages/review.stdout.md" + assert planning_stage_stdout_path(mig_root, "review").read_text( + encoding="utf-8" + ) == "1. Missing rollback step.\n" + + +def test_review_two_findings_fail_without_publish( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "findings", "1. Missing rollback step.\n"), + ("revise", "completed", "Revised.\n"), + ], + plan_text="# Plan v2\n", + phase_text=_phase_doc("always", "Setup is complete."), + ) + before = snapshot_tree_digest(mig_root) + + with pytest.raises( + ContinuousRefactorError, + match="planning.review-2 failed: revised plan still has findings", + ): + _run_next_step( + repo_root, + live_dir, + [_workspace_response("1. Still broken.\n", {})], + monkeypatch, + ) + + assert snapshot_tree_digest(mig_root) == before + assert not planning_stage_stdout_path(mig_root, "review-2").exists() + assert load_planning_state(repo_root, planning_state_path(mig_root)).next_step == "review-2" + + +def test_final_ready_rejects_inconsistent_manifest_docs_before_publish( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "clear", "no findings\n"), + ], + plan_text="# Plan\n", + phase_text="## Precondition\n\nalways\n", + ) + before = snapshot_tree_digest(mig_root) + + result, mock = _run_next_step( + repo_root, + live_dir, + [_workspace_response("final-decision: approve-auto - solid\n", {})], + monkeypatch, + ) + + assert result.status == "blocked" + assert "workspace validation failed" in result.reason + assert mock.stage_labels == ["final-review"] + assert snapshot_tree_digest(mig_root) == before + assert not planning_stage_stdout_path(mig_root, "final-review").exists() + assert load_manifest(mig_root / "manifest.json").status == "planning" + assert load_planning_state(repo_root, planning_state_path(mig_root)).next_step == "final-review" + + +def test_refine_planning_keeps_current_cursor( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ], + ) + + result, mock = _run_refine_step( + repo_root, + live_dir, + [ + _workspace_response( + "Expanded with feedback.\n", + { + "plan.md": "# Plan\n", + "phase-0-setup.md": _phase_doc("always", "Setup is complete."), + }, + ) + ], + monkeypatch, + feedback="Add a smaller first phase.", + ) + + state = load_planning_state(repo_root, planning_state_path(mig_root)) + assert result.status == "published" + assert result.step == "expand" + assert mock.stage_labels == ["expand"] + assert state.next_step == "review" + assert state.feedback[-1].text == "Add a smaller first phase." + + +def test_refine_ready_reopen_runs_one_revise_step( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_ready_snapshot(repo_root, live_dir) + + result, mock = _run_refine_step( + repo_root, + live_dir, + [ + _workspace_response( + "Revised with feedback.\n", + { + "plan.md": "# Plan v2\n", + "phase-0-setup.md": _phase_doc("always", "Setup is complete."), + }, + ) + ], + monkeypatch, + feedback="Narrow the rollout.", + ) + + manifest = load_manifest(mig_root / "manifest.json") + state = load_planning_state(repo_root, planning_state_path(mig_root)) + assert result.status == "published" + assert result.step == "revise" + assert mock.stage_labels == ["revise"] + assert manifest.status == "planning" + assert state.next_step == "review-2" + assert state.revision_base_step_count == 5 + + +def test_refine_repeated_steps_keep_original_stdout_history( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_ready_snapshot(repo_root, live_dir) + original_final_review = planning_stage_stdout_path(mig_root, "final-review") + original_text = original_final_review.read_text(encoding="utf-8") + + result, _mock = _run_refine_step( + repo_root, + live_dir, + [ + _workspace_response( + "Revised with feedback.\n", + { + "plan.md": "# Plan v2\n", + "phase-0-setup.md": _phase_doc("always", "Setup is complete."), + }, + ) + ], + monkeypatch, + ) + assert result.status == "published" + _commit_all(repo_root, "planning refine") + + for responses in ( + [_workspace_response("Reviewed revised plan. no findings.\n")], + [_workspace_response("final-decision: approve-auto - refined ready\n")], + ): + result, _mock = _run_next_step(repo_root, live_dir, responses, monkeypatch) + assert result.status == "published" + _commit_all(repo_root, f"planning {result.step}") + + state = load_planning_state(repo_root, planning_state_path(mig_root)) + final_review_refs = [ + step.outputs["stdout"] + for step in state.completed_steps + if step.name == "final-review" + ] + assert final_review_refs == [ + "live/rework-auth/.planning/stages/final-review.stdout.md", + "live/rework-auth/.planning/stages/final-review-2.stdout.md", + ] + assert original_final_review.read_text(encoding="utf-8") == original_text + assert ( + mig_root / ".planning" / "stages" / "final-review-2.stdout.md" + ).read_text(encoding="utf-8") == "final-decision: approve-auto - refined ready\n" + + # --------------------------------------------------------------------------- # initial decisions # --------------------------------------------------------------------------- @@ -214,7 +794,7 @@ def test_initial_decisions( assert manifest.human_review_reason is None if should_skip: - skip_file = intentional_skips_dir(live_dir) / f"{_MIGRATION}.md" + skip_file = mig_root / "intentional-skip.md" assert skip_file.exists() skip_content = skip_file.read_text(encoding="utf-8") assert _TARGET in skip_content @@ -252,11 +832,84 @@ def test_no_findings_path_keeps_stage_order_and_context_sources( "final-review", ] assert "Approaches:\n### incremental\n# Incremental\nStep by step approach." in mock.prompts[1] - assert "Chosen approach:\nChose incremental approach.\n" in mock.prompts[2] + assert "Chosen approach (from live/rework-auth/.planning/stages/pick-best.stdout.md):" in mock.prompts[2] + assert "Chose incremental approach.\n" in mock.prompts[2] assert "Plan:\n# Migration Plan\nPhased approach." in mock.prompts[3] assert "Plan:\n# Migration Plan\nPhased approach." in mock.prompts[4] +def test_run_planning_persists_durable_stage_outputs( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir, mig_root = _planning_context(tmp_path, monkeypatch) + _run_planning( + tmp_path, + live_dir, + _base_responses() + [_planning_decision_response("approve-auto", "plan is solid")], + monkeypatch, + ) + + state = load_planning_state(tmp_path, planning_state_path(mig_root)) + + assert state.next_step == "terminal-ready" + assert state.final_decision == "approve-auto" + assert state.final_reason == "plan is solid" + assert [step.name for step in state.completed_steps] == [ + "approaches", + "pick-best", + "expand", + "review", + "final-review", + ] + for step in state.completed_steps: + stdout_ref = step.outputs["stdout"] + assert stdout_ref.startswith("live/rework-auth/.planning/stages/") + assert (tmp_path / stdout_ref).is_file() + + +def test_planning_context_reconstructs_from_durable_stage_outputs( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir, mig_root = _planning_context(tmp_path, monkeypatch) + artifacts = _make_artifacts(tmp_path) + transient_stdout = artifacts.root / "planning" / "pick-best" / "agent.stdout.log" + transient_stdout.parent.mkdir(parents=True) + transient_stdout.write_text("wrong transient output\n", encoding="utf-8") + + state = new_planning_state(_TARGET, now="2026-04-29T12:00:00.000+00:00") + for name, text in ( + ("approaches", "Generated approaches.\n"), + ("pick-best", "Chose incremental approach.\n"), + ): + stdout_path = planning_stage_stdout_path(mig_root, name) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(text, encoding="utf-8") + state = complete_planning_step( + state, + name, + "completed", + {"stdout": stdout_path.relative_to(tmp_path).as_posix()}, + completed_at="2026-04-29T12:01:00.000+00:00", + ) + save_planning_state(state, planning_state_path(mig_root), repo_root=tmp_path) + shutil.rmtree(artifacts.root) + + context = _build_durable_planning_context( + repo_root=tmp_path, + live_dir=live_dir, + migration_name=_MIGRATION, + state=state, + ) + + assert "Chosen approach" in context + assert "Chose incremental approach." in context + assert ".planning/stages/pick-best.stdout.md" in context + assert "wrong transient output" not in context + assert "agent.stdout.log" not in context + + # --------------------------------------------------------------------------- # review findings trigger revise + review-2 # --------------------------------------------------------------------------- @@ -366,7 +1019,11 @@ def test_revise_path_keeps_existing_prompt_stages_with_distinct_stage_labels( "You are a planning agent expanding the chosen approach into a detailed migration plan." in mock.prompts[4] ) - assert "Review findings to address:\n1. Missing rollback step.\n2. Phase order unclear.\n" in mock.prompts[4] + assert ( + "Review findings to address (from live/rework-auth/.planning/stages/review.stdout.md):" + in mock.prompts[4] + ) + assert "1. Missing rollback step.\n2. Phase order unclear.\n" in mock.prompts[4] assert "You are a planning reviewer examining a refactoring migration plan." in mock.prompts[5] assert "Plan (revised):\n# Plan v2 (revised)" in mock.prompts[5] @@ -374,7 +1031,7 @@ def test_revise_path_keeps_existing_prompt_stages_with_distinct_stage_labels( def test_review_two_findings_fail_before_final_review( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - live_dir, _ = _planning_context(tmp_path, monkeypatch) + live_dir, mig_root = _planning_context(tmp_path, monkeypatch) responses = _revise_responses() responses[5] = ("1. Still missing rollback validation.\n", {}) @@ -384,6 +1041,32 @@ def test_review_two_findings_fail_before_final_review( ): _run_planning(tmp_path, live_dir, responses, monkeypatch) + assert not planning_stage_stdout_path(mig_root, "review-2").exists() + state = load_planning_state(tmp_path, planning_state_path(mig_root)) + assert state.next_step == "review-2" + + +def test_failed_final_review_output_is_not_durable( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir, mig_root = _planning_context(tmp_path, monkeypatch) + + with pytest.raises( + ContinuousRefactorError, + match="planning.final-review failed: Final review produced no output", + ): + _run_planning( + tmp_path, + live_dir, + _base_responses() + [("debug line without decision\n", {})], + monkeypatch, + ) + + assert not planning_stage_stdout_path(mig_root, "final-review").exists() + state = load_planning_state(tmp_path, planning_state_path(mig_root)) + assert state.next_step == "final-review" + def test_manifest_phase_discovery_refreshes_only_after_file_writing_stages( tmp_path: Path, diff --git a/tests/test_planning_publish.py b/tests/test_planning_publish.py new file mode 100644 index 0000000..f1a2e55 --- /dev/null +++ b/tests/test_planning_publish.py @@ -0,0 +1,594 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +import continuous_refactoring.planning_publish as planning_publish +from conftest import init_repo +from continuous_refactoring.artifacts import ContinuousRefactorError +from continuous_refactoring.git import run_command +from continuous_refactoring.migration_consistency import ( + check_migration_consistency, + has_blocking_consistency_findings, +) +from continuous_refactoring.migrations import ( + MigrationManifest, + PhaseSpec, + save_manifest, +) + + +_NOW = "2026-04-29T12:00:00.000+00:00" +_PHASE = PhaseSpec( + name="setup", + file="phase-0-setup.md", + done=False, + precondition="always", +) + + +def _manifest(slug: str) -> MigrationManifest: + return MigrationManifest( + name=slug, + created_at=_NOW, + last_touch=_NOW, + wake_up_on=None, + awaiting_human_review=False, + status="ready", + current_phase=_PHASE.name, + phases=(_PHASE,), + ) + + +def _write_snapshot(root: Path, slug: str, version: str, *, extra: bool = False) -> Path: + migration_dir = root / slug + migration_dir.mkdir(parents=True) + (migration_dir / "plan.md").write_text(f"# Plan {version}\n", encoding="utf-8") + (migration_dir / _PHASE.file).write_text( + f"## Precondition\n\nalways\n\n## Definition of Done\n\n{version}\n", + encoding="utf-8", + ) + if extra: + (migration_dir / "notes.md").write_text(f"{version}\n", encoding="utf-8") + save_manifest(_manifest(slug), migration_dir / "manifest.json") + return migration_dir + + +def _request( + repo_root: Path, + live_migrations_dir: Path, + slug: str, + workspace_dir: Path, + *, + base_snapshot_id: str | None = None, +) -> planning_publish.PlanningPublishRequest: + return planning_publish.PlanningPublishRequest( + repo_root=repo_root, + live_migrations_dir=live_migrations_dir, + slug=slug, + workspace_dir=workspace_dir, + base_snapshot_id=( + base_snapshot_id + if base_snapshot_id is not None + else planning_publish.snapshot_tree_digest(live_migrations_dir / slug) + ), + ) + + +def _tree(path: Path) -> dict[str, str]: + return { + child.relative_to(path).as_posix(): child.read_text(encoding="utf-8") + for child in sorted(path.rglob("*")) + if child.is_file() + } + + +def _commit_all(repo_root: Path, message: str = "commit") -> None: + run_command(["git", "add", "-A"], cwd=repo_root) + run_command(["git", "commit", "-m", message], cwd=repo_root) + + +def _tx(live_migrations_dir: Path, token: str) -> Path: + return live_migrations_dir / "__transactions__" / token + + +def _stable_token(monkeypatch: pytest.MonkeyPatch, token: str) -> None: + monkeypatch.setattr(planning_publish, "_new_transaction_token", lambda: token) + + +def test_publish_creates_new_live_migration_from_staged_snapshot( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-create") + + result = planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert result.status == "published" + assert result.live_dir == live_dir / "auth-cleanup" + assert _tree(live_dir / "auth-cleanup") == _tree(workspace) + findings = check_migration_consistency( + live_dir / "auth-cleanup", mode="execution-gate" + ) + assert not has_blocking_consistency_findings(findings) + assert result.cleanup_error is None + assert not _tx(live_dir, "tx-create").exists() + + +def test_publish_replaces_existing_non_empty_live_dir_with_backup_transaction( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old", extra=True) + _commit_all(repo, "old migration") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-replace") + + result = planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert result.status == "published" + assert _tree(old_live) == _tree(workspace) + assert not (old_live / "notes.md").exists() + assert not (_tx(live_dir, "tx-replace") / "rollback").exists() + assert not (_tx(live_dir, "tx-replace") / "failed").exists() + + +def test_publish_requires_same_device_final_staging( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + old_tree = _tree(old_live) + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + seen: list[tuple[Path, Path]] = [] + _stable_token(monkeypatch, "tx-device") + + def different_device(source: Path, target_root: Path) -> bool: + seen.append((source, target_root)) + return False + + def fail_move(_source: Path, _destination: Path) -> None: + raise AssertionError("publish must not move live state across devices") + + monkeypatch.setattr(planning_publish, "_same_device", different_device) + monkeypatch.setattr(planning_publish, "_move_path", fail_move) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert exc.value.result.status == "blocked" + assert "same filesystem" in str(exc.value) + assert seen == [(_tx(live_dir, "tx-device") / "staged", live_dir)] + assert _tree(old_live) == old_tree + assert (_tx(live_dir, "tx-device") / "staged").is_dir() + assert not (_tx(live_dir, "tx-device") / "rollback").exists() + + +def test_staged_validation_failure_leaves_live_snapshot_unchanged( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + old_tree = _tree(old_live) + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + validated: list[Path] = [] + _stable_token(monkeypatch, "tx-stage-invalid") + + def validate(path: Path, mode: str = "ready-publish") -> None: + validated.append(path) + if path.name == "staged": + raise ContinuousRefactorError("staged invalid") + + def fail_live_move(source: Path, _destination: Path) -> None: + if source == old_live: + raise AssertionError("live dir must not move after staged validation fails") + + monkeypatch.setattr(planning_publish, "_validate_snapshot", validate) + monkeypatch.setattr(planning_publish, "_move_path", fail_live_move) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert exc.value.result.status == "blocked" + assert "staged invalid" in str(exc.value) + assert validated == [workspace, _tx(live_dir, "tx-stage-invalid") / "staged"] + assert _tree(old_live) == old_tree + assert (_tx(live_dir, "tx-stage-invalid") / "staged").is_dir() + assert not (_tx(live_dir, "tx-stage-invalid") / "rollback").exists() + + +def test_publish_rejects_stale_base_snapshot( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + stale_base = planning_publish.snapshot_tree_digest(old_live) + (old_live / "plan.md").write_text("# human edit\n", encoding="utf-8") + _commit_all(repo, "human migration edit") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-stale") + + def fail_live_move(source: Path, _destination: Path) -> None: + if source == old_live: + raise AssertionError("stale publish must not move live state") + + monkeypatch.setattr(planning_publish, "_move_path", fail_live_move) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request( + repo, + live_dir, + "auth-cleanup", + workspace, + base_snapshot_id=stale_base, + ) + ) + + assert exc.value.result.status == "blocked" + assert "stale base snapshot" in str(exc.value) + assert "base_snapshot_id" in str(exc.value) + assert (old_live / "plan.md").read_text(encoding="utf-8") == "# human edit\n" + assert (_tx(live_dir, "tx-stale") / "staged").is_dir() + assert not (_tx(live_dir, "tx-stale") / "rollback").exists() + + +def test_nested_transaction_named_dir_changes_snapshot_digest_and_blocks_stale_publish( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + stale_base = planning_publish.snapshot_tree_digest(old_live) + nested = old_live / "__transactions__" + nested.mkdir() + (nested / "user-note.md").write_text("do not drop\n", encoding="utf-8") + _commit_all(repo, "nested transaction-named user dir") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-nested-stale") + + def fail_live_move(source: Path, _destination: Path) -> None: + if source == old_live: + raise AssertionError("stale publish must not move live state") + + monkeypatch.setattr(planning_publish, "_move_path", fail_live_move) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request( + repo, + live_dir, + "auth-cleanup", + workspace, + base_snapshot_id=stale_base, + ) + ) + + assert exc.value.result.status == "blocked" + assert "stale base snapshot" in str(exc.value) + assert (nested / "user-note.md").read_text(encoding="utf-8") == "do not drop\n" + assert not (_tx(live_dir, "tx-nested-stale") / "rollback").exists() + + +def test_publish_cleans_backup_after_success( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-clean") + + result = planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert result.status == "published" + assert result.cleanup_error is None + assert _tree(old_live) == _tree(workspace) + assert not (_tx(live_dir, "tx-clean") / "rollback").exists() + assert not _tx(live_dir, "tx-clean").exists() + + +def test_publish_restores_rollback_when_live_replace_fails( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + old_tree = _tree(old_live) + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + original_move = planning_publish._move_path + _stable_token(monkeypatch, "tx-restore") + + def fail_install(source: Path, destination: Path) -> None: + if source == _tx(live_dir, "tx-restore") / "staged" and destination == old_live: + raise OSError("cannot install staged") + original_move(source, destination) + + monkeypatch.setattr(planning_publish, "_move_path", fail_install) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert exc.value.result.status == "failed" + assert "cannot install staged" in str(exc.value) + assert _tree(old_live) == old_tree + assert (_tx(live_dir, "tx-restore") / "staged").is_dir() + assert not (_tx(live_dir, "tx-restore") / "rollback").exists() + + +def test_publish_reports_live_rollback_staged_and_failed_paths_when_rollback_fails( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + original_move = planning_publish._move_path + _stable_token(monkeypatch, "tx-rollback-fails") + + def validate(path: Path, mode: str = "ready-publish") -> None: + if path == old_live: + raise ContinuousRefactorError("live validation failed") + + def fail_restore(source: Path, destination: Path) -> None: + if source == _tx(live_dir, "tx-rollback-fails") / "rollback" and destination == old_live: + raise OSError("rollback restore failed") + original_move(source, destination) + + monkeypatch.setattr(planning_publish, "_validate_snapshot", validate) + monkeypatch.setattr(planning_publish, "_move_path", fail_restore) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + message = str(exc.value) + assert exc.value.result.status == "failed" + assert "rollback restore failed" in message + assert "live=" in message + assert "rollback=" in message + assert "staged=" in message + assert "failed=" in message + assert (_tx(live_dir, "tx-rollback-fails") / "rollback").is_dir() + assert (_tx(live_dir, "tx-rollback-fails") / "failed").is_dir() + + +def test_publish_refuses_dirty_live_migration_dir( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + (old_live / "plan.md").write_text("# dirty tracked\n", encoding="utf-8") + (old_live / "local.md").write_text("local\n", encoding="utf-8") + tx_noise = live_dir / "__transactions__" / "old" / "staged" + tx_noise.mkdir(parents=True) + (tx_noise / "ignored.md").write_text("ignored\n", encoding="utf-8") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-dirty") + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + message = str(exc.value) + assert exc.value.result.status == "blocked" + assert "dirty live migration" in message + assert "migrations/auth-cleanup/plan.md" in message + assert "migrations/auth-cleanup/local.md" in message + assert "__transactions__" not in message + assert not _tx(live_dir, "tx-dirty").exists() + + +def test_publish_refuses_ignored_live_migration_files( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + _write_snapshot(live_dir, "auth-cleanup", "old") + (repo / ".gitignore").write_text( + "migrations/auth-cleanup/*.cache\n", + encoding="utf-8", + ) + _commit_all(repo, "old migration with ignore rule") + ignored = live_dir / "auth-cleanup" / "local.cache" + ignored.write_text("operator scratch\n", encoding="utf-8") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-ignored-dirty") + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + message = str(exc.value) + assert exc.value.result.status == "blocked" + assert "dirty live migration" in message + assert "migrations/auth-cleanup/local.cache" in message + assert ignored.read_text(encoding="utf-8") == "operator scratch\n" + assert not _tx(live_dir, "tx-ignored-dirty").exists() + + +def test_lock_rejects_concurrent_mutation_and_reports_lock_path( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + lock_path = planning_publish.publish_lock_path(live_dir) + lock_path.mkdir(parents=True) + (lock_path / "owner.json").write_text( + '{"pid": 123, "operation": "review", ' + '"created_at": "2026-04-29T12:00:00.000+00:00"}\n', + encoding="utf-8", + ) + _stable_token(monkeypatch, "tx-locked") + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + message = str(exc.value) + assert exc.value.result.status == "blocked" + assert "concurrent mutation" in message + assert str(lock_path) in message + assert "123" in message + assert "review" in message + assert "2026-04-29T12:00:00.000+00:00" in message + assert _tree(old_live) != _tree(workspace) + assert not _tx(live_dir, "tx-locked").exists() + + +def test_publish_reports_lock_cleanup_failure_on_success( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + original_remove = planning_publish._remove_tree + _stable_token(monkeypatch, "tx-lock-cleanup-fails") + + def fail_lock_cleanup(path: Path) -> None: + if path == planning_publish.publish_lock_path(live_dir): + raise OSError("lock cleanup denied") + original_remove(path) + + monkeypatch.setattr(planning_publish, "_remove_tree", fail_lock_cleanup) + + result = planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert result.status == "published" + assert result.cleanup_error is not None + assert "lock cleanup denied" in result.cleanup_error + assert _tree(old_live) == _tree(workspace) + assert planning_publish.publish_lock_path(live_dir).is_dir() + + +def test_publish_moves_partial_live_to_failed_before_restoring_rollback( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + old_tree = _tree(old_live) + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + original_move = planning_publish._move_path + _stable_token(monkeypatch, "tx-partial-live") + + def fail_install_with_partial_live(source: Path, destination: Path) -> None: + if source == _tx(live_dir, "tx-partial-live") / "staged" and destination == old_live: + destination.mkdir() + (destination / "partial.md").write_text("bad partial\n", encoding="utf-8") + raise OSError("cannot install staged") + original_move(source, destination) + + monkeypatch.setattr(planning_publish, "_move_path", fail_install_with_partial_live) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert exc.value.result.status == "failed" + assert _tree(old_live) == old_tree + assert ( + _tx(live_dir, "tx-partial-live") / "failed" / "partial.md" + ).read_text(encoding="utf-8") == "bad partial\n" + + +def test_transaction_dirs_are_left_for_doctor_when_cleanup_fails( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + old_tree = _tree(old_live) + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + original_remove = planning_publish._remove_tree + _stable_token(monkeypatch, "tx-cleanup-fails") + + def fail_cleanup(path: Path) -> None: + if path == _tx(live_dir, "tx-cleanup-fails") / "rollback": + raise OSError("cleanup denied") + original_remove(path) + + monkeypatch.setattr(planning_publish, "_remove_tree", fail_cleanup) + + result = planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert result.status == "published" + assert result.cleanup_error is not None + assert "cleanup denied" in result.cleanup_error + assert _tree(old_live) == _tree(workspace) + assert _tree(_tx(live_dir, "tx-cleanup-fails") / "rollback") == old_tree + assert not (_tx(live_dir, "tx-cleanup-fails") / "failed").exists() diff --git a/tests/test_planning_state.py b/tests/test_planning_state.py new file mode 100644 index 0000000..727f9e3 --- /dev/null +++ b/tests/test_planning_state.py @@ -0,0 +1,590 @@ +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from continuous_refactoring.artifacts import ContinuousRefactorError +from continuous_refactoring.planning_state import ( + CompletedPlanningStep, + PlanningState, + append_planning_feedback, + complete_planning_step, + load_planning_state, + new_planning_state, + planning_stage_stdout_path, + planning_state_path, + planning_step_stdout, + save_planning_state, + write_planning_stage_stdout, +) + + +_NOW = "2026-04-29T12:00:00.000+00:00" +_LATER = "2026-04-29T12:01:00.000+00:00" + + +def _migration_root(tmp_path: Path) -> tuple[Path, Path]: + repo_root = tmp_path + mig_root = repo_root / "migrations" / "auth-cleanup" + mig_root.mkdir(parents=True) + return repo_root, mig_root + + +def _write_stdout(repo_root: Path, mig_root: Path, step: str, text: str = "ok\n") -> str: + path = planning_stage_stdout_path(mig_root, step) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + return path.relative_to(repo_root).as_posix() + + +def _completed( + repo_root: Path, + mig_root: Path, + name: str, + outcome: str = "completed", +) -> CompletedPlanningStep: + return CompletedPlanningStep( + name=name, + completed_at=_LATER, + outcome=outcome, + outputs={"stdout": _write_stdout(repo_root, mig_root, name)}, + ) + + +def _write_state_payload(path: Path, payload: dict[str, object]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload), encoding="utf-8") + + +def _payload( + *, + repo_root: Path, + mig_root: Path, + next_step: str, + completed_steps: list[dict[str, object]] | None = None, + review_findings: str | None = None, + final_decision: str | None = None, + final_reason: str | None = None, +) -> dict[str, object]: + return { + "schema_version": 1, + "target": "Clean up auth", + "next_step": next_step, + "completed_steps": completed_steps or [], + "started_at": _NOW, + "updated_at": _LATER, + "feedback": [], + "review_findings": review_findings, + "final_decision": final_decision, + "final_reason": final_reason, + } + + +def test_planning_state_roundtrip_preserves_completed_steps_and_current_step( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = new_planning_state("Clean up auth", now=_NOW) + outputs = {"stdout": _write_stdout(repo_root, mig_root, "approaches")} + updated = complete_planning_step( + state, + "approaches", + "completed", + outputs, + completed_at=_LATER, + agent="codex", + model="gpt-5.5", + effort="low", + ) + + save_planning_state(updated, planning_state_path(mig_root), repo_root=repo_root) + loaded = load_planning_state(repo_root, planning_state_path(mig_root)) + + assert loaded.next_step == "pick-best" + assert [step.name for step in loaded.completed_steps] == ["approaches"] + assert loaded.completed_steps[0].outputs == outputs + assert loaded.completed_steps[0].agent == "codex" + assert loaded.completed_steps[0].model == "gpt-5.5" + assert loaded.completed_steps[0].effort == "low" + + +def test_planning_state_defaults_new_plan_to_first_step() -> None: + state = new_planning_state("Clean up auth", now=_NOW) + + assert state.schema_version == 1 + assert state.target == "Clean up auth" + assert state.next_step == "approaches" + assert state.completed_steps == () + assert state.review_findings is None + assert state.final_decision is None + assert state.final_reason is None + + +def test_planning_state_records_user_refinement_feedback(tmp_path: Path) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = new_planning_state("Clean up auth", now=_NOW) + + state = append_planning_feedback( + state, + "Keep rollout separate.", + "message", + now=_NOW, + ) + state = append_planning_feedback( + state, + "Use the staged publisher.", + "file", + now=_LATER, + ) + save_planning_state(state, planning_state_path(mig_root), repo_root=repo_root) + + payload = json.loads(planning_state_path(mig_root).read_text(encoding="utf-8")) + assert payload["feedback"] == [ + { + "received_at": _NOW, + "source": "message", + "text": "Keep rollout separate.", + }, + { + "received_at": _LATER, + "source": "file", + "text": "Use the staged publisher.", + }, + ] + + loaded = load_planning_state(repo_root, planning_state_path(mig_root)) + assert [feedback.source for feedback in loaded.feedback] == ["message", "file"] + assert [feedback.text for feedback in loaded.feedback] == [ + "Keep rollout separate.", + "Use the staged publisher.", + ] + + +def test_repeated_planning_step_stdout_keeps_prior_audit_output( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + + first = write_planning_stage_stdout(repo_root, mig_root, "final-review", "first\n") + second = write_planning_stage_stdout(repo_root, mig_root, "final-review", "second\n") + + assert first == { + "stdout": "migrations/auth-cleanup/.planning/stages/final-review.stdout.md" + } + assert second == { + "stdout": "migrations/auth-cleanup/.planning/stages/final-review-2.stdout.md" + } + assert planning_stage_stdout_path(mig_root, "final-review").read_text( + encoding="utf-8" + ) == "first\n" + assert ( + mig_root / ".planning" / "stages" / "final-review-2.stdout.md" + ).read_text(encoding="utf-8") == "second\n" + + +def test_planning_state_rejects_unknown_current_step(tmp_path: Path) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + _write_state_payload( + path, + _payload(repo_root=repo_root, mig_root=mig_root, next_step="wat"), + ) + + with pytest.raises(ContinuousRefactorError, match="Unknown planning cursor"): + load_planning_state(repo_root, path) + + +def test_planning_state_rejects_completed_step_after_current_step( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + _write_state_payload( + path, + _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="pick-best", + completed_steps=[ + { + "name": "approaches", + "completed_at": _LATER, + "outcome": "completed", + "outputs": { + "stdout": _write_stdout(repo_root, mig_root, "approaches") + }, + }, + { + "name": "pick-best", + "completed_at": _LATER, + "outcome": "completed", + "outputs": { + "stdout": _write_stdout(repo_root, mig_root, "pick-best") + }, + }, + ], + ), + ) + + with pytest.raises(ContinuousRefactorError, match="does not match replayed cursor"): + load_planning_state(repo_root, path) + + +def test_planning_state_rejects_review_to_final_review_when_findings_required_revise( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + review_path = _write_stdout(repo_root, mig_root, "review", "1. Fix it.\n") + path = planning_state_path(mig_root) + _write_state_payload( + path, + _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="final-review", + review_findings=review_path, + completed_steps=[ + _completed(repo_root, mig_root, "approaches").to_payload(), + _completed(repo_root, mig_root, "pick-best").to_payload(), + _completed(repo_root, mig_root, "expand").to_payload(), + { + "name": "review", + "completed_at": _LATER, + "outcome": "findings", + "outputs": {"stdout": review_path}, + }, + ], + ), + ) + + with pytest.raises(ContinuousRefactorError, match="does not match replayed cursor"): + load_planning_state(repo_root, path) + + +def test_planning_state_rejects_revise_without_prior_review_findings( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + _write_state_payload( + path, + _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="review-2", + completed_steps=[ + _completed(repo_root, mig_root, "approaches").to_payload(), + _completed(repo_root, mig_root, "pick-best").to_payload(), + _completed(repo_root, mig_root, "expand").to_payload(), + _completed(repo_root, mig_root, "review", "clear").to_payload(), + _completed(repo_root, mig_root, "revise").to_payload(), + ], + ), + ) + + with pytest.raises(ContinuousRefactorError, match="expected final-review"): + load_planning_state(repo_root, path) + + +def test_planning_state_replays_branching_transition_history(tmp_path: Path) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = new_planning_state("Clean up auth", now=_NOW) + for name in ("approaches", "pick-best", "expand"): + state = complete_planning_step( + state, + name, + "completed", + {"stdout": _write_stdout(repo_root, mig_root, name)}, + completed_at=_LATER, + ) + review_path = _write_stdout(repo_root, mig_root, "review", "1. Fix it.\n") + state = complete_planning_step( + state, + "review", + "findings", + {"stdout": review_path}, + completed_at=_LATER, + ) + state = complete_planning_step( + state, + "revise", + "completed", + {"stdout": _write_stdout(repo_root, mig_root, "revise")}, + completed_at=_LATER, + ) + state = complete_planning_step( + state, + "review-2", + "clear", + {"stdout": _write_stdout(repo_root, mig_root, "review-2")}, + completed_at=_LATER, + ) + + save_planning_state(state, planning_state_path(mig_root), repo_root=repo_root) + loaded = load_planning_state(repo_root, planning_state_path(mig_root)) + + assert loaded.next_step == "final-review" + assert loaded.review_findings == review_path + assert [step.name for step in loaded.completed_steps] == [ + "approaches", + "pick-best", + "expand", + "review", + "revise", + "review-2", + ] + + +def test_planning_state_rejects_missing_artifact_for_completed_step( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = PlanningState( + schema_version=1, + target="Clean up auth", + next_step="pick-best", + completed_steps=( + CompletedPlanningStep( + name="approaches", + completed_at=_LATER, + outcome="completed", + outputs={ + "stdout": ( + mig_root / ".planning" / "stages" / "approaches.stdout.md" + ).relative_to(repo_root).as_posix() + }, + ), + ), + started_at=_NOW, + updated_at=_LATER, + feedback=(), + review_findings=None, + final_decision=None, + final_reason=None, + ) + _write_state_payload( + planning_state_path(mig_root), + { + "schema_version": state.schema_version, + "target": state.target, + "next_step": state.next_step, + "completed_steps": [ + step.to_payload() for step in state.completed_steps + ], + "started_at": state.started_at, + "updated_at": state.updated_at, + "feedback": list(state.feedback), + "review_findings": state.review_findings, + "final_decision": state.final_decision, + "final_reason": state.final_reason, + }, + ) + + with pytest.raises(ContinuousRefactorError, match="missing planning output"): + load_planning_state(repo_root, planning_state_path(mig_root)) + + +@pytest.mark.parametrize( + ("stdout_ref", "message"), + [ + ("/tmp/agent.stdout.log", "repo-relative"), + ("../escape.stdout.md", "repo-relative"), + ("outside/stdout.md", "inside the migration directory"), + ( + "migrations/auth-cleanup/.planning/stages/missing.stdout.md", + "missing planning output", + ), + ], +) +def test_save_planning_state_rejects_invalid_output_refs_before_replacing( + tmp_path: Path, + stdout_ref: str, + message: str, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + path.parent.mkdir(parents=True) + original_content = '{"schema_version": 0}\n' + path.write_text(original_content, encoding="utf-8") + outside = repo_root / "outside" / "stdout.md" + outside.parent.mkdir() + outside.write_text("outside\n", encoding="utf-8") + state = PlanningState( + schema_version=1, + target="Clean up auth", + next_step="pick-best", + completed_steps=( + CompletedPlanningStep( + name="approaches", + completed_at=_LATER, + outcome="completed", + outputs={"stdout": stdout_ref}, + ), + ), + started_at=_NOW, + updated_at=_LATER, + feedback=(), + review_findings=None, + final_decision=None, + final_reason=None, + ) + + with pytest.raises(ContinuousRefactorError, match=message): + save_planning_state(state, path, repo_root=repo_root) + + assert path.read_text(encoding="utf-8") == original_content + + +def test_planning_state_atomic_save_preserves_existing_file_on_replace_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + path.parent.mkdir(parents=True) + original_content = '{"schema_version": 0}\n' + path.write_text(original_content, encoding="utf-8") + + def fail_replace(self: Path, target: Path) -> Path: + raise OSError(f"cannot replace {target} from {self}") + + monkeypatch.setattr(Path, "replace", fail_replace) + + with pytest.raises(ContinuousRefactorError, match=f"Could not save planning state {path}"): + save_planning_state( + new_planning_state("Clean up auth", now=_NOW), + path, + repo_root=tmp_path, + ) + + assert path.read_text(encoding="utf-8") == original_content + assert list(path.parent.glob("*.tmp")) == [] + + +def test_complete_planning_step_rejects_impossible_in_memory_cursor( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = PlanningState( + schema_version=1, + target="Clean up auth", + next_step="pick-best", + completed_steps=(), + started_at=_NOW, + updated_at=_NOW, + feedback=(), + review_findings=None, + final_decision=None, + final_reason=None, + ) + + with pytest.raises(ContinuousRefactorError, match="does not match replayed cursor"): + complete_planning_step( + state, + "pick-best", + "completed", + {"stdout": _write_stdout(repo_root, mig_root, "pick-best")}, + completed_at=_LATER, + ) + + +def test_complete_planning_step_rejects_absolute_output_ref(tmp_path: Path) -> None: + state = new_planning_state("Clean up auth", now=_NOW) + + with pytest.raises(ContinuousRefactorError, match="repo-relative"): + complete_planning_step( + state, + "approaches", + "completed", + {"stdout": str(tmp_path / "agent.stdout.log")}, + completed_at=_LATER, + ) + + +def test_planning_step_stdout_rejects_unvalidated_output_ref(tmp_path: Path) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = PlanningState( + schema_version=1, + target="Clean up auth", + next_step="pick-best", + completed_steps=( + CompletedPlanningStep( + name="approaches", + completed_at=_LATER, + outcome="completed", + outputs={"stdout": str(tmp_path / "agent.stdout.log")}, + ), + ), + started_at=_NOW, + updated_at=_LATER, + feedback=(), + review_findings=None, + final_decision=None, + final_reason=None, + ) + + with pytest.raises(ContinuousRefactorError, match="repo-relative"): + planning_step_stdout( + state, + repo_root, + "approaches", + state_path=planning_state_path(mig_root), + ) + + +def test_planning_state_snapshot_paths_are_repo_relative(tmp_path: Path) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + absolute_payload = _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="pick-best", + completed_steps=[ + { + "name": "approaches", + "completed_at": _LATER, + "outcome": "completed", + "outputs": {"stdout": str(tmp_path / "tmp" / "agent.stdout.log")}, + } + ], + ) + _write_state_payload(path, absolute_payload) + + with pytest.raises(ContinuousRefactorError, match="repo-relative"): + load_planning_state(repo_root, path) + + escape_payload = dict(absolute_payload) + escape_payload["completed_steps"] = [ + { + "name": "approaches", + "completed_at": _LATER, + "outcome": "completed", + "outputs": {"stdout": "../escape.stdout.md"}, + } + ] + _write_state_payload(path, escape_payload) + + with pytest.raises(ContinuousRefactorError, match="repo-relative"): + load_planning_state(repo_root, path) + + valid_ref = _write_stdout(repo_root, mig_root, "approaches") + _write_state_payload( + path, + _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="pick-best", + completed_steps=[ + { + "name": "approaches", + "completed_at": _LATER, + "outcome": "completed", + "outputs": {"stdout": valid_ref}, + } + ], + ), + ) + + assert load_planning_state(repo_root, path).completed_steps[0].outputs["stdout"] == valid_ref diff --git a/tests/test_prompts.py b/tests/test_prompts.py index 8f599d2..8e9dc05 100644 --- a/tests/test_prompts.py +++ b/tests/test_prompts.py @@ -1,5 +1,6 @@ from __future__ import annotations +from dataclasses import replace from pathlib import Path import pytest @@ -7,6 +8,15 @@ from continuous_refactoring.config import TASTE_CURRENT_VERSION, default_taste_text from continuous_refactoring.effort import EffortBudget from continuous_refactoring.migrations import MigrationManifest, PhaseSpec +from continuous_refactoring.planning import _build_durable_planning_context +from continuous_refactoring.planning_state import ( + PlanningState, + append_planning_feedback, + complete_planning_step, + new_planning_state, + planning_stage_stdout_path, + reopen_planning_for_revise, +) from continuous_refactoring.prompts import ( CLASSIFIER_PROMPT, CONTINUOUS_REFACTORING_STATUS_BEGIN, @@ -19,12 +29,14 @@ PLANNING_FINAL_REVIEW_PROMPT, PLANNING_PICK_BEST_PROMPT, PLANNING_REVIEW_PROMPT, + REVIEW_PERFORM_PROMPT, compose_full_prompt, compose_classifier_prompt, compose_interview_prompt, compose_phase_execution_prompt, compose_phase_ready_prompt, compose_planning_prompt, + compose_review_perform_prompt, compose_taste_refine_prompt, compose_taste_upgrade_prompt, ) @@ -63,6 +75,7 @@ PLANNING_FINAL_REVIEW_PROMPT, PHASE_READY_CHECK_PROMPT, PHASE_EXECUTION_PROMPT, + REVIEW_PERFORM_PROMPT, ) @@ -93,6 +106,29 @@ def _manifest() -> MigrationManifest: ) +def _terminal_ready_state(repo_root: Path, mig_root: Path) -> PlanningState: + state = new_planning_state("Clean up auth", now="2026-04-29T12:00:00.000+00:00") + for step, outcome, stdout in ( + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose approach.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "clear", "No findings.\n"), + ("final-review", "approve-auto", "final-decision: approve-auto - ready\n"), + ): + stdout_path = planning_stage_stdout_path(mig_root, step) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + state = complete_planning_step( + state, + step, + outcome, + {"stdout": stdout_path.relative_to(repo_root).as_posix()}, + completed_at="2026-04-29T12:00:00.000+00:00", + final_reason="ready" if step == "final-review" else None, + ) + return state + + # --------------------------------------------------------------------------- # Output contracts on prompt constants # --------------------------------------------------------------------------- @@ -185,6 +221,155 @@ def test_phase_ready_prompt_does_not_make_fresh_test_evidence_human_review() -> assert "Use `ready: unverifiable` only" in PHASE_READY_CHECK_PROMPT +def test_planning_prompts_name_staged_work_dir_and_keep_taste( + tmp_path: Path, +) -> None: + repo_root = tmp_path / "repo" + live_mig_root = repo_root / "migrations" / "auth-cleanup" + staged_parent = tmp_path / "xdg" / "planning" / "auth-cleanup" / "run" / "work" + state = new_planning_state("Clean up auth", now="2026-04-29T12:00:00.000+00:00") + + context = _build_durable_planning_context( + repo_root=repo_root, + live_dir=staged_parent, + migration_name="auth-cleanup", + state=state, + published_migration_root=live_mig_root, + ) + + for stage in _PLANNING_STAGES: + result = compose_planning_prompt(stage, "auth-cleanup", _TASTE, context) + + assert f"## Taste\n{_TASTE}" in result + assert f"Staged work dir: {staged_parent / 'auth-cleanup'}" in result + assert f"Work dir: {staged_parent / 'auth-cleanup'}" in result + assert f"Live migration dir: {live_mig_root}" in result + assert "Writable target: work dir only." in result + assert "Do not mutate the live migration directory." in result + assert ".planning/state.json" in result + assert ".planning/stages/" in result + assert "failed current-step output" in result + assert "not resume input" in result + + +def test_review_prompt_names_work_dir_and_forbids_live_dir_mutation() -> None: + manifest = replace( + _manifest(), + awaiting_human_review=True, + human_review_reason="Need Hiren to choose rollout order.", + ) + repo_root = Path("/repo") + work_dir = Path("/xdg/projects/p/planning/auth-cleanup/review-1/work/auth-cleanup") + live_dir = Path("/repo/migrations/auth-cleanup") + + result = compose_review_perform_prompt( + "auth-cleanup", + repo_root, + work_dir, + live_dir, + manifest.phases[1], + manifest, + _TASTE, + ) + + assert f"Repo root: {repo_root}" in result + assert f"Work dir: {work_dir}" in result + assert f"Live migration dir: {live_dir}" in result + assert "Writable target: work dir only." in result + assert "Do not mutate the live migration directory." in result + assert "Need Hiren to choose rollout order." in result + assert f"## Taste\n{_TASTE}" in result + + +def test_refine_prompt_names_work_dir_and_keeps_taste(tmp_path: Path) -> None: + repo_root = tmp_path / "repo" + live_dir = repo_root / "migrations" + mig_root = live_dir / "auth-cleanup" + mig_root.mkdir(parents=True) + (mig_root / "plan.md").write_text("# Plan\n", encoding="utf-8") + state = _terminal_ready_state(repo_root, mig_root) + state = append_planning_feedback( + state, + "Split the risky phase.", + "message", + now="2026-04-29T12:00:00.000+00:00", + ) + state = reopen_planning_for_revise( + state, + now="2026-04-29T12:01:00.000+00:00", + ) + + context = _build_durable_planning_context( + repo_root=repo_root, + live_dir=live_dir, + migration_name="auth-cleanup", + state=state, + extra_context="User refinement feedback:\nSplit the risky phase.", + published_migration_root=mig_root, + ) + result = compose_planning_prompt("expand", "auth-cleanup", _TASTE, context) + + assert f"Work dir: {mig_root}" in result + assert f"Live migration dir: {mig_root}" in result + assert "Writable target: work dir only." in result + assert "Do not mutate the live migration directory." in result + assert "User refinement feedback" in result + assert "Split the risky phase." in result + assert f"## Taste\n{_TASTE}" in result + + +def test_review_and_refine_prompts_forbid_live_dir_mutation(tmp_path: Path) -> None: + manifest = replace( + _manifest(), + awaiting_human_review=True, + human_review_reason="Need Hiren to choose rollout order.", + ) + repo_root = tmp_path / "repo" + review_work_dir = tmp_path / "xdg" / "planning" / "auth-cleanup" / "review" / "work" + live_mig_root = repo_root / "migrations" / "auth-cleanup" + review_prompt = compose_review_perform_prompt( + "auth-cleanup", + repo_root, + review_work_dir, + live_mig_root, + manifest.phases[1], + manifest, + _TASTE, + ) + + refine_state = new_planning_state( + "Clean up auth", + now="2026-04-29T12:00:00.000+00:00", + ) + refine_state = append_planning_feedback( + refine_state, + "Split the risky phase.", + "message", + now="2026-04-29T12:01:00.000+00:00", + ) + refine_context = _build_durable_planning_context( + repo_root=repo_root, + live_dir=review_work_dir.parent, + migration_name="work", + state=refine_state, + extra_context="User refinement feedback:\nSplit the risky phase.", + published_migration_root=live_mig_root, + ) + refine_prompt = compose_planning_prompt( + "expand", + "auth-cleanup", + _TASTE, + refine_context, + ) + + for prompt in (review_prompt, refine_prompt): + assert f"Live migration dir: {live_mig_root}" in prompt + assert "Writable target: work dir only." in prompt + assert "Do not mutate the live migration directory." in prompt + assert "not resume input" in prompt + assert f"## Taste\n{_TASTE}" in prompt + + @pytest.mark.parametrize("prompt", _PLANNING_PROMPTS_THAT_MENTION_PLAN_MD) @pytest.mark.parametrize("fragment", ("plan.md", "phase--.md")) def test_planning_prompts_reference_plan_artifacts(prompt: str, fragment: str) -> None: @@ -379,6 +564,43 @@ def test_planning_prompt_includes_effort_budget_guidance() -> None: assert "wait for a future run" in result +def test_planning_resume_prompt_uses_durable_state_and_keeps_taste( + tmp_path: Path, +) -> None: + live_dir = tmp_path / "migrations" + mig_root = live_dir / "auth-cleanup" + mig_root.mkdir(parents=True) + state = new_planning_state("Clean up auth", now="2026-04-29T12:00:00.000+00:00") + for name, text in ( + ("approaches", "Generated approaches.\n"), + ("pick-best", "Chose incremental approach.\n"), + ): + stdout_path = planning_stage_stdout_path(mig_root, name) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(text, encoding="utf-8") + state = complete_planning_step( + state, + name, + "completed", + {"stdout": stdout_path.relative_to(tmp_path).as_posix()}, + completed_at="2026-04-29T12:01:00.000+00:00", + ) + + context = _build_durable_planning_context( + repo_root=tmp_path, + live_dir=live_dir, + migration_name="auth-cleanup", + state=state, + ) + result = compose_planning_prompt("expand", "auth-cleanup", _TASTE, context) + + assert f"## Taste\n{_TASTE}" in result + assert "migrations/auth-cleanup/.planning/stages/pick-best.stdout.md" in result + assert "Chose incremental approach." in result + assert "agent.stdout.log" not in result + assert str(tmp_path / "tmp") not in result + + def test_planning_prompts_describe_phase_effort_metadata() -> None: assert "required_effort: