diff --git a/.gitignore b/.gitignore index eef6600..7625076 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ .scratchpad/ +tmpdir/ +.pytest_cache/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/AGENTS.md b/AGENTS.md index f1c705f..da02139 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -20,6 +20,33 @@ Treat `AGENTS.md` as part of the codebase's invariants, not documentation. A dri - Test one: `uv run pytest tests/test_x.py::test_name` - Entry: `continuous-refactoring --help` / `continuous-refactoring --version` (or `python -m continuous_refactoring`) +- Init: `continuous-refactoring init [--path PATH] + [--live-migrations-dir DIR] [--in-repo-taste [PATH]] [--force]` +- Taste: `continuous-refactoring taste [--global] + [--interview|--upgrade|--refine] + [--with codex|claude --model --effort ] + [--force]` +- Run once: `continuous-refactoring run-once --with codex|claude + --model [common targeting/validation flags]` +- Run loop: `continuous-refactoring run --with codex|claude --model + [--max-attempts N] [--max-refactors N] [--focus-on-live-migrations] + [--commit-message-prefix TEXT] [--max-consecutive-failures N] [--sleep N]` + Requires targeting flags or `--scope-instruction` unless + `--focus-on-live-migrations`; `--max-refactors` is required unless using + `--targets` or `--focus-on-live-migrations`. +- Upgrade config: `continuous-refactoring upgrade` +- Inspect migrations: `continuous-refactoring migration list + [--status planning|ready|in-progress|skipped|done] + [--awaiting-review]` / + `continuous-refactoring migration doctor ` / + `continuous-refactoring migration doctor --all` +- Review migrations: `continuous-refactoring migration review + --with codex|claude --model --effort ` + (top-level `review list` / `review perform --with ... --model ... + --effort ...` remain compatibility wrappers) +- Refine migration planning: `continuous-refactoring migration refine + (--message |--file ) --with codex|claude --model + --effort [--show-agent-logs]` No lint, no typecheck, no formatter, no pre-commit. GitHub Actions `Test` runs `uv run pytest`. **Pytest is the only code gate.** GitHub Actions @@ -29,17 +56,23 @@ runs `uv run pytest`. **Pytest is the only code gate.** GitHub Actions - `src/continuous_refactoring/` — flat module layout, no subpackages - `tests/` — flat, `test_.py` per source module plus behavior bundles (`test_e2e.py`, `test_run.py`, `test_run_once.py`) -- `migrations/` — live multi-phase plans (dog-food output) +- `/` — configurable live multi-phase plans (dog-food output); a checkout may not have `migrations/` - `.scratchpad/` — ephemeral agent state, gitignored - Durable user state: `~/.local/share/continuous-refactoring/…` (XDG) ## 5. Project vocabulary -- **Target** — a source path the driver is working on this iteration. +- **Target** — the refactoring unit the driver is working on this iteration: a JSONL target, one matched tracked file, literal path set, random tracked-file bundle, or fallback scoped prompt. - **Taste** — project or global prose that shapes every prompt. Project taste is XDG by default, or a repo-relative path stored as `repo_taste_path` after `init --in-repo-taste [PATH]`. - **Scope expansion** — deciding the set of files edited together with the target (`scope_expansion.py`). -- **Classifier / routing** — picks which agent handles a target (`routing.py`). -- **Migration** — a multi-phase plan living under `migrations//`. +- **Classifier / routing** — chooses a target route: `cohesive-cleanup` vs `needs-plan` (`routing.py`). +- **Migration** — a multi-phase plan living under `//`. +- **Visible migration directory** — direct child migration dir that is not hidden, dotted, symlinked, or internal/transactional; enumerate through `iter_visible_migration_dirs()`. +- **Consistency finding** — structured migration integrity result with shared `info | warning | error` severity and `planning-snapshot | ready-publish | execution-gate | doctor` mode. +- **Planning state** — durable resume/audit cursor at `/.planning/state.json`; it records accepted planning steps and their repo-relative stage outputs. +- **Planning stage output** — accepted planning stdout stored under `/.planning/stages/.stdout.md`; repeated accepted steps use suffixed refs such as `-2.stdout.md`. Failed current-step output stays in run artifacts only. +- **Planning feedback** — explicit user refinement feedback recorded in `.planning/state.json`; it reuses the `revise` planning step and is published only through staged planning/refine transactions. +- **Planning workspace** — off-live candidate migration snapshot built under project state, then copied to a live-dir transaction before publish. - **Phase** — one step of a migration; state transitions in `phases.py`. - **Precondition** — what must already be true before a phase may execute; stored on each manifest phase as `precondition`. - **Definition of Done** — what must be true for a phase to count as completed; written in each phase markdown doc under `## Definition of Done`. @@ -47,14 +80,14 @@ runs `uv run pytest`. **Pytest is the only code gate.** GitHub Actions - **Wake-up rule** — schedule for when the driver reconsiders an idle target. - **Eligibility cooldown** — `manifest.cooldown_until` gates re-checks after a migration was deferred or blocked; `last_touch` records activity only. - **Settle protocol** — `.done` + sha256 handshake confirming an interactive agent is finished. -- **Status block** — the driver's end-of-attempt summary written to artifacts. -- **Call role** — `classifier | planner | editor | reviewer` slot filled in a prompt. +- **Status block** — the agent-emitted final-message block parsed by `decisions.py`. +- **Call role** — prompt slot recorded in artifacts, including `classify`, `refactor`, dotted planning roles such as `planning.`, `planning.state`, `planning.publish`, and phase roles such as `phase.ready-check` or `phase.execute`. - **Effort budget** — shared nominal tiers `low < medium < high < xhigh`; `--default-effort` is the normal call effort, `--max-allowed-effort` caps target overrides and phase escalation. - **Failure snapshot** — per-attempt failure record at `…/projects//failures/-attempt-NNN-retry-NN-.md`. One file per failed attempt; sort to find the latest. ## 6. Code conventions -- `from __future__ import annotations` at the top of every src file. +- `from __future__ import annotations` at the top of every src file, after an optional module docstring. - Frozen dataclasses for value types; `Literal[…]` for state machines. - Explicit `__all__` per module. - Full-path imports (`from continuous_refactoring.X import Y`). **Never relative.** @@ -70,10 +103,10 @@ runs `uv run pytest`. **Pytest is the only code gate.** GitHub Actions ## 8. Testing idioms -- `pytest>=8.0` only. No coverage, no hypothesis, no markers. +- `pytest>=8.0` only. No coverage, no hypothesis, no custom pytest markers; `pytest.mark.parametrize` is normal. - Monkeypatching is idiomatic — not a smell. - `tests/conftest.py` provides: - - `write_fake_codex` — drops a Python stub for `codex` on PATH. Controlled by `FAKE_CODEX_STDOUT`, `FAKE_CODEX_LAST_MESSAGE`, `FAKE_CODEX_TOUCH_FILE`, `FAKE_CODEX_EXIT_CODE`. + - `write_fake_codex` — drops a Python stub for `codex` on PATH. Controlled by `FAKE_CODEX_STDOUT`, `FAKE_CODEX_STDERR`, `FAKE_CODEX_LAST_MESSAGE`, `FAKE_CODEX_TOUCH_FILE`, `FAKE_CODEX_TOUCH_CONTENT`, `FAKE_CODEX_EXIT_CODE`. - `_prepare_run_env` — `git init -b main` in `tmp_path`; redirects `TMPDIR` and `XDG_DATA_HOME` to the sandbox. - `make_run_once_args` / `make_run_loop_args` — build argparse `Namespace`s so tests bypass the CLI layer. - Claude stream-json parsing is covered with recorded NDJSON at `tests/fixtures/claude_stream_json/selection.stdout.log`. @@ -93,9 +126,16 @@ active phase explicitly names `loop.py` in scope. - **Driver owns commits** (`refactor_attempts.py:_finalize_commit()`, called from `loop.py`) — if an agent commits mid-attempt, driver does `git reset --soft head_before` and re-commits with its own message. - **Migration scheduling split** (`migrations.py`, `loop.py`, `phases.py`) — `last_touch` is activity bookkeeping, not the 6-hour retry gate. Deferred/blocked migrations set `cooldown_until`; successful phase completion clears deferral markers so the next ready phase can run immediately. - **Migration tick deferral writes** (`migration_tick.py`) — ready-check deferrals are queued while scanning candidates and saved only when the tick finds no executable phase or blocks for human review. Do not save a deferred manifest before checking later candidates; that dirties the worktree and can make ready-checks reject runnable phases. +- **Migration visibility + consistency gate** (`migration_consistency.py`, `migration_tick.py`, `loop.py`, `review_cli.py`) — candidate scans use `iter_visible_migration_dirs()` so hidden/dotted/internal/symlink dirs are invisible to tick/review list. Before ready-check, `execution-gate` consistency errors block phase execution; `info`/`warning` never block. - **Manifest codec boundary** (`migration_manifest_codec.py`, `migrations.py`) — codec owns legacy `ready_when`, legacy integer `current_phase`, duplicate phase-name rejection, and saved JSON formatting. `load_manifest()` / `save_manifest()` own filesystem and JSON boundary errors. -- **Review CLI boundary** (`cli.py`, `review_cli.py`) — `cli.py` owns parser wiring and run dispatch; migration review internals live in `review_cli.py`, which stays internal and out of package-root `_SUBMODULES`. -- **Human-review gating** (`planning.py`, `migration_tick.py`, `review_cli.py`) — migrations with `awaiting_human_review=true` must be invisible to automated migration ticks/ready-checks until `review perform` clears the flag. +- **Planning state codec boundary** (`planning_state.py`, `planning.py`) — `.planning/state.json` is valid only when completed steps replay through the branching planning graph to `next_step`; recorded outputs must be repo-relative files inside the migration directory. User refinement feedback is durable state, and append-only `revision_base_step_counts` anchors let unexecuted ready migrations reuse `revise` after terminal ready decisions; legacy `revision_base_step_count` decodes as one anchor. Persist accepted step stdout after the step is validated; do not add durable fields for failed current-step output. +- **Planning publish transaction** (`planning_publish.py`) — publish copies the complete workspace snapshot to `__transactions__//staged`, validates it, checks same-device and `base_snapshot_id`, moves live to `rollback`, moves staged live, validates live, then deletes rollback. On post-rollback failure, move bad live to `failed` before restoring rollback. Transaction directories are invisible to scheduling/list candidates but visible to `migration doctor --all`. Do not bypass the lock or dirty-live check. +- **One-step planning engine** (`planning.py`) — product planning entry points call `run_next_planning_step()` so one action runs exactly `PlanningState.next_step`, records accepted stdout/state in an off-live workspace, and publishes through `planning_publish.py`. Failed current-step output is never durable resume input. `run_planning` is intentionally not package-exported. +- **Planning resume scheduling** (`migration_tick.py`, `loop.py`, `routing_pipeline.py`) — normal automation runs one eligible `status: planning` step before ready/in-progress phase ticks and before source-target routing. Missing or invalid `.planning/state.json` blocks automation with planning failure evidence; `status: planning` must never enter phase ready-check or phase execution. +- **Focused planning reselection** (`loop.py`, `migration_tick.py`) — focused mode tracks planning migrations abandoned with `new-target` only in memory for the current run, skips them while another planning or phase candidate is eligible, and retries them only when no alternative remains. Do not persist this as `cooldown_until`; planning step failure is not a durable readiness deferral. +- **Review CLI boundary** (`cli.py`, `review_cli.py`) — `cli.py` owns parser wiring; staged migration review internals live in `review_cli.py`, publish only through `planning_publish.py`, and stay internal/out of package-root `_SUBMODULES`. Top-level `review perform` is only a compatibility wrapper around this path. +- **Migration CLI boundary** (`cli.py`, `migration_cli.py`) — `cli.py` owns parser wiring only; `migration_cli.py` owns namespace dispatch, read-only list/doctor behavior, and the contained slug/path resolver used by mutation commands. Mutating subcommands delegate their internals to focused modules such as `review_cli.py` or the planning refine entry point. Resolver targets must stay direct visible children of the configured live migrations root and reject symlink, outside, parent-traversal, and ambiguous paths. +- **Human-review gating** (`planning.py`, `migration_tick.py`, `review_cli.py`) — migrations with `awaiting_human_review=true` must be invisible to automated migration ticks/ready-checks until canonical `migration review` clears the flag through staged publish; top-level `review perform` routes to the same compatibility path. `migration refine` may reopen an unexecuted ready migration to planning, but it is user feedback, not review approval. - **Migration terminology split** (`migrations.py`, `planning.py`, `prompts.py`) — manifest `precondition` gates phase start; phase markdown `## Definition of Done` governs completion. - **Run-level baseline validation** (`loop.py`) — `run-once`, `run`, and `--focus-on-live-migrations` run the configured validation command after the clean-worktree check and before routing/refactoring. A red baseline stops as `baseline_failed`, not migration human review. - **Phase execution validation gate** (`phases.py`, `prompts.py`, `loop.py`) — a migration phase is complete only after host-side full validation passes. `execute_phase()` retries validation-red attempts from `head_before` up to the effective `--max-attempts` budget, and the phase prompt must include the literal configured validation command plus the phase file's Definition of Done as the completion contract. @@ -108,6 +148,7 @@ active phase explicitly names `loop.py` in scope. ## 11. Surprising CLI semantics - Targeting is **first-match-wins** across `--targets > --globs > --extensions > --paths`. Multiple flags silently use the highest. +- `run` requires targeting or `--scope-instruction`, and also requires `--max-refactors` unless `--targets` or `--focus-on-live-migrations` is set. - `--max-attempts 0` means **unlimited**, not zero. A WARN fires at startup. - `run-once` and `run` both create local commits only; the driver never publishes branch updates. @@ -150,7 +191,7 @@ active phase explicitly names `loop.py` in scope. ## 15. Read-first pointers - `README.md` — feature tour and CLI reference. -- `migrations//plan.md` — active structural work. +- `//plan.md` — active structural work, when this checkout has a live migrations dir. - `src/continuous_refactoring/__init__.py` — public surface and uniqueness check. - `tests/conftest.py` — test env patterns and fake agents. - `src/continuous_refactoring/prompts.py` — prompt templates and taste injection. diff --git a/README.md b/README.md index a22a8d7..802c7c4 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ Small, test-gated cleanup commits by an AI coding agent. Think of it as a supervised janitor loop: the agent proposes a cleanup, your tests decide if it stays. +Here's [an article](https://artisincode.com/essays/how-i-use-unspent-tokens/) I wrote about it. + ## Install Try it without installing: @@ -69,15 +71,18 @@ continuous-refactoring run \ --max-attempts 2 ``` -That keeps sweeping targets until it runs out, hits your caps, or starts failing. +That runs up to 10 refactor actions, then stops sooner if the finite target file +runs out or the loop starts failing. Use `run --focus-on-live-migrations` when +you want the loop to work only on eligible live migrations; it bypasses target +selection and `--max-refactors`. ## What it does -- Resolves a target from `--targets`, `--globs`, `--extensions`, or `--paths`, with optional natural-language scoping via `--scope-instruction`. +- Resolves each source action from `--targets`, `--globs`, `--extensions`, or `--paths`, with optional natural-language scoping via `--scope-instruction`. - Runs the agent with a refactoring prompt + your "taste" guidelines. - Runs your validation command (default: `uv run pytest`). - If green and there's a diff, it commits locally and leaves the branch for you to inspect. -- Repeats until it runs out of targets, hits the retry budget, or stacks too many failures. +- Repeats until it spends the action budget, exhausts a finite target file, hits the retry budget, or stacks too many failures. ## Requirements @@ -120,10 +125,15 @@ continuous-refactoring run \ | `init` | Registers this directory as a project, creates a default `taste.md`, and can store `--live-migrations-dir` or `--in-repo-taste`. | | `taste` | Prints the active taste file path. Add `--interview` to have an agent author it, `--refine` to iteratively improve an existing taste doc, `--upgrade` to refresh stale taste dimensions, `--global` for the shared file, and `--force` to let `--interview` overwrite custom content after writing a `.bak`. | | `run-once` | Single pass on one resolved target. No retry. If there is a diff and validation passes, it commits locally and prints the diffstat. | -| `run` | The loop. Iterates targets, retries on failure, and commits successful targets locally. | +| `run` | The loop. Iterates refactor actions, retries on failure, and commits successful changes locally. Add `--focus-on-live-migrations` to bypass targeting and work only on eligible live migrations. | | `upgrade` | Checks that the global config manifest is current, rewrites it idempotently, and warns if the global taste file is stale. | -| `review list` | Lists migrations flagged for human review (`awaiting_human_review`). | -| `review perform ` | Starts an interactive agent session to resolve a flagged migration's review. Requires `--with`, `--model`, and `--effort`. | +| `migration list` | Lists visible migrations. Add `--status ` or `--awaiting-review` to filter. | +| `migration doctor ` | Validates one visible migration's consistency. | +| `migration doctor --all` | Validates every visible migration plus internal transaction state. | +| `migration review ` | Starts staged review for a migration awaiting human review. Requires `--with`, `--model`, and `--effort`. | +| `migration refine ` | Records feedback for a planning or unexecuted ready migration and runs one staged planning revision. Requires `--message ` or `--file `, plus `--with`, `--model`, and `--effort`; add `--show-agent-logs` to mirror the planning agent. | + +Legacy `review list` and `review perform ` remain compatibility aliases; prefer `migration list --awaiting-review` and `migration review`. ## Targeting / Useful flags @@ -134,22 +144,46 @@ Target resolution is first-match-wins: These flags are not mutually exclusive, but only the highest-priority populated source is used. -- `--targets path/to/targets.jsonl` — explicit list; one JSON object per line with `description`, `files`, optional `scoping`, `model-override`, `effort-override`. Effort overrides use `low`, `medium`, `high`, or `xhigh`. -- `--globs 'src/**/*.py:tests/**/*.py'` — colon-separated globs; each matched file becomes its own target. -- `--extensions .py,.ts` — shorthand that expands to `**/*.py`, `**/*.ts`; each matched file becomes its own target. -- `--paths a.py:b.py` — literal paths, all treated as one target. -- `--scope-instruction "clean up the auth module"` — extra free-text scoping. If file-based targeting resolves nothing, this becomes the useful fallback context. +- `--targets path/to/targets.jsonl` — explicit finite list; one JSON object per line with `description`, `files`, optional `scoping`, `model-override`, `effort-override`. Effort overrides use `low`, `medium`, `high`, or `xhigh`. If `--max-refactors` is omitted, `run` processes the file once and stops. +- `--globs 'src/**/*.py:tests/**/*.py'` — colon-separated globs matched once against tracked files from `git ls-files`; each refactor action samples one matched file, so files can repeat. +- `--extensions .py,.ts` — shorthand that expands to `**/*.py`, `**/*.ts` against tracked files from `git ls-files`; each refactor action samples one matched file, so files can repeat. +- `--paths a.py:b.py` — literal user-provided paths, all treated as one grouped target; each refactor action reuses that group. +- `--scope-instruction "clean up the auth module"` — extra free-text scoping. If selected file patterns resolve nothing, this becomes the useful fallback context. -If you provide none of `--targets`, `--globs`, `--extensions`, or `--paths`, then `run` and `run-once` require `--scope-instruction`. +If `--globs` or `--extensions` match no tracked files and there is no +`--scope-instruction`, `run` completes successfully with zero refactor actions. +`--paths` is literal input and is not filtered through `git ls-files`. + +If you provide none of `--targets`, `--globs`, `--extensions`, or `--paths`, +then `run` and `run-once` require `--scope-instruction`; the driver still +random-samples tracked files from `git ls-files` for each action and uses the +scope text as context for that target. ### Migrations & taste flags - `init --live-migrations-dir PATH` — enables the larger-refactoring workflow for this project. The path is stored repo-relative in the project registry and created if missing. - `init --in-repo-taste [PATH]` — stores this project's taste file in the repo and remembers the repo-relative path. Defaults to `.continuous-refactoring/taste.md`; re-run `init --in-repo-taste ...` to choose a different path. +- `migration list` — shows visible migrations; `--awaiting-review` narrows to human-review handoffs. +- `migration doctor ` / `migration doctor --all` — read-only consistency checks. Doctor reports problems; it does not repair them. +- `migration review --with ... --model ... --effort ...` — resolves an `awaiting_human_review` migration through a staged workspace. +- `migration refine (--message |--file ) --with ... --model ... --effort ... [--show-agent-logs]` — adds user feedback to a planning or unexecuted ready migration and resumes planning through the `revise` step when reopening ready work. - `taste --refine` — opens a collaborative editing session for the taste file. The agent keeps refining until you tell it to write, then the session ends automatically after the settled write. - `taste --upgrade` — re-interviews for taste dimensions added since your last version. No-op when already current; use `taste --refine` if you want to rework the doc anyway. - `taste --force` — only applies to `--interview`; it allows a customized taste file to be overwritten after backing it up to `taste.md.bak`. +Canonical migration commands: + +```bash +continuous-refactoring migration list +continuous-refactoring migration list --status planning +continuous-refactoring migration list --awaiting-review +continuous-refactoring migration doctor +continuous-refactoring migration doctor --all +continuous-refactoring migration review --with codex --model gpt-5 --effort high +continuous-refactoring migration refine --message "split the risky phase" --with codex --model gpt-5 --effort high +continuous-refactoring migration refine --file feedback.md --with codex --model gpt-5 --effort high +``` + ### Shared `run` / `run-once` flags - `--with`, `--model` — required agent backend/model selection. @@ -164,10 +198,11 @@ If you provide none of `--targets`, `--globs`, `--extensions`, or `--paths`, the ### `run`-only flags -- `--max-attempts N` — per-target retry budget. `1` = no retry, `0` = unlimited (which means permanently broken targets will never give up). -- `--max-refactors N` — cap the number of targets per run. Required unless you use `--targets`. -- `--max-consecutive-failures N` — bail after N targets fail in a row. Default 3. -- `--sleep SECONDS` — pause between completed targets. Useful when you want a long batch without hammering the repo or your agent budget. +- `--max-attempts N` — per-action retry budget. `1` = no retry, `0` = unlimited (which means permanently broken actions will never give up). +- `--max-refactors N` — cap the number of refactor actions per run. Required unless you use `--targets` or `--focus-on-live-migrations`. +- `--focus-on-live-migrations` — bypass target selection and `--max-refactors`; iterate eligible live migrations until they are done, deferred, blocked, or the failure budget trips. +- `--max-consecutive-failures N` — bail after N actions fail in a row. Default 3. +- `--sleep SECONDS` — pause between completed actions. Useful when you want a long batch without hammering the repo or your agent budget. - `--commit-message-prefix TEXT` — subject prefix for successful refactor or migration-plan commits. Default `continuous refactor`. ## Safety behaviors @@ -184,13 +219,27 @@ If you provide none of `--targets`, `--globs`, `--extensions`, or `--paths`, the Each run writes to `$TMPDIR/continuous-refactoring//`: - `summary.json` — rolling status, counts, per-attempt stats -- `events.jsonl` — structured event log +- `events.jsonl` — structured event log with call roles such as `classify`, + `planning.`, `phase.ready-check`, `phase.execute`, and + `phase.validation` - `run.log` — human-readable log - `attempt-NNN/[retry-NN/]refactor/` — per-attempt agent + test stdout/stderr +- `baseline/initial/` — baseline validation stdout/stderr before work starts +- `classify/` — classifier agent stdout/stderr +- `scope-expansion/` — scope candidates, selection, and bypass reason +- `attempt-NNN/[retry-NN/]planning//` — planning agent stdout/stderr for + migration planning steps +- `phase-ready-check/` — phase precondition agent stdout/stderr +- `attempt-NNN/[retry-NN/]phase-execute/` — phase agent and validation logs +- `migration-probes/action-NNN/` — migration probe logs during normal `run` + actions, including planning, phase ready-checks, and phase execution Mixed-effort runs are auditable: summaries and call events record the default effort, max allowed effort, requested effort, effective effort, source, and whether the request was capped. -The path prints at startup. Grep it when something goes sideways. +The path prints at startup. Grep it when something goes sideways. Failed +non-commit decisions also write durable XDG snapshots under the project failure +directory, usually +`~/.local/share/continuous-refactoring/projects//failures/`. ## Taste files @@ -218,7 +267,7 @@ This tells the CLI where to store migration artifacts. The path is repo-relative Each `run` / `run-once` tick now checks for eligible migration work before falling back to single-commit cleanups: 1. **Classify** — a classifier agent reads the target and decides: `cohesive-cleanup` (one-shot path) or `needs-plan` (migration path). -2. **Plan** — for `needs-plan` targets, a six-stage planning workflow runs: generate approaches → pick best → expand into phases → review → revise → final review. Artifacts land under `//`. +2. **Plan** — for `needs-plan` targets, each automation action runs exactly one planning step: approaches, pick-best, expand, review, optional revise/review-2, then final-review. Accepted steps update `.planning/state.json`, store stdout under `.planning/stages/`, and publish through a staged transaction. Failed current-step output stays in run artifacts and is not resume input. 3. **Execute** — each phase is a self-contained unit of work. The tick picks the oldest eligible migration, checks whether its current phase precondition is satisfied, and executes it on the current branch. Phase completion is judged against the phase file's `## Definition of Done`; commit message identifies the migration as `migration//.md`. ### Migration directory layout @@ -227,14 +276,20 @@ Each `run` / `run-once` tick now checks for eligible migration work before falli / / manifest.json # status, phases, wake-up schedule + .planning/ + state.json # durable planning cursor and accepted step refs + stages/ # accepted planning stdout, suffixed on repeats plan.md # the expanded plan approaches/ # candidate approaches considered during planning phase-1-.md # per-phase specification phase-2-.md ... + __transactions__/ # internal staged publish state __intentional_skips__/ # migrations rejected at final review ``` +Do not edit `.planning/` or `__transactions__/` by hand. Use `migration doctor` when the shape looks wrong. + ### Wake-up rules Migrations don't run on every tick. The scheduler now separates **activity** from @@ -268,7 +323,7 @@ Before executing a phase, a ready-check agent verifies that the current phase pr - **ready: yes** — phase executes; on green tests, the phase is marked done, any prior deferral markers are cleared, and the migration advances immediately to the next phase. - **ready: no** — manifest activity is bumped, a retry cooldown is started, and a future `wake_up_on` is recorded when needed; the tick moves on. -- **ready: unverifiable** — the migration is flagged `awaiting_human_review` and put on cooldown. Automated migration ticks skip flagged migrations until review clears the flag. Use `review list` to find it and `review perform --with ... --model ... --effort ...` to resolve it interactively. +- **ready: unverifiable** — the migration is flagged `awaiting_human_review` and put on cooldown. Automated migration ticks skip flagged migrations until review clears the flag. Use `migration list --awaiting-review` to find it and `migration review --with ... --model ... --effort ...` to resolve it interactively. Human-facing migration references use the relative phase spec path, for example `phase-2-failure-report.md`. The manifest cursor stores the phase `name`, not a numeric index. diff --git a/approaches/agent-backend-boundary-split.md b/approaches/agent-backend-boundary-split.md deleted file mode 100644 index abf6010..0000000 --- a/approaches/agent-backend-boundary-split.md +++ /dev/null @@ -1,39 +0,0 @@ -# Approach: Backend Boundary Split - -## Strategy -- Split backend-specific command behavior out of `src/continuous_refactoring/agent.py`. -- Create focused modules: - - `src/continuous_refactoring/agent_backends.py` for supported-agent validation and command construction, - - `src/continuous_refactoring/agent_claude_stream.py` for Claude stream-json extraction, - - keep `agent.py` as the orchestration layer for interactive execution, settle handling, and observed command capture. -- Keep public imports stable through `agent.py`; no package-root re-export churn unless needed. - -## Tradeoffs -- Strongest readability gain around the real domain seam: Codex and Claude are different products with different protocol handling. -- Makes future backend additions or behavior changes less likely to bloat the process-control code. -- Medium migration churn because tests and imports will move across files. -- Risk of over-splitting if backend logic remains tiny after cleanup. - -## Estimated phases -1. Extract backend validation and command builders into `agent_backends.py` with no behavior changes. - - `required_effort`: `medium` -2. Extract Claude NDJSON parsing into `agent_claude_stream.py` and retarget stream-json tests there. - - `required_effort`: `low` -3. Reduce `agent.py` to orchestration glue plus interactive/process-control concerns. - - `required_effort`: `medium` -4. Delete dead wrappers and duplicate private helpers once all callsites are stable. - - `required_effort`: `low` -5. Run full pytest, paying extra attention to package export uniqueness and callsite imports. - - `required_effort`: `low` - -## Risk profile -- Technical risk: medium -- Blast radius: medium -- Failure modes: - - Import cycles if orchestration and backend helpers are split in the wrong direction. - - Private helper extraction accidentally weakening boundary errors or hiding unsupported-agent checks. - - Test churn masking a subtle command-line regression. - -## Best when -- The main pain is that backend concerns and process-control concerns are mixed together. -- We want a real module boundary without touching the heavier settle/watchdog code yet. diff --git a/approaches/agent-execution-domain-split.md b/approaches/agent-execution-domain-split.md deleted file mode 100644 index 58d58b8..0000000 --- a/approaches/agent-execution-domain-split.md +++ /dev/null @@ -1,40 +0,0 @@ -# Approach: Execution-Domain Split - -## Strategy -- Split `agent.py` by execution model rather than by backend. -- Proposed modules: - - `src/continuous_refactoring/agent_commands.py` for agent command construction and support checks, - - `src/continuous_refactoring/agent_interactive.py` for settle protocol, signal escalation, terminal reset, and TTY handling, - - `src/continuous_refactoring/agent_observed.py` for subprocess capture, watchdog behavior, timestamped logs, and test execution, - - `src/continuous_refactoring/agent.py` as a thin public facade. -- Keep public function names stable: `build_command`, `maybe_run_agent`, `run_agent_interactive`, `run_agent_interactive_until_settled`, `run_observed_command`, `run_tests`, `summarize_output`. - -## Tradeoffs -- Cleanest long-term structure. The boundaries match how callers think about the module. -- Makes the load-bearing settle protocol and watchdog code easier to review in isolation. -- Highest churn of the options here. More files, more imports, more chances to nick a subtle invariant. -- Adds a facade module, which is justified only if we believe `agent.py` will keep evolving. - -## Estimated phases -1. Extract `agent_commands.py` and move backend validation plus command builders first. - - `required_effort`: `medium` -2. Extract `agent_observed.py` for command capture, watchdog, and `run_tests`. - - `required_effort`: `medium` -3. Extract `agent_interactive.py` for settle protocol, terminal state handling, and forced Codex reset. - - `required_effort`: `high` -4. Collapse `agent.py` into a thin facade with direct imports and no compatibility shims beyond those imports. - - `required_effort`: `low` -5. Rebalance tests around the new module boundaries and run full pytest. - - `required_effort`: `medium` - -## Risk profile -- Technical risk: medium-high -- Blast radius: high -- Failure modes: - - Import layering mistakes around `ContinuousRefactorError`, `CommandCapture`, and shared helpers. - - Behavioral regressions in the settle handshake or watchdog teardown because lifecycle code moved wholesale. - - Package export collisions or stale imports if the facade and implementation modules drift. - -## Best when -- We want the migration to end with a durable structure, not just a neater big file. -- We can afford a higher-churn refactor in exchange for clearer review surfaces later. diff --git a/approaches/agent-inplace-seams.md b/approaches/agent-inplace-seams.md deleted file mode 100644 index a64d159..0000000 --- a/approaches/agent-inplace-seams.md +++ /dev/null @@ -1,37 +0,0 @@ -# Approach: In-Place Seams Inside `agent.py` - -## Strategy -- Keep `src/continuous_refactoring/agent.py` as one module for now. -- Refactor internally around three truthful sections: - - command construction and backend validation, - - interactive settle lifecycle and terminal recovery, - - observed command execution and watchdog logging. -- Normalize helper naming and data flow so the public API reads top-down without changing imports anywhere else. -- Add a small amount of typed structure only where it shortens branches or clarifies return values. - -## Tradeoffs -- Safest path. No import churn, no package-surface changes, minimal merge pain. -- Best fit if the immediate problem is readability and local change friction, not module count. -- Leaves `agent.py` large. It gets cleaner, but not smaller in a meaningful architectural way. -- Does not create future domain boundaries for backend-specific behavior. - -## Estimated phases -1. Reorder and tighten private helpers so command-building, settle logic, and observed-command logic read as coherent blocks. - - `required_effort`: `low` -2. Introduce small internal value helpers where they remove repetitive branching without hiding behavior. - - `required_effort`: `low` -3. Update tests to reflect any renamed helpers or changed internal flow, while keeping behavior identical. - - `required_effort`: `low` -4. Run full pytest and remove dead local helper paths uncovered during the cleanup. - - `required_effort`: `low` - -## Risk profile -- Technical risk: low -- Blast radius: low -- Failure modes: - - Accidental behavior drift in settle timing or Claude output extraction during local cleanup. - - Over-tidying that obscures the load-bearing Codex terminal reset and watchdog semantics. - -## Best when -- We want the fastest safe readability win. -- We do not yet know which future split is actually worth carrying. diff --git a/approaches/artifacts-boundary-hardening-inplace.md b/approaches/artifacts-boundary-hardening-inplace.md deleted file mode 100644 index 4e5f147..0000000 --- a/approaches/artifacts-boundary-hardening-inplace.md +++ /dev/null @@ -1,50 +0,0 @@ -# Approach: In-Place Artifact Boundary Hardening - -## Strategy -- Keep module surfaces stable and refactor inside the existing cluster with minimal churn: - - `src/continuous_refactoring/artifacts.py` - - `src/continuous_refactoring/agent.py` - - `src/continuous_refactoring/loop.py` - - `src/continuous_refactoring/phases.py` - - `src/continuous_refactoring/migration_tick.py` - - `src/continuous_refactoring/routing_pipeline.py` - - `src/continuous_refactoring/config.py` - - `src/continuous_refactoring/git.py` - - `src/continuous_refactoring/cli.py` -- Treat `artifacts.py` as the current error and telemetry spine, but harden it so every external effect returns actionable causes and preserves `__cause__`. -- At module boundaries (agent, cli, loop, phases, routing, migration, git, config) translate only where behavior needs a boundary contract change: - - Keep original exceptions as nested causes unless caller-level signal is improved by context. - - Avoid blanket wrapping inside helper functions that are already at the callsite. - -## Tradeoffs -- Lowest blast radius and easiest to apply under an active migration. -- No new module-level indirection and little `__init__.py` risk. -- Best fit for taste version 1: strong boundary comments only where contract changes. -- Leaves `artifacts.py` still carrying multiple concerns (capture/state/path/root metadata), but no risky cut needed for this migration. -- Keeps direct import compatibility with `ContinuousRefactoringError` and existing `_SUBMODULES`. - -## Estimated phases -1. Add migration tests for failure-cause retention -2. Introduce explicit boundary helpers and nested exceptions in `artifacts.py` -3. Update cluster modules to catch and wrap only at decision points -4. Add regression tests on loop/migration-path behavior -5. Tighten CLI exit messaging while preserving exact user-visible strings that tests assert - -### Phase intent -- Phase 1: Add focused tests in `tests/test_continuous_refactoring.py`, `tests/test_phases.py`, `tests/test_loop_migration_tick.py`, `tests/test_routing.py` for `__cause__` preservation. -- Phase 2: In `artifacts.py`, add small helpers for atomic JSON/log writes and command capture parsing that include nested underlying errors. -- Phase 3: In cluster modules, avoid new broad wrappers; replace ambiguous messages with boundary-specific context where needed. -- Phase 4: Verify migration and loop flow still emits expected artifacts summaries and commit handoff semantics. -- Phase 5: Run targeted migration tests, then run full suite as final gate. - -## Risk profile -- Technical risk: low to medium -- Blast radius: medium, because changes touch loop routing and failure persistence paths -- Failure modes: - - Message-level test regressions if we over-wrap and lose exact strings. - - Slightly more verbose failure paths in `artifacts.py` impacting readability if too many wrappers are added. - - No new APIs expected, so integration regression risk stays low. - -## Why this first -- It satisfies the taste mandate (boundary-aware wrapping with cause chaining) without a disruptive module split. -- It keeps compatibility and can be evaluated quickly with tight, deterministic phase gates. diff --git a/approaches/artifacts-domain-split-lightweight.md b/approaches/artifacts-domain-split-lightweight.md deleted file mode 100644 index bfd51be..0000000 --- a/approaches/artifacts-domain-split-lightweight.md +++ /dev/null @@ -1,43 +0,0 @@ -# Approach: Lightweight Domain Split of Artifact Subsystems - -## Strategy -- Keep API compatibility but split `artifacts.py` into two files: - - `src/continuous_refactoring/artifacts_models.py` for immutable telemetry data structures. - - `src/continuous_refactoring/artifact_runs.py` for run lifecycle creation and atomic writes. - - `src/continuous_refactoring/artifacts.py` as a thin compatibility re-export and doc seam. -- Keep CLI/migration and loop integration untouched where possible: - - `loop.py`, `phases.py`, `migration_tick.py`, `routing_pipeline.py`, `agent.py`, `config.py`, `git.py`, `cli.py`. -- Replace ad hoc imports of `ContinuousRefactorError` from `artifacts.py` with direct imports from `artifacts.py` compatibility alias only if needed. -- This creates clearer file-level domains while preserving FQNs and avoiding module sprawl. - -## Tradeoffs -- Cleaner local module focus and lower future merge pain when `artifacts.py` starts to grow. -- Best future extensibility for migration state persistence versus command-capture concerns. -- Highest mechanical risk of this set due split and import graph migration. -- Increases short-term review burden because many names stay re-exported for compatibility. -- Must guard against hidden behavior shifts due import order and module initialization. - -## Estimated phases -1. Create split modules with zero-behavior shims and compatibility exports -2. Migrate production imports and keep package `__all__` uniqueness clean -3. Fold in taste-compliant error wrapping and cause chaining during migration -4. Update tests to use compatibility imports and assert no drift in summaries/events -5. Run full suite after phased import migration and clean dead-paths - -### Phase intent -- Phase 1: Data models and lifecycle utilities move out without changing logic. -- Phase 2: Rewire imports in cluster modules and ensure `continuous_refactoring.__all__` contract remains stable. -- Phase 3: Apply error-boundary pass without introducing interface churn. -- Phase 4: Remove transitional names and dead compatibility comments only if no longer needed. -- Phase 5: Verification as per existing full-run migration gate. - -## Risk profile -- Technical risk: medium -- Blast radius: high -- Failure modes: - - Package import order regressions while `__init__.py` rebuilds re-exports. - - Hidden test failures due import-time side effects. - - More difficult conflict detection with duplicate symbols during package init. - -## Why pick this only if we can absorb the churn -- Strong structure win, but not worth it if we need the cleanest, fastest path to safe artifact boundary improvement. diff --git a/approaches/artifacts-error-typing-crossboundaries.md b/approaches/artifacts-error-typing-crossboundaries.md deleted file mode 100644 index f5edda0..0000000 --- a/approaches/artifacts-error-typing-crossboundaries.md +++ /dev/null @@ -1,44 +0,0 @@ -# Approach: Error Taxonomy with Boundary-Specific Types - -## Strategy -- Introduce a dedicated error module and explicit boundary error classes while preserving public compatibility: - - Add `src/continuous_refactoring/errors.py` with `ContinuousRefactoringError` as canonical base. - - Re-export `ContinuousRefactoringError` from `artifacts.py` to keep existing imports and `__init__` behavior intact. - - Add module-level boundary types: `CommandBoundaryError`, `ArtifactBoundaryError`, `GitBoundaryError`, `MigrationBoundaryError`, `LoopBoundaryError`. -- Move wrapping logic so each cluster module becomes explicit about what it owns and what it reports: - - `agent` and `config` wrap infra faults when translating to domain failure outcomes. - - `loop`, `phases`, `migration_tick`, and `routing_pipeline` wrap only policy-level failures. -- Keep semantics of CLI and migration scheduling unchanged; preserve existing command strings, artifact path names, and summary structure. - -## Tradeoffs -- Clearer operational signal and cleaner root-cause triage. -- Stronger alignment with taste instruction on nested exceptions at boundaries. -- Larger import churn across the cluster and tests. -- Must update `continuous_refactoring.__all__` import graph after moving exported error ownership, which is extra mechanical risk. -- Potentially over-specified errors if boundary classes expand faster than actual domain needs. - -## Estimated phases -1. Add `errors.py` and compatibility export path -2. Create boundary exception types and migrate `artifacts.py` to consume canonical base -3. Shift catch/raise behavior in `agent.py`, `git.py`, `loop.py`, `phases.py`, `migration_tick.py`, `routing_pipeline.py` -4. Update `cli.py` and tests that assert exact exception types/messages -5. Run targeted migration and full project verification - -### Phase intent -- Phase 1: New module only, no production behavior changes yet. -- Phase 2: Add wrappers and nesting around I/O/process/git/agent callouts. -- Phase 3: Convert consumer catches to boundary-aware failures and update decision records where needed. -- Phase 4: Add/adjust tests for exception typing, compatibility of imports, and boundary names in messages. -- Phase 5: Validate `tests/test_continuous_refactoring.py`, `tests/test_run.py`, `tests/test_run_once.py`, `tests/test_phases.py`, and `tests/test_loop_migration_tick.py`. - -## Risk profile -- Technical risk: medium -- Blast radius: medium -- Failure modes: - - Import graph breakage from moved exported symbols into `__init__.py` and `_SUBMODULES`. - - Tests that assert specific exception text may break on message wrapping style. - - Additional migration complexity from adding new module and maintaining alias compatibility. - -## Why choose this if stability budget allows -- Better long-term maintainability and explicit domain boundaries. -- Clear runway for future non-trivial refactors where cross-module ownership gets noisier than today. diff --git a/approaches/init-init-export-contract.md b/approaches/init-init-export-contract.md deleted file mode 100644 index c3b4fc8..0000000 --- a/approaches/init-init-export-contract.md +++ /dev/null @@ -1,34 +0,0 @@ -# Approach: Contract-Driven Public Surface Descriptor - -## Strategy -- Move exported-public definition out of runtime module-order magic into `src/continuous_refactoring/public_api.py`. -- Define a compact, explicit `PUBLIC_REEXPORTS` descriptor (module_name, symbol, optional alias) and drive `__init__.py` from that list. -- Keep package runtime behavior: same names still appear in `continuous_refactoring.__all__`, same re-exported callsites, same hidden-module boundary. -- Keep `__SUBMODULES` for import validation, but source of truth for API moves to descriptor data. - -## Tradeoffs -- Pros: clearer intent, easier code review for future API changes, simpler to detect stale/manual exports, aligns with domain-focused boundaries and naming truthfulness. -- Cons: adds one new module and one migration step to validate descriptor integrity. -- Why this is taste-aligned: no speculative abstractions, clear readability gain, explicit compatibility over convenience. - -## Estimated phases -1. Add `src/continuous_refactoring/public_api.py` with a typed re-export descriptor + minimal validation helpers. -2. Refactor `src/continuous_refactoring/__init__.py` to build `__all__` from descriptor + runtime imports only. -3. Add descriptor-level tests in `tests/test_continuous_refactoring.py` for: - - all exported names present, - - no duplicate symbol names in descriptor, - - internal module not re-exported (`migration_manifest_codec` remains private). -4. Add a migration check that compares generated `__all__` to a non-empty known set to prevent accidental empty exposure. -5. Run targeted contract tests for package init and prompt/loop import flows. - -### Phased scope -- Files touched: `src/continuous_refactoring/__init__.py`, `src/continuous_refactoring/public_api.py` -- Test touched: `tests/test_continuous_refactoring.py` - -## Risk profile -- Technical risk: Medium -- Blast radius: Medium -- Failure modes: - - New descriptor errors can hide symbols if import paths drift. - - More churn touching two new files means merge conflict potential during rapid migrations. -- Mitigation: keep descriptor small and strictly validated before touching any loop/routing logic. diff --git a/approaches/init-init-export-lazy-namespace.md b/approaches/init-init-export-lazy-namespace.md deleted file mode 100644 index b5e7cb8..0000000 --- a/approaches/init-init-export-lazy-namespace.md +++ /dev/null @@ -1,31 +0,0 @@ -# Approach: Lazy-Load Package Namespace via `__getattr__` - -## Strategy -- Replace eager import-and-reexport side-effects in `src/continuous_refactoring/__init__.py` with explicit `__all__` and lazy symbol resolution via `__getattr__`. -- Keep exported API stable but defer module imports until first symbol access. -- Use cause-preserving wrapping only in namespace boundary failures (e.g., loader exception -> wrapped as `ContinuousRefactorError` with original exception attached) and avoid translation elsewhere. -- Keep `__SUBMODULES` for package contract visibility, but shrink initial work needed for import-time module graph. - -## Tradeoffs -- Pros: faster and cleaner import path for package consumers, easier to spot import fan-in issues when one symbol fails to resolve. -- Cons: behavior shifts for side effects that depended on module import side-effects during package import; requires careful docs/tests for `hasattr`/`dir` expectations. -- Why this is taste-aligned: keeps compatibility paths safer (no hard cuts), uses explicit boundary mapping, uses truthful transitional naming (`migrating`/`stabilized` states where needed in plan docs). - -## Estimated phases -1. Design a symbol-to-module map (static, not dynamic inference) and explicit `__all__` in `__init__`. -2. Implement `__getattr__` loader path and `__dir__` to keep introspection stable. -3. Add targeted tests in `tests/test_continuous_refactoring.py` and a small namespace-focused regression test verifying `hasattr` works for public exports. -4. Add a migration-readiness test run against `loop.py`/`prompts.py` entry usage to ensure the refactoring pipeline still imports cleanly. -5. Decide and lock rollback if lazy behavior introduces import timing regressions. - -### Phased scope -- Files touched: `src/continuous_refactoring/__init__.py` -- Test touched: `tests/test_continuous_refactoring.py` - -## Risk profile -- Technical risk: Medium to High -- Blast radius: Medium -- Failure modes: - - subtle breakage in code that relies on eager module import side effects. - - harder-to-diagnose delayed import failures during runtime. -- Mitigation: phase-gated activation with a hard stop plan after contract test failures; fallback to Approach 1 style if timing regression appears. diff --git a/approaches/init-init-export-surface.md b/approaches/init-init-export-surface.md deleted file mode 100644 index 43c8296..0000000 --- a/approaches/init-init-export-surface.md +++ /dev/null @@ -1,34 +0,0 @@ -# Approach: Surface-Clarity Refactor for `__init__.py` - -## Strategy -- Keep current re-export model and behavior intact, but make it explicit and inspectable. -- In `src/continuous_refactoring/__init__.py`, replace the raw tuple of imports with a small set of explicit module entries plus one `collect_package_exports()` helper. -- Enforce duplicate detection with origin-aware error messages (module + symbol), preserving full cause chains on lower-level errors only where raised. -- Keep `__SUBMODULES` and exported symbols backward-compatible for existing tests and callers. -- No module split or new runtime behavior outside package init. - -## Tradeoffs -- Pros: very low blast radius, low behavioral risk, minimal API churn, direct migration to stable `__all__` contract. -- Cons: still keeps `__init__.py` as the central export hub and does not change the eager-import profile. -- Why this is taste-aligned: it avoids speculative boundaries, keeps module boundaries stable, and improves clarity without touching dead/legacy code paths. - -## Estimated phases -1. Capture current export expectations in tests - - Add/extend assertions for stable symbol presence and deterministic export order if useful. -2. Introduce a structured `_PUBLIC_MODULES` list and extraction helper in `src/continuous_refactoring/__init__.py` - - Preserve module import order and public-only behavior. -3. Upgrade duplicate-symbol checks to include duplicate provenance details while keeping same failure contract. -4. Add a tiny regression test for internal-module re-export exclusion still holding (`migration_manifest_codec` remains module-private to package root). -5. Run focused package contract tests. - -### Phased scope -- File touched: `src/continuous_refactoring/__init__.py` -- Test touched: `tests/test_continuous_refactoring.py` - -## Risk profile -- Technical risk: Low -- Blast radius: Low -- Failure modes: - - Hidden breakage if symbol collection accidentally drops a symbol due descriptor typo. - - Slightly harder-to-spot import-time failures if one of the modules in the explicit list raises on import. -- Mitigation: phase gates with existing package contract tests before migration write path changes. diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/approaches/inplace-artifact-boundary-hardening.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/approaches/inplace-artifact-boundary-hardening.md deleted file mode 100644 index 3397292..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/approaches/inplace-artifact-boundary-hardening.md +++ /dev/null @@ -1,34 +0,0 @@ -# In-place artifact boundary hardening - -## Strategy - -Keep module boundaries intact and make failure contracts explicit where side effects cross module seams. - -1. Baseline current behavior with regression tests that assert boundary failures carry `__cause__` where this migration intends to improve context. -2. Tighten `artifacts.py` with private boundary helpers for event writes, summary serialization, and atomic persistence, then apply them to existing callsites with no public API change. -3. Extend callsite-level wrappers in `agent.py`, `git.py`, `phases.py`, and `migration_tick.py` so boundary failures bubble with preserved causes while preserving current control flow. -4. Update orchestration and CLI surfaces in `loop.py`, `config.py`, and `cli.py` to keep recovery/abort semantics unchanged while preserving richer causal context. -5. Close with a migration-wide contract lock, duplicate-symbol safety checks, and full-suite verification. - -## Tradeoffs - -Pros: -- No module splitting or symbol churn. -- Localized change surface anchored to observed co-change boundaries. -- Minimal stack distortion because boundaries stay aligned with existing module seams. - -Cons: -- Additional wrapper indirection in hot paths can lengthen tracebacks. -- Requires coordinated test updates across adjacent modules before the final lock step. - -## Compatibility stance - -No canary/cutover rollout in this repo. The migration is a straight in-place refinement with stronger boundary contracts and stable behavior defaults. - -## Phase intent - -- `phase-1` records a stable baseline and ensures the suite will catch causal-regression mistakes. -- `phase-2` introduces the module-level helpers in `artifacts.py` and validates their persistence contract. -- `phase-3` applies adjacent boundary wrappers at seams, including migration-tick reporting. -- `phase-4` propagates the contract safely through loop/CLI/config orchestration points. -- `phase-5` freezes contracts and runs full validation for shipping confidence. diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/manifest.json b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/manifest.json deleted file mode 100644 index 16817fa..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/manifest.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "awaiting_human_review": false, - "cooldown_until": null, - "created_at": "2026-04-27T21:59:42.893-07:00", - "current_phase": "", - "human_review_reason": null, - "last_touch": "2026-04-28T16:06:15.347-07:00", - "name": "src-continuous-refactoring-artifacts-py-20260427T215942", - "phases": [ - { - "done": true, - "effort_reason": null, - "file": "phase-1-artifact-boundary-baseline.md", - "name": "artifact-boundary-baseline", - "precondition": "- No production files in the migration scope have been modified yet. - The target migration scope is unchanged in production modules.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-2-artifacts-boundary-contract.md", - "name": "artifacts-boundary-contract", - "precondition": "- Phase 1 is marked complete in the migration manifest. - `src/continuous_refactoring/artifacts.py` has not been edited yet in this migration.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-3-boundary-wrappers-at-callsites.md", - "name": "boundary-wrappers-at-callsites", - "precondition": "- Phase 2 is marked complete in the migration manifest. - Phase-2 boundary contracts are present in `artifacts.py` and their tests. - No edits are made in `config.py`, `loop.py`, or `cli.py` during this phase.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-4-loop-and-cli-boundary-resilience.md", - "name": "loop-and-cli-boundary-resilience", - "precondition": "- Phase 3 is marked complete in the migration manifest. - Boundary wrappers from phase 3 are present at the intended module seams. - No edits in `__init__.py` in this phase yet.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-5-shippable-regression-and-contract-lock.md", - "name": "shippable-regression-and-contract-lock", - "precondition": "- Phase 4 is marked complete in the migration manifest. - All intended phase edits are present in working tree. - All phase documents in this migration directory match their intended scope (especially phase names referenced in `manifest.json`).", - "required_effort": null - } - ], - "status": "done", - "wake_up_on": null -} diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-1-artifact-boundary-baseline.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-1-artifact-boundary-baseline.md deleted file mode 100644 index 0f2ea41..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-1-artifact-boundary-baseline.md +++ /dev/null @@ -1,32 +0,0 @@ -# Phase 1: Baseline failure-cause behavior on artifact and phase boundaries - -## Objective -Create a failing-ready baseline for cause-preserving failure behavior without changing production code. - -## Scope -- `tests/test_continuous_refactoring.py` -- `tests/test_phases.py` -- `tests/test_loop_migration_tick.py` - -## Instructions -1. Add baseline tests in `tests/test_continuous_refactoring.py` for artifact persistence paths: - - fail-fast behavior on malformed payload flows - - boundary failures now asserting `__cause__` expectations only where behavior already depends on translation -2. Add baseline tests in `tests/test_phases.py` for readiness/phase parsing failure paths that already route through `ContinuousRefactorError`. -3. Add focused checks in `tests/test_loop_migration_tick.py` for artifact summary/failure text preservation and non-masked root causes. -4. Keep all production files untouched in this phase. - -## Precondition -- No production files in the migration scope have been modified yet. -- The target migration scope is unchanged in production modules. - -## Definition of Done -- New tests explicitly exercise baseline boundary-failure expectations for artifact and phase orchestration. -- No production files are edited. -- All phase-1 scope tests pass. -- The tree remains shippable with only baseline test changes. - -## Validation steps -- `uv run pytest tests/test_continuous_refactoring.py` -- `uv run pytest tests/test_phases.py` -- `uv run pytest tests/test_loop_migration_tick.py` diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-2-artifacts-boundary-contract.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-2-artifacts-boundary-contract.md deleted file mode 100644 index 4295b78..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-2-artifacts-boundary-contract.md +++ /dev/null @@ -1,35 +0,0 @@ -# Phase 2: Artifacts module in-place boundary hardening - -## Objective -Create a strict, low-churn boundary contract in `artifacts.py` around artifact writes and serialization paths, preserving underlying exceptions through nested causes. - -## Scope -- `src/continuous_refactoring/artifacts.py` -- `tests/test_continuous_refactoring.py` - -## Instructions -1. Add private helpers in `artifacts.py` to isolate unsafe effects: - - event append helper that captures and wraps effect-level `OSError` exceptions with `ContinuousRefactorError` using `from`. - - summary serialization helper that wraps serialization/value-shape failures with `ContinuousRefactorError` using `from` when boundary context adds signal. - - atomic write helper that wraps parent-dir/temp-file/write/replace failures with `ContinuousRefactorError` using `from`. -2. Use those helpers in: - - `RunArtifacts.log()` event emission, - - `RunArtifacts.write_summary()`, - - `create_run_artifacts()` initialization where the first summary write establishes the run boundary state. -3. Add boundary-level context only where it changes caller signal (what failed and where), but do not replace clearer native errors from pure bookkeeping branches. -4. Keep existing module surface (`__all__`, class names, public functions) unchanged. -5. Update tests to pin that nested causes are preserved for event-write, summary-serialization, and atomic-write boundary failures. - -## Precondition -- Phase 1 is marked complete in the migration manifest. -- `src/continuous_refactoring/artifacts.py` has not been edited yet in this migration. - -## Definition of Done -- Boundary helper functions exist in `artifacts.py` and are wired into event-write and summary-write/serialize flows. -- Wrapped boundary failures from `artifacts.py` include original exceptions as `__cause__`. -- Paths that are semantically better left unwrapped remain unwrapped. -- `tests/test_continuous_refactoring.py` contains explicit cause assertions for artifact boundary failures. -- Public module contracts (`__all__`, types, API names, and `ContinuousRefactorError`) remain unchanged. - -## Validation steps -- `uv run pytest tests/test_continuous_refactoring.py` diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-3-boundary-wrappers-at-callsites.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-3-boundary-wrappers-at-callsites.md deleted file mode 100644 index 920a8c3..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-3-boundary-wrappers-at-callsites.md +++ /dev/null @@ -1,44 +0,0 @@ -# Phase 3: Boundary wrappers at module seams - -## Objective -Apply boundary wrappers at adjacent module seams, preserving causes while keeping existing callsite semantics stable across routing and migration-tick flows. - -## Scope -- `src/continuous_refactoring/agent.py` -- `src/continuous_refactoring/git.py` -- `src/continuous_refactoring/migration_tick.py` -- `src/continuous_refactoring/phases.py` -- `tests/test_run.py` -- `tests/test_routing.py` -- `tests/test_loop_migration_tick.py` -- `tests/test_phases.py` - -## Instructions -1. In `agent.py`, wrap subprocess/process-launch failures with `ContinuousRefactorError` when a module boundary message improves troubleshooting, and preserve the original exception via `from`. -2. In `git.py`, keep `GitCommandError` as a boundary type and add nested causes consistently where subprocess launch/runtime failures are converted into module boundary errors. -3. In `phases.py`, preserve verdict flow while making readiness and phase-result errors boundary-safe at decision points. -4. In `migration_tick.py`, preserve defer/blocked/abandon decision flow while keeping ready-check and phase-result failures tied to meaningful summaries and original causes. -5. Keep semantics that callers depend on: - - stable control flow - - stable exception class behavior - - stable user-visible strings unless a wrapped-context test justifies a targeted delta. -6. Add/adjust tests: - - `tests/test_run.py` for module-seam command-boundary cause retention. - - `tests/test_routing.py` for routing/decision stability under wrapped failures. - - `tests/test_loop_migration_tick.py` to ensure migration-tick summaries still include meaningful root-cause context. - -## Precondition -- Phase 2 is marked complete in the migration manifest. -- Phase-2 boundary contracts are present in `artifacts.py` and their tests. -- No edits are made in `config.py`, `loop.py`, or `cli.py` during this phase. - -## Definition of Done -- `agent.py`, `git.py`, `phases.py`, and `migration_tick.py` boundary wrappers preserve `__cause__` and keep current call patterns intact. -- No new external API is introduced. -- Behavior for run/routing/migration tick remains unchanged in flow and decision results while asserting cause retention where wrapped. -- No test in phase scope is left failing. - -## Validation steps -- `uv run pytest tests/test_run.py` -- `uv run pytest tests/test_routing.py` -- `uv run pytest tests/test_loop_migration_tick.py tests/test_phases.py` diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-4-loop-and-cli-boundary-resilience.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-4-loop-and-cli-boundary-resilience.md deleted file mode 100644 index fef9cce..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-4-loop-and-cli-boundary-resilience.md +++ /dev/null @@ -1,41 +0,0 @@ -# Phase 4: Loop and CLI resilience under boundary changes - -## Objective -Ensure loop and CLI behavior remains shippable when artifact/config/git boundaries fail, with truthful user messaging and unchanged control-flow defaults. - -## Scope -- `src/continuous_refactoring/loop.py` -- `src/continuous_refactoring/config.py` -- `src/continuous_refactoring/cli.py` -- `tests/test_cli_init_taste.py` -- `tests/test_cli_taste_warning.py` -- `tests/test_run_once.py` -- `tests/test_run_once_regression.py` -- `tests/test_config.py` -- `tests/test_phases.py` - -## Instructions -1. Update boundary catch/relay points in `loop.py` so config/artifacts/git failures are wrapped only at decision points and keep current fallback logic when safe (`load_taste` defaults, non-fatal taste path failures, validation path continuity). -2. Tighten `config.py` helpers (`continuous_refactoring.config.load_manifest`, `_load_manifest_payload`, and related config loaders) only where needed to align with consistent cause-chaining semantics with artifacts and keep missing-manifest behavior unchanged. -3. Update CLI taste/upgrade/init paths to preserve exact user-facing behavior on boundary failures while adding richer cause-linked debug context internally. -4. Add regression tests for malformed/unreadable manifest and log-write failures in config/CLI/loop surfaces that must not crash into less useful errors. -5. Keep command output and exit status stable where existing tests assert exact semantics. -6. Ensure `tests/test_phases.py` still validates unchanged high-level phase outcomes under loop/cli boundary stress paths. - -## Precondition -- Phase 3 is marked complete in the migration manifest. -- Boundary wrappers from phase 3 are present at the intended module seams. -- No edits in `__init__.py` in this phase yet. - -## Definition of Done -- Loop/CLI/cfg paths remain robust under boundary failures and recover/abort in the same control plane as before. -- Boundary errors are wrapped with preserved causes only where callsite semantics improve context. -- Regressions for taste, run-once, and config load paths are covered by new/updated tests. -- No observable behavior changes outside error-cause channels unless explicitly documented by tests. -- No new direct API behavior changes in this phase outside boundary resilience scope. -- All phase-4 scope tests pass. - -## Validation steps -- `uv run pytest tests/test_cli_init_taste.py tests/test_cli_taste_warning.py` -- `uv run pytest tests/test_run_once.py tests/test_run_once_regression.py` -- `uv run pytest tests/test_config.py` diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-5-shippable-regression-and-contract-lock.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-5-shippable-regression-and-contract-lock.md deleted file mode 100644 index e8f15c7..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/phase-5-shippable-regression-and-contract-lock.md +++ /dev/null @@ -1,55 +0,0 @@ -# Phase 5: Cross-module contract lock and migration finalization - -## Objective -Lock export/runtime contracts after boundary hardening and complete migration-wide regression validation while keeping the repository shippable after each intermediate step. - -## Scope -- `src/continuous_refactoring/__init__.py` -- `src/continuous_refactoring/loop.py` -- `src/continuous_refactoring/migration_tick.py` -- `src/continuous_refactoring/phases.py` -- `src/continuous_refactoring/config.py` -- `src/continuous_refactoring/git.py` -- `src/continuous_refactoring/agent.py` -- `src/continuous_refactoring/artifacts.py` -- All tests touched in earlier phases -- `tests/test_continuous_refactoring.py` -- `tests/test_loop_migration_tick.py` -- `tests/test_phases.py` -- `tests/test_routing.py` -- `tests/test_run.py` -- `tests/test_run_once.py` -- `tests/test_run_once_regression.py` -- `tests/test_cli_init_taste.py` -- `tests/test_cli_taste_warning.py` -- `tests/test_config.py` - -## Instructions -1. Verify `__init__.py` still enforces duplicate-export safety after any added/retained symbols and update no public symbol lists unless required by the migration. -2. Re-run phase-level and integration checks to ensure no behavior drift: - - phase readiness/validation retry semantics, - - artifact summary/event content, - - CLI taste/init messages, - - run/run-once loop outcomes. -3. Confirm migration docs and this plan match scope edits and that no phase introduced behavior outside migration intent, including the current `migration_tick.py` seam covered by `tests/test_loop_migration_tick.py`. -4. If any CLI exit path changed only in wording for more context, add/adjust exact-string assertions in dedicated CLI tests and call this out explicitly in DoD. -5. Verify `migrations/src-continuous-refactoring-artifacts-py-20260427T215942/manifest.json` phase graph and metadata remain consistent with this plan (no missing or renamed phase files). - -## Precondition -- Phase 4 is marked complete in the migration manifest. -- All intended phase edits are present in working tree. -- All phase documents in this migration directory match their intended scope (especially phase names referenced in `manifest.json`). - -## Definition of Done -- Package export checks are clean for touched modules. -- All phase-level target validations and full suite are green. -- Boundary-cause semantics are consistent across `artifacts`, `agent`, `git`, `loop`, `migration_tick`, `phases`, `config`, and CLI callsites. -- Migration scope and docs are aligned to the final code shape. -- `manifest.json` and `plan.md` are coherent with delivered phase files and scope. -- No unresolved documentation/process debt introduced by this migration. - -## Validation steps -- `uv run pytest tests/test_config.py tests/test_continuous_refactoring.py tests/test_loop_migration_tick.py tests/test_phases.py tests/test_routing.py` -- `uv run pytest tests/test_cli_init_taste.py tests/test_cli_taste_warning.py tests/test_run_once.py tests/test_run_once_regression.py` -- `uv run pytest tests/test_run.py` -- `uv run pytest` diff --git a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/plan.md b/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/plan.md deleted file mode 100644 index 51813fa..0000000 --- a/migrations/src-continuous-refactoring-artifacts-py-20260427T215942/plan.md +++ /dev/null @@ -1,80 +0,0 @@ -# Migration: src-continuous-refactoring-artifacts-py-20260427T215942 - -## Goal -Harden artifact persistence and adjacent command-boundary behavior around `continuous_refactoring.artifacts` in place so that failures at module boundaries preserve root-cause context, callsites remain stable, and execution behavior stays shippable between phases. - -## Chosen approach -[`inplace-artifact-boundary-hardening`](approaches/inplace-artifact-boundary-hardening.md) - -## Scope -- `src/continuous_refactoring/artifacts.py` -- `src/continuous_refactoring/agent.py` -- `src/continuous_refactoring/loop.py` -- `src/continuous_refactoring/migration_tick.py` -- `src/continuous_refactoring/phases.py` -- `src/continuous_refactoring/cli.py` -- `src/continuous_refactoring/config.py` -- `src/continuous_refactoring/git.py` -- `src/continuous_refactoring/__init__.py` -- `tests/test_continuous_refactoring.py` -- `tests/test_loop_migration_tick.py` -- `tests/test_phases.py` -- `tests/test_run.py` -- `tests/test_routing.py` -- `tests/test_cli_init_taste.py` -- `tests/test_cli_taste_warning.py` -- `tests/test_run_once.py` -- `tests/test_run_once_regression.py` -- `tests/test_config.py` - -## Non-goals -- No module splitting or package-boundary redesign. -- No rollout flags or canary mechanics in this migration. -- No API-level renames. -- No deliberate changes to prompt text where tests assert exact output, except where required to preserve boundary context. - -## Scope policy -Only files listed above and existing migration documents in this directory may be edited for this migration. - -## Phases -1. `phase-1-artifact-boundary-baseline` -2. `phase-2-artifacts-boundary-contract` -3. `phase-3-boundary-wrappers-at-callsites` -4. `phase-4-loop-and-cli-boundary-resilience` -5. `phase-5-shippable-regression-and-contract-lock` - -```mermaid -flowchart TD - P1[phase-1-artifact-boundary-baseline] - P2[phase-2-artifacts-boundary-contract] - P3[phase-3-boundary-wrappers-at-callsites] - P4[phase-4-loop-and-cli-boundary-resilience] - P5[phase-5-shippable-regression-and-contract-lock] - - P1 --> P2 - P2 --> P3 - P3 --> P4 - P4 --> P5 -``` - -## Dependency summary -- Phase 1 creates a test baseline and verifies current behavior before production edits. -- Phase 2 introduces helper contracts in `artifacts.py` for summary/event persistence; all other production modules consume these contracts later. -- Phase 3 applies direct boundary wrappers in adjacent modules and migration-tick reporting seams, and must run only after Phase 2 is green. -- Phase 4 applies boundary resilience in orchestration and CLI surfaces and must run only after callsite behavior in Phase 3 is locked. -- Phase 5 performs final contract lock validation across the scope and must run only after Phase 4 is green. - -## Validation strategy -Taste version: `taste-scoping-version: 1` - -Phase gates must remain independently verifiable and each phase must leave a shippable tree (at least targeted tests green and no behavioral break outside migration intent). - -### Phase gates -- `phase-1-artifact-boundary-baseline.md`: `uv run pytest tests/test_continuous_refactoring.py tests/test_phases.py tests/test_loop_migration_tick.py` -- `phase-2-artifacts-boundary-contract.md`: `uv run pytest tests/test_continuous_refactoring.py` -- `phase-3-boundary-wrappers-at-callsites.md`: `uv run pytest tests/test_run.py tests/test_routing.py tests/test_loop_migration_tick.py tests/test_phases.py` -- `phase-4-loop-and-cli-boundary-resilience.md`: `uv run pytest tests/test_cli_init_taste.py tests/test_cli_taste_warning.py tests/test_run_once.py tests/test_run_once_regression.py tests/test_config.py` -- `phase-5-shippable-regression-and-contract-lock.md`: `uv run pytest` - -## Verification rule -Each phase must satisfy its local Definition of Done, including full boundary error-cause visibility for behavior changes it introduces, and pass its gate before the next phase starts. diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/git-backed-tracked-files.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/git-backed-tracked-files.md deleted file mode 100644 index 07349fe..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/git-backed-tracked-files.md +++ /dev/null @@ -1,56 +0,0 @@ -# Move Tracked-File Enumeration to `git.py` - -## Strategy - -Extract low-level repo access from `targeting.py` into `git.py` so file enumeration is centralized and boundary-faithful. - -Proposed changes: -- Add `list_tracked_files(repo_root: Path) -> list[str]` to `src/continuous_refactoring/git.py` using existing `run_command`. -- Replace direct `subprocess.run([... "git", "ls-files", "-z"])` in `targeting.py` with `continuous_refactoring.git.list_tracked_files`. -- Keep `select_random_files` in `targeting.py` as policy (`count`, tuple return, ordering behavior) and use `git.py` only for repository access. -- Preserve warning/error behavior by preserving command output messages and wrapping failures with nested `ContinuousRefactorError` in one place. -- Add regression tests in both modules: - - low-level git command edge cases (`git.py`) and - - target resolution behavior under non-ASCII and empty-repo conditions (`test_targeting.py`). - -This is explicitly non-speculative: there is real duplication pressure across modules that already depend on git command semantics. - -## Tradeoffs - -Pros: -- Stronger domain split around repository transport. -- Easier to test and mock repository behavior in one place. -- Improves consistency if other modules later need reliable tracked-file access. - -Cons: -- Requires modifying `git.py`, which increases blast radius into `loop.py`, `artifacts.py`, and related tests through callsite imports. -- Need to keep error messages stable for existing tests that assert on command failure paths. -- Not as immediate a cleanup as pure in-place refactor. - -## Estimated Phases - -1. Git utility extraction -- Add `list_tracked_files` to `git.py` and test it with fixtures already used by `tests/test_git.py`. -- Keep interface narrow and stdlib-only. - -2. Targeting integration -- Replace in-module git listing with the new utility. -- Ensure `select_random_files` and `expand_patterns_to_files` remain deterministic and deduplicated. - -3. Scope and loop checks -- Update any callsites that need direct visibility of tracked-file listing behavior. -- Keep `targeting.py` API and `Target` contract unchanged. - -4. Full behavioral pass -- Focused tests: `uv run pytest tests/test_git.py tests/test_targeting.py` -- Broader: `uv run pytest tests/test_run_once_regression.py tests/test_cli_init_taste.py`. - -## Risk Profile - -Medium. - -Watch-outs: -- Avoid introducing temporary migration flags in CLI or runtime flow. -- Do not change fallback semantics: no random target shape change, no precedence change. -- Keep exception boundaries clear: `run_command` wraps low-level process issues, `targeting.py` wraps domain-level failures only. - diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/in-place-target-resolution.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/in-place-target-resolution.md deleted file mode 100644 index ce2d348..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/in-place-target-resolution.md +++ /dev/null @@ -1,63 +0,0 @@ -# In-Place Targeting Resolution Tightening - -## Strategy - -Keep `src/continuous_refactoring/targeting.py` as the ownership point for all target semantics, but tighten the module into explicit, small pipeline helpers. - -Core move: -- Move CLI-facing target parsing to a first-class helper in `targeting.py`: - - `parse_paths_arg(raw: str | None) -> tuple[str, ...] | None` - - validate/truncate-empty in one place, not ad hoc in `loop.py`. -- Introduce a tiny selector abstraction in `targeting.py`: - - `select_target_files(patterns: tuple[str, ...], repo_root: Path) -> tuple[str, ...]` - - `resolve_target_sources(...) -> tuple[list[Target], list[str]]` is still not a second data structure, just returns an ordered `list[Target]`. -- Keep `loop.py` orchestration thin: - - `_resolve_targets_from_args()` delegates parsing and resolution; it only passes parser outputs. -- Preserve existing output contracts: - - fallback provenance strings (`targets`, `globs`, `extensions`, `paths`, `random`), - - random fallback behavior and warning text patterns, - - `Target` dataclass shape and public imports. -- Normalize warnings/errors at module boundaries: - - keep current behavior (`ContinuousRefactorError` on fatal git enumeration failures), - - attach `__cause__` where wrapping adds context. - -## Tradeoffs - -Pros: -- Lowest churn across `loop.py`, `scope_expansion.py`, and tests. -- No module boundary churn, no migration of symbol ownership. -- Fastest path to measurable cleanup and easy review. - -Cons: -- Retains a broader `targeting.py` surface than a full split. -- Less architectural separation than module extraction options. -- Any later boundary extraction will be easier from this cleaner baseline, not zero-cost. - -## Estimated Phases - -1. Baseline lock -- Add regression tests for `_parse_paths_arg` behavior and edge-case warnings in `tests/test_targeting.py`. -- Add one small `run-loop` integration assertion proving `loop.py` delegates to new parser behavior. - -2. Internal pipeline cleanup -- Extract parsing and selection helpers inside `targeting.py`. -- Update `_resolve_targets_from_args()` in `loop.py` to call the new helper functions. -- Keep prompt composition unchanged; only target resolution shape changes through same contract. - -3. Error-boundary hardening -- Wrap failed git enumeration paths with nested `ContinuousRefactorError`. -- Preserve user-facing strings where tests assert them; update only if intentional and justified. - -4. Validation -- `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_prompts.py` -- Then focused `uv run pytest tests/test_cli_*.py` for any touched CLI path. - -## Risk Profile - -Low to medium. - -Watch-outs: -- Keep warning wording stable to avoid brittle regression in stderr-capture tests. -- Keep first-match targeting semantics intact (`targets > globs > extensions > paths > random`). -- No new temporary flags, names, or compatibility indirection. - diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/split-targeting-domain.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/split-targeting-domain.md deleted file mode 100644 index b5bb4b8..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/approaches/split-targeting-domain.md +++ /dev/null @@ -1,66 +0,0 @@ -# Split Targeting by Domain Ownership - -## Strategy - -Introduce explicit domain modules and keep `targeting.py` as a facade: -- `src/continuous_refactoring/targeting_io.py` - - JSONL parsing/validation (`load_targets_jsonl`, `validate_target_line`) - - `_optional_str` and field mapping (`effort-override`, `model-override`) -- `src/continuous_refactoring/targeting_match.py` - - pattern compilation (`_compile_glob`) - - `parse_extensions`, `parse_globs`, `expand_patterns_to_files` -- `src/continuous_refactoring/targeting_resolution.py` - - `resolve_targets` policy and source precedence -- `src/continuous_refactoring/targeting.py` - - stable façade, `Target`, `TargetSource`, `select_random_files` - - re-exports and orchestration glue only. - -This aligns with taste-scoping by making domain boundaries meaningful: -parsing, matching, and policy are separate and testable without CLI, agent, or loop context. - -## Tradeoffs - -Pros: -- Clearer code ownership and fewer long-distance responsibilities in one file. -- Easier targeted tests for each boundary (pure parsing/matching/resolution). -- Reduced pressure on `targeting.py` as behavior keeps growing. - -Cons: -- Medium-high import churn (`loop.py`, tests, `__init__.py` surface, maybe `prompts.py` type imports). -- Higher chance of symbol/export conflict with package uniqueness checks. -- More files to keep in sync while maintaining deterministic behavior and warning wording. - -## Estimated Phases - -1. Test and contract capture -- Split existing tests into focused ownership buckets: - - keep `tests/test_targeting.py` for top-level orchestration and cross-boundary behavior, - - add `tests/test_targeting_match.py` for glob semantics, - - add `tests/test_targeting_resolution.py` for precedence and fallback. - -2. Extract parsing module -- Move validation and JSONL loading into `targeting_io.py`. -- Update direct imports where `validate_target_line` and `load_targets_jsonl` are used. - -3. Extract matching module -- Move glob and extension parsing to `targeting_match.py`. -- Ensure dedupe/sort/range behavior stays identical; expand tests using existing randomized generator case. - -4. Extract resolution module -- Move precedence/fallback policy into `targeting_resolution.py`. -- Keep return-order semantics stable and deterministic. - -5. Facade and package integration -- Keep stable imports from `targeting.py` where external callers expect it. -- Update `src/continuous_refactoring/__init__.py` if new public symbols are intentionally exported. -- Final smoke tests. - -## Risk Profile - -Medium. - -Watch-outs: -- Avoid speculative new API: no extra adapters, no temporary compatibility aliases. -- Do not rename the "truthy" precedence order; any change must be explicit and covered by tests. -- Ensure package-level uniqueness passes after each phase; no duplicate exports allowed. - diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/manifest.json b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/manifest.json deleted file mode 100644 index cf8094e..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/manifest.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "awaiting_human_review": false, - "cooldown_until": null, - "created_at": "2026-04-27T22:06:24.348-07:00", - "current_phase": "", - "human_review_reason": null, - "last_touch": "2026-04-28T15:43:53.161-07:00", - "name": "src-continuous-refactoring-targeting-py-20260427T220624", - "phases": [ - { - "done": true, - "effort_reason": null, - "file": "phase-1-targeting-parse-foundation.md", - "name": "targeting-parse-foundation", - "precondition": "- `rg -n \\\"def parse_paths_arg\\\\(\\\" src/continuous_refactoring/targeting.py` returns no matches before edits. - Existing `loop.py` path parsing may still be present; phase 2 owns loop delegation and removal.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-2-loop-delegates-targeting-parse.md", - "name": "loop-delegates-targeting-parse", - "precondition": "- `phase-1-targeting-parse-foundation.md` is marked complete in the migration manifest. - `rg -n \\\"def parse_paths_arg\\\\(\\\" src/continuous_refactoring/targeting.py` finds the parser in `targeting.py`. - `rg -n \\\"def _parse_paths_arg\\\\(\\\" src/continuous_refactoring/loop.py` finds the local parser that this phase removes. - `rg -n \\\"_resolve_targets_from_args\\\\(\\\" src/continuous_refactoring/loop.py` finds the shared helper definition plus the existing `run_once()` and `run_loop()` callsites, confirming both entrypoints already route through one resolver before this phase delegates path parsing.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-3-targeting-git-boundary-hardening.md", - "name": "targeting-git-boundary-hardening", - "precondition": "- Phase 2 is marked complete in the migration manifest. - `rg -n \\\"def _parse_paths_arg\\\\(\\\" src/continuous_refactoring/loop.py` returns no matches, confirming Phase 2 removed loop-local path parsing. - `rg -n \\\"parse_paths_arg\\\\(\\\" src/continuous_refactoring/loop.py` reports only delegated usage from the shared target resolver. - `rg -n \\\"def list_tracked_files\\\\(\\\" src/continuous_refactoring/targeting.py` finds the tracked-file enumeration implementation that this phase hardens.", - "required_effort": null - }, - { - "done": true, - "effort_reason": null, - "file": "phase-4-targeting-surface-regression-lock.md", - "name": "targeting-surface-regression-lock", - "precondition": "- Phases 1, 2, and 3 are marked complete in the migration manifest. - `rg -n \\\"parse_paths_arg\\\\(\\\" src/continuous_refactoring/loop.py` shows only callsite usage from `targeting`. - `rg -n \\\"_parse_paths_arg\\\\(\\\" src/continuous_refactoring/loop.py` returns no matches. - `rg -n \\\"run_command\\\\(\\\" src/continuous_refactoring/targeting.py` finds tracked-file enumeration flowing through the repository git boundary. - `rg -n \\\"subprocess\\\\.run\\\\(\\\" src/continuous_refactoring/targeting.py` returns no matches.", - "required_effort": null - } - ], - "status": "done", - "wake_up_on": null -} diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-1-targeting-parse-foundation.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-1-targeting-parse-foundation.md deleted file mode 100644 index 3f79a46..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-1-targeting-parse-foundation.md +++ /dev/null @@ -1,37 +0,0 @@ -# Phase 1: targeting parse foundation - -## Objective -Move path and selection parsing into `targeting.py` without changing public targeting output. - -## Scope -- `src/continuous_refactoring/targeting.py` -- `tests/test_targeting.py` - -## Instructions -1. In `targeting.py`, add a helper that owns CLI path parsing: - - `parse_paths_arg(raw_paths: str | None) -> tuple[str, ...] | None` -2. Keep `resolve_targets()` as the public entrypoint and add the helper without changing current loop callsites: - - normalize and drop empty segments in one place. - - avoid changing output ordering or `Target.provenance` behavior. -3. Add/adjust tests in `tests/test_targeting.py` for: - - trimming and dropping empty path segments (e.g. `"src/foo.py: src/bar.py"`) - - `None`/blank path raw values produce `None` - - precedence expectations preserved in `resolve_targets` when path input is present -4. Keep warning text and exception behavior stable unless a test in this phase requires a deliberate, documented assertion. - -## Precondition -- `rg -n \"def parse_paths_arg\\(\" src/continuous_refactoring/targeting.py` returns no matches before edits. -- Existing `loop.py` path parsing may still be present; phase 2 owns loop delegation and removal. - -## Definition of Done -- `targeting.py` has a first-class path parser ready for loop delegation. -- `tests/test_targeting.py` contains targeted regression coverage for path parsing semantics and precedence at the unit level. -- `uv run pytest tests/test_targeting.py` passes with no skipped assertions specific to this migration. -- No external API/CLI contract changes outside `targeting.py` behavior. -- No edits are made in `loop.py` during this phase. - -## Validation steps -- Run: `uv run pytest tests/test_targeting.py` -- Validate ownership by inspection and signature checks: - - `rg -n \"def parse_paths_arg\\(\" src/continuous_refactoring/targeting.py` - - `rg -n \"def _parse_paths_arg\\(\" src/continuous_refactoring/loop.py` diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-2-loop-delegates-targeting-parse.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-2-loop-delegates-targeting-parse.md deleted file mode 100644 index 83cbbac..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-2-loop-delegates-targeting-parse.md +++ /dev/null @@ -1,47 +0,0 @@ -# Phase 2: loop delegates target parsing and resolution - -## Objective -Make `loop.py` a thin orchestration layer by delegating all target argument parsing to `targeting.py`. - -## Scope -- `src/continuous_refactoring/targeting.py` -- `src/continuous_refactoring/loop.py` -- `tests/test_run_once_regression.py` -- `tests/test_run.py` - -## Instructions -1. Remove local `_parse_paths_arg` path parsing logic from `loop.py`. -2. Update `_resolve_targets_from_args()` to call `targeting.parse_paths_arg(...)` and pass the parsed value directly into `resolve_targets(...)`. -3. Keep existing precedence behavior identical: `targets` > `globs` > `extensions` > `paths` > random. -4. Ensure no behavioral coupling is introduced in loop entrypoints: - - `run_once()` - - `run_loop()` -5. Add/adjust regression checks covering: - - trimmed path handling in the run-once path (`args.paths` with whitespace) - - path-driven target prompt shape in one-shot flow - - non-empty target list behavior in normal loop mode. -6. Ensure parse helper ownership is visible at callsite by importing from `continuous_refactoring.targeting` rather than local path parsing implementations. - -## Precondition -- `phase-1-targeting-parse-foundation.md` is marked complete in the migration manifest. -- `rg -n \"def parse_paths_arg\\(\" src/continuous_refactoring/targeting.py` finds the parser in `targeting.py`. -- `rg -n \"def _parse_paths_arg\\(\" src/continuous_refactoring/loop.py` finds the local parser that this phase removes. -- `rg -n \"_resolve_targets_from_args\\(\" src/continuous_refactoring/loop.py` finds the shared helper definition plus the existing `run_once()` and `run_loop()` callsites, confirming both entrypoints already route through one resolver before this phase delegates path parsing. - -## Definition of Done -- `loop.py` contains no `_parse_paths_arg` implementation and does not parse `args.paths` directly. -- `_resolve_targets_from_args()` passes parsed `paths` and raw non-path selectors to `resolve_targets(...)` in one place. -- `run_once` and `run_loop` behavior stays unchanged for all targeting modes. -- Focused regression scope passes: - - path trimming in `args.paths` on one-shot path - - run-loop target prompt shape with non-empty target set - - precedence still resolves to `targets` > `globs` > `extensions` > `paths` > random. -- `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_run.py` passes. -- `rg -n \"parse_paths_arg\\(\" src/continuous_refactoring/loop.py` reports only delegated usage to `targeting.parse_paths_arg`. - -## Validation steps -- Run: `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_run.py` -- Verify delegation ownership by inspection: - - `rg -n \"_parse_paths_arg\\(\" src/continuous_refactoring/loop.py` - - `rg -n \"parse_paths_arg\\(\" src/continuous_refactoring/loop.py` -- Keep `tests/test_run_once_regression.py` and `tests/test_run.py` green before phase transition. diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-3-targeting-git-boundary-hardening.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-3-targeting-git-boundary-hardening.md deleted file mode 100644 index b485fbb..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-3-targeting-git-boundary-hardening.md +++ /dev/null @@ -1,38 +0,0 @@ -# Phase 3: targeting git enumeration boundary hardening - -## Objective -Standardize failure handling for tracked-file enumeration so git subprocess failures are wrapped at the targeting boundary with preserved causes. - -## Scope -- `src/continuous_refactoring/targeting.py` -- `src/continuous_refactoring/git.py` -- `tests/test_targeting.py` - -## Instructions -1. In `targeting.py`, replace direct subprocess-based tracked-file reads inside `list_tracked_files()` with the repository git boundary (`continuous_refactoring.git.run_command`, imported or module-qualified). -2. Add module-local context when git enumeration fails and preserve the original exception via `from` (`ContinuousRefactorError` nesting). -3. Keep non-fatal semantics for missing matches: - - no patterns -> empty tuple - - zero tracked files in matching mode -> empty tuple -4. Add/extend tests in `tests/test_targeting.py` for nested-cause behavior and message preservation when git enumeration fails. -5. Keep `list_tracked_files` return value and shape stable when git succeeds. - -## Precondition -- Phase 2 is marked complete in the migration manifest. -- `rg -n \"def _parse_paths_arg\\(\" src/continuous_refactoring/loop.py` returns no matches, confirming Phase 2 removed loop-local path parsing. -- `rg -n \"parse_paths_arg\\(\" src/continuous_refactoring/loop.py` reports only delegated usage from the shared target resolver. -- `rg -n \"def list_tracked_files\\(\" src/continuous_refactoring/targeting.py` finds the tracked-file enumeration implementation that this phase hardens. - -## Definition of Done -- `list_tracked_files()` uses the git command boundary and wraps failures with nested context at the targeting boundary. -- No behavioral changes in successful pattern matching paths. -- `uv run pytest tests/test_targeting.py` passes. -- `rg -n \"subprocess\\.run\\(\" src/continuous_refactoring/targeting.py` returns no matches for tracked-file reads. -- `tests/test_targeting.py` has explicit assertions for: - - `ContinuousRefactorError` raised on git command failures - - original failure attached as `__cause__` - -## Validation steps -- Run: `uv run pytest tests/test_targeting.py` -- Confirm that a failing git enumeration path raises `ContinuousRefactorError` with the original cause (`GitCommandError` from the git command boundary) where applicable. -- Confirm by inspection that tracked-file reads now flow through the repository git command boundary and no duplicate subprocess paths exist in `targeting.py`. diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-4-targeting-surface-regression-lock.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-4-targeting-surface-regression-lock.md deleted file mode 100644 index 63f07d3..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/phase-4-targeting-surface-regression-lock.md +++ /dev/null @@ -1,46 +0,0 @@ -# Phase 4: targeting surface regression lock - -## Objective -Lock in regression coverage across CLI, loop, and planning surfaces after in-place targeting refactor. - -## Scope -- `tests/test_targeting.py` -- `tests/test_run.py` -- `tests/test_run_once_regression.py` -- `tests/test_scope_loop_integration.py` -- `tests/test_focus_on_live_migrations.py` -- `tests/test_e2e.py` - -## Instructions -1. Run focused integration/reuse tests that depend on targeting contracts (prompt construction, CLI handling, loop flow). -2. Confirm no implicit behavior shift for these invariants: - - precedence remains `targets > globs > extensions > paths > random` - - `--paths` whitespace is ignored after parsing - - random fallback to `general refactoring` remains unchanged when no tracked matches exist - - no regression in live-migration routing where target files are forwarded unchanged. -3. If new regression failures appear, contain them in a minimal additional test under the targeting module or affected loop integration test in the same phase. -4. Keep user-facing output and validation contracts intact: - - prompt text/contents used by `compose_full_prompt` flows - - scope-fallback and max-target behavior semantics. - -## Precondition -- Phases 1, 2, and 3 are marked complete in the migration manifest. -- `rg -n \"parse_paths_arg\\(\" src/continuous_refactoring/loop.py` shows only callsite usage from `targeting`. -- `rg -n \"_parse_paths_arg\\(\" src/continuous_refactoring/loop.py` returns no matches. -- `rg -n \"run_command\\(\" src/continuous_refactoring/targeting.py` finds tracked-file enumeration flowing through the repository git boundary. -- `rg -n \"subprocess\\.run\\(\" src/continuous_refactoring/targeting.py` returns no matches. - -## Definition of Done -- Focused cross-surface targeting regression suite for this phase passes. -- Targeting behavior is stable for both one-shot and loop runs, including live-migration integration points. -- No behavior contract changes are introduced by module boundary refactoring. -- `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_run.py tests/test_scope_loop_integration.py tests/test_focus_on_live_migrations.py tests/test_prompts.py tests/test_prompts_scope_selection.py tests/test_e2e.py` passes. -- `uv run pytest` passes (final migration-wide regression gate). -- In `prompt` and `cli` surfaces, precedence and fallback invariants are still observable: - - `targets` > `globs` > `extensions` > `paths` > `random` - - `--paths` whitespace is ignored - - random fallback still uses existing fallback prompt behavior. - -## Validation steps -- Run: `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_run.py tests/test_scope_loop_integration.py tests/test_focus_on_live_migrations.py tests/test_prompts.py tests/test_prompts_scope_selection.py tests/test_e2e.py` -- Run: `uv run pytest` diff --git a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/plan.md b/migrations/src-continuous-refactoring-targeting-py-20260427T220624/plan.md deleted file mode 100644 index 6e25aee..0000000 --- a/migrations/src-continuous-refactoring-targeting-py-20260427T220624/plan.md +++ /dev/null @@ -1,68 +0,0 @@ -# Migration: src-continuous-refactoring-targeting-py-20260427T220624 - -## Goal -Refactor target resolution so CLI targeting semantics are owned by `src/continuous_refactoring/targeting.py`, while keeping all runtime behavior unchanged and preserving precedence order (`targets` > `globs` > `extensions` > `paths` > random fallback). - -## Chosen approach -`in-place-target-resolution` - -## Scope -- `src/continuous_refactoring/targeting.py` -- `tests/test_targeting.py` -- `src/continuous_refactoring/loop.py` -- `src/continuous_refactoring/prompts.py` -- `src/continuous_refactoring/cli.py` -- `src/continuous_refactoring/artifacts.py` -- `src/continuous_refactoring/git.py` -- `src/continuous_refactoring/scope_expansion.py` - -## Non-goals -- No API or data-shape migration. -- No architectural split of `loop.py`/`scope_expansion.py`. -- No rollout/temporary naming or compatibility shims. -- No global project-wide behavior changes outside target resolution and tracking failure boundaries. - -## Phases -1. `phase-1-targeting-parse-foundation.md` -2. `phase-2-loop-delegates-targeting-parse.md` -3. `phase-3-targeting-git-boundary-hardening.md` -4. `phase-4-targeting-surface-regression-lock.md` - -```mermaid -flowchart TD - P1[phase-1-targeting-parse-foundation] - P2[phase-2-loop-delegates-targeting-parse] - P3[phase-3-targeting-git-boundary-hardening] - P4[phase-4-targeting-surface-regression-lock] - - P1 --> P2 - P2 --> P3 - P2 --> P4 - P3 --> P4 -``` - -## Dependencies -1. `phase-1` must establish parsing ownership in `targeting.py` before `loop.py` can delegate. -2. `phase-2` must complete before `phase-3` because error-hardening depends on the same argument flow. -3. `phase-4` must wait for both `phase-2` and `phase-3` so loop behavior and git-failure boundaries are stabilized. - -## Dependency summary -- `phase-1` must establish parser/selector abstractions and coverage in `test_targeting.py` before `loop.py` and callers can shift responsibility. -- `phase-2` depends on phase-1 because all loop delegation points route through the new targeting helper signatures. -- `phase-3` depends on phase-2 so any git-tracking failure edge case is validated through the same call shape used by loop + tests. -- `phase-4` is integration + regression lock and must only run after all prior phase DoD are satisfied. - -## Validation strategy -Each phase is independently verifiable and includes a narrow command that should be green before proceeding. - -- `phase-1` gate: `uv run pytest tests/test_targeting.py` -- `phase-2` gate: `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_run.py` -- `phase-3` gate: `uv run pytest tests/test_targeting.py` -- `phase-4` gate: `uv run pytest tests/test_targeting.py tests/test_run_once_regression.py tests/test_run.py tests/test_scope_loop_integration.py tests/test_focus_on_live_migrations.py tests/test_prompts.py tests/test_prompts_scope_selection.py tests/test_e2e.py` - -Final migration gate (after all phases): -- `uv run pytest` - -## Validation notes -The migration stays shippable after every phase by enforcing behavior-specific gates that include the changed surface. -Phase order minimizes coupling: parsing is isolated first, delegation second, failure boundary hardening third, and only then full-surface verification. diff --git a/src/continuous_refactoring/cli.py b/src/continuous_refactoring/cli.py index 89ba5c8..290c380 100644 --- a/src/continuous_refactoring/cli.py +++ b/src/continuous_refactoring/cli.py @@ -7,6 +7,7 @@ from collections.abc import Callable from importlib.metadata import version as metadata_version from pathlib import Path +from typing import Literal __all__ = [ "build_parser", @@ -29,8 +30,11 @@ run_migrations_focused_loop, run_once, ) +from continuous_refactoring.migration_cli import handle_migration +from continuous_refactoring.migrations import MIGRATION_STATUSES from continuous_refactoring.review_cli import handle_review +_PACKAGE_DISTRIBUTION = "continuous-refactoring" _TASTE_WARNING = "warning: taste out of date — run `continuous-refactoring taste --upgrade`" _GLOBAL_TASTE_WARNING = ( "warning: global taste is out of date — " @@ -38,6 +42,10 @@ ) +def _version_banner() -> str: + return f"{_PACKAGE_DISTRIBUTION} {metadata_version(_PACKAGE_DISTRIBUTION)}" + + def parse_max_attempts(value: str) -> int: try: attempts = int(value) @@ -221,7 +229,7 @@ def _add_run_parser(subparsers: argparse._SubParsersAction) -> None: "--max-refactors", type=int, default=None, - help="Distinct targets to process.", + help="Refactor actions to run.", ) run_parser.add_argument( "--focus-on-live-migrations", @@ -246,7 +254,7 @@ def _add_run_parser(subparsers: argparse._SubParsersAction) -> None: "--sleep", type=parse_sleep_seconds, default=0.0, - help="Seconds to sleep between completed targets.", + help="Seconds to sleep between completed actions.", ) @@ -271,6 +279,86 @@ def _add_review_parser(subparsers: argparse._SubParsersAction) -> None: perform_parser.add_argument("--effort", required=True, help="Effort level.") +def _add_migration_parser(subparsers: argparse._SubParsersAction) -> None: + migration_parser = subparsers.add_parser( + "migration", + help="Inspect live migrations.", + ) + migration_parser.set_defaults(handler=handle_migration) + migration_sub = migration_parser.add_subparsers(dest="migration_command") + + list_parser = migration_sub.add_parser( + "list", + help="List visible migrations.", + ) + list_parser.add_argument( + "--status", + choices=MIGRATION_STATUSES, + default=None, + help="Only show migrations with this status.", + ) + list_parser.add_argument( + "--awaiting-review", + action="store_true", + help="Only show migrations awaiting human review.", + ) + + doctor_parser = migration_sub.add_parser( + "doctor", + help="Validate migration consistency.", + ) + doctor_parser.add_argument( + "target", + nargs="?", + help="Migration slug or contained path.", + ) + doctor_parser.add_argument( + "--all", + action="store_true", + help="Validate every visible migration and transaction state.", + ) + + review_parser = migration_sub.add_parser( + "review", + help="Perform staged review on a flagged migration.", + ) + review_parser.add_argument("target", help="Migration slug or contained path.") + review_parser.add_argument( + "--with", dest="agent", choices=("codex", "claude"), required=True, + help="Agent backend.", + ) + review_parser.add_argument("--model", required=True, help="Model name.") + review_parser.add_argument( + "--effort", choices=EFFORT_TIERS, required=True, help="Effort level." + ) + + refine_parser = migration_sub.add_parser( + "refine", + help="Refine a planning migration with user feedback.", + ) + refine_parser.add_argument("target", help="Migration slug or contained path.") + feedback_group = refine_parser.add_mutually_exclusive_group(required=True) + feedback_group.add_argument("--message", help="Refinement feedback text.") + feedback_group.add_argument( + "--file", + type=Path, + help="Path to a UTF-8 file containing refinement feedback.", + ) + refine_parser.add_argument( + "--with", dest="agent", choices=("codex", "claude"), required=True, + help="Agent backend.", + ) + refine_parser.add_argument("--model", required=True, help="Model name.") + refine_parser.add_argument( + "--effort", choices=EFFORT_TIERS, required=True, help="Effort level." + ) + refine_parser.add_argument( + "--show-agent-logs", + action="store_true", + help="Mirror planning agent output to terminal.", + ) + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="Continuous refactoring CLI for AI coding agents.", @@ -278,7 +366,7 @@ def build_parser() -> argparse.ArgumentParser: parser.add_argument( "--version", action="version", - version=f"continuous-refactoring {metadata_version('continuous-refactoring')}", + version=_version_banner(), ) subparsers = parser.add_subparsers(dest="command") @@ -291,6 +379,7 @@ def build_parser() -> argparse.ArgumentParser: help="Verify and upgrade global configuration.", ) upgrade_parser.set_defaults(handler=_handle_upgrade) + _add_migration_parser(subparsers) _add_review_parser(subparsers) return parser @@ -318,36 +407,20 @@ def _handle_init(args: argparse.Namespace) -> None: try: if in_repo_taste_arg is not None: - repo_taste_resolved = (path / in_repo_taste_arg).resolve() - if not repo_taste_resolved.is_relative_to(path): - print( - f"Error: --in-repo-taste must be inside the repo: {in_repo_taste_arg}", - file=sys.stderr, - ) - raise SystemExit(2) - if repo_taste_resolved.exists() and not repo_taste_resolved.is_file(): - print( - f"Error: --in-repo-taste must point to a file: {in_repo_taste_arg}", - file=sys.stderr, - ) - raise SystemExit(2) - repo_taste_relative = str(repo_taste_resolved.relative_to(path)) + repo_taste_resolved, repo_taste_relative = _resolve_repo_relative_arg( + repo_root=path, + value=in_repo_taste_arg, + flag="--in-repo-taste", + expected_kind="file", + ) if live_dir_arg is not None: - resolved_live = (path / live_dir_arg).resolve() - if not resolved_live.is_relative_to(path): - print( - f"Error: --live-migrations-dir must be inside the repo: {live_dir_arg}", - file=sys.stderr, - ) - raise SystemExit(2) - if resolved_live.exists() and not resolved_live.is_dir(): - print( - f"Error: --live-migrations-dir must point to a directory: {live_dir_arg}", - file=sys.stderr, - ) - raise SystemExit(2) - live_dir_relative = str(resolved_live.relative_to(path)) + resolved_live, live_dir_relative = _resolve_repo_relative_arg( + repo_root=path, + value=live_dir_arg, + flag="--live-migrations-dir", + expected_kind="directory", + ) project = register_project(path) if repo_taste_relative is not None: @@ -386,6 +459,36 @@ def _handle_init(args: argparse.Namespace) -> None: print(f"Live migrations dir: {resolved_live}") +def _resolve_repo_relative_arg( + *, + repo_root: Path, + value: Path, + flag: str, + expected_kind: Literal["file", "directory"], +) -> tuple[Path, str]: + resolved = (repo_root / value).resolve() + if not resolved.is_relative_to(repo_root): + print( + f"Error: {flag} must be inside the repo: {value}", + file=sys.stderr, + ) + raise SystemExit(2) + if resolved.exists(): + if expected_kind == "file" and not resolved.is_file(): + print( + f"Error: {flag} must point to a file: {value}", + file=sys.stderr, + ) + raise SystemExit(2) + if expected_kind == "directory" and not resolved.is_dir(): + print( + f"Error: {flag} must point to a directory: {value}", + file=sys.stderr, + ) + raise SystemExit(2) + return resolved, str(resolved.relative_to(repo_root)) + + def _configure_repo_taste( *, current: Path, diff --git a/src/continuous_refactoring/commit_messages.py b/src/continuous_refactoring/commit_messages.py index 07ac258..d3a2ce5 100644 --- a/src/continuous_refactoring/commit_messages.py +++ b/src/continuous_refactoring/commit_messages.py @@ -2,7 +2,11 @@ from pathlib import Path -from continuous_refactoring.decisions import AgentStatus, sanitize_text +from continuous_refactoring.decisions import ( + AgentStatus, + sanitize_text, + sanitized_text_or, +) __all__ = [ "build_commit_message", @@ -49,7 +53,7 @@ def commit_rationale( ): return summary - fallback_text = _present_text(sanitize_text(fallback, repo_root)) + fallback_text = _present_text(sanitized_text_or(fallback, repo_root, fallback)) if fallback_text is not None: return fallback_text return "Validated cleanup completed." diff --git a/src/continuous_refactoring/decisions.py b/src/continuous_refactoring/decisions.py index 0377225..6409751 100644 --- a/src/continuous_refactoring/decisions.py +++ b/src/continuous_refactoring/decisions.py @@ -36,6 +36,12 @@ _VALID_RETRY_RECOMMENDATIONS = frozenset( (*get_args(RetryRecommendation), None), ) +_DEFAULT_RETRY_RECOMMENDATIONS: dict[RunnerDecision, RetryRecommendation] = { + "commit": "none", + "retry": "same-target", + "abandon": "new-target", + "blocked": "human-review", +} @dataclass(frozen=True) @@ -68,13 +74,6 @@ class DecisionRecord: tests_stdout_path: Path | None = None tests_stderr_path: Path | None = None - -def _status_path_text(path: Path | None) -> str | None: - if path is None or not path.exists(): - return None - return path.read_text(encoding="utf-8") - - def parse_status_block(text: str | None) -> AgentStatus | None: if not text: return None @@ -134,8 +133,8 @@ def read_status( last_message_path: Path | None, fallback_text: str | None, ) -> AgentStatus | None: - if agent == "codex": - status = parse_status_block(_status_path_text(last_message_path)) + if agent == "codex" and last_message_path is not None and last_message_path.exists(): + status = parse_status_block(last_message_path.read_text(encoding="utf-8")) if status is not None: return status return parse_status_block(fallback_text) @@ -159,21 +158,23 @@ def sanitize_text(text: str | None, repo_root: Path) -> str | None: return " ".join(lines)[:240] +def sanitized_text_or(text: str | None, repo_root: Path, fallback: str) -> str: + return sanitize_text(text, repo_root) or fallback + + def status_summary( status: AgentStatus | None, *, fallback: str, repo_root: Path, ) -> tuple[str, str | None]: - summary = sanitize_text(status.summary if status else None, repo_root) or fallback + summary = sanitized_text_or(status.summary if status else None, repo_root, fallback) focus = sanitize_text(status.next_retry_focus if status else None, repo_root) return summary, focus def resolved_phase_reached(status: AgentStatus | None, fallback: str) -> str: - if status is None: - return fallback - return status.phase_reached or fallback + return fallback if status is None else (status.phase_reached or fallback) def error_failure_kind(message: str) -> str: @@ -188,10 +189,4 @@ def error_failure_kind(message: str) -> str: def default_retry_recommendation( decision: RunnerDecision, ) -> RetryRecommendation: - if decision == "retry": - return "same-target" - if decision == "abandon": - return "new-target" - if decision == "blocked": - return "human-review" - return "none" + return _DEFAULT_RETRY_RECOMMENDATIONS[decision] diff --git a/src/continuous_refactoring/effort.py b/src/continuous_refactoring/effort.py index 823bccf..5007824 100644 --- a/src/continuous_refactoring/effort.py +++ b/src/continuous_refactoring/effort.py @@ -21,6 +21,7 @@ "resolve_effort_budget", "resolve_phase_effort", "resolve_requested_effort", + "resolve_target_effort_budget", ] EffortTier = Literal["low", "medium", "high", "xhigh"] @@ -90,6 +91,24 @@ def cap_effort(requested: EffortTier, max_allowed: EffortTier) -> EffortTier: return requested +def _build_resolution( + *, + source: str, + requested_effort: EffortTier, + max_allowed_effort: EffortTier, + reason: str, +) -> EffortResolution: + effective_effort = cap_effort(requested_effort, max_allowed_effort) + return EffortResolution( + source=source, + requested_effort=requested_effort, + effective_effort=effective_effort, + max_allowed_effort=max_allowed_effort, + capped=effective_effort != requested_effort, + reason=reason, + ) + + def resolve_effort_budget( default_effort: object | None, max_allowed_effort: object | None, @@ -123,17 +142,38 @@ def resolve_requested_effort( if requested_effort is None else require_effort_tier(requested_effort, field=f"{source} effort") ) - effective = cap_effort(requested, budget.max_allowed_effort) - return EffortResolution( + return _build_resolution( source=source, requested_effort=requested, - effective_effort=effective, max_allowed_effort=budget.max_allowed_effort, - capped=effective != requested, reason=reason, ) +def resolve_target_effort_budget( + budget: EffortBudget, + requested_effort: object | None, +) -> tuple[EffortBudget, EffortResolution]: + has_override = requested_effort is not None + resolution = resolve_requested_effort( + budget, + requested_effort, + source="target-override" if has_override else "default", + reason=( + "target effort override capped by run budget" + if has_override + else "run default effort" + ), + ) + return ( + EffortBudget( + default_effort=resolution.effective_effort, + max_allowed_effort=budget.max_allowed_effort, + ), + resolution, + ) + + def resolve_phase_effort( budget: EffortBudget, required_effort: EffortTier | None, @@ -146,13 +186,10 @@ def resolve_phase_effort( else max_effort(budget.default_effort, required_effort) ) source = "phase-required" if required_effort is not None else "default" - effective = cap_effort(requested, budget.max_allowed_effort) - return EffortResolution( + return _build_resolution( source=source, requested_effort=requested, - effective_effort=effective, max_allowed_effort=budget.max_allowed_effort, - capped=effective != requested, reason=reason or ( "phase required effort" if required_effort is not None else "default effort" ), diff --git a/src/continuous_refactoring/failure_report.py b/src/continuous_refactoring/failure_report.py index 1c2734e..5c9cd43 100644 --- a/src/continuous_refactoring/failure_report.py +++ b/src/continuous_refactoring/failure_report.py @@ -4,6 +4,7 @@ import json import os +import re import tempfile from dataclasses import dataclass, replace from pathlib import Path @@ -11,6 +12,7 @@ from continuous_refactoring.config import failure_snapshots_dir from continuous_refactoring.decisions import DecisionRecord +from continuous_refactoring.planning_state import is_executable_planning_step if TYPE_CHECKING: from continuous_refactoring.artifacts import RunArtifacts @@ -21,6 +23,10 @@ "write", ] +_PLANNING_CALL_ROLE_PREFIX = "planning." +_INTERNAL_PLANNING_CALL_ROLES = frozenset({"state", "publish", "resume"}) +_MAX_INLINE_ARTIFACT_CHARS = 4000 + @dataclass(frozen=True) class SnapshotArtifactPaths: @@ -157,6 +163,7 @@ def _snapshot_body_lines( record: DecisionRecord, artifacts: RunArtifacts, artifact_paths: SnapshotArtifactPaths, + repo_root: Path, ) -> list[str]: return [ "# Reason for Failure", @@ -177,6 +184,7 @@ def _snapshot_body_lines( "## Evidence", f"- Run artifacts: {artifacts.root}", *artifact_paths.evidence_lines(), + *_inline_artifact_sections(record, repo_root), "", ] @@ -206,7 +214,7 @@ def _snapshot_content( ), "---", "", - *_snapshot_body_lines(record, artifacts, artifact_paths), + *_snapshot_body_lines(record, artifacts, artifact_paths, repo_root), ]) @@ -243,6 +251,13 @@ def write( def _next_step_text(record: DecisionRecord) -> str: + planning_step = _planning_step(record) + if planning_step is not None: + return ( + f"Rerun planning step `{planning_step}` from the last published " + ".planning/state.json; failed current-step output and partial " + "work are artifact evidence only, not resume input." + ) if record.decision == "retry": focus = f" Focus: {record.next_retry_focus}" if record.next_retry_focus else "" return f"Retry the same target on the next attempt.{focus}" @@ -253,6 +268,48 @@ def _next_step_text(record: DecisionRecord) -> str: return "Commit the validated result and continue normally." +def _inline_artifact_sections(record: DecisionRecord, repo_root: Path) -> list[str]: + sections: list[str] = [] + for title, path in ( + ("Latest Agent Message", record.agent_last_message_path), + ("Agent Stdout", record.agent_stdout_path), + ("Agent Stderr", record.agent_stderr_path), + ): + excerpt = _artifact_excerpt(path, repo_root) + if excerpt is None: + continue + sections.extend(["", f"### {title}", "```text", excerpt, "```"]) + return sections + + +def _artifact_excerpt(path: Path | None, repo_root: Path) -> str | None: + if path is None or not path.exists() or not path.is_file(): + return None + content = path.read_text(encoding="utf-8", errors="replace") + if not content.strip(): + return None + sanitized = _sanitize_artifact_text(content, repo_root) + if len(sanitized) <= _MAX_INLINE_ARTIFACT_CHARS: + return sanitized + return sanitized[:_MAX_INLINE_ARTIFACT_CHARS].rstrip() + "\n...[truncated]" + + +def _sanitize_artifact_text(content: str, repo_root: Path) -> str: + sanitized = content.replace(str(repo_root), "") + return re.sub(r"/tmp/[^ \n]+", "", sanitized) + + +def _planning_step(record: DecisionRecord) -> str | None: + if not record.call_role.startswith(_PLANNING_CALL_ROLE_PREFIX): + return None + step = record.call_role.removeprefix(_PLANNING_CALL_ROLE_PREFIX) + if step in _INTERNAL_PLANNING_CALL_ROLES: + return None + if not is_executable_planning_step(step): + return None + return step + + def effective_record( record: DecisionRecord, *, @@ -303,16 +360,25 @@ def persist_decision( validation_command=validation_command, record=record, ) + planning_step = _planning_step(record) + log_fields: dict[str, object] = {} + if planning_step is not None: + log_fields["planning_step"] = planning_step artifacts.log( "WARN", f"failure snapshot written: {reason_doc}", - event="failure_doc_written", + event=( + "planning_step_failure_doc_written" + if planning_step is not None + else "failure_doc_written" + ), attempt=attempt, retry=retry, target=record.target, call_role=record.call_role, phase_reached=record.phase_reached, reason_doc_path=str(reason_doc), + **log_fields, ) artifacts.log_transition( attempt=attempt, diff --git a/src/continuous_refactoring/git.py b/src/continuous_refactoring/git.py index a8ff78b..22f056a 100644 --- a/src/continuous_refactoring/git.py +++ b/src/continuous_refactoring/git.py @@ -89,7 +89,7 @@ def repo_change_count(repo_root: Path) -> int: def repo_has_changes(repo_root: Path) -> bool: - return repo_change_count(repo_root) > 0 + return bool(workspace_status_lines(repo_root)) def current_branch(repo_root: Path) -> str: diff --git a/src/continuous_refactoring/log_mirroring.py b/src/continuous_refactoring/log_mirroring.py new file mode 100644 index 0000000..5f61ec5 --- /dev/null +++ b/src/continuous_refactoring/log_mirroring.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from dataclasses import dataclass + +__all__ = ["LogMirroring"] + + +@dataclass(frozen=True) +class LogMirroring: + agent: bool = False + command: bool = False diff --git a/src/continuous_refactoring/loop.py b/src/continuous_refactoring/loop.py index 6f46366..b29151b 100644 --- a/src/continuous_refactoring/loop.py +++ b/src/continuous_refactoring/loop.py @@ -4,9 +4,10 @@ import random import sys import time +from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal if TYPE_CHECKING: import argparse @@ -50,7 +51,7 @@ EffortBudget, EffortResolution, resolve_effort_budget, - resolve_requested_effort, + resolve_target_effort_budget, ) from continuous_refactoring.failure_report import effective_record, persist_decision from continuous_refactoring.git import ( @@ -60,6 +61,7 @@ revert_to, run_command, ) +from continuous_refactoring.log_mirroring import LogMirroring from continuous_refactoring.migrations import ( phase_file_reference, resolve_current_phase, @@ -82,18 +84,29 @@ from continuous_refactoring.targeting import Target, parse_paths_arg, resolve_targets +_RunSourceKind = Literal["finite", "pool", "group", "ambient", "empty"] + + +@dataclass(frozen=True) +class _RunSource: + kind: _RunSourceKind + targets: tuple[Target, ...] = () + + def run_baseline_checks( test_command: str, repo_root: Path, *, stdout_path: Path, stderr_path: Path, + log_mirroring: LogMirroring = LogMirroring(), ) -> tuple[bool, str]: result = run_tests( test_command, repo_root, stdout_path=stdout_path, stderr_path=stderr_path, + mirror_to_terminal=log_mirroring.command, ) if result.returncode == 0: return True, "" @@ -125,27 +138,10 @@ def _effort_budget_from_args(args: argparse.Namespace) -> EffortBudget: return resolve_effort_budget(default_effort, max_allowed_effort) -def _target_effort_budget( - budget: EffortBudget, - target: Target, -) -> tuple[EffortBudget, EffortResolution]: - has_override = target.effort_override is not None - resolution = resolve_requested_effort( - budget, - target.effort_override, - source="target-override" if has_override else "default", - reason=( - "target effort override capped by run budget" - if has_override - else "run default effort" - ), - ) - return ( - EffortBudget( - default_effort=resolution.effective_effort, - max_allowed_effort=budget.max_allowed_effort, - ), - resolution, +def _log_mirroring_from_args(args: argparse.Namespace) -> LogMirroring: + return LogMirroring( + agent=bool(getattr(args, "show_agent_logs", False)), + command=bool(getattr(args, "show_command_logs", False)), ) @@ -226,6 +222,35 @@ def _resolve_targets_from_args( ) +def _has_selector(args: argparse.Namespace) -> bool: + return bool(args.targets or args.globs or args.extensions or args.paths) + + +def _build_run_source( + args: argparse.Namespace, + repo_root: Path, +) -> tuple[_RunSource, bool]: + if not _has_selector(args): + return _RunSource("ambient"), False + + targets = _resolve_targets_from_args(args, repo_root) + if args.targets: + random.shuffle(targets) + return _RunSource("finite", tuple(targets)), False + + if targets: + source_kind: _RunSourceKind = "group" if args.paths else "pool" + return _RunSource(source_kind, tuple(targets)), False + + if args.scope_instruction and (args.globs or args.extensions or args.paths): + return ( + _RunSource("group", (_build_target_fallback(args.scope_instruction),)), + True, + ) + + return _RunSource("empty"), False + + def _action_limit( args: argparse.Namespace, targets: list[Target], @@ -237,10 +262,56 @@ def _action_limit( return args.max_refactors +def _require_run_loop_action_limit(args: argparse.Namespace) -> None: + if args.max_refactors is None and not args.targets: + raise ContinuousRefactorError("--max-refactors required when no --targets") + + def _has_action_budget(actions_completed: int, action_limit: int | None) -> bool: return action_limit is None or actions_completed < action_limit +def _has_available_source_target(source: _RunSource, source_index: int) -> bool: + if source.kind == "finite": + return source_index < len(source.targets) + return source.kind in {"pool", "group", "ambient"} + + +def _has_more_actions( + source: _RunSource, + source_index: int, + actions_completed: int, + action_limit: int | None, +) -> bool: + return _has_action_budget( + actions_completed, + action_limit, + ) and _has_available_source_target(source, source_index) + + +def _select_source_target( + source: _RunSource, + source_index: int, + args: argparse.Namespace, + repo_root: Path, +) -> tuple[Target, int]: + if source.kind == "finite": + return source.targets[source_index], source_index + 1 + if source.kind == "pool": + return random.choice(source.targets), source_index + if source.kind == "group": + return source.targets[0], source_index + + if source.kind != "ambient": + raise ContinuousRefactorError("no source target is available") + + targets = _resolve_targets_from_args(args, repo_root) + target = random.choice(targets) if targets else _build_target_fallback( + args.scope_instruction + ) + return target, source_index + + def _action_banner(action_index: int, action_limit: int | None) -> str: if action_limit is None: return f"\n── Action {action_index} ──" @@ -248,9 +319,31 @@ def _action_banner(action_index: int, action_limit: int | None) -> str: def _print_migration_probe(live_dir: Path, effort_budget: EffortBudget) -> None: + preflight = migration_tick._first_unloadable_visible_manifest(live_dir) + if preflight is not None: + migration_dir, _findings = preflight + print(f"Examining migration: migration/{migration_dir.name}") + return + + now = datetime.now(timezone.utc) + planning_candidates = migration_tick.enumerate_eligible_planning_manifests( + live_dir, + now, + ) + if planning_candidates: + if len(planning_candidates) > 1: + print( + f"Examining planning migrations: " + f"{len(planning_candidates)} eligible" + ) + return + manifest, _manifest_path = planning_candidates[0] + print(f"Examining planning migration: migration/{manifest.name}") + return + candidates = migration_tick.enumerate_eligible_manifests( live_dir, - datetime.now(timezone.utc), + now, effort_budget, ) if not candidates: @@ -268,6 +361,7 @@ def _print_migration_probe(live_dir: Path, effort_budget: EffortBudget) -> None: class _MigrationProbeArtifacts: def __init__(self, artifacts: RunArtifacts, action_index: int) -> None: self._artifacts = artifacts + self.run_id = artifacts.run_id self.root = artifacts.root / "migration-probes" / f"action-{action_index:03d}" def attempt_dir(self, attempt: int, retry: int = 1) -> Path: @@ -367,6 +461,7 @@ def _sleep_between_actions( def run_once(args: argparse.Namespace) -> int: repo_root = args.repo_root.resolve() timeout = args.timeout or 900 + log_mirroring = _log_mirroring_from_args(args) base_effort_budget = _effort_budget_from_args(args) max_attempts_effective = _effective_max_attempts( getattr(args, "max_attempts", None) @@ -382,9 +477,9 @@ def run_once(args: argparse.Namespace) -> int: base_prompt = _resolve_base_prompt(args) model = target.model_override or args.model - target_effort_budget, effort_resolution = _target_effort_budget( + target_effort_budget, effort_resolution = resolve_target_effort_budget( base_effort_budget, - target, + target.effort_override, ) effort = target_effort_budget.default_effort @@ -417,6 +512,7 @@ def run_once(args: argparse.Namespace) -> int: repo_root, stdout_path=artifacts.baseline_dir("initial") / "tests.stdout.log", stderr_path=artifacts.baseline_dir("initial") / "tests.stderr.log", + log_mirroring=log_mirroring, ) if not baseline_ok: final_status = "baseline_failed" @@ -444,12 +540,22 @@ def run_once(args: argparse.Namespace) -> int: max_attempts=max_attempts_effective, attempt=1, finalize_commit=_finalize_commit, + log_mirroring=log_mirroring, ) target = route_result.target if route_result.outcome == "commit": final_status = "completed" return 0 if route_result.outcome in {"abandon", "blocked"}: + if route_result.decision_record is not None: + persist_decision( + repo_root, + artifacts, + attempt=1, + retry=route_result.decision_record.retry_used, + validation_command=args.validation_command, + record=route_result.decision_record, + ) final_status = "migration_failed" raise ContinuousRefactorError( route_result.decision_record.summary @@ -490,7 +596,7 @@ def run_once(args: argparse.Namespace) -> int: stdout_path=attempt_dir / "agent.stdout.log", stderr_path=attempt_dir / "agent.stderr.log", last_message_path=last_message_path, - mirror_to_terminal=args.show_agent_logs, + mirror_to_terminal=log_mirroring.agent, timeout=timeout, ) except ContinuousRefactorError as error: @@ -538,7 +644,7 @@ def run_once(args: argparse.Namespace) -> int: repo_root, stdout_path=attempt_dir / "tests.stdout.log", stderr_path=attempt_dir / "tests.stderr.log", - mirror_to_terminal=args.show_command_logs, + mirror_to_terminal=log_mirroring.command, ) if validation_result.returncode != 0: @@ -597,7 +703,9 @@ def run_once(args: argparse.Namespace) -> int: def run_loop(args: argparse.Namespace) -> int: repo_root = args.repo_root.resolve() + _require_run_loop_action_limit(args) timeout = args.timeout or 1800 + log_mirroring = _log_mirroring_from_args(args) sleep_seconds = getattr(args, "sleep", 0.0) max_consecutive = args.max_consecutive_failures base_effort_budget = _effort_budget_from_args(args) @@ -606,14 +714,8 @@ def run_loop(args: argparse.Namespace) -> int: ) taste = _load_taste_safe(repo_root) - targets = _resolve_targets_from_args(args, repo_root) - random.shuffle(targets) - - fell_back_to_scope = False - if not targets: - targets = [_build_target_fallback(args.scope_instruction)] - fell_back_to_scope = bool(args.extensions or args.globs or args.paths) - action_limit = _action_limit(args, targets) + source, fell_back_to_scope = _build_run_source(args, repo_root) + action_limit = _action_limit(args, list(source.targets)) live_dir = _resolve_live_migrations_dir(repo_root) base_prompt = _resolve_base_prompt(args) @@ -657,6 +759,7 @@ def run_loop(args: argparse.Namespace) -> int: repo_root, stdout_path=artifacts.baseline_dir("initial") / "tests.stdout.log", stderr_path=artifacts.baseline_dir("initial") / "tests.stderr.log", + log_mirroring=log_mirroring, ) if not baseline_ok: final_status = "baseline_failed" @@ -664,16 +767,68 @@ def run_loop(args: argparse.Namespace) -> int: f"Baseline validation failed\n{baseline_context}" ) - while ( - source_index < len(targets) - and _has_action_budget(actions_completed, action_limit) - ): + while _has_more_actions(source, source_index, actions_completed, action_limit): action_index = actions_completed + 1 print(_action_banner(action_index, action_limit)) if live_dir is not None: _print_migration_probe(live_dir, base_effort_budget) migration_artifacts = _MigrationProbeArtifacts(artifacts, action_index) + planning_outcome, planning_record = migration_tick.try_planning_tick( + live_dir, + taste, + repo_root, + migration_artifacts, + agent=args.agent, + model=args.model, + effort=base_effort_budget.default_effort, + effort_budget=base_effort_budget, + timeout=timeout, + commit_message_prefix=args.commit_message_prefix, + attempt=action_index, + finalize_commit=_finalize_commit, + log_mirroring=log_mirroring, + ) + + if planning_outcome in {"commit", "abandon", "blocked"}: + artifacts.mark_attempt_started(action_index) + if planning_record is not None: + persist_decision( + repo_root, + artifacts, + attempt=action_index, + retry=planning_record.retry_used, + validation_command=args.validation_command, + record=planning_record, + ) + actions_completed += 1 + if planning_outcome == "commit": + consecutive_failures = 0 + else: + if planning_record is not None: + print( + "Planning blocked: " + f"{planning_record.target} — {planning_record.summary}" + ) + consecutive_failures += 1 + if consecutive_failures >= max_consecutive: + final_status = "max_consecutive_failures" + raise ContinuousRefactorError( + f"Stopping: {max_consecutive} consecutive failures" + ) + _sleep_between_actions( + sleep_seconds, + artifacts=artifacts, + action_index=action_index, + has_more_actions=_has_more_actions( + source, + source_index, + actions_completed, + action_limit, + ), + ) + continue + migration_outcome, migration_record = migration_tick.try_migration_tick( live_dir, taste, @@ -689,6 +844,7 @@ def run_loop(args: argparse.Namespace) -> int: max_attempts=max_attempts_effective, attempt=action_index, finalize_commit=_finalize_commit, + log_mirroring=log_mirroring, ) if migration_outcome in {"commit", "abandon"}: @@ -721,9 +877,11 @@ def run_loop(args: argparse.Namespace) -> int: sleep_seconds, artifacts=artifacts, action_index=action_index, - has_more_actions=( - source_index < len(targets) - and _has_action_budget(actions_completed, action_limit) + has_more_actions=_has_more_actions( + source, + source_index, + actions_completed, + action_limit, ), ) continue @@ -741,13 +899,17 @@ def run_loop(args: argparse.Namespace) -> int: f"{migration_record.summary}" ) - target = targets[source_index] - source_index += 1 + target, source_index = _select_source_target( + source, + source_index, + args, + repo_root, + ) artifacts.mark_attempt_started(action_index) model = target.model_override or args.model - target_effort_budget, effort_resolution = _target_effort_budget( + target_effort_budget, effort_resolution = resolve_target_effort_budget( base_effort_budget, - target, + target.effort_override, ) effort = target_effort_budget.default_effort effort_metadata = effort_resolution.event_fields() @@ -777,6 +939,7 @@ def run_loop(args: argparse.Namespace) -> int: attempt=action_index, finalize_commit=_finalize_commit, check_migrations=False, + log_mirroring=log_mirroring, ) target = route_result.target if route_result.outcome == "commit": @@ -795,9 +958,11 @@ def run_loop(args: argparse.Namespace) -> int: sleep_seconds, artifacts=artifacts, action_index=action_index, - has_more_actions=( - source_index < len(targets) - and _has_action_budget(actions_completed, action_limit) + has_more_actions=_has_more_actions( + source, + source_index, + actions_completed, + action_limit, ), ) continue @@ -822,9 +987,11 @@ def run_loop(args: argparse.Namespace) -> int: sleep_seconds, artifacts=artifacts, action_index=action_index, - has_more_actions=( - source_index < len(targets) - and _has_action_budget(actions_completed, action_limit) + has_more_actions=_has_more_actions( + source, + source_index, + actions_completed, + action_limit, ), ) continue @@ -861,8 +1028,8 @@ def run_loop(args: argparse.Namespace) -> int: prompt=prompt, timeout=timeout, validation_command=args.validation_command, - show_agent_logs=args.show_agent_logs, - show_command_logs=args.show_command_logs, + show_agent_logs=log_mirroring.agent, + show_command_logs=log_mirroring.command, commit_message_prefix=args.commit_message_prefix, preserved_workspace=preserved_workspace, ) @@ -914,9 +1081,11 @@ def run_loop(args: argparse.Namespace) -> int: sleep_seconds, artifacts=artifacts, action_index=action_index, - has_more_actions=( - source_index < len(targets) - and _has_action_budget(actions_completed, action_limit) + has_more_actions=_has_more_actions( + source, + source_index, + actions_completed, + action_limit, ), ) @@ -951,6 +1120,40 @@ def _focus_eligible_manifests( ] +def _focus_eligible_planning_manifests( + live_dir: Path, now: datetime, +) -> list[tuple[MigrationManifest, Path]]: + return [ + pair for pair in migration_tick.enumerate_eligible_planning_manifests( + live_dir, + now, + ) + if not pair[0].awaiting_human_review + ] + + +def _unskipped_planning_candidates( + candidates: list[tuple[MigrationManifest, Path]], + skipped_names: set[str], +) -> list[tuple[MigrationManifest, Path]]: + if not skipped_names: + return candidates + return [ + pair for pair in candidates + if pair[0].name not in skipped_names + ] + + +def _eligible_planning_path_labels( + repo_root: Path, + candidates: list[tuple[MigrationManifest, Path]], +) -> tuple[str, ...]: + return tuple( + _repo_relative_path(repo_root, manifest_path.parent) + for _manifest, manifest_path in candidates + ) + + def _eligible_phase_path_labels( repo_root: Path, candidates: list[tuple[MigrationManifest, Path]], @@ -977,6 +1180,7 @@ def _repo_relative_path(repo_root: Path, path: Path) -> str: def run_migrations_focused_loop(args: argparse.Namespace) -> int: repo_root = args.repo_root.resolve() timeout = args.timeout or 1800 + log_mirroring = _log_mirroring_from_args(args) sleep_seconds = getattr(args, "sleep", 0.0) max_consecutive = args.max_consecutive_failures base_effort_budget = _effort_budget_from_args(args) @@ -1019,6 +1223,7 @@ def run_migrations_focused_loop(args: argparse.Namespace) -> int: error_message: str | None = None consecutive_failures = 0 iteration = 0 + skipped_planning_names: set[str] = set() try: require_clean_worktree(repo_root) @@ -1028,6 +1233,7 @@ def run_migrations_focused_loop(args: argparse.Namespace) -> int: repo_root, stdout_path=artifacts.baseline_dir("initial") / "tests.stdout.log", stderr_path=artifacts.baseline_dir("initial") / "tests.stderr.log", + log_mirroring=log_mirroring, ) if not baseline_ok: final_status = "baseline_failed" @@ -1037,8 +1243,26 @@ def run_migrations_focused_loop(args: argparse.Namespace) -> int: while True: now = datetime.now(timezone.utc) - eligible = _focus_eligible_manifests(live_dir, now, base_effort_budget) - if not eligible: + preflight = migration_tick._first_unloadable_visible_manifest(live_dir) + all_planning_eligible = ( + [] + if preflight is not None + else _focus_eligible_planning_manifests(live_dir, now) + ) + planning_eligible = _unskipped_planning_candidates( + all_planning_eligible, + skipped_planning_names, + ) + phase_eligible = ( + [] + if preflight is not None or planning_eligible + else _focus_eligible_manifests(live_dir, now, base_effort_budget) + ) + if all_planning_eligible and not planning_eligible and not phase_eligible: + skipped_planning_names.clear() + planning_eligible = all_planning_eligible + + if not planning_eligible and not phase_eligible and preflight is None: print( "Focused migrations loop: nothing eligible — " "every migration is done or blocked." @@ -1053,25 +1277,56 @@ def run_migrations_focused_loop(args: argparse.Namespace) -> int: iteration += 1 artifacts.mark_attempt_started(iteration) - names = ", ".join(_eligible_phase_path_labels(repo_root, eligible)) + names = ( + f"{preflight[0].name}/manifest.json" + if preflight is not None + else ( + ", ".join( + _eligible_planning_path_labels(repo_root, planning_eligible) + ) + if planning_eligible + else ", ".join( + _eligible_phase_path_labels(repo_root, phase_eligible) + ) + ) + ) print(f"\n── Migration tick {iteration} (eligible: {names}) ──") - outcome, record = migration_tick.try_migration_tick( - live_dir, - taste, - repo_root, - artifacts, - agent=args.agent, - model=args.model, - effort=base_effort_budget.default_effort, - effort_budget=base_effort_budget, - timeout=timeout, - commit_message_prefix=args.commit_message_prefix, - validation_command=args.validation_command, - max_attempts=max_attempts_effective, - attempt=iteration, - finalize_commit=_finalize_commit, - ) + if planning_eligible: + outcome, record = migration_tick.try_planning_tick( + live_dir, + taste, + repo_root, + artifacts, + agent=args.agent, + model=args.model, + effort=base_effort_budget.default_effort, + effort_budget=base_effort_budget, + timeout=timeout, + commit_message_prefix=args.commit_message_prefix, + attempt=iteration, + finalize_commit=_finalize_commit, + skip_migration_names=skipped_planning_names, + log_mirroring=log_mirroring, + ) + else: + outcome, record = migration_tick.try_migration_tick( + live_dir, + taste, + repo_root, + artifacts, + agent=args.agent, + model=args.model, + effort=base_effort_budget.default_effort, + effort_budget=base_effort_budget, + timeout=timeout, + commit_message_prefix=args.commit_message_prefix, + validation_command=args.validation_command, + max_attempts=max_attempts_effective, + attempt=iteration, + finalize_commit=_finalize_commit, + log_mirroring=log_mirroring, + ) if record is not None and outcome != "not-routed": persist_decision( @@ -1085,7 +1340,16 @@ def run_migrations_focused_loop(args: argparse.Namespace) -> int: if outcome == "commit": consecutive_failures = 0 + if record is not None and record.call_role.startswith("planning."): + skipped_planning_names.discard(record.target) elif outcome in {"abandon", "blocked"}: + if ( + outcome == "abandon" + and record is not None + and record.call_role.startswith("planning.") + and record.retry_recommendation == "new-target" + ): + skipped_planning_names.add(record.target) consecutive_failures += 1 if consecutive_failures >= max_consecutive: final_status = "max_consecutive_failures" @@ -1093,6 +1357,16 @@ def run_migrations_focused_loop(args: argparse.Namespace) -> int: f"Stopping: {max_consecutive} consecutive failures" ) else: + if skipped_planning_names: + artifacts.log( + "INFO", + "Focused migration alternatives deferred; retrying skipped " + "planning migrations.", + event="focus_retry_skipped_planning", + skipped_planning_targets=sorted(skipped_planning_names), + ) + skipped_planning_names.clear() + continue message = ( "Migration tick deferred all eligible migrations; " "terminating until a wake-up window or manifest change." diff --git a/src/continuous_refactoring/migration_cli.py b/src/continuous_refactoring/migration_cli.py new file mode 100644 index 0000000..5bf0f88 --- /dev/null +++ b/src/continuous_refactoring/migration_cli.py @@ -0,0 +1,701 @@ +from __future__ import annotations + +import argparse +import json +import sys +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path + +from continuous_refactoring.artifacts import ( + ContinuousRefactorError, + create_run_artifacts, +) +from continuous_refactoring.config import resolve_live_migrations_dir, resolve_project +from continuous_refactoring.migration_consistency import ( + MigrationConsistencyFinding, + check_migration_consistency, + has_blocking_consistency_findings, + iter_visible_migration_dirs, +) +from continuous_refactoring.migrations import ( + MigrationManifest, + load_manifest as load_migration_manifest, + phase_file_reference, + resolve_current_phase, +) +from continuous_refactoring.planning_publish import publish_lock_path +from continuous_refactoring.planning_state import ( + FeedbackSource, + load_planning_state, + planning_state_path, +) + +__all__ = [ + "MigrationCliContext", + "MigrationTarget", + "handle_migration", + "handle_migration_doctor", + "handle_migration_list", + "handle_migration_refine", + "handle_migration_review", + "resolve_migration_target", +] + +_MIGRATION_USAGE = "Usage: continuous-refactoring migration {list,doctor,review,refine}" +_MISSING_TEXT = "(none)" + + +@dataclass(frozen=True) +class MigrationCliContext: + repo_root: Path + live_dir: Path + project_state_dir: Path + + +@dataclass(frozen=True) +class MigrationTarget: + slug: str + path: Path + + +def handle_migration(args: argparse.Namespace) -> None: + if args.migration_command == "list": + return handle_migration_list(args) + if args.migration_command == "doctor": + return handle_migration_doctor(args) + if args.migration_command == "review": + return handle_migration_review(args) + if args.migration_command == "refine": + return handle_migration_refine(args) + print(_MIGRATION_USAGE, file=sys.stderr) + raise SystemExit(2) + + +def handle_migration_list(args: argparse.Namespace) -> None: + context = _resolve_context(error_code=1) + if not context.live_dir.is_dir(): + return + + for migration_dir in iter_visible_migration_dirs(context.live_dir): + row = _list_row(context, migration_dir) + if row is None: + continue + if args.status is not None and row.status != args.status: + continue + if args.awaiting_review and row.awaiting_review != "yes": + continue + print(row.format()) + + +def handle_migration_doctor(args: argparse.Namespace) -> None: + context = _resolve_context(error_code=2) + target: str | None = getattr(args, "target", None) + all_targets = bool(getattr(args, "all", False)) + if all_targets == bool(target): + print( + "Error: migration doctor requires exactly one of --all or .", + file=sys.stderr, + ) + raise SystemExit(2) + + if all_targets: + findings = _doctor_all(context) + else: + assert target is not None + try: + migration_target = resolve_migration_target( + live_dir=context.live_dir, + repo_root=context.repo_root, + value=target, + ) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(2) from error + findings = _doctor_migration(context, migration_target) + + for slug, finding in findings: + print(_format_doctor_finding(slug, finding)) + if has_blocking_consistency_findings(finding for _, finding in findings): + raise SystemExit(1) + + +def handle_migration_review(args: argparse.Namespace) -> None: + context = _resolve_context(error_code=2) + try: + target = resolve_migration_target( + live_dir=context.live_dir, + repo_root=context.repo_root, + value=args.target, + ) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(2) from error + + from continuous_refactoring.config import load_taste + from continuous_refactoring.review_cli import ( + StagedReviewRequest, + handle_staged_migration_review, + ) + + try: + taste = load_taste(resolve_project(context.repo_root)) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(1) from error + handle_staged_migration_review( + StagedReviewRequest( + repo_root=context.repo_root, + live_dir=context.live_dir, + target=target, + project_state_dir=context.project_state_dir, + agent=args.agent, + model=args.model, + effort=args.effort, + taste=taste, + ) + ) + + +def handle_migration_refine(args: argparse.Namespace) -> None: + context = _resolve_context(error_code=2) + feedback_text, feedback_source = _read_refine_feedback(args) + try: + target = resolve_migration_target( + live_dir=context.live_dir, + repo_root=context.repo_root, + value=args.target, + ) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(2) from error + + from continuous_refactoring.config import load_taste + from continuous_refactoring.log_mirroring import LogMirroring + from continuous_refactoring.planning import ( + PlanningRefineRequest, + run_refine_planning_step, + ) + + try: + taste = load_taste(resolve_project(context.repo_root)) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(1) from error + + artifacts = create_run_artifacts( + context.repo_root, + agent=args.agent, + model=args.model, + effort=args.effort, + test_command="migration refine", + ) + try: + result = run_refine_planning_step( + PlanningRefineRequest( + migration_name=target.slug, + feedback_text=feedback_text, + feedback_source=feedback_source, + taste=taste, + repo_root=context.repo_root, + live_dir=context.live_dir, + artifacts=artifacts, + agent=args.agent, + model=args.model, + effort=args.effort, + log_mirroring=LogMirroring( + agent=bool(getattr(args, "show_agent_logs", False)), + ), + ) + ) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(_refine_error_code(str(error))) from error + + if result.status != "published": + print( + f"Error: {_refine_publish_error_message(result.reason, target.slug)}", + file=sys.stderr, + ) + raise SystemExit(1) + print(f"Refined {target.slug}: {result.reason}") + + +def resolve_migration_target( + *, + live_dir: Path, + repo_root: Path, + value: str, +) -> MigrationTarget: + live_root = live_dir.resolve() + slug_target = _slug_target(live_root, value) + path_target = _path_target( + live_root, + repo_root.resolve(), + value, + reject_symlink=slug_target is None, + ) + + if ( + slug_target is not None + and path_target is not None + and slug_target.path.resolve() != path_target.path.resolve() + ): + raise ContinuousRefactorError( + f"Migration target {value!r} is ambiguous between " + f"{slug_target.path} and {path_target.path}." + ) + if slug_target is not None: + return slug_target + if path_target is not None: + return path_target + if _looks_like_path(value): + _raise_invalid_path_target(live_root, repo_root.resolve(), value) + raise ContinuousRefactorError(f"Migration {value!r} does not exist.") + + +def _read_refine_feedback(args: argparse.Namespace) -> tuple[str, FeedbackSource]: + if args.message is not None: + text = str(args.message) + source: FeedbackSource = "message" + else: + try: + path = args.file + text = path.read_text(encoding="utf-8") + except OSError as error: + print( + f"Error: could not read refinement feedback file: {error}", + file=sys.stderr, + ) + raise SystemExit(2) from error + source = "file" + if not text.strip(): + print("Error: refinement feedback must not be empty.", file=sys.stderr) + raise SystemExit(2) + return text, source + + +def _refine_publish_error_message(reason: str, slug: str) -> str: + if "stale base snapshot" not in reason: + return reason + return ( + f"{reason}\n" + "Live migration changed while refine was running. " + f"Run `continuous-refactoring migration doctor {slug}` if unsure, then " + f"rerun `continuous-refactoring migration refine {slug} ...`." + ) + + +def _refine_error_code(message: str) -> int: + usage_fragments = ( + "cannot be refined", + "only planning or unexecuted ready migrations", + "already advanced", + "missing .planning/state.json", + "Cannot reopen planning state", + "Planning state is terminal", + ) + return 2 if any(fragment in message for fragment in usage_fragments) else 1 + + +@dataclass(frozen=True) +class _ListRow: + slug: str + status: str + cursor: str + awaiting_review: str + last_touch: str + cooldown: str + reason: str + + def format(self) -> str: + return "\t".join( + ( + self.slug, + self.status, + self.cursor, + self.awaiting_review, + self.last_touch, + self.cooldown, + self.reason, + ) + ) + + +def _resolve_context(*, error_code: int) -> MigrationCliContext: + try: + project = resolve_project(Path.cwd().resolve()) + except ContinuousRefactorError: + print( + "Error: project not initialized; no live-migrations-dir available.", + file=sys.stderr, + ) + raise SystemExit(error_code) + try: + live_dir = resolve_live_migrations_dir(project) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(error_code) + if live_dir is None: + print( + "Error: no live-migrations-dir configured for this project.", + file=sys.stderr, + ) + raise SystemExit(error_code) + return MigrationCliContext( + repo_root=Path(project.entry.path).resolve(), + live_dir=live_dir, + project_state_dir=project.project_dir, + ) + + +def _list_row( + context: MigrationCliContext, + migration_dir: Path, +) -> _ListRow | None: + manifest_path = migration_dir / "manifest.json" + if not manifest_path.exists(): + return _ListRow( + slug=migration_dir.name, + status="invalid-manifest", + cursor="blocked", + awaiting_review="no", + last_touch=_MISSING_TEXT, + cooldown=_MISSING_TEXT, + reason="missing-manifest", + ) + try: + manifest = load_migration_manifest(manifest_path) + except ContinuousRefactorError as error: + return _ListRow( + slug=migration_dir.name, + status="invalid-manifest", + cursor="blocked", + awaiting_review="no", + last_touch=_MISSING_TEXT, + cooldown=_MISSING_TEXT, + reason=f"invalid-manifest: {_single_line(str(error))}", + ) + + cursor, cursor_reason = _cursor_text(context, migration_dir, manifest) + return _ListRow( + slug=migration_dir.name, + status=manifest.status, + cursor=cursor, + awaiting_review="yes" if manifest.awaiting_human_review else "no", + last_touch=manifest.last_touch, + cooldown=manifest.cooldown_until or _MISSING_TEXT, + reason=_reason_text(manifest, cursor_reason), + ) + + +def _cursor_text( + context: MigrationCliContext, + migration_dir: Path, + manifest: MigrationManifest, +) -> tuple[str, str | None]: + if manifest.status == "planning": + return _planning_cursor(context, migration_dir) + if manifest.status in ("ready", "in-progress"): + if not manifest.current_phase: + return _MISSING_TEXT, None + try: + phase = resolve_current_phase(manifest) + except ContinuousRefactorError: + return "blocked", "invalid-current-phase" + return phase_file_reference(phase), None + return _MISSING_TEXT, None + + +def _planning_cursor( + context: MigrationCliContext, + migration_dir: Path, +) -> tuple[str, str | None]: + state_path = planning_state_path(migration_dir) + if not state_path.exists(): + return "planning:blocked", "planning-state-missing" + try: + state = load_planning_state( + context.repo_root, + state_path, + published_migration_root=migration_dir, + ) + except ContinuousRefactorError: + return "planning:blocked", "planning-state-invalid" + return f"planning:{state.next_step}", None + + +def _reason_text(manifest: MigrationManifest, cursor_reason: str | None) -> str: + if cursor_reason is not None: + return cursor_reason + if manifest.human_review_reason: + return manifest.human_review_reason + return _MISSING_TEXT + + +def _doctor_all( + context: MigrationCliContext, +) -> list[tuple[str, MigrationConsistencyFinding]]: + findings: list[tuple[str, MigrationConsistencyFinding]] = [] + for migration_dir in iter_visible_migration_dirs(context.live_dir): + findings.extend( + _doctor_migration( + context, + MigrationTarget(slug=migration_dir.name, path=migration_dir), + ) + ) + findings.extend(_transaction_findings(context.live_dir)) + return findings + + +def _doctor_migration( + context: MigrationCliContext, + target: MigrationTarget, +) -> list[tuple[str, MigrationConsistencyFinding]]: + findings = check_migration_consistency(target.path, mode="doctor") + findings.extend(_planning_state_findings(context, target.path)) + return [(target.slug, finding) for finding in findings] + + +def _planning_state_findings( + context: MigrationCliContext, + migration_dir: Path, +) -> list[MigrationConsistencyFinding]: + manifest_path = migration_dir / "manifest.json" + try: + manifest = load_migration_manifest(manifest_path) + except ContinuousRefactorError: + return [] + if manifest.status != "planning": + return [] + + state_path = planning_state_path(migration_dir) + if not state_path.exists(): + return [ + MigrationConsistencyFinding( + severity="error", + mode="doctor", + code="planning-state-missing", + path=state_path, + message="Planning migration is missing .planning/state.json.", + ) + ] + try: + load_planning_state( + context.repo_root, + state_path, + published_migration_root=migration_dir, + ) + except ContinuousRefactorError as error: + return [ + MigrationConsistencyFinding( + severity="error", + mode="doctor", + code="planning-state-invalid", + path=state_path, + message=_single_line(str(error)), + ) + ] + return [] + + +def _transaction_findings( + live_dir: Path, +) -> list[tuple[str, MigrationConsistencyFinding]]: + transaction_root = publish_lock_path(live_dir).parent + if not transaction_root.exists(): + return [] + if not transaction_root.is_dir(): + return [ + ( + "__transactions__", + MigrationConsistencyFinding( + severity="error", + mode="doctor", + code="transaction-root-invalid", + path=transaction_root, + message="Planning transaction root is not a directory.", + ), + ) + ] + + findings: list[tuple[str, MigrationConsistencyFinding]] = [] + lock_path = publish_lock_path(live_dir) + if lock_path.exists(): + findings.append( + ( + "__transactions__", + MigrationConsistencyFinding( + severity="error", + mode="doctor", + code="publish-lock-present", + path=lock_path, + message=_lock_message(lock_path), + ), + ) + ) + + for child in sorted(transaction_root.iterdir()): + if child == lock_path: + continue + if child.is_dir(): + findings.append( + ( + "__transactions__", + MigrationConsistencyFinding( + severity="error", + mode="doctor", + code="transaction-leftover", + path=child, + message="Planning transaction directory is still present.", + ), + ) + ) + return findings + + +def _format_doctor_finding( + slug: str, + finding: MigrationConsistencyFinding, +) -> str: + return "\t".join( + ( + slug, + finding.severity, + finding.code, + str(finding.path), + finding.message, + ) + ) + + +def _slug_target(live_root: Path, value: str) -> MigrationTarget | None: + if not _safe_slug(value): + return None + path = live_root / value + if not path.is_dir() or path.is_symlink(): + return None + return MigrationTarget(slug=value, path=path) + + +def _path_target( + live_root: Path, + repo_root: Path, + value: str, + *, + reject_symlink: bool, +) -> MigrationTarget | None: + if not _should_consider_path(repo_root, value): + return None + _require_no_parent_traversal(value) + path = _raw_path(repo_root, value) + if reject_symlink and path.is_symlink(): + raise ContinuousRefactorError( + f"Migration path must not be a symlink: {path}" + ) + resolved = path.resolve() + if not resolved.exists(): + return None + _require_contained_visible_child(live_root, resolved, original=path) + return MigrationTarget(slug=resolved.name, path=resolved) + + +def _raise_invalid_path_target(live_root: Path, repo_root: Path, value: str) -> None: + _require_no_parent_traversal(value) + path = _raw_path(repo_root, value) + if path.is_symlink(): + raise ContinuousRefactorError( + f"Migration path must not be a symlink: {path}" + ) + resolved = path.resolve() + _require_contained_visible_child(live_root, resolved, original=path) + if not resolved.is_dir(): + raise ContinuousRefactorError(f"Migration path is not a directory: {path}") + + +def _require_contained_visible_child( + live_root: Path, + resolved: Path, + *, + original: Path, +) -> None: + try: + relative = resolved.relative_to(live_root) + except ValueError as error: + raise ContinuousRefactorError( + f"Migration path must stay inside live migrations dir: {original}" + ) from error + if len(relative.parts) != 1: + raise ContinuousRefactorError( + f"Migration path must identify a direct migration directory: {original}" + ) + if not _safe_slug(relative.parts[0]): + raise ContinuousRefactorError( + f"Migration path targets a hidden or internal directory: {original}" + ) + if not resolved.is_dir(): + raise ContinuousRefactorError(f"Migration path is not a directory: {original}") + + +def _safe_slug(value: str) -> bool: + return ( + value != "" + and Path(value).name == value + and not value.startswith(".") + and not value.startswith("__") + ) + + +def _should_consider_path(repo_root: Path, value: str) -> bool: + return _looks_like_path(value) or _raw_path(repo_root, value).exists() + + +def _looks_like_path(value: str) -> bool: + path = Path(value) + return path.is_absolute() or len(path.parts) > 1 or value.startswith(".") + + +def _require_no_parent_traversal(value: str) -> None: + if ".." in Path(value).parts: + raise ContinuousRefactorError( + f"Migration path must not contain parent traversal: {value}" + ) + + +def _raw_path(repo_root: Path, value: str) -> Path: + path = Path(value) + if path.is_absolute(): + return path + return repo_root / path + + +def _lock_message(lock_path: Path) -> str: + details = _lock_metadata(lock_path) + try: + mtime = datetime.fromtimestamp(lock_path.stat().st_mtime).astimezone() + except OSError: + mtime_text = "unknown" + else: + mtime_text = mtime.isoformat(timespec="seconds") + suffix = f"; {details}" if details else "" + return f"Planning publish lock is present; mtime={mtime_text}{suffix}." + + +def _lock_metadata(lock_path: Path) -> str: + owner_path = lock_path / "owner.json" + try: + raw = json.loads(owner_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return "" + if not isinstance(raw, dict): + return "" + parts = [ + f"{key}={raw[key]}" + for key in ("pid", "operation", "created_at") + if key in raw + ] + return ", ".join(parts) + + +def _single_line(value: str) -> str: + return " ".join(value.split()) diff --git a/src/continuous_refactoring/migration_consistency.py b/src/continuous_refactoring/migration_consistency.py new file mode 100644 index 0000000..692eaab --- /dev/null +++ b/src/continuous_refactoring/migration_consistency.py @@ -0,0 +1,528 @@ +from __future__ import annotations + +import re +from collections.abc import Iterable +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + +from continuous_refactoring.artifacts import ContinuousRefactorError +from continuous_refactoring.migrations import MigrationManifest, load_manifest + +__all__ = [ + "CONSISTENCY_MODES", + "CONSISTENCY_SEVERITIES", + "ConsistencyMode", + "ConsistencySeverity", + "MigrationConsistencyFinding", + "check_migration_consistency", + "has_blocking_consistency_findings", + "iter_visible_migration_dirs", +] + +ConsistencyMode = Literal[ + "planning-snapshot", + "ready-publish", + "execution-gate", + "doctor", +] +ConsistencySeverity = Literal["info", "warning", "error"] + +CONSISTENCY_MODES: tuple[ConsistencyMode, ...] = ( + "planning-snapshot", + "ready-publish", + "execution-gate", + "doctor", +) +CONSISTENCY_SEVERITIES: tuple[ConsistencySeverity, ...] = ( + "info", + "warning", + "error", +) + +_PHASE_DOC_RE = re.compile(r"^phase-(?P\d+)-(?P.+)\.md$") +_INTERNAL_MIGRATION_DIR_NAMES = frozenset( + { + "__intentional_skips__", + "__transactions__", + } +) + + +@dataclass(frozen=True) +class MigrationConsistencyFinding: + severity: ConsistencySeverity + mode: ConsistencyMode + code: str + path: Path + message: str + + +def iter_visible_migration_dirs(live_dir: Path) -> list[Path]: + if not live_dir.is_dir(): + return [] + return [ + child + for child in sorted(live_dir.iterdir()) + if _is_visible_migration_dir(child) + ] + + +def check_migration_consistency( + migration_dir: Path, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + manifest_path = migration_dir / "manifest.json" + findings: list[MigrationConsistencyFinding] = [] + if not manifest_path.exists(): + return [ + _finding( + mode, + "error", + "missing-manifest", + manifest_path, + "Migration manifest is missing.", + ) + ] + + try: + manifest = load_manifest(manifest_path) + except ContinuousRefactorError as error: + return [ + _finding( + mode, + "error", + "invalid-manifest", + manifest_path, + str(error), + ) + ] + + findings.extend(_manifest_identity_findings(migration_dir, manifest, mode)) + findings.extend(_phase_doc_name_collision_findings(migration_dir, mode)) + findings.extend(_manifest_plan_findings(migration_dir, manifest, mode)) + findings.extend(_manifest_phase_file_findings(migration_dir, manifest, mode)) + findings.extend(_manifest_phase_metadata_findings(migration_dir, manifest, mode)) + return findings + + +def has_blocking_consistency_findings( + findings: Iterable[MigrationConsistencyFinding], +) -> bool: + return any(finding.severity == "error" for finding in findings) + + +def _is_visible_migration_dir(path: Path) -> bool: + return ( + path.is_dir() + and not path.is_symlink() + and _is_visible_migration_dir_name(path.name) + ) + + +def _is_visible_migration_dir_name(name: str) -> bool: + return ( + not name.startswith(".") + and not name.startswith("__") + and name not in _INTERNAL_MIGRATION_DIR_NAMES + ) + + +def _finding( + mode: ConsistencyMode, + severity: ConsistencySeverity, + code: str, + path: Path, + message: str, +) -> MigrationConsistencyFinding: + return MigrationConsistencyFinding( + severity=severity, + mode=mode, + code=code, + path=path, + message=message, + ) + + +def _manifest_identity_findings( + migration_dir: Path, + manifest: MigrationManifest, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + if manifest.name == migration_dir.name: + return [] + return [ + _finding( + mode, + "error", + "manifest-slug-mismatch", + migration_dir / "manifest.json", + ( + f"Manifest name {manifest.name!r} does not match " + f"directory slug {migration_dir.name!r}." + ), + ) + ] + + +def _phase_doc_name_collision_findings( + migration_dir: Path, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + findings: list[MigrationConsistencyFinding] = [] + by_index: dict[int, Path] = {} + by_name: dict[str, Path] = {} + for path in _phase_doc_paths(migration_dir): + match = _PHASE_DOC_RE.match(path.name) + if match is None: + continue + + phase_index = int(match.group("index")) + phase_name = match.group("name") + + existing_index_path = by_index.get(phase_index) + if existing_index_path is not None: + findings.append( + _finding( + mode, + "error", + "duplicate-phase-doc-index", + path, + ( + f"Phase doc index {phase_index} is duplicated by " + f"{existing_index_path.name!r} and {path.name!r}." + ), + ) + ) + else: + by_index[phase_index] = path + + existing_name_path = by_name.get(phase_name) + if existing_name_path is not None: + findings.append( + _finding( + mode, + "error", + "duplicate-phase-doc-name", + path, + ( + f"Phase doc name {phase_name!r} is duplicated by " + f"{existing_name_path.name!r} and {path.name!r}." + ), + ) + ) + else: + by_name[phase_name] = path + return findings + + +def _phase_doc_paths(migration_dir: Path) -> list[Path]: + try: + return [ + child + for child in sorted(migration_dir.iterdir()) + if _PHASE_DOC_RE.match(child.name) is not None + ] + except OSError as error: + raise ContinuousRefactorError( + f"Could not scan migration directory {migration_dir}: {error}" + ) from error + + +def _manifest_plan_findings( + migration_dir: Path, + manifest: MigrationManifest, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + if not _requires_plan(manifest, mode): + return [] + + plan_path = migration_dir / "plan.md" + if plan_path.exists(): + return [] + + return [ + _finding( + mode, + "error", + "missing-plan", + plan_path, + "Ready and in-progress migrations require plan.md.", + ) + ] + + +def _requires_plan(manifest: MigrationManifest, mode: ConsistencyMode) -> bool: + return _is_ready_publish_mode(mode) or ( + _is_doctor_or_execution_gate_mode(mode) + and _is_ready_or_in_progress_status(manifest.status) + ) + + +def _manifest_phase_file_findings( + migration_dir: Path, + manifest: MigrationManifest, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + findings: list[MigrationConsistencyFinding] = [] + migration_root = migration_dir.resolve() + for phase in manifest.phases: + phase_path = migration_dir / phase.file + findings.extend( + _single_phase_file_findings( + migration_root=migration_root, + phase_path=phase_path, + phase_file=phase.file, + mode=mode, + ) + ) + return findings + + +def _single_phase_file_findings( + *, + migration_root: Path, + phase_path: Path, + phase_file: str, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + ref = Path(phase_file) + if _invalid_phase_file_reference(ref): + return [ + _finding( + mode, + "error", + "invalid-phase-file-reference", + phase_path, + f"Phase file reference {phase_file!r} must stay inside the migration directory.", + ) + ] + + if phase_path.is_symlink(): + return _symlink_phase_file_findings(migration_root, phase_path, mode) + + if not phase_path.exists(): + return [ + _finding( + mode, + "error", + "missing-phase-file", + phase_path, + f"Manifest phase file {phase_file!r} is missing.", + ) + ] + + if not _is_inside(phase_path.resolve(), migration_root): + return [ + _finding( + mode, + "error", + "phase-file-escapes-migration", + phase_path, + f"Manifest phase file {phase_file!r} resolves outside the migration directory.", + ) + ] + + if not phase_path.is_file(): + return [ + _finding( + mode, + "error", + "phase-file-not-regular", + phase_path, + f"Manifest phase file {phase_file!r} is not a regular file.", + ) + ] + + return [] + + +def _manifest_phase_metadata_findings( + migration_dir: Path, + manifest: MigrationManifest, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + if not _requires_ready_publish_metadata(manifest, mode): + return [] + + findings: list[MigrationConsistencyFinding] = [] + findings.extend(_manifest_phase_membership_findings(migration_dir, manifest, mode)) + findings.extend(_phase_doc_manifest_coverage_findings(migration_dir, manifest, mode)) + findings.extend(_phase_doc_contract_findings_for_manifest(migration_dir, manifest, mode)) + return findings + + +def _requires_ready_publish_metadata( + manifest: MigrationManifest, + mode: ConsistencyMode, +) -> bool: + return _is_ready_publish_mode(mode) or ( + _is_doctor_mode(mode) and _is_ready_or_in_progress_status(manifest.status) + ) + + +def _is_ready_publish_mode(mode: ConsistencyMode) -> bool: + return mode == "ready-publish" + + +def _is_doctor_mode(mode: ConsistencyMode) -> bool: + return mode == "doctor" + + +def _is_execution_gate_mode(mode: ConsistencyMode) -> bool: + return mode == "execution-gate" + + +def _is_doctor_or_execution_gate_mode(mode: ConsistencyMode) -> bool: + return _is_doctor_mode(mode) or _is_execution_gate_mode(mode) + + +def _is_ready_or_in_progress_status(status: str) -> bool: + return status in ("ready", "in-progress") + + +def _manifest_phase_membership_findings( + migration_dir: Path, + manifest: MigrationManifest, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + phase_names = {phase.name for phase in manifest.phases} + if not phase_names: + return [ + _finding( + mode, + "error", + "missing-manifest-phases", + migration_dir / "manifest.json", + "Ready migrations require at least one manifest phase.", + ) + ] + + if manifest.current_phase in phase_names: + return [] + + return [ + _finding( + mode, + "error", + "invalid-current-phase", + migration_dir / "manifest.json", + ( + f"Current phase {manifest.current_phase!r} does not match " + "any manifest phase." + ), + ) + ] + + +def _phase_doc_manifest_coverage_findings( + migration_dir: Path, + manifest: MigrationManifest, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + phase_names = {phase.name for phase in manifest.phases} + doc_phase_names = { + match.group("name") + for path in _phase_doc_paths(migration_dir) + if (match := _PHASE_DOC_RE.match(path.name)) is not None + } + return [ + _finding( + mode, + "error", + "phase-doc-not-in-manifest", + migration_dir / f"phase-*-{doc_phase_name}.md", + f"Phase doc {doc_phase_name!r} is not represented in manifest phases.", + ) + for doc_phase_name in sorted(doc_phase_names - phase_names) + ] + + +def _phase_doc_contract_findings_for_manifest( + migration_dir: Path, + manifest: MigrationManifest, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + findings: list[MigrationConsistencyFinding] = [] + for phase in manifest.phases: + phase_path = migration_dir / phase.file + if phase_path.is_file() and not phase_path.is_symlink(): + findings.extend(_phase_doc_contract_findings(phase_path, mode)) + return findings + + +def _phase_doc_contract_findings( + phase_path: Path, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + try: + content = phase_path.read_text(encoding="utf-8") + except OSError as error: + raise ContinuousRefactorError( + f"Could not read phase doc {phase_path}: {error}" + ) from error + + findings: list[MigrationConsistencyFinding] = [] + if not re.search(r"^##\s+Precondition\s*$", content, re.IGNORECASE | re.MULTILINE): + findings.append( + _finding( + mode, + "error", + "missing-phase-precondition", + phase_path, + "Phase docs require a ## Precondition section before ready publish.", + ) + ) + if not re.search( + r"^##\s+Definition of Done\s*$", + content, + re.IGNORECASE | re.MULTILINE, + ): + findings.append( + _finding( + mode, + "error", + "missing-phase-definition-of-done", + phase_path, + "Phase docs require a ## Definition of Done section before ready publish.", + ) + ) + return findings + + +def _invalid_phase_file_reference(ref: Path) -> bool: + return str(ref) in ("", ".") or ref.is_absolute() or ".." in ref.parts + + +def _symlink_phase_file_findings( + migration_root: Path, + phase_path: Path, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + if not _is_inside(phase_path.resolve(), migration_root): + return [ + _finding( + mode, + "error", + "phase-file-escapes-migration", + phase_path, + f"Phase file symlink {phase_path.name!r} resolves outside the migration directory.", + ) + ] + return [ + _finding( + mode, + "error", + "phase-file-not-regular", + phase_path, + f"Phase file {phase_path.name!r} must be a regular file, not a symlink.", + ) + ] + + +def _is_inside(path: Path, root: Path) -> bool: + try: + path.relative_to(root) + except ValueError: + return False + return True diff --git a/src/continuous_refactoring/migration_tick.py b/src/continuous_refactoring/migration_tick.py index a5f6c3e..f92f869 100644 --- a/src/continuous_refactoring/migration_tick.py +++ b/src/continuous_refactoring/migration_tick.py @@ -2,10 +2,11 @@ from __future__ import annotations +from collections.abc import Collection from dataclasses import replace from datetime import datetime, timedelta, timezone from pathlib import Path -from typing import TYPE_CHECKING, Protocol +from typing import TYPE_CHECKING, Callable, Protocol if TYPE_CHECKING: from continuous_refactoring.artifacts import RunArtifacts @@ -24,6 +25,7 @@ RouteOutcome, error_failure_kind, sanitize_text, + sanitized_text_or, ) from continuous_refactoring.effort import ( EffortBudget, @@ -32,6 +34,7 @@ resolve_phase_effort, ) from continuous_refactoring.git import get_head_sha +from continuous_refactoring.log_mirroring import LogMirroring from continuous_refactoring.migrations import ( bump_last_touch, eligible_now, @@ -41,11 +44,28 @@ resolve_current_phase, save_manifest, ) +from continuous_refactoring.migration_consistency import ( + MigrationConsistencyFinding, + check_migration_consistency, + has_blocking_consistency_findings, + iter_visible_migration_dirs, +) from continuous_refactoring.phases import ( ReadyVerdict, check_phase_ready, execute_phase, ) +from continuous_refactoring.planning import ( + PlanningStepResult, + planning_artifact_paths, + run_next_planning_step, +) +from continuous_refactoring.planning_state import ( + PlanningState, + is_executable_planning_step, + load_planning_state, + planning_state_path, +) _BASELINE_VALIDATION_UNCERTAINTY_PHRASES = ( @@ -57,6 +77,9 @@ "full test suite passes", "tests pass now", ) +_REVIEW_TWO_FINDINGS_FAILURE = ( + "planning.review-2 failed: revised plan still has findings" +) class _FinalizeCommit(Protocol): @@ -78,25 +101,11 @@ def enumerate_eligible_manifests( now: datetime, effort_budget: EffortBudget | None = None, ) -> list[tuple[MigrationManifest, Path]]: - if not live_dir.is_dir(): - return [] - candidates: list[tuple[MigrationManifest, Path]] = [] - for entry in sorted(live_dir.iterdir()): - if not entry.is_dir() or entry.name.startswith("__"): - continue - manifest_path = entry / "manifest.json" - if not manifest_path.exists(): - continue - manifest = load_manifest(manifest_path) - if manifest.status not in ("ready", "in-progress"): - continue - if manifest.awaiting_human_review: - continue - if not has_executable_phase(manifest): - continue - if not eligible_now(manifest, now): - continue - candidates.append((manifest, manifest_path)) + candidates = _eligible_manifest_candidates( + live_dir, + now, + predicate=_is_normally_eligible, + ) if effort_budget is not None: seen_paths = {path for _, path in candidates} for manifest, manifest_path in _cooling_effort_candidates( @@ -104,8 +113,40 @@ def enumerate_eligible_manifests( ): if manifest_path not in seen_paths: candidates.append((manifest, manifest_path)) - candidates.sort(key=lambda pair: datetime.fromisoformat(pair[0].created_at)) - return candidates + return _sort_manifests_by_created_at(candidates) + + +def enumerate_eligible_planning_manifests( + live_dir: Path, + now: datetime, +) -> list[tuple[MigrationManifest, Path]]: + return _sort_manifests_by_created_at( + _eligible_manifest_candidates( + live_dir, + now, + predicate=_is_planning_candidate, + ) + ) + + +def _eligible_manifest_candidates( + live_dir: Path, + now: datetime, + *, + predicate: Callable[[MigrationManifest, datetime], bool], +) -> list[tuple[MigrationManifest, Path]]: + return [ + (manifest, manifest_path) + for manifest, manifest_path in _iter_candidate_manifests(live_dir) + if predicate(manifest, now) + ] + + +def _sort_manifests_by_created_at( + manifests: list[tuple[MigrationManifest, Path]], +) -> list[tuple[MigrationManifest, Path]]: + manifests.sort(key=lambda pair: datetime.fromisoformat(pair[0].created_at)) + return manifests def _cooling_effort_candidates( @@ -113,30 +154,49 @@ def _cooling_effort_candidates( now: datetime, budget: EffortBudget, ) -> list[tuple[MigrationManifest, Path]]: - if not live_dir.is_dir(): - return [] candidates: list[tuple[MigrationManifest, Path]] = [] - for entry in sorted(live_dir.iterdir()): - if not entry.is_dir() or entry.name.startswith("__"): + for manifest, manifest_path in _iter_candidate_manifests(live_dir): + if not _can_ignore_effort_cooldown(manifest, now, budget): continue + candidates.append((manifest, manifest_path)) + return candidates + + +def _iter_candidate_manifests( + live_dir: Path, +) -> list[tuple[MigrationManifest, Path]]: + candidates: list[tuple[MigrationManifest, Path]] = [] + for entry in iter_visible_migration_dirs(live_dir): manifest_path = entry / "manifest.json" if not manifest_path.exists(): continue - manifest = load_manifest(manifest_path) - if not _can_ignore_effort_cooldown(manifest, now, budget): - continue - candidates.append((manifest, manifest_path)) + candidates.append((load_manifest(manifest_path), manifest_path)) return candidates +def _is_normally_eligible(manifest: MigrationManifest, now: datetime) -> bool: + return ( + manifest.status in ("ready", "in-progress") + and not manifest.awaiting_human_review + and has_executable_phase(manifest) + and eligible_now(manifest, now) + ) + + +def _is_planning_candidate(manifest: MigrationManifest, now: datetime) -> bool: + return ( + manifest.status == "planning" + and not manifest.awaiting_human_review + and eligible_now(manifest, now) + ) + + def _can_ignore_effort_cooldown( manifest: MigrationManifest, now: datetime, budget: EffortBudget, ) -> bool: - if manifest.status not in ("ready", "in-progress"): - return False - if manifest.awaiting_human_review or not has_executable_phase(manifest): + if not _is_phase_candidate(manifest): return False if manifest.cooldown_until is None: return False @@ -149,6 +209,45 @@ def _can_ignore_effort_cooldown( ) +def _is_phase_candidate(manifest: MigrationManifest) -> bool: + return ( + manifest.status in ("ready", "in-progress") + and not manifest.awaiting_human_review + and has_executable_phase(manifest) + ) + + +def _first_unloadable_visible_manifest( + live_dir: Path, +) -> tuple[Path, list[MigrationConsistencyFinding]] | None: + for migration_dir in iter_visible_migration_dirs(live_dir): + if not (migration_dir / "manifest.json").exists(): + continue + findings = check_migration_consistency(migration_dir, mode="execution-gate") + invalid_findings = [ + finding for finding in findings + if finding.severity == "error" and finding.code == "invalid-manifest" + ] + if invalid_findings: + return migration_dir, invalid_findings + return None + + +def _preflight_manifest_consistency( + live_dir: Path, + repo_root: Path, +) -> DecisionRecord | None: + preflight = _first_unloadable_visible_manifest(live_dir) + if preflight is None: + return None + migration_dir, consistency_findings = preflight + return _consistency_failure_record( + consistency_findings, + repo_root, + migration_dir.name, + ) + + def try_migration_tick( live_dir: Path, taste: str, @@ -165,9 +264,13 @@ def try_migration_tick( attempt: int, finalize_commit: _FinalizeCommit, effort_budget: EffortBudget | None = None, + log_mirroring: LogMirroring = LogMirroring(), ) -> tuple[RouteOutcome, DecisionRecord | None]: resolved_budget = effort_budget or resolve_effort_budget(effort, None) now = datetime.now(timezone.utc) + preflight_record = _preflight_manifest_consistency(live_dir, repo_root) + if preflight_record is not None: + return "abandon", preflight_record candidates = enumerate_eligible_manifests(live_dir, now, resolved_budget) deferred_record: DecisionRecord | None = None pending_defers: list[tuple[MigrationManifest, Path]] = [] @@ -175,6 +278,21 @@ def try_migration_tick( for manifest, manifest_path in candidates: phase = resolve_current_phase(manifest) target_label = _target_label(manifest, phase) + try: + consistency_findings = check_migration_consistency( + manifest_path.parent, mode="execution-gate", + ) + except ContinuousRefactorError as error: + return "abandon", _consistency_error_record( + str(error), + repo_root, + target_label, + failure_kind=error_failure_kind(str(error)), + ) + if has_blocking_consistency_findings(consistency_findings): + return "abandon", _consistency_failure_record( + consistency_findings, repo_root, target_label, + ) if ( phase.required_effort is not None and effort_exceeds( @@ -193,16 +311,13 @@ def try_migration_tick( reason, max_allowed_effort=resolved_budget.max_allowed_effort, ) - pending_defers.append( - ( - _defer_manifest( - manifest, - now, - verdict="effort-over-budget", - reason=reason, - ), - manifest_path, - ) + _queue_deferred_manifest( + pending_defers, + manifest, + manifest_path, + now, + verdict="effort-over-budget", + reason=reason, ) deferred_record = _effort_deferred_record(reason, repo_root, target_label) continue @@ -228,6 +343,7 @@ def try_migration_tick( effort=phase_effort, effort_metadata=effort_metadata, timeout=timeout, + log_mirroring=log_mirroring, ) except ContinuousRefactorError as error: return "abandon", _ready_check_failure_record(error, repo_root, target_label) @@ -252,6 +368,7 @@ def try_migration_tick( timeout=timeout, validation_command=validation_command, max_attempts=max_attempts, + log_mirroring=log_mirroring, ) if outcome.status != "failed": @@ -261,7 +378,7 @@ def try_migration_tick( build_commit_message( f"{commit_message_prefix}: migration/{manifest.name}" f"/{phase_file_reference(phase)}", - why=sanitize_text(outcome.reason, repo_root) or outcome.reason, + why=sanitized_text_or(outcome.reason, repo_root, outcome.reason), validation=validation_command, ), artifacts=artifacts, @@ -277,11 +394,13 @@ def try_migration_tick( return "abandon", _phase_failure_record(outcome, repo_root, target_label) return "commit", _phase_commit_record(outcome, repo_root, target_label) - pending_defers.append( - ( - _defer_manifest(manifest, now, verdict=verdict, reason=reason), - manifest_path, - ) + _queue_deferred_manifest( + pending_defers, + manifest, + manifest_path, + now, + verdict=verdict, + reason=reason, ) if verdict == "unverifiable": _save_pending_defers(pending_defers) @@ -292,6 +411,309 @@ def try_migration_tick( return "not-routed", deferred_record +def try_planning_tick( + live_dir: Path, + taste: str, + repo_root: Path, + artifacts: RunArtifacts, + *, + agent: str, + model: str, + effort: str, + timeout: int | None, + commit_message_prefix: str, + attempt: int, + finalize_commit: _FinalizeCommit, + effort_budget: EffortBudget | None = None, + effort_metadata: dict[str, object] | None = None, + skip_migration_names: Collection[str] = (), + log_mirroring: LogMirroring = LogMirroring(), +) -> tuple[RouteOutcome, DecisionRecord | None]: + now = datetime.now(timezone.utc) + preflight_record = _preflight_manifest_consistency(live_dir, repo_root) + if preflight_record is not None: + return "abandon", preflight_record + + candidates = enumerate_eligible_planning_manifests(live_dir, now) + if skip_migration_names: + skipped_names = frozenset(skip_migration_names) + candidates = [ + (manifest, manifest_path) + for manifest, manifest_path in candidates + if manifest.name not in skipped_names + ] + for manifest, manifest_path in candidates: + migration_dir = manifest_path.parent + try: + consistency_findings = check_migration_consistency( + migration_dir, + mode="planning-snapshot", + ) + except ContinuousRefactorError as error: + return "blocked", _planning_state_record( + str(error), + repo_root, + migration_dir.name, + failure_kind=error_failure_kind(str(error)), + ) + if has_blocking_consistency_findings(consistency_findings): + return "blocked", _planning_consistency_record( + consistency_findings, + repo_root, + migration_dir.name, + ) + state_result = _load_planning_resume_state( + migration_dir, + repo_root, + ) + if isinstance(state_result, DecisionRecord): + return "blocked", state_result + state = state_result + step = state.next_step + if not is_executable_planning_step(step): + return "blocked", _planning_state_record( + ( + f"Planning migration has terminal next_step {step!r} " + "while manifest status is still planning" + ), + repo_root, + manifest.name, + failure_kind="planning-state-invalid", + ) + head_before = get_head_sha(repo_root) + try: + result = run_next_planning_step( + manifest.name, + state.target, + taste, + repo_root, + live_dir, + artifacts, + attempt=attempt, + retry=1, + agent=agent, + model=model, + effort=effort, + effort_budget=effort_budget, + effort_metadata=effort_metadata, + timeout=timeout, + log_mirroring=log_mirroring, + ) + except ContinuousRefactorError as error: + paths = planning_artifact_paths( + artifacts, + attempt=attempt, + retry=1, + label=step, + agent=agent, + ) + return "abandon", _planning_error_record( + str(error), + repo_root, + manifest.name, + call_role=_planning_call_role(step), + failure_kind=_planning_failure_kind(str(error)), + agent_last_message_path=paths.agent_last_message_path, + agent_stdout_path=paths.agent_stdout_path, + agent_stderr_path=paths.agent_stderr_path, + ) + + outcome = _planning_route_outcome(result) + if outcome == "commit": + finalize_commit( + repo_root, + head_before, + build_commit_message( + ( + f"{commit_message_prefix}: planning/" + f"{manifest.name}/{result.step}" + ), + why=sanitize_text(result.reason, repo_root) or result.reason, + ), + artifacts=artifacts, + attempt=attempt, + phase="planning", + ) + print( + "Planning: " + f"{_describe_planning_outcome(result)} — " + f"{manifest.name}/{result.step}: {result.reason}" + ) + return "commit", _planning_commit_record(result, repo_root) + if outcome == "blocked": + return "blocked", _planning_blocked_record(result, repo_root) + return "abandon", _planning_failed_record(result, repo_root) + + return "not-routed", None + + +def _planning_consistency_record( + findings: list[MigrationConsistencyFinding], + repo_root: Path, + migration_name: str, +) -> DecisionRecord: + error_findings = [finding for finding in findings if finding.severity == "error"] + codes = ", ".join(sorted({finding.code for finding in error_findings})) + message = ( + error_findings[0].message + if error_findings + else "planning snapshot consistency failed" + ) + return _planning_state_record( + f"Planning snapshot consistency failed ({codes}): {message}", + repo_root, + migration_name, + failure_kind="planning-consistency-error", + ) + + +def _load_planning_resume_state( + migration_dir: Path, + repo_root: Path, +) -> PlanningState | DecisionRecord: + state_path = planning_state_path(migration_dir) + if not state_path.exists(): + return _planning_state_record( + f"Planning migration is missing {state_path.relative_to(migration_dir)}", + repo_root, + migration_dir.name, + failure_kind="planning-state-missing", + ) + try: + return load_planning_state( + repo_root, + state_path, + published_migration_root=migration_dir, + ) + except ContinuousRefactorError as error: + return _planning_state_record( + str(error), + repo_root, + migration_dir.name, + failure_kind="planning-state-invalid", + ) + + +def _planning_route_outcome(result: PlanningStepResult) -> RouteOutcome: + if result.status == "published": + return "commit" + if result.status == "blocked": + return "blocked" + return "abandon" + + +def _planning_call_role(step: object) -> str: + if is_executable_planning_step(step): + return f"planning.{step}" + return "planning.resume" + + +def _planning_failure_kind(message: str) -> str: + if message == _REVIEW_TWO_FINDINGS_FAILURE: + return "planning-step-failed" + return error_failure_kind(message) + + +def _describe_planning_outcome(result: PlanningStepResult) -> str: + if result.terminal_outcome is None: + return f"{result.step} accepted" + if result.terminal_outcome.status == "ready": + return "queued for execution" + if result.terminal_outcome.status == "awaiting_human_review": + return "awaiting human review" + return result.terminal_outcome.status.replace("_", " ") + + +def _planning_state_record( + message: str, + repo_root: Path, + migration_name: str, + *, + failure_kind: str, +) -> DecisionRecord: + return DecisionRecord( + decision="blocked", + retry_recommendation="human-review", + target=migration_name, + call_role="planning.state", + phase_reached="planning.state", + failure_kind=failure_kind, + summary=sanitized_text_or(message, repo_root, message), + ) + + +def _planning_error_record( + message: str, + repo_root: Path, + migration_name: str, + *, + call_role: str, + failure_kind: str, + agent_last_message_path: Path | None = None, + agent_stdout_path: Path | None = None, + agent_stderr_path: Path | None = None, +) -> DecisionRecord: + return DecisionRecord( + decision="abandon", + retry_recommendation="new-target", + target=migration_name, + call_role=call_role, + phase_reached=call_role, + failure_kind=failure_kind, + summary=sanitized_text_or(message, repo_root, message), + agent_last_message_path=agent_last_message_path, + agent_stdout_path=agent_stdout_path, + agent_stderr_path=agent_stderr_path, + ) + + +def _planning_commit_record( + result: PlanningStepResult, + repo_root: Path, +) -> DecisionRecord: + call_role = f"planning.{result.step}" + return DecisionRecord( + decision="commit", + retry_recommendation="none", + target=result.migration_name, + call_role=call_role, + phase_reached=call_role, + failure_kind="none", + summary=sanitized_text_or(result.reason, repo_root, result.reason), + ) + + +def _planning_blocked_record( + result: PlanningStepResult, + repo_root: Path, +) -> DecisionRecord: + return DecisionRecord( + decision="blocked", + retry_recommendation="human-review", + target=result.migration_name, + call_role="planning.publish", + phase_reached="planning.publish", + failure_kind="planning-publish-blocked", + summary=sanitized_text_or(result.reason, repo_root, result.reason), + ) + + +def _planning_failed_record( + result: PlanningStepResult, + repo_root: Path, +) -> DecisionRecord: + call_role = f"planning.{result.step}" + return DecisionRecord( + decision="abandon", + retry_recommendation="new-target", + target=result.migration_name, + call_role=call_role, + phase_reached=call_role, + failure_kind="planning-step-failed", + summary=sanitized_text_or(result.reason, repo_root, result.reason), + ) + + def _target_label(manifest: MigrationManifest, phase: PhaseSpec) -> str: return f"{manifest.name} {phase_file_reference(phase)} ({phase.name})" @@ -322,6 +744,23 @@ def _save_pending_defers( save_manifest(deferred_manifest, manifest_path) +def _queue_deferred_manifest( + pending_defers: list[tuple[MigrationManifest, Path]], + manifest: MigrationManifest, + manifest_path: Path, + now: datetime, + *, + verdict: str, + reason: str, +) -> None: + pending_defers.append( + ( + _defer_manifest(manifest, now, verdict=verdict, reason=reason), + manifest_path, + ) + ) + + def _effort_defer_reason( phase: PhaseSpec, *, @@ -361,7 +800,7 @@ def _log_phase_effort_deferred( def _ready_check_failure_record( error: ContinuousRefactorError, repo_root: Path, target_label: str, ) -> DecisionRecord: - summary = sanitize_text(str(error), repo_root) or str(error) + summary = sanitized_text_or(str(error), repo_root, str(error)) return DecisionRecord( decision="abandon", retry_recommendation="new-target", @@ -373,6 +812,45 @@ def _ready_check_failure_record( ) +def _consistency_failure_record( + findings: list[MigrationConsistencyFinding], + repo_root: Path, + target_label: str, +) -> DecisionRecord: + error_findings = [finding for finding in findings if finding.severity == "error"] + codes = ", ".join(sorted({finding.code for finding in error_findings})) + message = ( + error_findings[0].message + if error_findings + else "migration consistency failed" + ) + summary = f"Migration consistency failed ({codes}): {message}" + return _consistency_error_record( + summary, + repo_root, + target_label, + failure_kind="migration-consistency-error", + ) + + +def _consistency_error_record( + message: str, + repo_root: Path, + target_label: str, + *, + failure_kind: str, +) -> DecisionRecord: + return DecisionRecord( + decision="abandon", + retry_recommendation="new-target", + target=target_label, + call_role="phase.execution-gate", + phase_reached="phase.execution-gate", + failure_kind=failure_kind, + summary=sanitized_text_or(message, repo_root, message), + ) + + def _phase_failure_record( outcome: ExecutePhaseOutcome, repo_root: Path, target_label: str, ) -> DecisionRecord: @@ -383,7 +861,7 @@ def _phase_failure_record( call_role=outcome.call_role or "phase.execute", phase_reached=outcome.phase_reached or "phase.execute", failure_kind=outcome.failure_kind or "phase-failed", - summary=sanitize_text(outcome.reason, repo_root) or outcome.reason, + summary=sanitized_text_or(outcome.reason, repo_root, outcome.reason), retry_used=outcome.retry, ) @@ -400,7 +878,7 @@ def _phase_commit_record( call_role="phase.execute", phase_reached="phase.execute", failure_kind="none", - summary=sanitize_text(outcome.reason, repo_root) or outcome.reason, + summary=sanitized_text_or(outcome.reason, repo_root, outcome.reason), ) @@ -430,7 +908,7 @@ def _defer_manifest( def _human_review_record( reason: str, repo_root: Path, target_label: str, ) -> DecisionRecord: - summary = sanitize_text(reason, repo_root) or "Phase requires human review" + summary = sanitized_text_or(reason, repo_root, "Phase requires human review") return DecisionRecord( decision="blocked", retry_recommendation="human-review", @@ -450,7 +928,7 @@ def _deferred_record(reason: str, repo_root: Path, target_label: str) -> Decisio call_role="phase.ready-check", phase_reached="phase.ready-check", failure_kind="phase-ready-no", - summary=sanitize_text(reason, repo_root) or "Migration phase not ready", + summary=sanitized_text_or(reason, repo_root, "Migration phase not ready"), ) @@ -464,5 +942,9 @@ def _effort_deferred_record( call_role="phase.effort-budget", phase_reached="phase.effort-budget", failure_kind="phase-effort-over-budget", - summary=sanitize_text(reason, repo_root) or "Migration phase over effort budget", + summary=sanitized_text_or( + reason, + repo_root, + "Migration phase over effort budget", + ), ) diff --git a/src/continuous_refactoring/phases.py b/src/continuous_refactoring/phases.py index 4d110f0..1350eee 100644 --- a/src/continuous_refactoring/phases.py +++ b/src/continuous_refactoring/phases.py @@ -26,9 +26,11 @@ read_status, resolved_phase_reached, sanitize_text, + sanitized_text_or, status_summary, ) from continuous_refactoring.git import get_head_sha, revert_to +from continuous_refactoring.log_mirroring import LogMirroring from continuous_refactoring.migrations import ( complete_manifest_phase, migration_root, @@ -133,6 +135,7 @@ def check_phase_ready( effort: str, timeout: int | None, effort_metadata: dict[str, object] | None = None, + log_mirroring: LogMirroring = LogMirroring(), ) -> tuple[ReadyVerdict, str]: prompt = compose_phase_ready_prompt(phase, manifest, taste) check_dir = artifacts.root / "phase-ready-check" @@ -162,7 +165,7 @@ def check_phase_ready( last_message_path=( check_dir / "agent-last-message.md" if agent == "codex" else None ), - mirror_to_terminal=False, + mirror_to_terminal=log_mirroring.agent, timeout=timeout, ) except ContinuousRefactorError as error: @@ -323,6 +326,7 @@ def _run_phase_agent( effort: str, effort_metadata: dict[str, object] | None, timeout: int | None, + log_mirroring: LogMirroring, ) -> _PhaseAgentRun: artifacts.log_call_started( attempt=attempt, @@ -343,11 +347,11 @@ def _run_phase_agent( stdout_path=phase_attempt.phase_dir / "agent.stdout.log", stderr_path=phase_attempt.phase_dir / "agent.stderr.log", last_message_path=phase_attempt.last_message_path, - mirror_to_terminal=False, + mirror_to_terminal=log_mirroring.agent, timeout=timeout, ) except ContinuousRefactorError as error: - summary = sanitize_text(str(error), repo_root) or str(error) + summary = sanitized_text_or(str(error), repo_root, str(error)) return _PhaseAgentRun( status=None, phase_reached=_PHASE_EXECUTE_ROLE, @@ -428,6 +432,7 @@ def _run_phase_validation( display_target_label: str, repo_root: Path, validation_command: str, + log_mirroring: LogMirroring, ) -> _PhaseValidationResult: artifacts.log_call_started( attempt=attempt, @@ -443,12 +448,12 @@ def _run_phase_validation( repo_root, stdout_path=phase_attempt.phase_dir / "tests.stdout.log", stderr_path=phase_attempt.phase_dir / "tests.stderr.log", - mirror_to_terminal=False, + mirror_to_terminal=log_mirroring.command, ) except ContinuousRefactorError as error: summary, focus = status_summary( agent_run.status, - fallback=sanitize_text(str(error), repo_root) or str(error), + fallback=sanitized_text_or(str(error), repo_root, str(error)), repo_root=repo_root, ) return _PhaseValidationResult( @@ -602,6 +607,7 @@ def execute_phase( validation_command: str, max_attempts: int | None, effort_metadata: dict[str, object] | None = None, + log_mirroring: LogMirroring = LogMirroring(), ) -> ExecutePhaseOutcome: _require_phase_in_manifest(manifest, phase.name) head_before = get_head_sha(repo_root) @@ -638,6 +644,7 @@ def execute_phase( effort=effort, effort_metadata=effort_metadata, timeout=timeout, + log_mirroring=log_mirroring, ) if agent_run.failure is not None: return agent_run.failure @@ -651,6 +658,7 @@ def execute_phase( display_target_label=display_target_label, repo_root=repo_root, validation_command=validation_command, + log_mirroring=log_mirroring, ) if validation_result.status == "passed": diff --git a/src/continuous_refactoring/planning.py b/src/continuous_refactoring/planning.py index 82be2e5..88b3676 100644 --- a/src/continuous_refactoring/planning.py +++ b/src/continuous_refactoring/planning.py @@ -1,6 +1,8 @@ from __future__ import annotations import re +import shutil +import uuid from dataclasses import dataclass, replace from pathlib import Path from typing import TYPE_CHECKING, Callable, Literal @@ -10,20 +12,53 @@ from continuous_refactoring.agent import maybe_run_agent from continuous_refactoring.artifacts import ContinuousRefactorError, iso_timestamp +from continuous_refactoring.config import resolve_project from continuous_refactoring.effort import EffortBudget, require_effort_tier +from continuous_refactoring.log_mirroring import LogMirroring from continuous_refactoring.migrations import ( MigrationManifest, PhaseSpec, approaches_dir, intentional_skips_dir, + load_manifest, migration_root, save_manifest, ) +from continuous_refactoring.planning_publish import ( + PlanningPublishError, + PlanningPublishRequest, + PlanningPublishResult, + capture_live_snapshot, + prepare_planning_workspace, + publish_planning_workspace, +) +from continuous_refactoring.planning_state import ( + FeedbackSource, + PlanningCursor, + PlanningState, + PlanningStep, + append_planning_feedback, + complete_planning_step, + load_planning_state, + new_planning_state, + planning_state_path, + planning_step_stdout, + reopen_planning_for_revise, + save_planning_state, + write_planning_stage_stdout, +) from continuous_refactoring.prompts import PlanningStage, compose_planning_prompt -__all__ = ["PlanningOutcome", "run_planning"] +__all__ = [ + "PlanningOutcome", + "PlanningRefineRequest", + "PlanningStepResult", + "run_next_planning_step", + "run_refine_planning_step", +] PlanningStatus = Literal["ready", "awaiting_human_review", "skipped"] +PlanningStepStatus = Literal["published", "blocked", "failed"] _FINAL_DECISION_RE = re.compile( r"^final-decision:\s*(approve-auto|approve-needs-human|reject)(?:\s*[—-]\s*(.+))?$", @@ -47,6 +82,51 @@ class PlanningOutcome: reason: str +@dataclass(frozen=True) +class PlanningStepResult: + status: PlanningStepStatus + migration_name: str + step: PlanningStep + next_step: PlanningCursor + reason: str + terminal_outcome: PlanningOutcome | None = None + publish_result: PlanningPublishResult | None = None + + +@dataclass(frozen=True) +class PlanningRefineRequest: + migration_name: str + feedback_text: str + feedback_source: FeedbackSource + taste: str + repo_root: Path + live_dir: Path + artifacts: RunArtifacts + agent: str + model: str + effort: str + timeout: int | None = None + attempt: int = 1 + retry: int = 1 + effort_budget: EffortBudget | None = None + effort_metadata: dict[str, object] | None = None + log_mirroring: LogMirroring = LogMirroring() + + +@dataclass(frozen=True) +class _PlanningArtifactPaths: + agent_stdout_path: Path + agent_stderr_path: Path + agent_last_message_path: Path | None + + +@dataclass(frozen=True) +class _PhaseMetadata: + precondition: str + required_effort: str | None + effort_reason: str | None + + @dataclass(frozen=True) class _PlanningStageSpec: prompt_stage: PlanningStage @@ -119,36 +199,75 @@ def _phase_section_text(content: str, heading: str) -> str | None: return normalized or None -def _phase_precondition(content: str, phase_file: str) -> str: - section = _phase_section_text(content, "Precondition") +def _phase_field( + content: str, + *, + heading: str, + line_re: re.Pattern[str], +) -> str | None: + section = _phase_section_text(content, heading) if section is not None: return section - match = _PRECONDITION_LINE_RE.search(content) + match = line_re.search(content) if match: return match.group(1).strip() - return f"prerequisites in {phase_file} are met" + return None + + +def _parse_phase_metadata(content: str, phase_file: str) -> _PhaseMetadata: + precondition = _phase_field( + content, + heading="Precondition", + line_re=_PRECONDITION_LINE_RE, + ) + raw_required_effort = _phase_field( + content, + heading="Required Effort", + line_re=_REQUIRED_EFFORT_LINE_RE, + ) + effort_reason = _phase_field( + content, + heading="Effort Reason", + line_re=_EFFORT_REASON_LINE_RE, + ) + required_effort = None + if raw_required_effort is not None: + candidate = raw_required_effort.strip().strip("`").split()[0].strip("`.,;:") + required_effort = require_effort_tier( + candidate, + field=f"{phase_file} required_effort", + ) + return _PhaseMetadata( + precondition=precondition or f"prerequisites in {phase_file} are met", + required_effort=required_effort, + effort_reason=effort_reason, + ) + + +def _phase_precondition(content: str, phase_file: str) -> str: + return _parse_phase_metadata(content, phase_file).precondition def _phase_required_effort(content: str, phase_file: str) -> str | None: - raw = _phase_section_text(content, "Required Effort") - if raw is None: - match = _REQUIRED_EFFORT_LINE_RE.search(content) - raw = match.group(1).strip() if match else None - if raw is None: - return None - candidate = raw.strip().strip("`").split()[0].strip("`.,;:") - return require_effort_tier(candidate, field=f"{phase_file} required_effort") + return _parse_phase_metadata(content, phase_file).required_effort def _phase_effort_reason(content: str) -> str | None: - section = _phase_section_text(content, "Effort Reason") - if section is not None: - return section - match = _EFFORT_REASON_LINE_RE.search(content) - if match: - return match.group(1).strip() - return None - + return _parse_phase_metadata(content, "").effort_reason + + +def _phase_spec_from_file(phase_file: Path) -> PhaseSpec: + content = phase_file.read_text(encoding="utf-8") + metadata = _parse_phase_metadata(content, phase_file.name) + name = phase_file.stem.split("-", 2)[2] + return PhaseSpec( + name=name, + file=phase_file.name, + done=False, + precondition=metadata.precondition, + required_effort=metadata.required_effort, + effort_reason=metadata.effort_reason, + ) def _discover_phase_files(mig_root: Path) -> tuple[PhaseSpec, ...]: phase_files: list[tuple[int, Path]] = [] @@ -169,17 +288,7 @@ def _discover_phase_files(mig_root: Path) -> tuple[PhaseSpec, ...]: f"Duplicate phase names are not allowed in {mig_root.name}: {name}" ) seen_names.add(name) - content = pf.read_text(encoding="utf-8") - phases.append( - PhaseSpec( - name=name, - file=pf.name, - done=False, - precondition=_phase_precondition(content, pf.name), - required_effort=_phase_required_effort(content, pf.name), - effort_reason=_phase_effort_reason(content), - ) - ) + phases.append(_phase_spec_from_file(pf)) return tuple(phases) @@ -209,6 +318,33 @@ def _write_skip_file( # --------------------------------------------------------------------------- +def _planning_stage_dir( + artifacts: RunArtifacts, + attempt: int, + retry: int, + label: str, +) -> Path: + return artifacts.attempt_dir(attempt, retry) / "planning" / label + + +def planning_artifact_paths( + artifacts: RunArtifacts, + *, + attempt: int, + retry: int, + label: str, + agent: str, +) -> _PlanningArtifactPaths: + stage_dir = _planning_stage_dir(artifacts, attempt, retry, label) + return _PlanningArtifactPaths( + agent_stdout_path=stage_dir / "agent.stdout.log", + agent_stderr_path=stage_dir / "agent.stderr.log", + agent_last_message_path=( + stage_dir / "agent-last-message.md" if agent == "codex" else None + ), + ) + + def _run_stage( stage: PlanningStage, migration_name: str, @@ -226,6 +362,7 @@ def _run_stage( timeout: int | None, effort_metadata: dict[str, object] | None = None, effort_budget: EffortBudget | None = None, + log_mirroring: LogMirroring = LogMirroring(), stage_label: str | None = None, ) -> str: prompt = compose_planning_prompt( @@ -237,8 +374,14 @@ def _run_stage( ) label = stage_label or stage call_role = f"planning.{label}" - stage_dir = artifacts.root / "planning" / label - stage_dir.mkdir(parents=True, exist_ok=True) + paths = planning_artifact_paths( + artifacts, + attempt=attempt, + retry=retry, + label=label, + agent=agent, + ) + paths.agent_stdout_path.parent.mkdir(parents=True, exist_ok=True) artifacts.log_call_started( attempt=attempt, @@ -255,12 +398,10 @@ def _run_stage( effort=effort, prompt=prompt, repo_root=repo_root, - stdout_path=stage_dir / "agent.stdout.log", - stderr_path=stage_dir / "agent.stderr.log", - last_message_path=( - stage_dir / "agent-last-message.md" if agent == "codex" else None - ), - mirror_to_terminal=False, + stdout_path=paths.agent_stdout_path, + stderr_path=paths.agent_stderr_path, + last_message_path=paths.agent_last_message_path, + mirror_to_terminal=log_mirroring.agent, timeout=timeout, ) except ContinuousRefactorError as error: @@ -309,8 +450,38 @@ def _run_stage( # --------------------------------------------------------------------------- -def _build_context(target: str, mig_relative: Path, extra: str = "") -> str: - parts = [f"Target: {target}", f"Migration directory: {mig_relative}"] +def _build_context( + target: str, + mig_relative: Path, + extra: str = "", + *, + work_dir: Path | None = None, + live_mig_root: Path | None = None, +) -> str: + parts = [ + f"Target: {target}", + f"Migration directory: {mig_relative}", + "Read and write all migration planning artifacts inside that directory.", + ] + if work_dir is not None: + live_dir = live_mig_root or work_dir + parts.extend( + [ + f"Staged work dir: {work_dir}", + f"Work dir: {work_dir}", + f"Live migration dir: {live_dir}", + "The staged work dir is the planning workspace; successful " + "steps are atomically published by the harness.", + "Writable target: staged work dir only.", + "Writable target: work dir only.", + "The live migration directory is read-only reference material.", + "Do not mutate the live migration directory.", + "Resume input is the last published .planning/state.json plus " + "accepted stdout under .planning/stages/.", + "failed current-step output, stdout/stderr, and partial work " + "are run artifacts only; they are not resume input.", + ] + ) if extra: parts.append(extra) return "\n\n".join(parts) @@ -324,6 +495,13 @@ def _join_nonempty(*parts: str) -> str: return "\n\n".join(part for part in parts if part) +def _display_migration_path(repo_root: Path, mig_root: Path) -> Path: + try: + return mig_root.relative_to(repo_root) + except ValueError: + return mig_root + + def _read_approach_listing(live_dir: Path, migration_name: str) -> str: app_dir = approaches_dir(live_dir, migration_name) if not app_dir.exists(): @@ -334,6 +512,130 @@ def _read_approach_listing(live_dir: Path, migration_name: str) -> str: ) +def _durable_stdout_context( + title: str, + state: PlanningState, + repo_root: Path, + mig_root: Path, + step: str, + *, + published_migration_root: Path | None = None, +) -> str: + stdout_ref, stdout = planning_step_stdout( + state, + repo_root, + step, + state_path=planning_state_path(mig_root), + published_migration_root=published_migration_root, + ) + return f"{title} (from {stdout_ref}):\n{stdout}" + + +def _build_durable_planning_context( + *, + repo_root: Path, + live_dir: Path, + migration_name: str, + state: PlanningState, + extra_context: str = "", + published_migration_root: Path | None = None, +) -> str: + mig_root = migration_root(live_dir, migration_name) + mig_relative = _display_migration_path(repo_root, mig_root) + plan_path = mig_root / "plan.md" + + if state.next_step == "approaches": + step_context = "" + elif state.next_step == "pick-best": + step_context = f"Approaches:\n{_read_approach_listing(live_dir, migration_name)}" + elif state.next_step == "expand": + step_context = _durable_stdout_context( + "Chosen approach", + state, + repo_root, + mig_root, + "pick-best", + published_migration_root=published_migration_root, + ) + elif state.next_step == "review": + step_context = f"Plan:\n{_read_plan_text(plan_path)}" + elif state.next_step == "revise": + if not state.revision_base_step_counts: + step_context = _durable_stdout_context( + "Review findings to address", + state, + repo_root, + mig_root, + "review", + published_migration_root=published_migration_root, + ) + else: + step_context = _latest_feedback_context(state) + elif state.next_step == "review-2": + step_context = f"Plan (revised):\n{_read_plan_text(plan_path)}" + elif state.next_step == "final-review": + step_context = f"Plan:\n{_read_plan_text(plan_path)}" + else: + raise ContinuousRefactorError( + f"Planning state is terminal; no prompt context for {state.next_step!r}" + ) + + return _build_context( + state.target, + mig_relative, + _join_nonempty(extra_context, step_context), + work_dir=mig_root, + live_mig_root=published_migration_root, + ) + + +def _latest_feedback_context(state: PlanningState) -> str: + if not state.feedback: + raise ContinuousRefactorError("Planning refinement requires user feedback") + return f"User refinement feedback to address:\n{state.feedback[-1].text}" + + +def _record_completed_planning_step( + state: PlanningState, + *, + repo_root: Path, + mig_root: Path, + published_migration_root: Path | None = None, + stage_label: str, + outcome: str, + stdout: str, + agent: str, + model: str, + effort: str, + final_reason: str | None = None, +) -> PlanningState: + outputs = write_planning_stage_stdout( + repo_root, + mig_root, + stage_label, + stdout, + published_migration_root=published_migration_root, + ) + updated = complete_planning_step( + state, + stage_label, + outcome, + outputs, + completed_at=iso_timestamp(), + agent=agent, + model=model, + effort=effort, + final_reason=final_reason, + ) + save_planning_state( + updated, + planning_state_path(mig_root), + repo_root=repo_root, + published_migration_root=published_migration_root, + ) + return updated + + def _run_pipeline_stage( spec: _PlanningStageSpec, state: _PlanningStageState, @@ -350,6 +652,7 @@ def _run_pipeline_stage( attempt: int, retry: int, agent_kw: dict[str, object], + log_mirroring: LogMirroring, ) -> tuple[MigrationManifest, str]: stdout = _run_stage( spec.prompt_stage, @@ -361,6 +664,7 @@ def _run_pipeline_stage( artifacts, attempt=attempt, retry=retry, + log_mirroring=log_mirroring, **agent_kw, ) if spec.stage_label == "approaches": @@ -396,11 +700,11 @@ def _refresh_manifest( # --------------------------------------------------------------------------- -# Main workflow +# One-step workflow # --------------------------------------------------------------------------- -def run_planning( +def run_next_planning_step( migration_name: str, target: str, taste: str, @@ -417,11 +721,236 @@ def run_planning( effort_budget: EffortBudget | None = None, effort_metadata: dict[str, object] | None = None, extra_context: str = "", -) -> PlanningOutcome: - mig_root = migration_root(live_dir, migration_name) - mig_root.mkdir(parents=True, exist_ok=True) - manifest_path = mig_root / "manifest.json" - mig_relative = mig_root.relative_to(repo_root) + log_mirroring: LogMirroring = LogMirroring(), +) -> PlanningStepResult: + live_mig_root = migration_root(live_dir, migration_name) + base_snapshot_id = capture_live_snapshot(repo_root, live_dir, migration_name) + workspace_root = _prepare_step_workspace( + repo_root, + artifacts, + migration_name, + live_mig_root, + ) + manifest, state = _load_or_seed_step_snapshot( + workspace_root, + live_mig_root, + migration_name=migration_name, + target=target, + repo_root=repo_root, + ) + if state.next_step not in _STEP_PROMPT_STAGES: + raise ContinuousRefactorError( + f"Planning state is terminal; no next step for {state.next_step!r}" + ) + + step = state.next_step + manifest, state, terminal_outcome = _execute_step_in_workspace( + manifest, + state, + migration_name=migration_name, + taste=taste, + repo_root=repo_root, + workspace_root=workspace_root, + live_mig_root=live_mig_root, + artifacts=artifacts, + attempt=attempt, + retry=retry, + agent=agent, + model=model, + effort=effort, + timeout=timeout, + effort_budget=effort_budget, + effort_metadata=effort_metadata, + extra_context=extra_context, + log_mirroring=log_mirroring, + ) + + validation_mode = "ready-publish" if manifest.status == "ready" else "planning-snapshot" + try: + publish_result = publish_planning_workspace( + PlanningPublishRequest( + repo_root=repo_root, + live_migrations_dir=live_dir, + slug=migration_name, + workspace_dir=workspace_root, + base_snapshot_id=base_snapshot_id, + validation_mode=validation_mode, + operation=f"planning.{step}", + ) + ) + except PlanningPublishError as error: + return PlanningStepResult( + status=error.result.status, + migration_name=migration_name, + step=step, + next_step=state.next_step, + reason=error.result.reason, + terminal_outcome=None, + publish_result=error.result, + ) + + return PlanningStepResult( + status="published", + migration_name=migration_name, + step=step, + next_step=state.next_step, + reason=_planning_step_reason(step, state, terminal_outcome), + terminal_outcome=terminal_outcome, + publish_result=publish_result, + ) + + +def run_refine_planning_step(request: PlanningRefineRequest) -> PlanningStepResult: + live_mig_root = migration_root(request.live_dir, request.migration_name) + base_snapshot_id = capture_live_snapshot( + request.repo_root, + request.live_dir, + request.migration_name, + ) + workspace_root = _prepare_step_workspace( + request.repo_root, + request.artifacts, + request.migration_name, + live_mig_root, + ) + manifest, state = _load_refine_snapshot( + workspace_root, + live_mig_root, + repo_root=request.repo_root, + migration_name=request.migration_name, + ) + manifest, state = _prepare_refine_state( + manifest, + state, + workspace_root=workspace_root, + live_mig_root=live_mig_root, + repo_root=request.repo_root, + feedback_text=request.feedback_text, + feedback_source=request.feedback_source, + ) + if state.next_step not in _STEP_PROMPT_STAGES: + raise ContinuousRefactorError( + f"Planning state is terminal; no next step for {state.next_step!r}" + ) + + step = state.next_step + manifest, state, terminal_outcome = _execute_step_in_workspace( + manifest, + state, + migration_name=request.migration_name, + taste=request.taste, + repo_root=request.repo_root, + workspace_root=workspace_root, + live_mig_root=live_mig_root, + artifacts=request.artifacts, + attempt=request.attempt, + retry=request.retry, + agent=request.agent, + model=request.model, + effort=request.effort, + timeout=request.timeout, + effort_budget=request.effort_budget, + effort_metadata=request.effort_metadata, + extra_context=_user_feedback_context(request.feedback_text), + log_mirroring=request.log_mirroring, + ) + + validation_mode = "ready-publish" if manifest.status == "ready" else "planning-snapshot" + try: + publish_result = publish_planning_workspace( + PlanningPublishRequest( + repo_root=request.repo_root, + live_migrations_dir=request.live_dir, + slug=request.migration_name, + workspace_dir=workspace_root, + base_snapshot_id=base_snapshot_id, + validation_mode=validation_mode, + operation=f"migration.refine.{step}", + ) + ) + except PlanningPublishError as error: + return PlanningStepResult( + status=error.result.status, + migration_name=request.migration_name, + step=step, + next_step=state.next_step, + reason=error.result.reason, + terminal_outcome=None, + publish_result=error.result, + ) + + return PlanningStepResult( + status="published", + migration_name=request.migration_name, + step=step, + next_step=state.next_step, + reason=_planning_step_reason(step, state, terminal_outcome), + terminal_outcome=terminal_outcome, + publish_result=publish_result, + ) + + +_STEP_PROMPT_STAGES: dict[str, PlanningStage] = { + "approaches": "approaches", + "pick-best": "pick-best", + "expand": "expand", + "review": "review", + "revise": "expand", + "review-2": "review", + "final-review": "final-review", +} + + +def _prepare_step_workspace( + repo_root: Path, + artifacts: RunArtifacts, + migration_name: str, + live_mig_root: Path, +) -> Path: + project_state_dir = _planning_project_state_dir(repo_root, artifacts) + workspace = prepare_planning_workspace( + project_state_dir, + migration_name, + f"{artifacts.run_id}-{uuid.uuid4().hex}", + ) + if live_mig_root.exists(): + shutil.copytree(live_mig_root, workspace.root, dirs_exist_ok=True) + return workspace.root + + +def _planning_project_state_dir(repo_root: Path, artifacts: RunArtifacts) -> Path: + try: + return resolve_project(repo_root).project_dir + except ContinuousRefactorError: + return artifacts.root / "project-state" + + +def _load_or_seed_step_snapshot( + workspace_root: Path, + live_mig_root: Path, + *, + migration_name: str, + target: str, + repo_root: Path, +) -> tuple[MigrationManifest, PlanningState]: + manifest_path = workspace_root / "manifest.json" + state_path = planning_state_path(workspace_root) + if manifest_path.exists(): + manifest = load_manifest(manifest_path) + if manifest.status != "planning": + raise ContinuousRefactorError( + f"Planning snapshot {migration_name!r} is not in planning status" + ) + if not state_path.exists(): + raise ContinuousRefactorError( + f"Planning snapshot {migration_name!r} is missing .planning/state.json" + ) + state = load_planning_state( + repo_root, + state_path, + published_migration_root=live_mig_root, + ) + return manifest, state now = iso_timestamp() manifest = MigrationManifest( @@ -435,154 +964,260 @@ def run_planning( phases=(), ) save_manifest(manifest, manifest_path) + state = new_planning_state(target, now=now) + save_planning_state( + state, + state_path, + repo_root=repo_root, + published_migration_root=live_mig_root, + ) + return manifest, state - agent_kw = dict( + +def _load_refine_snapshot( + workspace_root: Path, + live_mig_root: Path, + *, + repo_root: Path, + migration_name: str, +) -> tuple[MigrationManifest, PlanningState]: + manifest_path = workspace_root / "manifest.json" + state_path = planning_state_path(workspace_root) + if not manifest_path.exists(): + raise ContinuousRefactorError(f"Migration {migration_name!r} has no manifest") + if not state_path.exists(): + raise ContinuousRefactorError( + f"Migration {migration_name!r} is missing .planning/state.json" + ) + manifest = load_manifest(manifest_path) + state = load_planning_state( + repo_root, + state_path, + published_migration_root=live_mig_root, + ) + return manifest, state + + +def _prepare_refine_state( + manifest: MigrationManifest, + state: PlanningState, + *, + workspace_root: Path, + live_mig_root: Path, + repo_root: Path, + feedback_text: str, + feedback_source: FeedbackSource, +) -> tuple[MigrationManifest, PlanningState]: + _require_refine_eligible(manifest) + state = append_planning_feedback(state, feedback_text, feedback_source) + if manifest.status == "ready": + state = reopen_planning_for_revise(state) + manifest = _refresh_manifest( + manifest, + workspace_root / "manifest.json", + status="planning", + awaiting_human_review=False, + human_review_reason=None, + cooldown_until=None, + current_phase=manifest.phases[0].name, + ) + elif state.next_step not in _STEP_PROMPT_STAGES: + raise ContinuousRefactorError( + f"Planning state is terminal; no next step for {state.next_step!r}" + ) + save_planning_state( + state, + planning_state_path(workspace_root), + repo_root=repo_root, + published_migration_root=live_mig_root, + ) + return manifest, state + + +def _require_refine_eligible(manifest: MigrationManifest) -> None: + if any(phase.done for phase in manifest.phases): + raise ContinuousRefactorError( + f"Migration {manifest.name!r} has completed phase work and cannot be refined" + ) + if manifest.status == "planning": + return + if manifest.status != "ready": + raise ContinuousRefactorError( + f"Migration {manifest.name!r} has status {manifest.status!r}; " + "only planning or unexecuted ready migrations can be refined" + ) + if not manifest.phases: + raise ContinuousRefactorError( + f"Migration {manifest.name!r} has no phases and cannot be refined" + ) + first_phase = manifest.phases[0] + if manifest.current_phase != first_phase.name: + raise ContinuousRefactorError( + f"Migration {manifest.name!r} has already advanced past its first phase" + ) + + +def _user_feedback_context(text: str) -> str: + return f"User refinement feedback:\n{text}" + + +def _execute_step_in_workspace( + manifest: MigrationManifest, + state: PlanningState, + *, + migration_name: str, + taste: str, + repo_root: Path, + workspace_root: Path, + live_mig_root: Path, + artifacts: RunArtifacts, + attempt: int, + retry: int, + agent: str, + model: str, + effort: str, + timeout: int | None, + effort_budget: EffortBudget | None, + effort_metadata: dict[str, object] | None, + extra_context: str, + log_mirroring: LogMirroring, +) -> tuple[MigrationManifest, PlanningState, PlanningOutcome | None]: + step = state.next_step + if step not in _STEP_PROMPT_STAGES: + raise ContinuousRefactorError(f"Planning step {step!r} cannot be executed") + prompt_stage = _STEP_PROMPT_STAGES[step] + context = _build_durable_planning_context( + repo_root=repo_root, + live_dir=workspace_root.parent, + migration_name=migration_name, + state=state, + extra_context=extra_context, + published_migration_root=live_mig_root, + ) + stdout = _run_stage( + prompt_stage, + migration_name, + state.target, + taste, + context, + repo_root, + artifacts, + attempt=attempt, + retry=retry, agent=agent, model=model, effort=effort, timeout=timeout, effort_metadata=effort_metadata, effort_budget=effort_budget, + log_mirroring=log_mirroring, + stage_label=step, ) - plan_path = mig_root / "plan.md" - state = _PlanningStageState(extra_context=extra_context) - always_run_stages = ( - _PlanningStageSpec( - prompt_stage="approaches", - stage_label="approaches", - build_context=lambda current: _build_context( - target, mig_relative, current.extra_context - ), - ), - _PlanningStageSpec( - prompt_stage="pick-best", - stage_label="pick-best", - build_context=lambda current: _build_context( - target, - mig_relative, - _join_nonempty( - current.extra_context, - f"Approaches:\n{current.approach_listing}", - ), - ), - ), - _PlanningStageSpec( - prompt_stage="expand", - stage_label="expand", - build_context=lambda current: _build_context( - target, - mig_relative, - _join_nonempty( - current.extra_context, - f"Chosen approach:\n{current.pick_stdout}", - ), - ), - refresh_phase_listing=True, - ), - _PlanningStageSpec( - prompt_stage="review", - stage_label="review", - build_context=lambda current: _build_context( - target, - mig_relative, - _join_nonempty(current.extra_context, f"Plan:\n{_read_plan_text(plan_path)}"), - ), - ), + + outcome, final_reason = _step_outcome(step, stdout) + manifest = _refresh_manifest( + manifest, + workspace_root / "manifest.json", + mig_root=workspace_root if step in ("expand", "revise") else None, ) - for spec in always_run_stages: - manifest, _ = _run_pipeline_stage( - spec, - state, - manifest, - manifest_path, - migration_name=migration_name, - target=target, - taste=taste, - repo_root=repo_root, - artifacts=artifacts, - mig_root=mig_root, - live_dir=live_dir, - attempt=attempt, - retry=retry, - agent_kw=agent_kw, - ) - review_stdout = state.review_stdout - - # Stage 5: revise + review again (only if first review had findings) - if _review_has_findings(review_stdout): - _run_stage( - "expand", migration_name, target, taste, - _build_context( - target, - mig_relative, - _join_nonempty( - extra_context, - f"Review findings to address:\n{review_stdout}", - ), - ), - repo_root, - artifacts, - attempt=attempt, - retry=retry, - stage_label="revise", - **agent_kw, - ) - manifest = _refresh_manifest(manifest, manifest_path, mig_root=mig_root) - - review_two_stdout = _run_stage( - "review", migration_name, target, taste, - _build_context( - target, - mig_relative, - _join_nonempty( - extra_context, - f"Plan (revised):\n{_read_plan_text(plan_path)}", - ), - ), - repo_root, - artifacts, - attempt=attempt, - retry=retry, - stage_label="review-2", - **agent_kw, - ) - _require_review_clear(review_two_stdout, "review-2") - manifest = _refresh_manifest(manifest, manifest_path) - - # Stage 6: final-review - final_stdout = _run_stage( - "final-review", migration_name, target, taste, - _build_context( - target, - mig_relative, - _join_nonempty(extra_context, f"Plan:\n{_read_plan_text(plan_path)}"), - ), - repo_root, artifacts, attempt=attempt, retry=retry, **agent_kw, + state = _record_completed_planning_step( + state, + repo_root=repo_root, + mig_root=workspace_root, + published_migration_root=live_mig_root, + stage_label=step, + outcome=outcome, + stdout=stdout, + agent=agent, + model=model, + effort=effort, + final_reason=final_reason, ) + terminal_outcome = _terminal_outcome(state) + if terminal_outcome is None: + return manifest, state, None + manifest = _apply_terminal_manifest_state( + manifest, + workspace_root / "manifest.json", + workspace_root=workspace_root, + live_dir=workspace_root.parent, + migration_name=migration_name, + target=state.target, + outcome=terminal_outcome, + ) + return manifest, state, terminal_outcome + + +def _step_outcome(step: PlanningStep, stdout: str) -> tuple[str, str | None]: + if step == "review": + return ("findings" if _review_has_findings(stdout) else "clear"), None + if step == "review-2": + _require_review_clear(stdout, "review-2") + return "clear", None + if step == "final-review": + try: + return _parse_final_decision(stdout) + except ContinuousRefactorError as error: + raise ContinuousRefactorError( + f"planning.final-review failed: {error}" + ) from error + return "completed", None + + +def _terminal_outcome(state: PlanningState) -> PlanningOutcome | None: + if state.next_step == "terminal-ready": + return PlanningOutcome(status="ready", reason=state.final_reason or "ready") + if state.next_step == "terminal-ready-awaiting-human": + return PlanningOutcome( + status="awaiting_human_review", + reason=state.final_reason or "awaiting human review", + ) + if state.next_step == "terminal-skipped": + return PlanningOutcome(status="skipped", reason=state.final_reason or "skipped") + return None - try: - decision, reason = _parse_final_decision(final_stdout) - except ContinuousRefactorError as error: - raise ContinuousRefactorError( - f"planning.final-review failed: {error}" - ) from error - manifest = _refresh_manifest(manifest, manifest_path) - - if decision == "approve-auto": - manifest = _refresh_manifest(manifest, manifest_path, status="ready") - return PlanningOutcome(status="ready", reason=reason) - if decision == "approve-needs-human": - manifest = _refresh_manifest( +def _apply_terminal_manifest_state( + manifest: MigrationManifest, + manifest_path: Path, + *, + workspace_root: Path, + live_dir: Path, + migration_name: str, + target: str, + outcome: PlanningOutcome, +) -> MigrationManifest: + if outcome.status == "ready": + return _refresh_manifest( + manifest, + manifest_path, + status="ready", + awaiting_human_review=False, + human_review_reason=None, + ) + if outcome.status == "awaiting_human_review": + return _refresh_manifest( manifest, manifest_path, status="ready", awaiting_human_review=True, - human_review_reason=reason, + human_review_reason=outcome.reason, ) - return PlanningOutcome(status="awaiting_human_review", reason=reason) - # reject - manifest = _refresh_manifest(manifest, manifest_path, status="skipped") - _write_skip_file(live_dir, migration_name, target, reason) - return PlanningOutcome(status="skipped", reason=reason) + (workspace_root / "intentional-skip.md").write_text( + f"# Intentional Skip: {migration_name}\n\n" + f"## Target\n{target}\n\n" + f"## Blocker Reason\n{outcome.reason}\n", + encoding="utf-8", + ) + return _refresh_manifest(manifest, manifest_path, status="skipped") + + +def _planning_step_reason( + step: PlanningStep, + state: PlanningState, + terminal_outcome: PlanningOutcome | None, +) -> str: + if terminal_outcome is not None: + return terminal_outcome.reason + return f"planning.{step} accepted; next step: {state.next_step}" diff --git a/src/continuous_refactoring/planning_publish.py b/src/continuous_refactoring/planning_publish.py new file mode 100644 index 0000000..bcdb1fb --- /dev/null +++ b/src/continuous_refactoring/planning_publish.py @@ -0,0 +1,688 @@ +from __future__ import annotations + +import hashlib +import json +import os +import shutil +import stat +import subprocess +import uuid +from dataclasses import dataclass, replace +from pathlib import Path +from typing import Literal + +from continuous_refactoring.artifacts import ContinuousRefactorError, iso_timestamp +from continuous_refactoring.git import run_command +from continuous_refactoring.migration_consistency import ( + ConsistencyMode, + MigrationConsistencyFinding, + check_migration_consistency, + has_blocking_consistency_findings, +) + +__all__ = [ + "PlanningPublishError", + "PlanningPublishRequest", + "PlanningPublishResult", + "PlanningWorkspace", + "capture_live_snapshot", + "prepare_planning_workspace", + "publish_lock_path", + "publish_planning_workspace", + "snapshot_tree_digest", +] + +PublishStatus = Literal["published", "blocked", "failed"] + +_TRANSACTIONS_DIR_NAME = "__transactions__" +_LOCK_DIR_NAME = ".lock" +_LOCK_OWNER_FILE = "owner.json" +_DIGEST_VERSION = b"continuous-refactoring-tree-v1\n" +_MISSING_TREE_DIGEST_INPUT = b"missing\n" +_FS_ERRORS = (OSError, shutil.Error) + + +@dataclass(frozen=True) +class PlanningWorkspace: + root: Path + slug: str + run_id: str + + +@dataclass(frozen=True) +class PlanningPublishRequest: + repo_root: Path + live_migrations_dir: Path + slug: str + workspace_dir: Path + base_snapshot_id: str + validation_mode: ConsistencyMode = "ready-publish" + operation: str = "planning-publish" + now: str | None = None + + +@dataclass(frozen=True) +class PlanningPublishResult: + status: PublishStatus + reason: str + snapshot_id: str | None + live_dir: Path + transaction_dir: Path | None + staged_dir: Path | None + rollback_dir: Path | None + failed_dir: Path | None + findings: tuple[MigrationConsistencyFinding, ...] = () + dirty_paths: tuple[str, ...] = () + lock_path: Path | None = None + cleanup_error: str | None = None + + +class PlanningPublishError(ContinuousRefactorError): + def __init__(self, result: PlanningPublishResult) -> None: + self.result = result + super().__init__(_result_message(result)) + + +@dataclass(frozen=True) +class _TransactionPaths: + transaction_dir: Path + staged_dir: Path + rollback_dir: Path + failed_dir: Path + + +@dataclass(frozen=True) +class _PublishLock: + path: Path + + +def prepare_planning_workspace( + project_state_dir: Path, + slug: str, + run_id: str, +) -> PlanningWorkspace: + _require_safe_segment(slug, field="slug") + _require_safe_segment(run_id, field="run_id") + root = project_state_dir / "planning" / slug / run_id / "work" / slug + if root.exists() and any(root.iterdir()): + raise ContinuousRefactorError(f"Planning workspace is not empty: {root}") + root.mkdir(parents=True, exist_ok=True) + return PlanningWorkspace(root=root, slug=slug, run_id=run_id) + + +def capture_live_snapshot( + repo_root: Path, + live_migrations_dir: Path, + slug: str, +) -> str: + live_dir = _live_migration_dir(live_migrations_dir, slug) + dirty_paths = _dirty_live_migration_paths(repo_root, live_dir) + if dirty_paths: + _raise_result( + _blocked_result( + "dirty live migration directory; commit, discard, or inspect with migration doctor", + live_dir=live_dir, + dirty_paths=dirty_paths, + ) + ) + return snapshot_tree_digest(live_dir) + + +def publish_lock_path(live_migrations_dir: Path) -> Path: + return live_migrations_dir / _TRANSACTIONS_DIR_NAME / _LOCK_DIR_NAME + + +def snapshot_tree_digest(path: Path) -> str: + digest = hashlib.sha256() + digest.update(_DIGEST_VERSION) + if not path.exists(): + digest.update(_MISSING_TREE_DIGEST_INPUT) + return digest.hexdigest() + if path.is_symlink() or not path.is_dir(): + raise ContinuousRefactorError(f"Snapshot root must be a directory: {path}") + + root = path.resolve() + for child in sorted(path.rglob("*"), key=lambda item: _relative_name(path, item)): + if child.is_symlink(): + raise ContinuousRefactorError(f"Snapshot contains symlink: {child}") + try: + child_stat = child.stat() + except OSError as error: + raise ContinuousRefactorError( + f"Could not stat snapshot path {child}: {error}" + ) from error + rel = child.resolve().relative_to(root).as_posix() + mode = stat.S_IMODE(child_stat.st_mode) + if child.is_dir(): + digest.update(f"D {rel} {mode:o}\0".encode("utf-8")) + continue + if child.is_file(): + digest.update(f"F {rel} {mode:o} {child_stat.st_size}\0".encode("utf-8")) + try: + digest.update(child.read_bytes()) + except OSError as error: + raise ContinuousRefactorError( + f"Could not read snapshot path {child}: {error}" + ) from error + digest.update(b"\0") + continue + raise ContinuousRefactorError(f"Snapshot contains unsupported path: {child}") + return digest.hexdigest() + + +def publish_planning_workspace( + request: PlanningPublishRequest, +) -> PlanningPublishResult: + _validate_request(request) + live_migrations_dir = request.live_migrations_dir + live_dir = _live_migration_dir(live_migrations_dir, request.slug) + live_migrations_dir.mkdir(parents=True, exist_ok=True) + + lock = _acquire_publish_lock( + live_migrations_dir, + operation=request.operation, + now=request.now, + live_dir=live_dir, + ) + try: + result = _publish_planning_workspace_locked(request, live_dir) + except PlanningPublishError as error: + release_error = _release_publish_lock(lock.path) + if release_error is not None: + _raise_result(_with_cleanup_error(error.result, release_error)) + raise + except Exception as error: + release_error = _release_publish_lock(lock.path) + if release_error is not None: + raise ContinuousRefactorError( + f"{error}\n{release_error}" + ) from error + raise + + release_error = _release_publish_lock(lock.path) + if release_error is not None: + return _with_cleanup_error(result, release_error) + return result + + +def _publish_planning_workspace_locked( + request: PlanningPublishRequest, + live_dir: Path, +) -> PlanningPublishResult: + live_migrations_dir = request.live_migrations_dir + dirty_paths = _dirty_live_migration_paths(request.repo_root, live_dir) + if dirty_paths: + _raise_result( + _blocked_result( + "dirty live migration directory; commit, discard, or inspect with migration doctor", + live_dir=live_dir, + dirty_paths=dirty_paths, + ) + ) + + try: + _validate_snapshot(request.workspace_dir, mode=request.validation_mode) + except ContinuousRefactorError as error: + _raise_result( + _blocked_result( + f"workspace validation failed: {error}", + live_dir=live_dir, + ) + ) + + tx_paths = _prepare_transaction_paths(live_migrations_dir) + try: + _copy_tree(request.workspace_dir, tx_paths.staged_dir) + except _FS_ERRORS as error: + _raise_result( + _failed_result( + f"could not copy workspace to staged transaction path: {error}", + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + try: + _validate_snapshot(tx_paths.staged_dir, mode=request.validation_mode) + except ContinuousRefactorError as error: + _raise_result( + _blocked_result( + f"staged validation failed: {error}", + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + if not _same_device(tx_paths.staged_dir, live_migrations_dir): + _raise_result( + _blocked_result( + "staged publish source must be on the same filesystem as the live migrations dir", + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + current_snapshot_id = snapshot_tree_digest(live_dir) + if current_snapshot_id != request.base_snapshot_id: + _raise_result( + _blocked_result( + "stale base snapshot: base_snapshot_id does not match current live snapshot " + f"(base_snapshot_id={request.base_snapshot_id}, current_snapshot_id={current_snapshot_id})", + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + return _publish_staged_snapshot(request, live_dir, tx_paths) + + +def _publish_staged_snapshot( + request: PlanningPublishRequest, + live_dir: Path, + tx_paths: _TransactionPaths, +) -> PlanningPublishResult: + rollback_exists = False + try: + if live_dir.exists(): + _move_path(live_dir, tx_paths.rollback_dir) + rollback_exists = True + except OSError as error: + _raise_result( + _failed_result( + f"could not move live migration to rollback: {error}", + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + try: + _move_path(tx_paths.staged_dir, live_dir) + except OSError as error: + restore_error = _restore_rollback( + live_dir, + tx_paths, + move_live_to_failed=live_dir.exists(), + ) + _raise_result( + _failed_result( + _with_restore_context( + f"could not install staged migration: {error}", + restore_error, + ), + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + try: + _validate_snapshot(live_dir, mode=request.validation_mode) + except ContinuousRefactorError as error: + restore_error = _restore_rollback(live_dir, tx_paths, move_live_to_failed=True) + _raise_result( + _failed_result( + _with_restore_context( + f"live snapshot validation failed after publish: {error}", + restore_error, + ), + live_dir=live_dir, + tx_paths=tx_paths, + ) + ) + + cleanup_error = _cleanup_rollback(tx_paths.rollback_dir) if rollback_exists else None + if cleanup_error is None: + _remove_empty_dir(tx_paths.transaction_dir) + return PlanningPublishResult( + status="published", + reason="published", + snapshot_id=snapshot_tree_digest(live_dir), + live_dir=live_dir, + transaction_dir=tx_paths.transaction_dir, + staged_dir=tx_paths.staged_dir, + rollback_dir=tx_paths.rollback_dir, + failed_dir=tx_paths.failed_dir, + cleanup_error=cleanup_error, + ) + + +def _validate_request(request: PlanningPublishRequest) -> None: + _require_safe_segment(request.slug, field="slug") + if request.workspace_dir.name != request.slug: + raise ContinuousRefactorError( + "Planning workspace snapshot directory must be named for the migration " + f"slug {request.slug!r}: {request.workspace_dir}" + ) + workspace = request.workspace_dir.resolve() + live_root = request.live_migrations_dir.resolve() + try: + workspace.relative_to(live_root) + except ValueError: + pass + else: + raise ContinuousRefactorError( + f"Planning workspace must be outside live migrations dir: {request.workspace_dir}" + ) + if not request.base_snapshot_id: + raise ContinuousRefactorError("base_snapshot_id is required") + + +def _require_safe_segment(value: str, *, field: str) -> None: + if ( + not value + or Path(value).name != value + or value.startswith(".") + or value.startswith("__") + ): + raise ContinuousRefactorError( + f"Planning publish {field} is not a safe path segment: {value!r}" + ) + + +def _live_migration_dir(live_migrations_dir: Path, slug: str) -> Path: + _require_safe_segment(slug, field="slug") + return live_migrations_dir / slug + + +def _prepare_transaction_paths(live_migrations_dir: Path) -> _TransactionPaths: + token = _new_transaction_token() + transaction_dir = live_migrations_dir / _TRANSACTIONS_DIR_NAME / token + staged_dir = transaction_dir / "staged" + rollback_dir = transaction_dir / "rollback" + failed_dir = transaction_dir / "failed" + try: + transaction_dir.mkdir(parents=True, exist_ok=False) + except OSError as error: + raise ContinuousRefactorError( + f"Could not create planning transaction directory {transaction_dir}: {error}" + ) from error + return _TransactionPaths( + transaction_dir=transaction_dir, + staged_dir=staged_dir, + rollback_dir=rollback_dir, + failed_dir=failed_dir, + ) + + +def _acquire_publish_lock( + live_migrations_dir: Path, + *, + operation: str, + now: str | None, + live_dir: Path, +) -> _PublishLock: + lock_path = publish_lock_path(live_migrations_dir) + try: + lock_path.parent.mkdir(parents=True, exist_ok=True) + lock_path.mkdir() + except FileExistsError: + _raise_result(_lock_conflict_result(lock_path, live_dir)) + except OSError as error: + raise ContinuousRefactorError( + f"Could not acquire planning publish lock {lock_path}: {error}" + ) from error + + metadata = { + "pid": os.getpid(), + "operation": operation, + "created_at": now or iso_timestamp(), + } + try: + (lock_path / _LOCK_OWNER_FILE).write_text( + json.dumps(metadata, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + except OSError as error: + shutil.rmtree(lock_path, ignore_errors=True) + raise ContinuousRefactorError( + f"Could not write planning publish lock metadata {lock_path}: {error}" + ) from error + + return _PublishLock(lock_path) + + +def _release_publish_lock(lock_path: Path) -> str | None: + try: + _remove_tree(lock_path) + except _FS_ERRORS as error: + return f"could not release planning publish lock {lock_path}: {error}" + _remove_empty_dir(lock_path.parent) + return None + + +def _lock_conflict_result(lock_path: Path, live_dir: Path) -> PlanningPublishResult: + metadata = _read_lock_metadata(lock_path) + detail = ", ".join( + f"{key}={metadata[key]}" + for key in ("pid", "operation", "created_at") + if key in metadata + ) + suffix = f" ({detail})" if detail else "" + return PlanningPublishResult( + status="blocked", + reason=f"concurrent mutation lock is active at {lock_path}{suffix}", + snapshot_id=None, + live_dir=live_dir, + transaction_dir=None, + staged_dir=None, + rollback_dir=None, + failed_dir=None, + lock_path=lock_path, + ) + + +def _read_lock_metadata(lock_path: Path) -> dict[str, object]: + try: + raw = json.loads((lock_path / _LOCK_OWNER_FILE).read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return {} + if not isinstance(raw, dict): + return {} + return {key: value for key, value in raw.items() if isinstance(key, str)} + + +def _validate_snapshot(path: Path, mode: ConsistencyMode = "ready-publish") -> None: + if not path.is_dir(): + raise ContinuousRefactorError(f"Migration snapshot is not a directory: {path}") + snapshot_tree_digest(path) + findings = _publish_validation_findings(path, mode) + if not has_blocking_consistency_findings(findings): + return + details = "; ".join( + f"{finding.code}: {finding.path}: {finding.message}" + for finding in findings + if finding.severity == "error" + ) + raise ContinuousRefactorError(f"migration snapshot is inconsistent: {details}") + + +def _publish_validation_findings( + path: Path, + mode: ConsistencyMode, +) -> list[MigrationConsistencyFinding]: + findings = check_migration_consistency(path, mode=mode) + if not _is_transaction_staged_snapshot(path): + return findings + return [ + finding + for finding in findings + if finding.code != "manifest-slug-mismatch" + ] + + +def _is_transaction_staged_snapshot(path: Path) -> bool: + return path.name == "staged" and _TRANSACTIONS_DIR_NAME in path.parts + + +def _dirty_live_migration_paths(repo_root: Path, live_dir: Path) -> tuple[str, ...]: + pathspec = _repo_relative(live_dir, repo_root) + result = run_command( + ["git", "status", "--porcelain", "--ignored=matching", "--", pathspec], + cwd=repo_root, + check=False, + ) + if result.returncode != 0: + process_error = subprocess.CalledProcessError( + result.returncode, + result.args, + output=result.stdout, + stderr=result.stderr, + ) + raise ContinuousRefactorError( + "Could not inspect live migration git status.\n" + f"stdout:\n{result.stdout}\n" + f"stderr:\n{result.stderr}" + ) from process_error + return tuple( + line[3:] if len(line) > 3 else line + for line in result.stdout.splitlines() + if line.strip() + ) + + +def _repo_relative(path: Path, repo_root: Path) -> str: + try: + return path.resolve().relative_to(repo_root.resolve()).as_posix() + except ValueError as error: + raise ContinuousRefactorError( + f"Live migration path must stay inside repository: {path}" + ) from error + + +def _copy_tree(source: Path, destination: Path) -> None: + shutil.copytree(source, destination) + + +def _move_path(source: Path, destination: Path) -> None: + source.replace(destination) + + +def _remove_tree(path: Path) -> None: + shutil.rmtree(path) + + +def _same_device(source: Path, target_root: Path) -> bool: + return source.stat().st_dev == target_root.stat().st_dev + + +def _new_transaction_token() -> str: + return uuid.uuid4().hex + + +def _restore_rollback( + live_dir: Path, + tx_paths: _TransactionPaths, + *, + move_live_to_failed: bool = False, +) -> str | None: + try: + if move_live_to_failed and live_dir.exists(): + _move_path(live_dir, tx_paths.failed_dir) + if tx_paths.rollback_dir.exists(): + _move_path(tx_paths.rollback_dir, live_dir) + return None + return "rollback snapshot is unavailable" + except OSError as error: + return f"rollback restore failed: {error}" + + +def _with_restore_context(message: str, restore_error: str | None) -> str: + if restore_error is None: + return f"{message}; previous live snapshot was restored" + return f"{message}; {restore_error}" + + +def _cleanup_rollback(rollback_dir: Path) -> str | None: + if not rollback_dir.exists(): + return None + try: + _remove_tree(rollback_dir) + except _FS_ERRORS as error: + return f"could not remove rollback transaction directory {rollback_dir}: {error}" + return None + + +def _remove_empty_dir(path: Path) -> None: + try: + path.rmdir() + except OSError: + return + + +def _blocked_result( + reason: str, + *, + live_dir: Path, + tx_paths: _TransactionPaths | None = None, + dirty_paths: tuple[str, ...] = (), +) -> PlanningPublishResult: + return PlanningPublishResult( + status="blocked", + reason=reason, + snapshot_id=snapshot_tree_digest(live_dir), + live_dir=live_dir, + transaction_dir=tx_paths.transaction_dir if tx_paths is not None else None, + staged_dir=tx_paths.staged_dir if tx_paths is not None else None, + rollback_dir=tx_paths.rollback_dir if tx_paths is not None else None, + failed_dir=tx_paths.failed_dir if tx_paths is not None else None, + dirty_paths=dirty_paths, + ) + + +def _failed_result( + reason: str, + *, + live_dir: Path, + tx_paths: _TransactionPaths, +) -> PlanningPublishResult: + return PlanningPublishResult( + status="failed", + reason=reason, + snapshot_id=snapshot_tree_digest(live_dir), + live_dir=live_dir, + transaction_dir=tx_paths.transaction_dir, + staged_dir=tx_paths.staged_dir, + rollback_dir=tx_paths.rollback_dir, + failed_dir=tx_paths.failed_dir, + lock_path=publish_lock_path(live_dir.parent), + ) + + +def _with_cleanup_error( + result: PlanningPublishResult, + cleanup_error: str, +) -> PlanningPublishResult: + combined = ( + cleanup_error + if result.cleanup_error is None + else f"{result.cleanup_error}; {cleanup_error}" + ) + return replace(result, cleanup_error=combined) + + +def _raise_result(result: PlanningPublishResult) -> None: + raise PlanningPublishError(result) + + +def _result_message(result: PlanningPublishResult) -> str: + lines = [result.reason] + if result.dirty_paths: + lines.append("dirty paths:") + lines.extend(f"- {path}" for path in result.dirty_paths) + if result.lock_path is not None: + lines.append(f"lock={result.lock_path}") + if result.status == "failed": + if result.live_dir is not None: + lines.append(f"live={result.live_dir}") + if result.rollback_dir is not None: + lines.append(f"rollback={result.rollback_dir}") + if result.staged_dir is not None: + lines.append(f"staged={result.staged_dir}") + if result.failed_dir is not None: + lines.append(f"failed={result.failed_dir}") + if result.cleanup_error is not None: + lines.append(f"cleanup_error={result.cleanup_error}") + return "\n".join(lines) + + +def _relative_name(root: Path, path: Path) -> str: + return path.relative_to(root).as_posix() diff --git a/src/continuous_refactoring/planning_state.py b/src/continuous_refactoring/planning_state.py new file mode 100644 index 0000000..6f8ac5e --- /dev/null +++ b/src/continuous_refactoring/planning_state.py @@ -0,0 +1,1037 @@ +from __future__ import annotations + +import json +import tempfile +from dataclasses import dataclass, replace +from pathlib import Path +from typing import Literal, TypeGuard, cast, get_args + +from continuous_refactoring.artifacts import ContinuousRefactorError, iso_timestamp + +__all__ = [ + "CompletedPlanningStep", + "FeedbackSource", + "FinalPlanningDecision", + "PlanningCursor", + "PlanningState", + "PlanningStep", + "PlanningStepOutcome", + "UserPlanningFeedback", + "append_planning_feedback", + "complete_planning_step", + "initial_planning_state", + "is_executable_planning_step", + "load_planning_state", + "new_planning_state", + "planning_stage_stdout_path", + "planning_state_path", + "planning_step_stdout", + "reopen_planning_for_revise", + "replay_planning_state", + "save_planning_state", + "validate_planning_state", + "write_planning_stage_stdout", +] + +SCHEMA_VERSION = 1 + +PlanningStep = Literal[ + "approaches", + "pick-best", + "expand", + "review", + "revise", + "review-2", + "final-review", +] +TerminalPlanningCursor = Literal[ + "terminal-ready", + "terminal-ready-awaiting-human", + "terminal-skipped", +] +PlanningCursor = PlanningStep | TerminalPlanningCursor +FinalPlanningDecision = Literal["approve-auto", "approve-needs-human", "reject"] +PlanningStepOutcome = Literal[ + "completed", + "clear", + "findings", + "approve-auto", + "approve-needs-human", + "reject", +] +FeedbackSource = Literal["message", "file"] + +_PLANNING_STEPS: tuple[str, ...] = cast(tuple[str, ...], get_args(PlanningStep)) +_TERMINAL_CURSORS: tuple[str, ...] = cast( + tuple[str, ...], get_args(TerminalPlanningCursor) +) +_PLANNING_CURSORS: tuple[str, ...] = (*_PLANNING_STEPS, *_TERMINAL_CURSORS) +_FINAL_DECISIONS: tuple[str, ...] = cast( + tuple[str, ...], get_args(FinalPlanningDecision) +) +_STEP_OUTCOMES: tuple[str, ...] = cast(tuple[str, ...], get_args(PlanningStepOutcome)) + +_COMPLETED_OUTCOME = "completed" +_TERMINAL_BY_DECISION: dict[str, TerminalPlanningCursor] = { + "approve-auto": "terminal-ready", + "approve-needs-human": "terminal-ready-awaiting-human", + "reject": "terminal-skipped", +} + + +@dataclass(frozen=True) +class CompletedPlanningStep: + name: PlanningStep + completed_at: str + outcome: PlanningStepOutcome + outputs: dict[str, str] + agent: str | None = None + model: str | None = None + effort: str | None = None + + def to_payload(self) -> dict[str, object]: + payload: dict[str, object] = { + "name": self.name, + "completed_at": self.completed_at, + "outcome": self.outcome, + "outputs": dict(self.outputs), + } + if self.agent is not None: + payload["agent"] = self.agent + if self.model is not None: + payload["model"] = self.model + if self.effort is not None: + payload["effort"] = self.effort + return payload + + +@dataclass(frozen=True) +class UserPlanningFeedback: + received_at: str + source: FeedbackSource + text: str + + def to_payload(self) -> dict[str, object]: + return { + "received_at": self.received_at, + "source": self.source, + "text": self.text, + } + + +@dataclass(frozen=True) +class PlanningState: + schema_version: int + target: str + next_step: PlanningCursor + completed_steps: tuple[CompletedPlanningStep, ...] + started_at: str + updated_at: str + feedback: tuple[UserPlanningFeedback, ...] + review_findings: str | None + final_decision: FinalPlanningDecision | None + final_reason: str | None + revision_base_step_counts: tuple[int, ...] = () + + @property + def revision_base_step_count(self) -> int | None: + if not self.revision_base_step_counts: + return None + return self.revision_base_step_counts[-1] + + +@dataclass(frozen=True) +class _ReplayResult: + next_step: PlanningCursor + review_findings: str | None + final_decision: FinalPlanningDecision | None + + +def planning_state_path(mig_root: Path) -> Path: + return mig_root / ".planning" / "state.json" + + +def planning_stage_stdout_path(mig_root: Path, step: str) -> Path: + _require_step(step) + return mig_root / ".planning" / "stages" / f"{step}.stdout.md" + + +def new_planning_state(target: str, *, now: str | None = None) -> PlanningState: + timestamp = now or iso_timestamp() + return PlanningState( + schema_version=SCHEMA_VERSION, + target=target, + next_step="approaches", + completed_steps=(), + started_at=timestamp, + updated_at=timestamp, + feedback=(), + review_findings=None, + final_decision=None, + final_reason=None, + revision_base_step_counts=(), + ) + + +def initial_planning_state(target: str, *, now: str | None = None) -> PlanningState: + return new_planning_state(target, now=now) + + +def is_executable_planning_step(value: object) -> TypeGuard[PlanningStep]: + return isinstance(value, str) and value in _PLANNING_STEPS + + +def complete_planning_step( + state: PlanningState, + step: str, + outcome: str, + outputs: dict[str, str], + *, + completed_at: str | None = None, + agent: str | None = None, + model: str | None = None, + effort: str | None = None, + final_reason: str | None = None, +) -> PlanningState: + step_name = _require_step(step) + step_outcome = _require_outcome(outcome) + replay = _replay_details(state) + if state.next_step != replay.next_step: + raise ContinuousRefactorError( + f"Planning state next_step {state.next_step!r} does not match " + f"replayed cursor {replay.next_step!r}" + ) + _validate_replay_metadata(state, replay) + if state.next_step != step_name: + raise ContinuousRefactorError( + f"Cannot complete planning step {step_name!r}; " + f"current step is {state.next_step!r}" + ) + completed = CompletedPlanningStep( + name=step_name, + completed_at=completed_at or iso_timestamp(), + outcome=step_outcome, + outputs=dict(outputs), + agent=agent, + model=model, + effort=effort, + ) + _validate_output_refs_syntax(completed) + updated_steps = (*state.completed_steps, completed) + updated = _replace_planning_state( + state, + completed_steps=updated_steps, + updated_at=completed.completed_at, + ) + replay = _replay_details(updated) + return _replace_planning_state( + updated, + next_step=replay.next_step, + review_findings=replay.review_findings, + final_decision=replay.final_decision, + final_reason=_next_final_reason( + state.final_reason, + replay.final_decision, + final_reason, + ), + ) + + +def append_planning_feedback( + state: PlanningState, + text: str, + source: FeedbackSource, + *, + now: str | None = None, +) -> PlanningState: + feedback_source = _require_feedback_source(source, field="source") + feedback = UserPlanningFeedback( + received_at=now or iso_timestamp(), + source=feedback_source, + text=text, + ) + updated = _replace_planning_state( + state, + updated_at=feedback.received_at, + feedback=(*state.feedback, feedback), + ) + _validate_replay_metadata(updated, _replay_details(updated)) + return updated + + +def reopen_planning_for_revise( + state: PlanningState, + *, + now: str | None = None, +) -> PlanningState: + replay = _replay_details(state) + if replay.next_step not in ("terminal-ready", "terminal-ready-awaiting-human"): + raise ContinuousRefactorError( + f"Cannot reopen planning state at {replay.next_step!r} for revise" + ) + updated = _replace_planning_state( + state, + next_step="revise", + updated_at=now or iso_timestamp(), + review_findings=None, + final_decision=None, + final_reason=None, + revision_base_step_counts=( + *state.revision_base_step_counts, + len(state.completed_steps), + ), + ) + _validate_replay_metadata(updated, _replay_details(updated)) + return updated + + +def replay_planning_state(state: PlanningState) -> PlanningCursor: + return _replay_details(state).next_step + + +def validate_planning_state( + state: PlanningState, + repo_root: Path, + *, + state_path: Path | None = None, + published_migration_root: Path | None = None, +) -> None: + if state.schema_version != SCHEMA_VERSION: + raise ContinuousRefactorError( + f"Unsupported planning state schema_version: {state.schema_version!r}" + ) + replay = _replay_details(state) + if state.next_step != replay.next_step: + raise ContinuousRefactorError( + f"Planning state next_step {state.next_step!r} does not match " + f"replayed cursor {replay.next_step!r}" + ) + _validate_replay_metadata(state, replay) + migration_root = state_path.parent.parent if state_path is not None else None + _validate_output_paths( + state, + repo_root, + migration_root, + published_migration_root=published_migration_root, + ) + + +def _validate_replay_metadata(state: PlanningState, replay: _ReplayResult) -> None: + if state.review_findings != replay.review_findings: + raise ContinuousRefactorError( + "Planning state review_findings does not match replayed history" + ) + if state.final_decision != replay.final_decision: + raise ContinuousRefactorError( + "Planning state final_decision does not match replayed history" + ) + if replay.final_decision is None and state.final_reason is not None: + raise ContinuousRefactorError( + "Planning state final_reason requires a final-review decision" + ) + if replay.final_decision is not None and not state.final_reason: + raise ContinuousRefactorError( + "Planning state terminal final-review requires final_reason" + ) + + +def load_planning_state( + repo_root: Path, + path: Path, + *, + published_migration_root: Path | None = None, +) -> PlanningState: + try: + content = path.read_text(encoding="utf-8") + except OSError as error: + raise ContinuousRefactorError( + f"Could not load planning state {path}: {error}" + ) from error + try: + raw = json.loads(content) + except json.JSONDecodeError as error: + raise ContinuousRefactorError( + f"Could not parse planning state {path}: {error}" + ) from error + state = _decode_state_payload(raw) + validate_planning_state( + state, + repo_root, + state_path=path, + published_migration_root=published_migration_root, + ) + return state + + +def save_planning_state( + state: PlanningState, + path: Path, + *, + repo_root: Path, + published_migration_root: Path | None = None, +) -> None: + validate_planning_state( + state, + repo_root, + state_path=path, + published_migration_root=published_migration_root, + ) + content = _encode_state_payload(state) + try: + path.parent.mkdir(parents=True, exist_ok=True) + except OSError as error: + raise ContinuousRefactorError( + f"Could not save planning state {path}: {error}" + ) from error + + tmp_path: Path | None = None + try: + with tempfile.NamedTemporaryFile( + mode="w", encoding="utf-8", dir=path.parent, suffix=".tmp", delete=False + ) as tmp: + tmp_path = Path(tmp.name) + tmp.write(content) + except OSError as error: + if tmp_path is not None: + tmp_path.unlink(missing_ok=True) + raise ContinuousRefactorError( + f"Could not save planning state {path}: {error}" + ) from error + + try: + tmp_path.replace(path) + except OSError as error: + tmp_path.unlink(missing_ok=True) + raise ContinuousRefactorError( + f"Could not save planning state {path}: {error}" + ) from error + + +def write_planning_stage_stdout( + repo_root: Path, + mig_root: Path, + step: str, + stdout: str, + *, + published_migration_root: Path | None = None, +) -> dict[str, str]: + path = _next_planning_stage_stdout_path(mig_root, step) + _write_text_atomic(path, stdout) + if published_migration_root is None: + ref_path = path + else: + ref_path = published_migration_root / path.relative_to(mig_root) + return {"stdout": _repo_relative(ref_path, repo_root)} + + +def planning_step_stdout( + state: PlanningState, + repo_root: Path, + step: str, + *, + state_path: Path, + published_migration_root: Path | None = None, +) -> tuple[str, str]: + validate_planning_state( + state, + repo_root, + state_path=state_path, + published_migration_root=published_migration_root, + ) + step_name = _require_step(step) + migration_root = state_path.parent.parent + for completed in reversed(state.completed_steps): + if completed.name != step_name: + continue + stdout_ref = completed.outputs.get("stdout") + if stdout_ref is None: + break + path = _output_path_for_ref( + stdout_ref, + repo_root, + migration_root, + published_migration_root=published_migration_root, + ) + try: + return stdout_ref, path.read_text(encoding="utf-8") + except OSError as error: + raise ContinuousRefactorError( + f"Could not read planning output {stdout_ref}: {error}" + ) from error + raise ContinuousRefactorError( + f"Planning state has no accepted stdout output for step {step_name!r}" + ) + + +def _next_planning_stage_stdout_path(mig_root: Path, step: str) -> Path: + base = planning_stage_stdout_path(mig_root, step) + if not base.exists(): + return base + index = 2 + while True: + candidate = base.with_name(f"{step}-{index}.stdout.md") + if not candidate.exists(): + return candidate + index += 1 + + +def _replace_planning_state(state: PlanningState, **changes: object) -> PlanningState: + return replace(state, **changes) + + +def _replay_details(state: PlanningState) -> _ReplayResult: + expected: PlanningCursor = "approaches" + review_findings: str | None = None + final_decision: FinalPlanningDecision | None = None + + _validate_revision_base_step_counts(state) + revision_anchor_index = 0 + revision_anchors = state.revision_base_step_counts + for index, completed in enumerate(state.completed_steps): + if ( + revision_anchor_index < len(revision_anchors) + and revision_anchors[revision_anchor_index] == index + ): + expected, review_findings, final_decision = _reopen_cursor(expected) + revision_anchor_index += 1 + if expected not in _PLANNING_STEPS: + raise ContinuousRefactorError( + f"Planning step {completed.name!r} appears after terminal cursor {expected!r}" + ) + if completed.name != expected: + raise ContinuousRefactorError( + f"Completed planning step {completed.name!r} is invalid: " + f"expected {expected}" + ) + expected, review_findings, final_decision = _advance_cursor( + completed, + review_findings=review_findings, + final_decision=final_decision, + ) + + if ( + revision_anchor_index < len(revision_anchors) + and revision_anchors[revision_anchor_index] == len(state.completed_steps) + ): + expected, review_findings, final_decision = _reopen_cursor(expected) + revision_anchor_index += 1 + + return _ReplayResult( + next_step=expected, + review_findings=review_findings, + final_decision=final_decision, + ) + + +def _validate_revision_base_step_counts(state: PlanningState) -> None: + previous = 0 + for value in state.revision_base_step_counts: + if isinstance(value, bool) or not isinstance(value, int): + raise ContinuousRefactorError( + "Planning state revision_base_step_counts must contain integers" + ) + if value < 1 or value > len(state.completed_steps): + raise ContinuousRefactorError( + "Planning state revision_base_step_counts is outside completed history" + ) + if value <= previous: + raise ContinuousRefactorError( + "Planning state revision_base_step_counts must be strictly increasing" + ) + previous = value + + +def _reopen_cursor( + cursor: PlanningCursor, +) -> tuple[PlanningCursor, str | None, FinalPlanningDecision | None]: + if cursor not in ("terminal-ready", "terminal-ready-awaiting-human"): + raise ContinuousRefactorError( + "Planning state revision_base_step_counts must point at a " + f"terminal ready cursor, got {cursor!r}" + ) + return "revise", None, None + + +def _advance_cursor( + completed: CompletedPlanningStep, + *, + review_findings: str | None, + final_decision: FinalPlanningDecision | None, +) -> tuple[PlanningCursor, str | None, FinalPlanningDecision | None]: + _require_valid_outcome_for_step(completed) + if completed.name == "approaches": + return "pick-best", review_findings, final_decision + if completed.name == "pick-best": + return "expand", review_findings, final_decision + if completed.name == "expand": + return "review", review_findings, final_decision + if completed.name == "review": + if completed.outcome == "findings": + return "revise", _required_stdout_output(completed), final_decision + return "final-review", review_findings, final_decision + if completed.name == "revise": + return "review-2", review_findings, final_decision + if completed.name == "review-2": + return "final-review", review_findings, final_decision + decision = cast(FinalPlanningDecision, completed.outcome) + return _TERMINAL_BY_DECISION[decision], review_findings, decision + + +def _require_valid_outcome_for_step(completed: CompletedPlanningStep) -> None: + allowed = _allowed_outcomes(completed.name) + if completed.outcome not in allowed: + allowed_text = ", ".join(repr(outcome) for outcome in allowed) + raise ContinuousRefactorError( + f"Planning step {completed.name!r} outcome {completed.outcome!r} " + f"is invalid; expected one of {allowed_text}" + ) + + +def _allowed_outcomes(step: PlanningStep) -> tuple[str, ...]: + if step in ("approaches", "pick-best", "expand", "revise"): + return (_COMPLETED_OUTCOME,) + if step == "review": + return ("clear", "findings") + if step == "review-2": + return ("clear",) + return _FINAL_DECISIONS + + +def _required_stdout_output(completed: CompletedPlanningStep) -> str: + stdout_ref = completed.outputs.get("stdout") + if not stdout_ref: + raise ContinuousRefactorError( + f"Planning step {completed.name!r} must record a stdout output" + ) + return stdout_ref + + +def _next_final_reason( + previous: str | None, + final_decision: FinalPlanningDecision | None, + final_reason: str | None, +) -> str | None: + if final_decision is None: + return None + if final_reason is not None: + return final_reason + return previous + + +def _validate_output_paths( + state: PlanningState, + repo_root: Path, + migration_root: Path | None, + *, + published_migration_root: Path | None, +) -> None: + for completed in state.completed_steps: + stdout_ref = _required_stdout_output(completed) + _validate_output_refs_syntax(completed) + _require_existing_output( + stdout_ref, + repo_root, + migration_root, + published_migration_root=published_migration_root, + field=f"completed_steps.{completed.name}.outputs.stdout", + ) + + +def _validate_output_refs_syntax(completed: CompletedPlanningStep) -> None: + if completed.outputs.keys() != {"stdout"}: + raise ContinuousRefactorError( + f"Planning step {completed.name!r} has unsupported outputs" + ) + _require_repo_relative_path( + _required_stdout_output(completed), + field=f"completed_steps.{completed.name}.outputs.stdout", + ) + + +def _require_existing_output( + value: str, + repo_root: Path, + migration_root: Path | None, + *, + published_migration_root: Path | None, + field: str, +) -> None: + ref = _require_repo_relative_path(value, field=field) + repo_output_path = repo_root / ref + output_path = _output_path_for_ref( + value, + repo_root, + migration_root, + published_migration_root=published_migration_root, + ) + resolved_output = output_path.resolve() + try: + repo_output_path.resolve().relative_to(repo_root.resolve()) + except ValueError as error: + raise ContinuousRefactorError( + f"Planning output path {value!r} must be repo-relative" + ) from error + if published_migration_root is not None: + try: + repo_output_path.resolve().relative_to(published_migration_root.resolve()) + except ValueError as error: + raise ContinuousRefactorError( + f"Planning output path {value!r} must stay inside the published migration directory" + ) from error + if migration_root is not None: + try: + resolved_output.relative_to(migration_root.resolve()) + except ValueError as error: + raise ContinuousRefactorError( + f"Planning output path {value!r} must stay inside the migration directory" + ) from error + if output_path.is_symlink(): + raise ContinuousRefactorError( + f"Planning output path {value!r} must be a regular file, not a symlink" + ) + if not output_path.is_file(): + raise ContinuousRefactorError(f"missing planning output: {value}") + + +def _output_path_for_ref( + value: str, + repo_root: Path, + migration_root: Path | None, + *, + published_migration_root: Path | None, +) -> Path: + ref_path = repo_root / _require_repo_relative_path(value, field="stdout") + if migration_root is None or published_migration_root is None: + return ref_path + try: + relative = ref_path.resolve().relative_to(published_migration_root.resolve()) + except ValueError: + return ref_path + return migration_root / relative + + +def _require_repo_relative_path(value: str, *, field: str) -> Path: + if not isinstance(value, str): + raise ContinuousRefactorError(f"Planning field {field!r} must be a string") + ref = Path(value) + if str(ref) in ("", ".") or ref.is_absolute() or ".." in ref.parts: + raise ContinuousRefactorError( + f"Planning output path {value!r} must be repo-relative" + ) + return ref + + +def _repo_relative(path: Path, repo_root: Path) -> str: + try: + return path.relative_to(repo_root).as_posix() + except ValueError as error: + raise ContinuousRefactorError( + f"Planning output path {path} must be inside repository {repo_root}" + ) from error + + +def _decode_state_payload(raw_payload: object) -> PlanningState: + raw = _require_mapping(raw_payload, field="planning state") + _require_keys( + raw, + { + "schema_version", + "target", + "next_step", + "completed_steps", + "started_at", + "updated_at", + "feedback", + "review_findings", + "final_decision", + "final_reason", + }, + optional={"revision_base_step_count", "revision_base_step_counts"}, + field="planning state", + ) + revision_base_step_counts = _decode_revision_base_step_counts(raw) + return PlanningState( + schema_version=_require_int(raw.get("schema_version"), field="schema_version"), + target=_require_str(raw.get("target"), field="target"), + next_step=_require_cursor(raw.get("next_step"), field="next_step"), + completed_steps=_require_completed_steps(raw.get("completed_steps")), + started_at=_require_str(raw.get("started_at"), field="started_at"), + updated_at=_require_str(raw.get("updated_at"), field="updated_at"), + feedback=_require_feedback_tuple(raw.get("feedback"), field="feedback"), + review_findings=_optional_str(raw.get("review_findings"), field="review_findings"), + final_decision=_optional_final_decision( + raw.get("final_decision"), field="final_decision" + ), + final_reason=_optional_str(raw.get("final_reason"), field="final_reason"), + revision_base_step_counts=revision_base_step_counts, + ) + + +def _encode_state_payload(state: PlanningState) -> str: + replay = _replay_details(state) + if state.next_step != replay.next_step: + raise ContinuousRefactorError( + f"Cannot save planning state with next_step {state.next_step!r}; " + f"replayed cursor is {replay.next_step!r}" + ) + _validate_replay_metadata(state, replay) + payload = { + "schema_version": state.schema_version, + "target": state.target, + "next_step": state.next_step, + "completed_steps": [step.to_payload() for step in state.completed_steps], + "started_at": state.started_at, + "updated_at": state.updated_at, + "feedback": [feedback.to_payload() for feedback in state.feedback], + "review_findings": state.review_findings, + "final_decision": state.final_decision, + "final_reason": state.final_reason, + "revision_base_step_counts": list(state.revision_base_step_counts), + } + return json.dumps(payload, indent=2, sort_keys=True) + "\n" + + +def _decode_revision_base_step_counts( + raw: dict[str, object], +) -> tuple[int, ...]: + if "revision_base_step_counts" in raw: + if "revision_base_step_count" in raw and raw["revision_base_step_count"] is not None: + raise ContinuousRefactorError( + "Planning state may not mix revision_base_step_count and " + "revision_base_step_counts" + ) + return _require_int_tuple( + raw.get("revision_base_step_counts"), + field="revision_base_step_counts", + ) + legacy_value = _optional_int( + raw.get("revision_base_step_count"), + field="revision_base_step_count", + ) + if legacy_value is None: + return () + return (legacy_value,) + + +def _require_completed_steps(value: object) -> tuple[CompletedPlanningStep, ...]: + if not isinstance(value, list): + raise ContinuousRefactorError( + f"Planning field 'completed_steps' must be a list: {value!r}" + ) + return tuple( + _require_completed_step(raw_step, index=index) + for index, raw_step in enumerate(value) + ) + + +def _require_completed_step(raw_step: object, *, index: int) -> CompletedPlanningStep: + raw = _require_mapping(raw_step, field=f"completed_steps[{index}]") + _require_keys( + raw, + {"name", "completed_at", "outcome", "outputs"}, + optional={"agent", "model", "effort"}, + field=f"completed_steps[{index}]", + ) + return CompletedPlanningStep( + name=_require_step(raw.get("name")), + completed_at=_require_str( + raw.get("completed_at"), field=f"completed_steps[{index}].completed_at" + ), + outcome=_require_outcome(raw.get("outcome")), + outputs=_require_outputs(raw.get("outputs"), index=index), + agent=_optional_str(raw.get("agent"), field=f"completed_steps[{index}].agent"), + model=_optional_str(raw.get("model"), field=f"completed_steps[{index}].model"), + effort=_optional_str(raw.get("effort"), field=f"completed_steps[{index}].effort"), + ) + + +def _require_outputs(value: object, *, index: int) -> dict[str, str]: + raw = _require_mapping(value, field=f"completed_steps[{index}].outputs") + outputs: dict[str, str] = {} + for key, output in raw.items(): + if not isinstance(key, str): + raise ContinuousRefactorError( + f"Planning outputs keys must be strings: {key!r}" + ) + outputs[key] = _require_str( + output, + field=f"completed_steps[{index}].outputs.{key}", + ) + return outputs + + +def _require_mapping(value: object, *, field: str) -> dict[str, object]: + if not isinstance(value, dict): + raise ContinuousRefactorError( + f"Planning field {field!r} must be an object: {value!r}" + ) + return value + + +def _require_keys( + raw: dict[str, object], + required: set[str], + *, + field: str, + optional: set[str] | None = None, +) -> None: + allowed = required | (optional or set()) + missing = sorted(required - raw.keys()) + extra = sorted(raw.keys() - allowed) + if missing: + raise ContinuousRefactorError( + f"Planning field {field!r} is missing keys: {', '.join(missing)}" + ) + if extra: + raise ContinuousRefactorError( + f"Planning field {field!r} has unknown keys: {', '.join(extra)}" + ) + + +def _require_str(value: object, *, field: str) -> str: + if not isinstance(value, str): + raise ContinuousRefactorError( + f"Planning field {field!r} must be a string: {value!r}" + ) + return value + + +def _optional_str(value: object, *, field: str) -> str | None: + if value is None: + return None + return _require_str(value, field=field) + + +def _require_int(value: object, *, field: str) -> int: + if isinstance(value, bool) or not isinstance(value, int): + raise ContinuousRefactorError( + f"Planning field {field!r} must be an integer: {value!r}" + ) + return value + + +def _optional_int(value: object, *, field: str) -> int | None: + if value is None: + return None + return _require_int(value, field=field) + + +def _require_int_tuple(value: object, *, field: str) -> tuple[int, ...]: + if not isinstance(value, list): + raise ContinuousRefactorError( + f"Planning field {field!r} must be a list: {value!r}" + ) + return tuple( + _require_int(item, field=f"{field}[{index}]") + for index, item in enumerate(value) + ) + + +def _require_feedback_tuple( + value: object, + *, + field: str, +) -> tuple[UserPlanningFeedback, ...]: + if not isinstance(value, list): + raise ContinuousRefactorError( + f"Planning field {field!r} must be a list: {value!r}" + ) + return tuple( + _require_feedback(item, field=f"{field}[{index}]") + for index, item in enumerate(value) + ) + + +def _require_feedback(value: object, *, field: str) -> UserPlanningFeedback: + if isinstance(value, str): + return UserPlanningFeedback(received_at="", source="message", text=value) + raw = _require_mapping(value, field=field) + _require_keys(raw, {"received_at", "source", "text"}, field=field) + return UserPlanningFeedback( + received_at=_require_str(raw.get("received_at"), field=f"{field}.received_at"), + source=_require_feedback_source(raw.get("source"), field=f"{field}.source"), + text=_require_str(raw.get("text"), field=f"{field}.text"), + ) + + +def _require_feedback_source(value: object, *, field: str) -> FeedbackSource: + source = _require_str(value, field=field) + if source not in ("message", "file"): + raise ContinuousRefactorError( + f"Planning field {field!r} must be 'message' or 'file': {source!r}" + ) + return cast(FeedbackSource, source) + + +def _require_cursor(value: object, *, field: str) -> PlanningCursor: + if not isinstance(value, str): + raise ContinuousRefactorError( + f"Planning field {field!r} must be a string: {value!r}" + ) + if value not in _PLANNING_CURSORS: + raise ContinuousRefactorError(f"Unknown planning cursor: {value!r}") + return cast(PlanningCursor, value) + + +def _require_step(value: object) -> PlanningStep: + if not isinstance(value, str): + raise ContinuousRefactorError( + f"Planning step name must be a string: {value!r}" + ) + if value not in _PLANNING_STEPS: + raise ContinuousRefactorError(f"Unknown planning step: {value!r}") + return cast(PlanningStep, value) + + +def _require_outcome(value: object) -> PlanningStepOutcome: + if not isinstance(value, str): + raise ContinuousRefactorError( + f"Planning step outcome must be a string: {value!r}" + ) + if value not in _STEP_OUTCOMES: + raise ContinuousRefactorError(f"Unknown planning outcome: {value!r}") + return cast(PlanningStepOutcome, value) + + +def _optional_final_decision( + value: object, + *, + field: str, +) -> FinalPlanningDecision | None: + if value is None: + return None + decision = _require_str(value, field=field) + if decision not in _FINAL_DECISIONS: + raise ContinuousRefactorError(f"Unknown final planning decision: {decision!r}") + return cast(FinalPlanningDecision, decision) + + +def _write_text_atomic(path: Path, content: str) -> None: + try: + path.parent.mkdir(parents=True, exist_ok=True) + except OSError as error: + raise ContinuousRefactorError( + f"Could not save planning output {path}: {error}" + ) from error + + tmp_path: Path | None = None + try: + with tempfile.NamedTemporaryFile( + mode="w", encoding="utf-8", dir=path.parent, suffix=".tmp", delete=False + ) as tmp: + tmp_path = Path(tmp.name) + tmp.write(content) + except OSError as error: + if tmp_path is not None: + tmp_path.unlink(missing_ok=True) + raise ContinuousRefactorError( + f"Could not save planning output {path}: {error}" + ) from error + + try: + tmp_path.replace(path) + except OSError as error: + tmp_path.unlink(missing_ok=True) + raise ContinuousRefactorError( + f"Could not save planning output {path}: {error}" + ) from error diff --git a/src/continuous_refactoring/prompts.py b/src/continuous_refactoring/prompts.py index a65af34..ec5a9a8 100644 --- a/src/continuous_refactoring/prompts.py +++ b/src/continuous_refactoring/prompts.py @@ -843,6 +843,8 @@ def compose_phase_execution_prompt( You are conducting a human review of a refactoring migration that was flagged for human input during planning. +Project-specific taste is injected by the caller in the `## Taste` section. + The plan and phase files were written at an earlier point in time. The repository may have drifted since then: files referenced in the plan may have moved, been renamed, been deleted, or changed in shape. Line numbers, symbol @@ -850,7 +852,8 @@ def compose_phase_execution_prompt( against the current tree. Your job: -1. Read the migration plan (plan.md), the current phase file, and the manifest. +1. Read the migration plan (plan.md), the current phase file, and the manifest + from the staged work dir. 2. Check the plan against the current repo state. For each file, symbol, or line reference the plan relies on, confirm it still exists and still means what the plan assumes. Note any drift you find — stale assumptions change @@ -863,13 +866,21 @@ def compose_phase_execution_prompt( shape the plan was written against. 4. Ask the user whatever questions are needed to unblock the migration. 5. Based on the user's answers, update plan.md and/or phase files as needed. - Fix drifted references while you are there. + Fix drifted references while you are there. Write only inside the staged + work dir. 6. When the review is complete and the user approves, update manifest.json: set "awaiting_human_review" to false and set "human_review_reason" to null. If the user wants to abort or cannot resolve the review, leave awaiting_human_review as true and exit cleanly. +Do not mutate the live migration directory. It is read-only reference material. +The staged work dir is the only writable target. Successful review changes are +published by the harness after validation. +If review fails or exits before publish, failed review output and partial staged +changes are run artifacts only; they are not resume input. Rerun review starts +from the last published live migration snapshot. + ## Output Contract When the review is successfully completed: - manifest.json MUST have "awaiting_human_review": false @@ -880,21 +891,47 @@ def compose_phase_execution_prompt( def compose_review_perform_prompt( migration_name: str, - manifest_path: Path, - plan_path: Path, + repo_root: Path, + work_dir: Path, + live_dir: Path, phase: PhaseSpec | None, manifest: MigrationManifest, + taste: str, ) -> str: + reason = manifest.human_review_reason or "(no reason recorded)" sections: list[str] = [ REVIEW_PERFORM_PROMPT, f"## Migration\nName: {migration_name}", - f"## Manifest\nPath: {manifest_path}\n{_format_manifest_summary(manifest)}", - f"## Plan\nPath: {plan_path}", + ( + "## Workspace\n" + f"Repo root: {repo_root}\n" + f"Staged work dir: {work_dir}\n" + f"Work dir: {work_dir}\n" + f"Live migration dir: {live_dir}\n" + "Writable target: staged work dir only.\n" + "Writable target: work dir only.\n" + "The live migration directory is read-only reference material.\n" + "Do not mutate the live migration directory." + ), + f"## Human Review\n{reason}", + ( + "## Manifest\n" + f"Path: {work_dir / 'manifest.json'}\n" + f"{_format_manifest_summary(manifest)}" + ), + f"## Plan\nPath: {work_dir / 'plan.md'}", ] if phase is not None: sections.append( "## Current Phase\n" f"Name: {phase.name}\n" - f"File: {phase_file_reference(phase)}" + f"File: {work_dir / phase_file_reference(phase)}" + ) + else: + sections.append( + "## Current Phase\n" + "Current phase file: (none)\n" + "Current phase name: (none)" ) + sections.append(f"## Taste\n{taste}") return _join_sections(*sections) diff --git a/src/continuous_refactoring/refactor_attempts.py b/src/continuous_refactoring/refactor_attempts.py index 6fbbb3b..fb4e428 100644 --- a/src/continuous_refactoring/refactor_attempts.py +++ b/src/continuous_refactoring/refactor_attempts.py @@ -103,6 +103,74 @@ def _retry_context(record: DecisionRecord) -> str: return "\n".join(lines) +def _decision_record( + *, + decision: str, + retry_recommendation: str, + target: str, + call_role: str, + phase_reached: str, + failure_kind: str, + summary: str, + next_retry_focus: str | None = None, + agent_last_message_path: Path | None = None, + agent_stdout_path: Path | None = None, + agent_stderr_path: Path | None = None, + tests_stdout_path: Path | None = None, + tests_stderr_path: Path | None = None, +) -> DecisionRecord: + return DecisionRecord( + decision=decision, + retry_recommendation=retry_recommendation, + target=target, + call_role=call_role, + phase_reached=phase_reached, + failure_kind=failure_kind, + summary=summary, + next_retry_focus=next_retry_focus, + agent_last_message_path=agent_last_message_path, + agent_stdout_path=agent_stdout_path, + agent_stderr_path=agent_stderr_path, + tests_stdout_path=tests_stdout_path, + tests_stderr_path=tests_stderr_path, + ) + + +def _restore_and_retry( + *, + repo_root: Path, + head_before: str, + preserved_workspace: _PreservedWorkspaceTree | None, + target: str, + call_role: str, + phase_reached: str, + failure_kind: str, + summary: str, + next_retry_focus: str | None, + agent_last_message_path: Path | None, + agent_stdout_path: Path | None, + agent_stderr_path: Path | None, + tests_stdout_path: Path | None = None, + tests_stderr_path: Path | None = None, +) -> DecisionRecord: + _reset_to_source_baseline(repo_root, head_before, preserved_workspace) + return _decision_record( + decision="retry", + retry_recommendation="same-target", + target=target, + call_role=call_role, + phase_reached=phase_reached, + failure_kind=failure_kind, + summary=summary, + next_retry_focus=next_retry_focus, + agent_last_message_path=agent_last_message_path, + agent_stdout_path=agent_stdout_path, + agent_stderr_path=agent_stderr_path, + tests_stdout_path=tests_stdout_path, + tests_stderr_path=tests_stderr_path, + ) + + def _finalize_commit( repo_root: Path, head_before: str, @@ -194,7 +262,6 @@ def _run_refactor_attempt( summary=str(error), effort=effort_metadata, ) - _reset_to_source_baseline(repo_root, head_before, preserved_workspace) agent_status = read_status( agent, last_message_path=last_message_path, @@ -205,9 +272,10 @@ def _run_refactor_attempt( fallback=sanitize_text(str(error), repo_root) or str(error), repo_root=repo_root, ) - return DecisionRecord( - decision="retry", - retry_recommendation="same-target", + return _restore_and_retry( + repo_root=repo_root, + head_before=head_before, + preserved_workspace=preserved_workspace, target=target.description, call_role=call_role, phase_reached=resolved_phase_reached(agent_status, phase_reached), @@ -241,10 +309,10 @@ def _run_refactor_attempt( summary=summary, effort=effort_metadata, ) - _reset_to_source_baseline(repo_root, head_before, preserved_workspace) - return DecisionRecord( - decision="retry", - retry_recommendation="same-target", + return _restore_and_retry( + repo_root=repo_root, + head_before=head_before, + preserved_workspace=preserved_workspace, target=target.description, call_role=call_role, phase_reached=resolved_phase_reached(agent_status, phase_reached), @@ -293,15 +361,15 @@ def _run_refactor_attempt( level="WARN", summary=str(error), ) - _reset_to_source_baseline(repo_root, head_before, preserved_workspace) summary, focus = status_summary( agent_status, fallback=sanitize_text(str(error), repo_root) or str(error), repo_root=repo_root, ) - return DecisionRecord( - decision="retry", - retry_recommendation="same-target", + return _restore_and_retry( + repo_root=repo_root, + head_before=head_before, + preserved_workspace=preserved_workspace, target=target.description, call_role=validation_role, phase_reached=resolved_phase_reached(agent_status, phase_reached), @@ -332,10 +400,10 @@ def _run_refactor_attempt( returncode=validation_result.returncode, summary=summary, ) - _reset_to_source_baseline(repo_root, head_before, preserved_workspace) - return DecisionRecord( - decision="retry", - retry_recommendation="same-target", + return _restore_and_retry( + repo_root=repo_root, + head_before=head_before, + preserved_workspace=preserved_workspace, target=target.description, call_role=validation_role, phase_reached=resolved_phase_reached(agent_status, phase_reached), @@ -371,7 +439,7 @@ def _run_refactor_attempt( agent_status.retry_recommendation or default_retry_recommendation(decision) ) - return DecisionRecord( + return _decision_record( decision=decision, retry_recommendation=retry_recommendation, target=target.description, @@ -408,7 +476,7 @@ def _run_refactor_attempt( phase="refactor", ) - return DecisionRecord( + return _decision_record( decision="commit", retry_recommendation="none", target=target.description, diff --git a/src/continuous_refactoring/review_cli.py b/src/continuous_refactoring/review_cli.py index 1d9f7da..4c63b07 100644 --- a/src/continuous_refactoring/review_cli.py +++ b/src/continuous_refactoring/review_cli.py @@ -1,31 +1,78 @@ from __future__ import annotations import argparse +import shutil import sys -from dataclasses import replace +import uuid +from dataclasses import dataclass from pathlib import Path from continuous_refactoring.agent import run_agent_interactive from continuous_refactoring.artifacts import ContinuousRefactorError -from continuous_refactoring.config import resolve_live_migrations_dir, resolve_project +from continuous_refactoring.config import ( + load_taste, + resolve_live_migrations_dir, + resolve_project, +) +from continuous_refactoring.migration_cli import MigrationTarget, resolve_migration_target +from continuous_refactoring.migration_consistency import ( + check_migration_consistency, + has_blocking_consistency_findings, + iter_visible_migration_dirs, +) from continuous_refactoring.migrations import ( load_manifest as load_migration_manifest, phase_file_reference, resolve_current_phase, - save_manifest as save_migration_manifest, +) +from continuous_refactoring.planning_publish import ( + PlanningPublishError, + PlanningPublishRequest, + PlanningPublishResult, + capture_live_snapshot, + prepare_planning_workspace, + publish_planning_workspace, ) from continuous_refactoring.prompts import compose_review_perform_prompt __all__ = [ + "StagedReviewRequest", "handle_review", "handle_review_list", "handle_review_perform", + "handle_staged_migration_review", + "perform_staged_migration_review", ] _REVIEW_USAGE = "Usage: continuous-refactoring review {list,perform}" -def _resolve_review_context(*, error_code: int) -> Path: +@dataclass(frozen=True) +class _ReviewCliContext: + repo_root: Path + live_dir: Path + project_state_dir: Path + + +@dataclass(frozen=True) +class StagedReviewRequest: + repo_root: Path + live_dir: Path + target: MigrationTarget + project_state_dir: Path + agent: str + model: str + effort: str + taste: str + + +class _ReviewCliError(ContinuousRefactorError): + def __init__(self, message: str, exit_code: int) -> None: + self.exit_code = exit_code + super().__init__(message) + + +def _resolve_review_context(*, error_code: int) -> _ReviewCliContext: try: project = resolve_project(Path.cwd().resolve()) except ContinuousRefactorError: @@ -46,18 +93,21 @@ def _resolve_review_context(*, error_code: int) -> Path: ) raise SystemExit(error_code) - return live_dir + return _ReviewCliContext( + repo_root=Path(project.entry.path).resolve(), + live_dir=live_dir, + project_state_dir=project.project_dir, + ) def handle_review_list() -> None: - live_dir = _resolve_review_context(error_code=1) + context = _resolve_review_context(error_code=1) + live_dir = context.live_dir if not live_dir.is_dir(): return - for child in sorted(live_dir.iterdir()): - if not child.is_dir() or child.name.startswith("__"): - continue + for child in iter_visible_migration_dirs(live_dir): manifest_file = child / "manifest.json" if not manifest_file.exists(): continue @@ -75,56 +125,166 @@ def handle_review_list() -> None: def handle_review_perform(args: argparse.Namespace) -> None: - live_dir = _resolve_review_context(error_code=2) + context = _resolve_review_context(error_code=2) + try: + target = resolve_migration_target( + live_dir=context.live_dir, + repo_root=context.repo_root, + value=args.migration, + ) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(2) from error - migration_name: str = args.migration - migration_dir = live_dir / migration_name - manifest_path = migration_dir / "manifest.json" - if not manifest_path.exists(): + try: + taste = load_taste(resolve_project(context.repo_root)) + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(1) from error + + handle_staged_migration_review( + StagedReviewRequest( + repo_root=context.repo_root, + live_dir=context.live_dir, + target=target, + project_state_dir=context.project_state_dir, + agent=args.agent, + model=args.model, + effort=args.effort, + taste=taste, + ) + ) + + +def handle_staged_migration_review( + request: StagedReviewRequest, +) -> PlanningPublishResult: + try: + return perform_staged_migration_review(request) + except _ReviewCliError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(error.exit_code) from error + except PlanningPublishError as error: print( - f"Error: migration '{migration_name}' does not exist.", + f"Error: {_review_publish_error_message(error, request.target.slug)}", file=sys.stderr, ) - raise SystemExit(2) + raise SystemExit(1) from error + except ContinuousRefactorError as error: + print(f"Error: {error}", file=sys.stderr) + raise SystemExit(1) from error + + +def perform_staged_migration_review( + request: StagedReviewRequest, +) -> PlanningPublishResult: + manifest_path = request.target.path / "manifest.json" + if not manifest_path.exists(): + raise _ReviewCliError( + f"migration '{request.target.slug}' does not exist.", + 2, + ) manifest = load_migration_manifest(manifest_path) if not manifest.awaiting_human_review: - print( - f"Error: migration '{migration_name}' is not flagged for review.", - file=sys.stderr, + raise _ReviewCliError( + f"migration '{request.target.slug}' is not flagged for review.", + 2, ) - raise SystemExit(2) - plan_path = migration_dir / "plan.md" - phase = resolve_current_phase(manifest) if manifest.current_phase else None + base_snapshot_id = capture_live_snapshot( + request.repo_root, + request.live_dir, + request.target.slug, + ) + workspace = prepare_planning_workspace( + request.project_state_dir, + request.target.slug, + f"review-{uuid.uuid4().hex}", + ) + try: + shutil.copytree(request.target.path, workspace.root, dirs_exist_ok=True) + except (OSError, shutil.Error) as error: + raise ContinuousRefactorError( + f"Could not copy migration to review workspace {workspace.root}: {error}" + ) from error + phase = resolve_current_phase(manifest) if manifest.current_phase else None prompt = compose_review_perform_prompt( - migration_name, manifest_path, plan_path, phase, manifest, + request.target.slug, + request.repo_root, + workspace.root, + request.target.path, + phase, + manifest, + request.taste, ) - repo_root = Path.cwd().resolve() returncode = run_agent_interactive( - args.agent, args.model, args.effort, prompt, repo_root, + request.agent, + request.model, + request.effort, + prompt, + workspace.root, ) if returncode != 0: - print( - f"Error: review agent exited with code {returncode}.", - file=sys.stderr, + raise _ReviewCliError( + f"review agent exited with code {returncode}.", + returncode, ) - raise SystemExit(returncode) - reloaded = load_migration_manifest(manifest_path) + _require_consistent_review_workspace(workspace.root) + reloaded = load_migration_manifest(workspace.root / "manifest.json") if reloaded.awaiting_human_review: - print( - f"Error: review of '{migration_name}' was not completed — " + raise _ReviewCliError( + f"review of '{request.target.slug}' was not completed; " "awaiting_human_review is still set.", - file=sys.stderr, + 1, ) - raise SystemExit(1) - if reloaded.human_review_reason is not None: - save_migration_manifest( - replace(reloaded, human_review_reason=None), manifest_path, + raise _ReviewCliError( + f"review of '{request.target.slug}' was not completed; " + "human_review_reason is still set.", + 1, + ) + + return publish_planning_workspace( + PlanningPublishRequest( + repo_root=request.repo_root, + live_migrations_dir=request.live_dir, + slug=request.target.slug, + workspace_dir=workspace.root, + base_snapshot_id=base_snapshot_id, + validation_mode="ready-publish", + operation="migration.review", ) + ) + + +def _require_consistent_review_workspace(workspace_root: Path) -> None: + findings = check_migration_consistency(workspace_root, mode="ready-publish") + if not has_blocking_consistency_findings(findings): + return + details = "; ".join( + f"{finding.code}: {finding.path}: {finding.message}" + for finding in findings + if finding.severity == "error" + ) + raise _ReviewCliError( + f"review workspace validation failed: {details}", + 1, + ) + + +def _review_publish_error_message(error: PlanningPublishError, slug: str) -> str: + message = str(error) + if "stale base snapshot" not in error.result.reason: + return message + return ( + f"{message}\n" + "Live migration changed while review was running. " + f"Run `continuous-refactoring migration doctor {slug}` if unsure, then " + f"rerun `continuous-refactoring migration review {slug} ...`." + ) def handle_review(args: argparse.Namespace) -> None: diff --git a/src/continuous_refactoring/routing.py b/src/continuous_refactoring/routing.py index 3bda167..c1c8fe7 100644 --- a/src/continuous_refactoring/routing.py +++ b/src/continuous_refactoring/routing.py @@ -12,6 +12,7 @@ from continuous_refactoring.agent import maybe_run_agent from continuous_refactoring.artifacts import ContinuousRefactorError +from continuous_refactoring.log_mirroring import LogMirroring from continuous_refactoring.prompts import compose_classifier_prompt ClassifierDecision = Literal["cohesive-cleanup", "needs-plan"] @@ -21,6 +22,29 @@ ) +def _log_failed_classification( + artifacts: RunArtifacts, + target: Target, + *, + attempt: int, + retry: int, + summary: str, + effort_metadata: dict[str, object] | None, + returncode: int | None = None, +) -> None: + artifacts.log_call_finished( + attempt=attempt, + retry=retry, + target=target.description, + call_role="classify", + status="failed", + level="WARN", + returncode=returncode, + summary=summary, + effort=effort_metadata, + ) + + def _parse_decision(stdout: str) -> ClassifierDecision: non_empty = [line.strip() for line in stdout.splitlines() if line.strip()] if not non_empty: @@ -47,6 +71,7 @@ def classify_target( effort: str, timeout: int | None, effort_metadata: dict[str, object] | None = None, + log_mirroring: LogMirroring = LogMirroring(), ) -> ClassifierDecision: prompt = compose_classifier_prompt(target, taste) classify_dir = artifacts.root / "classify" @@ -73,33 +98,29 @@ def classify_target( last_message_path=( classify_dir / "agent-last-message.md" if agent == "codex" else None ), - mirror_to_terminal=False, + mirror_to_terminal=log_mirroring.agent, timeout=timeout, ) except ContinuousRefactorError as error: - artifacts.log_call_finished( + _log_failed_classification( + artifacts, + target, attempt=attempt, retry=retry, - target=target.description, - call_role=call_role, - status="failed", - level="WARN", summary=str(error), - effort=effort_metadata, + effort_metadata=effort_metadata, ) raise if result.returncode != 0: - artifacts.log_call_finished( + _log_failed_classification( + artifacts, + target, attempt=attempt, retry=retry, - target=target.description, - call_role=call_role, - status="failed", - level="WARN", - returncode=result.returncode, summary=f"{agent} exited with code {result.returncode}", - effort=effort_metadata, + effort_metadata=effort_metadata, + returncode=result.returncode, ) raise ContinuousRefactorError( f"Classifier agent failed with exit code {result.returncode}" @@ -108,16 +129,14 @@ def classify_target( try: decision = _parse_decision(result.stdout) except ContinuousRefactorError as error: - artifacts.log_call_finished( + _log_failed_classification( + artifacts, + target, attempt=attempt, retry=retry, - target=target.description, - call_role=call_role, - status="failed", - level="WARN", - returncode=result.returncode, summary=str(error), - effort=effort_metadata, + effort_metadata=effort_metadata, + returncode=result.returncode, ) raise diff --git a/src/continuous_refactoring/routing_pipeline.py b/src/continuous_refactoring/routing_pipeline.py index 79670a9..8d2fc0d 100644 --- a/src/continuous_refactoring/routing_pipeline.py +++ b/src/continuous_refactoring/routing_pipeline.py @@ -30,20 +30,35 @@ ) from continuous_refactoring.effort import EffortBudget, resolve_effort_budget from continuous_refactoring.git import get_head_sha -from continuous_refactoring.migration_tick import try_migration_tick as _try_migration_tick -from continuous_refactoring.planning import run_planning +from continuous_refactoring.log_mirroring import LogMirroring +from continuous_refactoring.migration_tick import ( + try_migration_tick as _try_migration_tick, + try_planning_tick as _try_planning_tick, +) +from continuous_refactoring.planning import ( + PlanningStepResult, + planning_artifact_paths, + run_next_planning_step, +) from continuous_refactoring.prompts import describe_scope_candidate from continuous_refactoring.routing import classify_target from continuous_refactoring.scope_expansion import ( + ScopeSelection, scope_candidate_to_target, scope_expansion_bypass_reason, select_scope_candidate, + write_scope_selection_logs, write_scope_expansion_artifacts, ) from continuous_refactoring.scope_candidates import build_scope_candidates from continuous_refactoring.targeting import Target +_REVIEW_TWO_FINDINGS_FAILURE = ( + "planning.review-2 failed: revised plan still has findings" +) + + def migration_name_from_target(target: Target) -> str: slug = re.sub(r"[^a-z0-9]+", "-", target.description.lower()).strip("-") ts = datetime.now().astimezone().strftime("%Y%m%dT%H%M%S") @@ -59,6 +74,88 @@ class RouteResult: decision_record: DecisionRecord | None = None +def _sanitized_summary(text: str, repo_root: Path) -> str: + return sanitize_text(text, repo_root) or text + + +def _abandon_result( + *, + target: Target, + planning_context: str, + repo_root: Path, + error: ContinuousRefactorError, + call_role: str, + agent_last_message_path: Path | None = None, + agent_stdout_path: Path | None = None, + agent_stderr_path: Path | None = None, +) -> RouteResult: + summary = _sanitized_summary(str(error), repo_root) + return RouteResult( + outcome="abandon", + target=target, + planning_context=planning_context, + decision_record=DecisionRecord( + decision="abandon", + retry_recommendation="new-target", + target=target.description, + call_role=call_role, + phase_reached=call_role, + failure_kind=_planning_failure_kind(str(error)), + summary=summary, + agent_last_message_path=agent_last_message_path, + agent_stdout_path=agent_stdout_path, + agent_stderr_path=agent_stderr_path, + ), + ) + + +def _planning_result( + *, + outcome: RouteOutcome, + target: Target, + planning_context: str, + repo_root: Path, + reason: str, + call_role: str = "planning.final-review", + failure_kind: str | None = None, +) -> RouteResult: + summary = _sanitized_summary(reason, repo_root) + return RouteResult( + outcome=outcome, + target=target, + planning_context=planning_context, + decision_record=DecisionRecord( + decision=outcome, + retry_recommendation="none" if outcome == "commit" else "new-target", + target=target.description, + call_role=call_role, + phase_reached=call_role, + failure_kind=( + failure_kind + if failure_kind is not None + else ("none" if outcome == "commit" else "planning-rejected") + ), + summary=summary, + ), + ) + + +def _planning_route_outcome(result: PlanningStepResult) -> RouteOutcome: + if result.status == "published": + if result.terminal_outcome is not None and result.terminal_outcome.status == "skipped": + return "abandon" + return "commit" + if result.status == "blocked": + return "blocked" + return "abandon" + + +def _planning_failure_kind(message: str) -> str: + if message == _REVIEW_TWO_FINDINGS_FAILURE: + return "planning-step-failed" + return error_failure_kind(message) + + def _scope_bypass_context(target: Target, reason: str) -> str: lines = [ f"Scope expansion bypassed: {reason}", @@ -80,6 +177,7 @@ def expand_target_for_classification( timeout: int | None, attempt: int = 1, effort_metadata: dict[str, object] | None = None, + log_mirroring: LogMirroring = LogMirroring(), ) -> tuple[Target, str]: scope_dir = artifacts.root / "scope-expansion" bypass_reason = scope_expansion_bypass_reason(target) @@ -90,11 +188,9 @@ def expand_target_for_classification( (), bypass_reason=bypass_reason, ) - bypass_line = f"selected-candidate: seed — {bypass_reason}\n" - (scope_dir / "selection.stdout.log").write_text(bypass_line, encoding="utf-8") - (scope_dir / "selection-last-message.md").write_text( - bypass_line, - encoding="utf-8", + write_scope_selection_logs( + scope_dir, + ScopeSelection(kind="seed", reason=bypass_reason), ) return target, _scope_bypass_context(target, bypass_reason) @@ -110,6 +206,7 @@ def expand_target_for_classification( effort=effort, attempt=attempt, effort_metadata=effort_metadata, + log_mirroring=log_mirroring, timeout=timeout, ) write_scope_expansion_artifacts( @@ -145,12 +242,30 @@ def route_and_run( check_migrations: bool = True, effort_budget: EffortBudget | None = None, effort_metadata: dict[str, object] | None = None, + log_mirroring: LogMirroring = LogMirroring(), ) -> RouteResult: resolved_budget = effort_budget or resolve_effort_budget(effort, None) if live_dir is None: return RouteResult(outcome="not-routed", target=target) if check_migrations: + planning_result, planning_record = _try_planning_tick( + live_dir, taste, repo_root, artifacts, + agent=agent, model=model, effort=effort, + effort_budget=resolved_budget, + effort_metadata=effort_metadata, + timeout=timeout, commit_message_prefix=commit_message_prefix, + attempt=attempt, + finalize_commit=finalize_commit, + log_mirroring=log_mirroring, + ) + if planning_result != "not-routed": + return RouteResult( + outcome=planning_result, + target=target, + decision_record=planning_record, + ) + migration_result, migration_record = _try_migration_tick( live_dir, taste, repo_root, artifacts, agent=agent, model=model, effort=effort, @@ -160,6 +275,7 @@ def route_and_run( max_attempts=max_attempts, attempt=attempt, finalize_commit=finalize_commit, + log_mirroring=log_mirroring, ) if migration_result != "not-routed": return RouteResult( @@ -178,6 +294,7 @@ def route_and_run( effort=effort, attempt=attempt, effort_metadata=effort_metadata, + log_mirroring=log_mirroring, timeout=timeout, ) @@ -193,23 +310,16 @@ def route_and_run( model=model, effort=effort, effort_metadata=effort_metadata, + log_mirroring=log_mirroring, timeout=timeout, ) except ContinuousRefactorError as error: - summary = sanitize_text(str(error), repo_root) or str(error) - return RouteResult( - outcome="abandon", + return _abandon_result( target=target, planning_context=planning_context, - decision_record=DecisionRecord( - decision="abandon", - retry_recommendation="new-target", - target=target.description, - call_role="classify", - phase_reached="classify", - failure_kind=error_failure_kind(str(error)), - summary=summary, - ), + repo_root=repo_root, + error=error, + call_role="classify", ) print(f"Classification: {decision} — {target.description}") @@ -223,7 +333,7 @@ def route_and_run( migration_name = migration_name_from_target(target) head_before = get_head_sha(repo_root) try: - outcome = run_planning( + outcome = run_next_planning_step( migration_name, target.description, taste, @@ -239,73 +349,63 @@ def route_and_run( effort_metadata=effort_metadata, timeout=timeout, extra_context=planning_context, + log_mirroring=log_mirroring, ) except ContinuousRefactorError as error: - summary = sanitize_text(str(error), repo_root) or str(error) call_role = "planning.final-review" match = re.match(r"^(planning\.[a-z0-9-]+)\s+failed:", str(error)) if match: call_role = match.group(1) - return RouteResult( - outcome="abandon", + label = call_role.removeprefix("planning.") + paths = planning_artifact_paths( + artifacts, + attempt=attempt, + retry=1, + label=label, + agent=agent, + ) + return _abandon_result( target=target, planning_context=planning_context, - decision_record=DecisionRecord( - decision="abandon", - retry_recommendation="new-target", - target=target.description, - call_role=call_role, - phase_reached=call_role, - failure_kind=error_failure_kind(str(error)), - summary=summary, - ), + repo_root=repo_root, + error=error, + call_role=call_role, + agent_last_message_path=paths.agent_last_message_path, + agent_stdout_path=paths.agent_stdout_path, + agent_stderr_path=paths.agent_stderr_path, ) - finalize_commit( - repo_root, - head_before, - build_commit_message( - f"{commit_message_prefix}: plan {migration_name}", - why=sanitize_text(outcome.reason, repo_root) or outcome.reason, - ), - artifacts=artifacts, - attempt=attempt, - phase="planning", - ) - - print(f"Planning: {describe_planning_outcome(outcome.status)} — {outcome.reason}") - if outcome.status == "skipped": - return RouteResult( - outcome="abandon", - target=target, - planning_context=planning_context, - decision_record=DecisionRecord( - decision="abandon", - retry_recommendation="new-target", - target=target.description, - call_role="planning.final-review", - phase_reached="planning.final-review", - failure_kind="planning-rejected", - summary=sanitize_text(outcome.reason, repo_root) or outcome.reason, + route_outcome = _planning_route_outcome(outcome) + if route_outcome == "commit": + finalize_commit( + repo_root, + head_before, + build_commit_message( + f"{commit_message_prefix}: plan {migration_name}", + why=sanitize_text(outcome.reason, repo_root) or outcome.reason, ), + artifacts=artifacts, + attempt=attempt, + phase="planning", ) - return RouteResult( - outcome="commit", + + print(f"Planning: {describe_planning_outcome(outcome)} — {outcome.reason}") + return _planning_result( + outcome=route_outcome, target=target, planning_context=planning_context, - decision_record=DecisionRecord( - decision="commit", - retry_recommendation="none", - target=target.description, - call_role="planning.final-review", - phase_reached="planning.final-review", - failure_kind="none", - summary=sanitize_text(outcome.reason, repo_root) or outcome.reason, - ), + repo_root=repo_root, + reason=outcome.reason, + call_role=f"planning.{outcome.step}", + failure_kind="none" if route_outcome == "commit" else "planning-blocked", ) -def describe_planning_outcome(status: str) -> str: +def describe_planning_outcome(status: str | PlanningStepResult) -> str: + if not isinstance(status, str): + if status.terminal_outcome is None: + return f"{status.step} accepted" + status = status.terminal_outcome.status if status == "ready": return "queued for execution" if status == "awaiting_human_review": diff --git a/src/continuous_refactoring/scope_candidates.py b/src/continuous_refactoring/scope_candidates.py index 93f2f3f..b766c7b 100644 --- a/src/continuous_refactoring/scope_candidates.py +++ b/src/continuous_refactoring/scope_candidates.py @@ -5,6 +5,7 @@ from collections import Counter, defaultdict from collections.abc import Callable from dataclasses import dataclass +from functools import lru_cache from pathlib import Path, PurePosixPath from typing import TYPE_CHECKING, Literal @@ -94,10 +95,14 @@ def _reference_aliases(path: str) -> tuple[str, ...]: def _text_mentions_alias(text: str, alias: str) -> bool: if not alias: return False - pattern = re.compile( + return _alias_pattern(alias).search(text) is not None + + +@lru_cache(maxsize=4096) +def _alias_pattern(alias: str) -> re.Pattern[str]: + return re.compile( rf"(? str: @@ -357,20 +362,6 @@ def _rank_paths( return [path for _score, path in ranked] -def _include_local(same_dir: bool, support_kinds: tuple[_SupportKind, ...]) -> bool: - has_pairing = "source-test-pairing" in support_kinds - has_non_cochange = any(kind != "git-cochange" for kind in support_kinds) - return has_pairing or (same_dir and has_non_cochange) - - -def _include_cross(same_dir: bool, support_kinds: tuple[_SupportKind, ...]) -> bool: - return not ( - same_dir - and support_kinds - and all(kind == "git-cochange" for kind in support_kinds) - ) - - def build_scope_candidates( target: Target, repo_root: Path, @@ -394,8 +385,23 @@ def build_scope_candidates( ) candidates = [_build_seed_candidate(seed_file)] + def include_local(same_dir: bool, support_kinds: tuple[_SupportKind, ...]) -> bool: + return "source-test-pairing" in support_kinds or ( + same_dir and any(kind != "git-cochange" for kind in support_kinds) + ) + + def include_cross(same_dir: bool, support_kinds: tuple[_SupportKind, ...]) -> bool: + return not ( + same_dir + and support_kinds + and all(kind == "git-cochange" for kind in support_kinds) + ) + local_ranked = _rank_paths( - support.scores, support.support_kinds, seed_file, _include_local, + support.scores, + support.support_kinds, + seed_file, + include_local, ) local_extras = tuple(local_ranked[: max_files - 1]) if local_extras: @@ -407,7 +413,10 @@ def build_scope_candidates( ) cross_ranked = _rank_paths( - support.scores, support.support_kinds, seed_file, _include_cross, + support.scores, + support.support_kinds, + seed_file, + include_cross, ) cross_extras = tuple(cross_ranked[: max_files - 1]) if cross_extras: diff --git a/src/continuous_refactoring/scope_expansion.py b/src/continuous_refactoring/scope_expansion.py index 02e4b05..2bc2c9f 100644 --- a/src/continuous_refactoring/scope_expansion.py +++ b/src/continuous_refactoring/scope_expansion.py @@ -1,7 +1,6 @@ from __future__ import annotations import json -import re from dataclasses import asdict, dataclass, replace from pathlib import Path from typing import TYPE_CHECKING @@ -21,13 +20,15 @@ from continuous_refactoring.agent import maybe_run_agent from continuous_refactoring.artifacts import ContinuousRefactorError +from continuous_refactoring.log_mirroring import LogMirroring from continuous_refactoring.prompts import compose_scope_selection_prompt from continuous_refactoring.scope_candidates import ScopeCandidate, ScopeCandidateKind -_SELECTION_RE = re.compile( - r"^selected-candidate:\s*(seed|local-cluster|cross-cluster)" - r"(?:\s*[—-]\s*(.+))?$", - re.IGNORECASE, +_SCOPE_SELECTION_PREFIX = "selected-candidate:" +_KNOWN_SCOPE_SELECTION_KINDS: tuple[ScopeCandidateKind, ...] = ( + "local-cluster", + "cross-cluster", + "seed", ) @@ -41,12 +42,46 @@ def _scope_selection_line(selection: ScopeSelection) -> str: return f"selected-candidate: {selection.kind} — {selection.reason}\n" -def _write_selection_logs(selection_dir: Path, selection: ScopeSelection) -> None: +def write_scope_selection_logs(selection_dir: Path, selection: ScopeSelection) -> None: line = _scope_selection_line(selection) (selection_dir / "selection.stdout.log").write_text(line, encoding="utf-8") (selection_dir / "selection-last-message.md").write_text(line, encoding="utf-8") +def _parse_selection_line(line: str) -> tuple[ScopeCandidateKind, str] | None: + line = line.strip() + lowered = line.lower() + if not lowered.startswith(_SCOPE_SELECTION_PREFIX): + return None + body = line[len(_SCOPE_SELECTION_PREFIX):].strip() + body_lower = body.lower() + for kind in _KNOWN_SCOPE_SELECTION_KINDS: + if not body_lower.startswith(kind): + continue + reason = body[len(kind):].strip() + if not reason: + return kind, kind + if reason[0] not in {"—", "-"}: + return None + reason = reason[1:].strip() + return kind, reason or kind + return None + + +def _require_unique_candidate_kinds( + candidate_kinds: tuple[ScopeCandidateKind, ...], +) -> tuple[ScopeCandidateKind, ...]: + duplicates = tuple( + kind for kind in _KNOWN_SCOPE_SELECTION_KINDS if candidate_kinds.count(kind) > 1 + ) + if duplicates: + quoted = ", ".join(repr(kind) for kind in duplicates) + raise ContinuousRefactorError( + f"Scope selection requires unique candidate kinds, got duplicates: {quoted}" + ) + return candidate_kinds + + def scope_expansion_bypass_reason(target: Target) -> str | None: if len(target.files) == 0: return "scope expansion requires a seed file" @@ -61,21 +96,19 @@ def parse_scope_selection( stdout: str, candidate_kinds: tuple[ScopeCandidateKind, ...], ) -> ScopeSelection: + candidate_kinds = _require_unique_candidate_kinds(candidate_kinds) non_blank = [line.strip() for line in stdout.splitlines() if line.strip()] if not non_blank: raise ContinuousRefactorError("Scope selection produced no output") for stripped in reversed(non_blank): - match = _SELECTION_RE.match(stripped) - if not match: + parsed = _parse_selection_line(stripped) + if parsed is None: continue - kind = match.group(1).lower() + kind, reason = parsed if kind not in candidate_kinds: raise ContinuousRefactorError( f"Selection chose unavailable candidate: {kind!r}" ) - reason = match.group(2).strip() if match.group(2) else "" - if not reason: - reason = kind return ScopeSelection(kind=kind, reason=reason) raise ContinuousRefactorError( f"Scope selection produced unrecognised output: {non_blank[-1]!r}" @@ -100,7 +133,11 @@ def select_scope_candidate( attempt: int = 1, retry: int = 1, effort_metadata: dict[str, object] | None = None, + log_mirroring: LogMirroring = LogMirroring(), ) -> ScopeSelection: + candidate_kinds = _require_unique_candidate_kinds( + tuple(candidate.kind for candidate in candidates) + ) selection_dir = artifacts.root / "scope-expansion" selection_dir.mkdir(parents=True, exist_ok=True) selection_stdout_path = selection_dir / "selection.stdout.log" @@ -111,7 +148,7 @@ def select_scope_candidate( kind=candidates[0].kind, reason="only viable candidate", ) - _write_selection_logs(selection_dir, selection) + write_scope_selection_logs(selection_dir, selection) return selection call_role = "scope-expansion" @@ -133,7 +170,7 @@ def select_scope_candidate( stdout_path=selection_stdout_path, stderr_path=selection_dir / "selection.stderr.log", last_message_path=selection_last_message_path if agent == "codex" else None, - mirror_to_terminal=False, + mirror_to_terminal=log_mirroring.agent, timeout=timeout, ) except ContinuousRefactorError as error: @@ -164,7 +201,6 @@ def select_scope_candidate( raise ContinuousRefactorError( f"Scope selection agent failed with exit code {result.returncode}" ) - candidate_kinds = tuple(candidate.kind for candidate in candidates) try: selection = parse_scope_selection(result.stdout, candidate_kinds) except ContinuousRefactorError as error: diff --git a/src/continuous_refactoring/targeting.py b/src/continuous_refactoring/targeting.py index a943f02..95e91ae 100644 --- a/src/continuous_refactoring/targeting.py +++ b/src/continuous_refactoring/targeting.py @@ -45,10 +45,6 @@ def _warn_skip(message: str) -> None: print(f"warning: target line has {message}, skipping", file=sys.stderr) -class _InvalidTargetFieldError(ValueError): - """Raised when a JSONL target line contains an invalid optional field.""" - - def parse_extensions(raw: str) -> tuple[str, ...]: """Convert comma-separated extensions to glob patterns. @@ -80,25 +76,27 @@ def parse_paths_arg(raw_paths: str | None) -> tuple[str, ...] | None: return parsed or None -def _optional_str(data: dict[str, object], key: str) -> str | None: +def _optional_str(data: dict[str, object], key: str) -> tuple[bool, str | None]: value = data.get(key) if value is None: - return None + return True, None if isinstance(value, str) and value.strip(): - return value + return True, value _warn_skip(f"non-string or empty {key}") - raise _InvalidTargetFieldError(key) + return False, None -def _optional_effort_override(data: dict[str, object]) -> str | None: - value = _optional_str(data, "effort-override") +def _optional_effort_override(data: dict[str, object]) -> tuple[bool, str | None]: + valid, value = _optional_str(data, "effort-override") + if not valid: + return False, None if value is None: - return None + return True, None try: - return require_effort_tier(value, field="effort-override") + return True, require_effort_tier(value, field="effort-override") except ContinuousRefactorError: _warn_skip("invalid effort-override") - raise _InvalidTargetFieldError("effort-override") + return False, None def validate_target_line(data: object) -> Target | None: @@ -120,11 +118,10 @@ def validate_target_line(data: object) -> Target | None: _warn_skip("invalid file entries") return None - try: - scoping = _optional_str(data, "scoping") - model_override = _optional_str(data, "model-override") - effort_override = _optional_effort_override(data) - except _InvalidTargetFieldError: + valid_scoping, scoping = _optional_str(data, "scoping") + valid_model_override, model_override = _optional_str(data, "model-override") + valid_effort_override, effort_override = _optional_effort_override(data) + if not (valid_scoping and valid_model_override and valid_effort_override): return None return Target( diff --git a/tests/conftest.py b/tests/conftest.py index 372f0e8..891004f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,8 @@ import continuous_refactoring import continuous_refactoring.loop +import continuous_refactoring.refactor_attempts +import continuous_refactoring.targeting from continuous_refactoring.artifacts import CommandCapture from continuous_refactoring.config import ( ProjectEntry, @@ -95,6 +97,10 @@ def assert_single_run_final_status(repo_root: Path, expected_status: str) -> Non assert summary["final_status"] == expected_status +def fail_if_taste_agent_runs(*_args: object, **_kwargs: object) -> int: + pytest.fail("taste agent should not be invoked") + + def make_taste_agent_writer( *, content: str | None = None, @@ -102,16 +108,16 @@ def make_taste_agent_writer( captured: dict[str, str] | None = None, ) -> Callable[..., int]: def fake( - agent: str, - model: str, - effort: str, + _agent: str, + _model: str, + _effort: str, prompt: str, - repo_root: Path, + _repo_root: Path, *, content_path: Path, settle_path: Path, - settle_window_seconds: float = 2.0, - poll_interval_seconds: float = 0.1, + _settle_window_seconds: float = 2.0, + _poll_interval_seconds: float = 0.1, ) -> int: assert content_path == extract_taste_path(prompt) assert settle_path == extract_settle_path(prompt) @@ -159,6 +165,28 @@ class RegisteredProjectLayout: taste_path: Path +def write_targets_file( + tmp_path: Path, + *, + count: int | None = None, + targets: list[dict[str, object]] | None = None, +) -> Path: + if (count is None) == (targets is None): + raise AssertionError("provide exactly one of count or targets") + if targets is None: + assert count is not None + targets = [ + {"description": f"target-{index}", "files": [f"file{index}.py"]} + for index in range(count) + ] + targets_file = tmp_path / "targets.jsonl" + targets_file.write_text( + "\n".join(json.dumps(target) for target in targets), + encoding="utf-8", + ) + return targets_file + + def init_repo(path: Path) -> None: path.mkdir(parents=True, exist_ok=True) continuous_refactoring.run_command(["git", "init", "-b", "main"], cwd=path) @@ -341,6 +369,23 @@ def failing_tests( ) +def install_run_command_spy( + monkeypatch: pytest.MonkeyPatch, +) -> list[tuple[str, ...]]: + captured: list[tuple[str, ...]] = [] + real_run_command = continuous_refactoring.run_command + + def spy(command, cwd, *args, **kwargs): # type: ignore[no-untyped-def] + captured.append(tuple(command)) + return real_run_command(command, cwd, *args, **kwargs) + + monkeypatch.setattr("continuous_refactoring.git.run_command", spy) + monkeypatch.setattr("continuous_refactoring.loop.run_command", spy) + monkeypatch.setattr("continuous_refactoring.refactor_attempts.run_command", spy) + monkeypatch.setattr("continuous_refactoring.targeting.run_command", spy) + return captured + + def _default_validation_command(repo_root: Path) -> str: test_script = repo_root.parent / "check_tests.py" if not test_script.exists(): @@ -405,26 +450,33 @@ def make_run_once_args( globs: str | None = None, targets: Path | None = None, paths: str | None = None, + show_agent_logs: bool = False, + show_command_logs: bool = False, ) -> argparse.Namespace: - return argparse.Namespace( - **_build_run_args( - repo_root=repo_root, - agent=agent, - model=model, - effort=effort, - default_effort=default_effort, - max_allowed_effort=max_allowed_effort, - validation_command=validation_command, - scope_instruction=scope_instruction, - timeout=timeout, - refactoring_prompt=refactoring_prompt, - extensions=extensions, - globs=globs, - targets=targets, - paths=paths, - ), - fix_prompt=None, + args = _build_run_args( + repo_root=repo_root, + agent=agent, + model=model, + effort=effort, + default_effort=default_effort, + max_allowed_effort=max_allowed_effort, + validation_command=validation_command, + scope_instruction=scope_instruction, + timeout=timeout, + refactoring_prompt=refactoring_prompt, + extensions=extensions, + globs=globs, + targets=targets, + paths=paths, ) + args.update( + { + "fix_prompt": None, + "show_agent_logs": show_agent_logs, + "show_command_logs": show_command_logs, + } + ) + return argparse.Namespace(**args) def make_run_loop_args( diff --git a/tests/test_cli_init_taste.py b/tests/test_cli_init_taste.py index 6f2cc02..c3bdbbf 100644 --- a/tests/test_cli_init_taste.py +++ b/tests/test_cli_init_taste.py @@ -282,51 +282,78 @@ def test_init_in_repo_taste_conflict_force_replaces_with_old_taste( assert not source.exists() -def test_init_in_repo_taste_rejects_outside_repo( +@pytest.mark.parametrize( + ("args", "expected_message"), + [ + ( + make_init_args(Path("unused"), in_repo_taste=Path("../taste.md")), + "--in-repo-taste must be inside the repo", + ), + ( + make_init_args(Path("unused"), live_migrations_dir=Path("../outside")), + "--live-migrations-dir must be inside the repo", + ), + ], + ids=["in-repo-taste", "live-migrations-dir"], +) +def test_init_rejects_outside_repo( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], + args: argparse.Namespace, + expected_message: str, ) -> None: repo = init_repo_with_temp_home(tmp_path, monkeypatch) - - args = argparse.Namespace( - path=repo, - in_repo_taste=Path("../taste.md"), - live_migrations_dir=None, - ) + args.path = repo with pytest.raises(SystemExit) as exc_info: _handle_init(args) assert exc_info.value.code == 2 err = capsys.readouterr().err - assert "--in-repo-taste must be inside the repo" in err + assert expected_message in err @pytest.mark.parametrize( - "taste_arg", - [Path("."), Path("existing-dir")], - ids=["repo-root", "existing-dir"], + ("args", "setup_name", "expected_message"), + [ + ( + make_init_args(Path("unused"), in_repo_taste=Path(".")), + "existing-dir", + "--in-repo-taste must point to a file", + ), + ( + make_init_args(Path("unused"), in_repo_taste=Path("existing-dir")), + "existing-dir", + "--in-repo-taste must point to a file", + ), + ( + make_init_args(Path("unused"), live_migrations_dir=Path("existing-file")), + "existing-file", + "--live-migrations-dir must point to a directory", + ), + ], + ids=["taste-repo-root", "taste-existing-dir", "live-migrations-existing-file"], ) -def test_init_in_repo_taste_rejects_directories( +def test_init_rejects_wrong_existing_path_kind( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], - taste_arg: Path, + args: argparse.Namespace, + setup_name: str, + expected_message: str, ) -> None: repo = init_repo_with_temp_home(tmp_path, monkeypatch) - (repo / "existing-dir").mkdir() - - args = argparse.Namespace( - path=repo, - in_repo_taste=taste_arg, - live_migrations_dir=None, - ) + args.path = repo + if setup_name == "existing-dir": + (repo / setup_name).mkdir() + else: + (repo / setup_name).write_text("not a directory\n", encoding="utf-8") with pytest.raises(SystemExit) as exc_info: _handle_init(args) assert exc_info.value.code == 2 err = capsys.readouterr().err - assert "--in-repo-taste must point to a file" in err + assert expected_message in err def test_init_idempotent( @@ -528,22 +555,6 @@ def test_init_live_migrations_dir_conflict_force_replaces_destination( assert not (repo / ".migrations").exists() -def test_init_live_migrations_dir_rejects_outside_repo( - tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, - capsys: pytest.CaptureFixture[str], -) -> None: - repo = init_repo_with_temp_home(tmp_path, monkeypatch) - - args = argparse.Namespace(path=repo, live_migrations_dir=Path("../outside")) - with pytest.raises(SystemExit) as exc_info: - _handle_init(args) - - assert exc_info.value.code == 2 - err = capsys.readouterr().err - assert "must be inside the repo" in err - - def test_init_exits_cleanly_on_malformed_manifest( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/test_cli_migrations.py b/tests/test_cli_migrations.py new file mode 100644 index 0000000..3702a7f --- /dev/null +++ b/tests/test_cli_migrations.py @@ -0,0 +1,1485 @@ +from __future__ import annotations + +import argparse +import json +import shlex +from dataclasses import replace +from pathlib import Path + +import pytest + +from conftest import init_repo +from continuous_refactoring.artifacts import ContinuousRefactorError +from continuous_refactoring.artifacts import CommandCapture +from continuous_refactoring.cli import build_parser +from continuous_refactoring.config import register_project, set_live_migrations_dir +from continuous_refactoring.git import run_command +from continuous_refactoring.migration_cli import ( + handle_migration, + handle_migration_doctor, + handle_migration_list, + handle_migration_refine, + handle_migration_review, + resolve_migration_target, +) +from continuous_refactoring.migrations import ( + MigrationManifest, + PhaseSpec, + load_manifest, + save_manifest, +) +from continuous_refactoring.planning_publish import snapshot_tree_digest +from continuous_refactoring.planning_state import ( + complete_planning_step, + load_planning_state, + new_planning_state, + planning_stage_stdout_path, + planning_state_path, + save_planning_state, +) + +_CREATED = "2025-01-01T00:00:00+00:00" +_PHASE = PhaseSpec( + name="setup", + file="phase-1-setup.md", + done=False, + precondition="always", +) + + +def test_migration_parser_accepts_list_and_doctor() -> None: + parser = build_parser() + + list_args = parser.parse_args(["migration", "list"]) + assert list_args.command == "migration" + assert list_args.migration_command == "list" + assert list_args.handler.__name__ == "handle_migration" + + filtered = parser.parse_args( + ["migration", "list", "--status", "planning", "--awaiting-review"] + ) + assert filtered.status == "planning" + assert filtered.awaiting_review is True + + doctor_args = parser.parse_args(["migration", "doctor", "my-mig"]) + assert doctor_args.migration_command == "doctor" + assert doctor_args.target == "my-mig" + assert doctor_args.all is False + + review_args = parser.parse_args( + [ + "migration", + "review", + "my-mig", + "--with", + "codex", + "--model", + "test-model", + "--effort", + "low", + ] + ) + assert review_args.migration_command == "review" + assert review_args.target == "my-mig" + assert review_args.agent == "codex" + assert review_args.model == "test-model" + assert review_args.effort == "low" + + +def test_migration_parser_accepts_doctor_all() -> None: + parser = build_parser() + + args = parser.parse_args(["migration", "doctor", "--all"]) + + assert args.command == "migration" + assert args.migration_command == "doctor" + assert args.target is None + assert args.all is True + + +def test_documented_migration_commands_match_parser() -> None: + readme = Path("README.md").read_text(encoding="utf-8") + parser = build_parser() + documented_commands = _canonical_migration_commands(readme) + + assert documented_commands == ( + "continuous-refactoring migration list", + "continuous-refactoring migration list --status planning", + "continuous-refactoring migration list --awaiting-review", + "continuous-refactoring migration doctor ", + "continuous-refactoring migration doctor --all", + ( + "continuous-refactoring migration review --with codex " + "--model gpt-5 --effort high" + ), + ( + "continuous-refactoring migration refine --message " + "\"split the risky phase\" --with codex --model gpt-5 --effort high" + ), + ( + "continuous-refactoring migration refine --file " + "feedback.md --with codex --model gpt-5 --effort high" + ), + ) + + for command in documented_commands: + argv = _argv_from_documented_command(command) + args = parser.parse_args(argv) + assert args.command == "migration" + assert args.handler.__name__ == "handle_migration" + + +def _canonical_migration_commands(readme: str) -> tuple[str, ...]: + marker = "Canonical migration commands:" + lines = readme.splitlines() + start = lines.index(marker) + block_start = lines.index("```bash", start) + block_end = lines.index("```", block_start + 1) + return tuple( + line + for line in lines[block_start + 1:block_end] + if line.startswith("continuous-refactoring migration ") + ) + + +def _argv_from_documented_command(command: str) -> list[str]: + parts = shlex.split(command) + if parts[0] != "continuous-refactoring": + raise AssertionError(f"unexpected command prefix: {command}") + return [ + "auth-cleanup" if part == "" else part + for part in parts[1:] + ] + + +def test_migration_refine_requires_message_or_file() -> None: + parser = build_parser() + + with pytest.raises(SystemExit) as missing_exit: + parser.parse_args( + [ + "migration", + "refine", + "my-mig", + "--with", + "codex", + "--model", + "test-model", + "--effort", + "low", + ] + ) + assert missing_exit.value.code == 2 + + with pytest.raises(SystemExit) as both_exit: + parser.parse_args( + [ + "migration", + "refine", + "my-mig", + "--message", + "tighten it", + "--file", + "feedback.md", + "--with", + "codex", + "--model", + "test-model", + "--effort", + "low", + ] + ) + assert both_exit.value.code == 2 + + args = parser.parse_args( + [ + "migration", + "refine", + "my-mig", + "--message", + "tighten it", + "--with", + "codex", + "--model", + "test-model", + "--effort", + "low", + "--show-agent-logs", + ] + ) + + assert args.migration_command == "refine" + assert args.target == "my-mig" + assert args.message == "tighten it" + assert args.file is None + assert args.agent == "codex" + assert args.model == "test-model" + assert args.effort == "low" + assert args.show_agent_logs is True + + with pytest.raises(SystemExit) as command_logs_exit: + parser.parse_args( + [ + "migration", + "refine", + "my-mig", + "--message", + "tighten it", + "--with", + "codex", + "--model", + "test-model", + "--effort", + "low", + "--show-command-logs", + ] + ) + assert command_logs_exit.value.code == 2 + + +def test_migration_list_includes_planning_ready_review_and_done_statuses( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "done-mig", status="done", current_phase="") + planning_dir = _write_migration( + live_dir, "planning-mig", status="planning", current_phase="", phases=(), + ) + _write_planning_state(repo, planning_dir) + _write_migration( + live_dir, + "ready-review", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + + handle_migration_list(_list_args()) + + lines = [line.split("\t") for line in capsys.readouterr().out.splitlines()] + assert lines == [ + [ + "done-mig", + "done", + "(none)", + "no", + _CREATED, + "(none)", + "(none)", + ], + [ + "planning-mig", + "planning", + "planning:approaches", + "no", + _CREATED, + "(none)", + "(none)", + ], + [ + "ready-review", + "ready", + "phase-1-setup.md", + "yes", + _CREATED, + "(none)", + "needs approval", + ], + ] + + +def test_migration_list_filters_by_status_and_awaiting_review( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration( + live_dir, "planning-review", status="planning", current_phase="", phases=(), + ) + _write_migration(live_dir, "ready-review", awaiting_human_review=True) + _write_migration(live_dir, "ready-normal") + + handle_migration_list(_list_args(status="ready", awaiting_review=True)) + + assert capsys.readouterr().out.splitlines() == [ + "ready-review\tready\tphase-1-setup.md\tyes\t" + f"{_CREATED}\t(none)\t(none)" + ] + + +def test_migration_list_marks_invalid_planning_state_as_blocked( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + planning_dir = _write_migration( + live_dir, "planning-mig", status="planning", current_phase="", phases=(), + ) + state_path = planning_state_path(planning_dir) + state_path.parent.mkdir(parents=True) + state_path.write_text("{not json\n", encoding="utf-8") + + handle_migration_list(_list_args()) + + fields = capsys.readouterr().out.strip().split("\t") + assert fields[0:3] == ["planning-mig", "planning", "planning:blocked"] + assert fields[-1] == "planning-state-invalid" + + +def test_migration_list_marks_invalid_ready_cursor_as_blocked( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "ready-mig") + + def fail_resolve(_manifest: MigrationManifest) -> PhaseSpec: + raise ContinuousRefactorError("invalid current phase") + + monkeypatch.setattr( + "continuous_refactoring.migration_cli.resolve_current_phase", + fail_resolve, + ) + + handle_migration_list(_list_args()) + + fields = capsys.readouterr().out.strip().split("\t") + assert fields[0:3] == ["ready-mig", "ready", "blocked"] + assert fields[-1] == "invalid-current-phase" + + +def test_migration_resolver_accepts_slug_or_path_inside_live_root( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration(live_dir, "target") + + by_slug = resolve_migration_target( + live_dir=live_dir, repo_root=repo, value="target", + ) + by_path = resolve_migration_target( + live_dir=live_dir, repo_root=repo, value="migrations/target", + ) + + assert by_slug.slug == "target" + assert by_slug.path == migration_dir + assert by_path == by_slug + + +def test_migration_resolver_rejects_outside_path_and_symlink_escape( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + outside = tmp_path / "outside" + outside.mkdir() + + with pytest.raises(ContinuousRefactorError, match="inside live migrations dir"): + resolve_migration_target( + live_dir=live_dir, repo_root=repo, value=str(outside), + ) + + link = live_dir / "linked" + try: + link.symlink_to(outside, target_is_directory=True) + except (NotImplementedError, OSError) as error: + pytest.skip(f"directory symlinks unavailable: {error}") + + with pytest.raises(ContinuousRefactorError, match="symlink"): + resolve_migration_target( + live_dir=live_dir, repo_root=repo, value=str(link), + ) + + +def test_migration_resolver_rejects_parent_traversal_before_resolution( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "target") + + with pytest.raises(ContinuousRefactorError, match="parent traversal"): + resolve_migration_target( + live_dir=live_dir, + repo_root=repo, + value="migrations/../migrations/target", + ) + + +def test_migration_resolver_rejects_ambiguous_slug_path_collision( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "ambiguous") + other = _write_migration(live_dir, "other") + link = repo / "ambiguous" + try: + link.symlink_to(other, target_is_directory=True) + except (NotImplementedError, OSError) as error: + pytest.skip(f"directory symlinks unavailable: {error}") + + with pytest.raises(ContinuousRefactorError, match="ambiguous"): + resolve_migration_target( + live_dir=live_dir, repo_root=repo, value="ambiguous", + ) + + +def test_migration_review_accepts_slug_or_path_inside_live_root( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + ) + seen: list[Path] = [] + + def fake_review(request: object) -> None: + seen.append(request.target.path) + + monkeypatch.setattr( + "continuous_refactoring.review_cli.handle_staged_migration_review", + fake_review, + ) + + handle_migration_review(_review_args("target")) + handle_migration_review(_review_args("migrations/target")) + + assert seen == [migration_dir, migration_dir] + + +def test_migration_review_rejects_outside_path_and_symlink_escape( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + outside = tmp_path / "outside" + outside.mkdir() + + with pytest.raises(SystemExit) as outside_exit: + handle_migration_review(_review_args(str(outside))) + + assert outside_exit.value.code == 2 + assert "inside live migrations dir" in capsys.readouterr().err + + link = live_dir / "linked" + try: + link.symlink_to(outside, target_is_directory=True) + except (NotImplementedError, OSError) as error: + pytest.skip(f"directory symlinks unavailable: {error}") + + with pytest.raises(SystemExit) as link_exit: + handle_migration_review(_review_args(str(link.relative_to(repo)))) + + assert link_exit.value.code == 2 + assert "symlink" in capsys.readouterr().err + + +def test_migration_review_rejects_missing_or_not_flagged_migration( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "not-flagged") + + with pytest.raises(SystemExit) as missing_exit: + handle_migration_review(_review_args("missing")) + + assert missing_exit.value.code == 2 + assert "does not exist" in capsys.readouterr().err + + with pytest.raises(SystemExit) as not_flagged_exit: + handle_migration_review(_review_args("not-flagged")) + + assert not_flagged_exit.value.code == 2 + assert "not flagged" in capsys.readouterr().err + + +def test_migration_review_runs_agent_against_work_dir( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + _commit_all(repo) + seen: dict[str, Path | str] = {} + + def fake_interactive( + agent: str, model: str, effort: str, prompt: str, repo_root: Path, + ) -> int: + seen["agent"] = agent + seen["cwd"] = repo_root + seen["prompt"] = prompt + manifest = load_manifest(repo_root / "manifest.json") + save_manifest( + replace( + manifest, + awaiting_human_review=False, + human_review_reason=None, + ), + repo_root / "manifest.json", + ) + return 0 + + monkeypatch.setattr( + "continuous_refactoring.review_cli.run_agent_interactive", + fake_interactive, + ) + + handle_migration_review(_review_args("target")) + + assert seen["agent"] == "codex" + assert seen["cwd"] != migration_dir + assert isinstance(seen["cwd"], Path) + assert seen["cwd"].name == "target" + assert str(seen["cwd"]).endswith("/work/target") + assert str(migration_dir) in str(seen["prompt"]) + assert str(seen["cwd"]) in str(seen["prompt"]) + reloaded = load_manifest(migration_dir / "manifest.json") + assert reloaded.awaiting_human_review is False + assert reloaded.human_review_reason is None + + +def test_migration_review_failure_leaves_live_snapshot_unchanged( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + + monkeypatch.setattr( + "continuous_refactoring.review_cli.run_agent_interactive", + lambda *_args: 7, + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_review(_review_args("target")) + + assert exc_info.value.code == 7 + assert snapshot_tree_digest(migration_dir) == before + + +def test_migration_review_rejects_stale_base_snapshot( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + concurrent: dict[str, str] = {} + + def fake_interactive( + agent: str, model: str, effort: str, prompt: str, repo_root: Path, + ) -> int: + manifest = load_manifest(repo_root / "manifest.json") + save_manifest( + replace( + manifest, + awaiting_human_review=False, + human_review_reason=None, + ), + repo_root / "manifest.json", + ) + (migration_dir / "plan.md").write_text("# Changed live plan\n", encoding="utf-8") + _commit_all(repo, "stale live migration") + concurrent["digest"] = snapshot_tree_digest(migration_dir) + concurrent["plan"] = (migration_dir / "plan.md").read_text(encoding="utf-8") + return 0 + + monkeypatch.setattr( + "continuous_refactoring.review_cli.run_agent_interactive", + fake_interactive, + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_review(_review_args("target")) + + assert exc_info.value.code == 1 + err = capsys.readouterr().err + assert "stale base snapshot" in err + assert "continuous-refactoring migration doctor target" in err + assert "continuous-refactoring migration review target" in err + assert snapshot_tree_digest(migration_dir) != before + assert snapshot_tree_digest(migration_dir) == concurrent["digest"] + assert (migration_dir / "plan.md").read_text(encoding="utf-8") == concurrent["plan"] + + +def test_migration_review_rejects_inconsistent_workspace_and_preserves_live_snapshot( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + before_manifest = load_manifest(migration_dir / "manifest.json") + before_phase = (migration_dir / _PHASE.file).read_text(encoding="utf-8") + + def fake_interactive( + agent: str, model: str, effort: str, prompt: str, repo_root: Path, + ) -> int: + manifest = load_manifest(repo_root / "manifest.json") + save_manifest( + replace( + manifest, + awaiting_human_review=False, + human_review_reason=None, + ), + repo_root / "manifest.json", + ) + (repo_root / _PHASE.file).write_text( + "# Phase\n\n" + "## Precondition\n\n" + "Ready.\n", + encoding="utf-8", + ) + return 0 + + monkeypatch.setattr( + "continuous_refactoring.review_cli.run_agent_interactive", + fake_interactive, + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_review(_review_args("target")) + + assert exc_info.value.code == 1 + err = capsys.readouterr().err + assert "review workspace validation failed" in err + assert "missing-phase-definition-of-done" in err + assert snapshot_tree_digest(migration_dir) == before + assert load_manifest(migration_dir / "manifest.json") == before_manifest + assert (migration_dir / _PHASE.file).read_text(encoding="utf-8") == before_phase + + +def test_migration_review_refuses_publish_when_review_flag_remains( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + + monkeypatch.setattr( + "continuous_refactoring.review_cli.run_agent_interactive", + lambda *_args: 0, + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_review(_review_args("target")) + + assert exc_info.value.code == 1 + assert "awaiting_human_review is still set" in capsys.readouterr().err + assert snapshot_tree_digest(migration_dir) == before + + +def test_migration_refine_rejects_outside_path_and_symlink_escape( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + outside = tmp_path / "outside" + outside.mkdir() + + with pytest.raises(SystemExit) as outside_exit: + handle_migration_refine(_refine_args(str(outside))) + + assert outside_exit.value.code == 2 + assert "inside live migrations dir" in capsys.readouterr().err + + link = live_dir / "linked" + try: + link.symlink_to(outside, target_is_directory=True) + except (NotImplementedError, OSError) as error: + pytest.skip(f"directory symlinks unavailable: {error}") + + with pytest.raises(SystemExit) as link_exit: + handle_migration_refine(_refine_args(str(link.relative_to(repo)))) + + assert link_exit.value.code == 2 + assert "symlink" in capsys.readouterr().err + + +def test_migration_refine_resumes_from_current_planning_state( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, "target", status="planning", current_phase="", phases=(), + ) + _write_completed_planning_state( + repo, + migration_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ], + ) + _commit_all(repo) + fake = _RefineAgent( + [ + _agent_response( + "Expanded.\n", + { + "plan.md": "# Refined Plan\n", + _PHASE.file: _phase_doc("always", "Done."), + }, + ) + ] + ) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", fake) + + handle_migration_refine( + _refine_args( + "target", + message="split phase one", + show_agent_logs=True, + ) + ) + + state = load_planning_state(repo, planning_state_path(migration_dir)) + assert fake.stage_labels == ["expand"] + assert fake.mirror_to_terminal == [True] + assert state.next_step == "review" + assert state.feedback[-1].source == "message" + assert state.feedback[-1].text == "split phase one" + assert (migration_dir / "plan.md").read_text(encoding="utf-8") == "# Refined Plan\n" + + +def test_migration_refine_reads_feedback_from_file( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, "target", status="planning", current_phase="", phases=(), + ) + _write_completed_planning_state( + repo, + migration_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ], + ) + feedback_path = repo / "feedback.md" + feedback_text = "Split phase one.\nPreserve this exact text.\n" + feedback_path.write_text(feedback_text, encoding="utf-8") + _commit_all(repo) + fake = _RefineAgent( + [ + _agent_response( + "Expanded.\n", + { + "plan.md": "# File Feedback Plan\n", + _PHASE.file: _phase_doc("always", "Done."), + }, + ) + ] + ) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", fake) + + handle_migration_refine(_refine_args("target", file=feedback_path)) + + state = load_planning_state(repo, planning_state_path(migration_dir)) + assert fake.stage_labels == ["expand"] + assert state.feedback[-1].source == "file" + assert state.feedback[-1].text == feedback_text + + +def test_migration_refine_rejects_empty_feedback_file( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration(live_dir, "target") + feedback_path = tmp_path / "empty-feedback.md" + feedback_path.write_text(" \n\t", encoding="utf-8") + before = snapshot_tree_digest(migration_dir) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_refine(_refine_args("target", file=feedback_path)) + + assert exc_info.value.code == 2 + assert "must not be empty" in capsys.readouterr().err + assert snapshot_tree_digest(migration_dir) == before + + +def test_migration_refine_reopens_unexecuted_ready_migration_to_planning( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration(live_dir, "target") + _write_terminal_ready_planning_state(repo, migration_dir) + _commit_all(repo) + fake = _RefineAgent( + [ + _agent_response( + "Revised.\n", + { + "plan.md": "# Plan v2\n", + _PHASE.file: _phase_doc("always", "Still done."), + }, + ) + ] + ) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", fake) + + handle_migration_refine(_refine_args("target", message="make setup smaller")) + + manifest = load_manifest(migration_dir / "manifest.json") + state = load_planning_state(repo, planning_state_path(migration_dir)) + assert fake.stage_labels == ["revise"] + assert manifest.status == "planning" + assert manifest.awaiting_human_review is False + assert manifest.human_review_reason is None + assert manifest.current_phase == "setup" + assert all(not phase.done for phase in manifest.phases) + assert state.next_step == "review-2" + assert state.revision_base_step_counts == (5,) + assert planning_stage_stdout_path(migration_dir, "final-review").is_file() + + +def test_migration_refine_reopens_ready_awaiting_review_without_approving_review( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, + "target", + awaiting_human_review=True, + human_review_reason="needs approval", + ) + _write_terminal_ready_planning_state(repo, migration_dir) + _commit_all(repo) + fake = _RefineAgent( + [ + _agent_response( + "Revised.\n", + { + "plan.md": "# Plan v2\n", + _PHASE.file: _phase_doc("always", "Still done."), + }, + ) + ] + ) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", fake) + + handle_migration_refine(_refine_args("target", message="rewrite after review")) + + manifest = load_manifest(migration_dir / "manifest.json") + state = load_planning_state(repo, planning_state_path(migration_dir)) + assert fake.stage_labels == ["revise"] + assert manifest.status == "planning" + assert manifest.awaiting_human_review is False + assert manifest.human_review_reason is None + assert manifest.current_phase == "setup" + assert all(not phase.done for phase in manifest.phases) + assert state.next_step == "review-2" + assert state.feedback[-1].text == "rewrite after review" + + +def test_migration_refine_refuses_migration_with_completed_phase( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + done_phase = replace(_PHASE, done=True) + migration_dir = _write_migration(live_dir, "target", phases=(done_phase,)) + _write_terminal_ready_planning_state(repo, migration_dir) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_refine(_refine_args("target")) + + assert exc_info.value.code == 2 + assert "completed phase" in capsys.readouterr().err + assert snapshot_tree_digest(migration_dir) == before + + +@pytest.mark.parametrize("status", ["in-progress", "done", "skipped"]) +def test_migration_refine_refuses_non_planning_status_matrix( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], + status: str, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration(live_dir, "target", status=status) + _write_terminal_ready_planning_state(repo, migration_dir) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_refine(_refine_args("target")) + + assert exc_info.value.code == 2 + assert "only planning or unexecuted ready migrations can be refined" in ( + capsys.readouterr().err + ) + assert snapshot_tree_digest(migration_dir) == before + + +def test_migration_refine_refuses_ready_state_with_advanced_cursor( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + followup_phase = PhaseSpec( + name="migrate", + file="phase-2-migrate.md", + done=False, + precondition="setup is done", + ) + migration_dir = _write_migration( + live_dir, + "target", + current_phase="migrate", + phases=(_PHASE, followup_phase), + ) + _write_terminal_ready_planning_state(repo, migration_dir) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_refine(_refine_args("target")) + + assert exc_info.value.code == 2 + assert "already advanced past its first phase" in capsys.readouterr().err + assert snapshot_tree_digest(migration_dir) == before + + +def test_migration_refine_refuses_non_reopenable_ready_planning_state( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration(live_dir, "target") + _write_terminal_skipped_planning_state(repo, migration_dir) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_refine(_refine_args("target")) + + assert exc_info.value.code == 2 + assert "Cannot reopen planning state" in capsys.readouterr().err + assert snapshot_tree_digest(migration_dir) == before + + +def test_migration_refine_failure_leaves_live_snapshot_unchanged( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, "target", status="planning", current_phase="", phases=(), + ) + _write_planning_state(repo, migration_dir) + _commit_all(repo) + before = snapshot_tree_digest(migration_dir) + before_state = planning_state_path(migration_dir).read_text(encoding="utf-8") + fake = _RefineAgent([_agent_response("partial\n", {}, returncode=1)]) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", fake) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_refine(_refine_args("target", message="try it")) + + assert exc_info.value.code == 1 + assert snapshot_tree_digest(migration_dir) == before + assert planning_state_path(migration_dir).read_text(encoding="utf-8") == before_state + + +def test_migration_refine_rejects_stale_base_snapshot( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration( + live_dir, "target", status="planning", current_phase="", phases=(), + ) + _write_planning_state(repo, migration_dir) + _commit_all(repo) + concurrent: dict[str, str] = {} + + def on_call(_migration_dir: Path) -> None: + (migration_dir / "plan.md").write_text("# Concurrent Plan\n", encoding="utf-8") + _commit_all(repo, "stale live migration") + concurrent["digest"] = snapshot_tree_digest(migration_dir) + concurrent["plan"] = (migration_dir / "plan.md").read_text(encoding="utf-8") + + fake = _RefineAgent([_agent_response("Approaches.\n", {})], on_call=on_call) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", fake) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_refine(_refine_args("target", message="try it")) + + assert exc_info.value.code == 1 + err = capsys.readouterr().err + assert "stale base snapshot" in err + assert "continuous-refactoring migration doctor target" in err + assert "continuous-refactoring migration refine target" in err + assert snapshot_tree_digest(migration_dir) == concurrent["digest"] + assert (migration_dir / "plan.md").read_text(encoding="utf-8") == concurrent["plan"] + + +def test_migration_doctor_checks_one_migration_by_name( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "valid") + + handle_migration_doctor(_doctor_args(target="valid")) + + assert capsys.readouterr().out == "" + + +def test_migration_doctor_all_checks_every_live_migration( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "valid") + (live_dir / "broken").mkdir() + + with pytest.raises(SystemExit) as exc_info: + handle_migration_doctor(_doctor_args(all_=True)) + + assert exc_info.value.code == 1 + out = capsys.readouterr().out + assert "broken\terror\tmissing-manifest" in out + + +def test_migration_doctor_reports_missing_planning_state( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration( + live_dir, "planning-mig", status="planning", current_phase="", phases=(), + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_doctor(_doctor_args(target="planning-mig")) + + assert exc_info.value.code == 1 + assert "planning-mig\terror\tplanning-state-missing" in capsys.readouterr().out + + +def test_migration_doctor_reports_ready_gate_phase_doc_drift( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + migration_dir = _write_migration(live_dir, "ready-mig") + (migration_dir / _PHASE.file).write_text( + "# Phase\n\n## Precondition\n\nReady.\n", + encoding="utf-8", + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_doctor(_doctor_args(target="ready-mig")) + + assert exc_info.value.code == 1 + assert "ready-mig\terror\tmissing-phase-definition-of-done" in ( + capsys.readouterr().out + ) + + +def test_migration_doctor_reports_transaction_root_and_lock_presence( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + tx_root = live_dir / "__transactions__" + (tx_root / "tx-leftover").mkdir(parents=True) + lock = tx_root / ".lock" + lock.mkdir() + (lock / "owner.json").write_text( + json.dumps( + { + "pid": 123, + "operation": "planning-publish", + "created_at": _CREATED, + } + ), + encoding="utf-8", + ) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_doctor(_doctor_args(all_=True)) + + assert exc_info.value.code == 1 + out = capsys.readouterr().out + assert "__transactions__\terror\tpublish-lock-present" in out + assert "pid=123" in out + assert "__transactions__\terror\ttransaction-leftover" in out + + +def test_migration_doctor_reports_invalid_transaction_root( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + (live_dir / "__transactions__").write_text("not a dir\n", encoding="utf-8") + + with pytest.raises(SystemExit) as exc_info: + handle_migration_doctor(_doctor_args(all_=True)) + + assert exc_info.value.code == 1 + assert "__transactions__\terror\ttransaction-root-invalid" in ( + capsys.readouterr().out + ) + + +def test_migration_doctor_exits_nonzero_on_error_findings( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _repo, live_dir = _init_migration_project(tmp_path, monkeypatch) + _write_migration(live_dir, "missing-plan", write_plan=False) + + with pytest.raises(SystemExit) as exc_info: + handle_migration_doctor(_doctor_args(target="missing-plan")) + + assert exc_info.value.code == 1 + assert "missing-plan\terror\tmissing-plan" in capsys.readouterr().out + + +def test_migration_dispatches_subcommands( + monkeypatch: pytest.MonkeyPatch, +) -> None: + seen: list[str] = [] + monkeypatch.setattr( + "continuous_refactoring.migration_cli.handle_migration_list", + lambda _args: seen.append("list"), + ) + monkeypatch.setattr( + "continuous_refactoring.migration_cli.handle_migration_doctor", + lambda _args: seen.append("doctor"), + ) + monkeypatch.setattr( + "continuous_refactoring.migration_cli.handle_migration_review", + lambda _args: seen.append("review"), + ) + monkeypatch.setattr( + "continuous_refactoring.migration_cli.handle_migration_refine", + lambda _args: seen.append("refine"), + ) + + handle_migration(argparse.Namespace(migration_command="list")) + handle_migration(argparse.Namespace(migration_command="doctor")) + handle_migration(argparse.Namespace(migration_command="review")) + handle_migration(argparse.Namespace(migration_command="refine")) + + assert seen == ["list", "doctor", "review", "refine"] + + +def test_migration_exits_2_without_subcommand( + capsys: pytest.CaptureFixture[str], +) -> None: + with pytest.raises(SystemExit) as exc_info: + handle_migration(argparse.Namespace(migration_command=None)) + + assert exc_info.value.code == 2 + assert "Usage: continuous-refactoring migration" in capsys.readouterr().err + + +def _init_migration_project( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> tuple[Path, Path]: + monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) + repo = tmp_path / "project" + init_repo(repo) + monkeypatch.chdir(repo) + project = register_project(repo) + live_dir = repo / "migrations" + live_dir.mkdir() + set_live_migrations_dir(project.entry.uuid, "migrations") + return repo, live_dir + + +def _write_migration( + live_dir: Path, + slug: str, + *, + status: str = "ready", + awaiting_human_review: bool = False, + current_phase: str = "setup", + human_review_reason: str | None = None, + phases: tuple[PhaseSpec, ...] = (_PHASE,), + write_plan: bool = True, + write_phase: bool = True, +) -> Path: + migration_dir = live_dir / slug + migration_dir.mkdir(parents=True) + if write_plan: + (migration_dir / "plan.md").write_text("# Plan\n", encoding="utf-8") + if write_phase: + for phase in phases: + (migration_dir / phase.file).write_text( + "# Phase\n\n" + "## Precondition\n\n" + "Ready.\n\n" + "## Definition of Done\n\n" + "Done.\n", + encoding="utf-8", + ) + save_manifest( + MigrationManifest( + name=slug, + created_at=_CREATED, + last_touch=_CREATED, + wake_up_on=None, + awaiting_human_review=awaiting_human_review, + status=status, + current_phase=current_phase, + phases=phases, + human_review_reason=human_review_reason, + ), + migration_dir / "manifest.json", + ) + return migration_dir + + +def _write_planning_state(repo: Path, migration_dir: Path) -> None: + save_planning_state( + new_planning_state("src/example.py", now=_CREATED), + planning_state_path(migration_dir), + repo_root=repo, + published_migration_root=migration_dir, + ) + + +def _write_completed_planning_state( + repo: Path, + migration_dir: Path, + completed: list[tuple[str, str, str]], +) -> None: + state = new_planning_state("src/example.py", now=_CREATED) + for step, outcome, stdout in completed: + stdout_path = planning_stage_stdout_path(migration_dir, step) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + state = complete_planning_step( + state, + step, + outcome, + {"stdout": stdout_path.relative_to(repo).as_posix()}, + completed_at=_CREATED, + final_reason="ready" if step == "final-review" else None, + ) + save_planning_state( + state, + planning_state_path(migration_dir), + repo_root=repo, + published_migration_root=migration_dir, + ) + + +def _write_terminal_ready_planning_state(repo: Path, migration_dir: Path) -> None: + _write_completed_planning_state( + repo, + migration_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "clear", "No findings.\n"), + ("final-review", "approve-auto", "final-decision: approve-auto - ready\n"), + ], + ) + + +def _write_terminal_skipped_planning_state(repo: Path, migration_dir: Path) -> None: + _write_completed_planning_state( + repo, + migration_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "clear", "No findings.\n"), + ("final-review", "reject", "final-decision: reject - flawed\n"), + ], + ) + + +def _commit_all(repo: Path, message: str = "test state") -> None: + run_command(["git", "add", "-A"], cwd=repo) + run_command(["git", "commit", "-m", message], cwd=repo) + + +def _list_args( + *, + status: str | None = None, + awaiting_review: bool = False, +) -> argparse.Namespace: + return argparse.Namespace(status=status, awaiting_review=awaiting_review) + + +def _doctor_args( + *, + target: str | None = None, + all_: bool = False, +) -> argparse.Namespace: + return argparse.Namespace(target=target, all=all_) + + +def _review_args(target: str) -> argparse.Namespace: + return argparse.Namespace( + target=target, + agent="codex", + model="test-model", + effort="low", + ) + + +def _refine_args( + target: str, + *, + message: str = "please refine this migration", + file: Path | None = None, + show_agent_logs: bool = False, +) -> argparse.Namespace: + return argparse.Namespace( + target=target, + message=message if file is None else None, + file=file, + agent="codex", + model="test-model", + effort="low", + show_agent_logs=show_agent_logs, + ) + + +def _phase_doc(precondition: str, definition_of_done: str) -> str: + return ( + f"# Phase\n\n" + f"## Precondition\n\n{precondition}\n\n" + f"## Definition of Done\n\n{definition_of_done}\n" + ) + + +def _agent_response( + stdout: str, + writes: dict[str, str] | None = None, + *, + returncode: int = 0, +) -> tuple[str, dict[str, str], int]: + return stdout, writes or {}, returncode + + +class _RefineAgent: + def __init__( + self, + responses: list[tuple[str, dict[str, str], int]], + *, + on_call: object | None = None, + ) -> None: + self._responses = responses + self._index = 0 + self._on_call = on_call + self.stage_labels: list[str] = [] + self.prompts: list[str] = [] + self.mirror_to_terminal: list[bool] = [] + + def __call__(self, **kwargs: object) -> CommandCapture: + assert self._index < len(self._responses) + stdout, writes, returncode = self._responses[self._index] + self._index += 1 + prompt = str(kwargs["prompt"]) + stdout_path = Path(str(kwargs["stdout_path"])) + stderr_path = Path(str(kwargs["stderr_path"])) + migration_dir = _prompt_migration_dir(prompt) + + self.prompts.append(prompt) + self.stage_labels.append(stdout_path.parent.name) + self.mirror_to_terminal.append(bool(kwargs["mirror_to_terminal"])) + for rel_path, content in writes.items(): + path = migration_dir / rel_path + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + if self._on_call is not None: + self._on_call(migration_dir) + + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + stderr_path.parent.mkdir(parents=True, exist_ok=True) + stderr_path.write_text("", encoding="utf-8") + return CommandCapture( + command=("fake",), + returncode=returncode, + stdout=stdout, + stderr="", + stdout_path=stdout_path, + stderr_path=stderr_path, + ) + + +def _prompt_migration_dir(prompt: str) -> Path: + for line in prompt.splitlines(): + if line.startswith("Migration directory:"): + return Path(line.split(":", 1)[1].strip()) + raise AssertionError("Migration directory missing from prompt") diff --git a/tests/test_cli_review.py b/tests/test_cli_review.py index 8a5da3b..78af49d 100644 --- a/tests/test_cli_review.py +++ b/tests/test_cli_review.py @@ -117,6 +117,11 @@ def _make_perform_args(migration: str) -> argparse.Namespace: ) +def _commit_all(repo: Path, message: str = "test state") -> None: + subprocess.run(["git", "add", "-A"], cwd=repo, check=True, capture_output=True) + subprocess.run(["git", "commit", "-m", message], cwd=repo, check=True, capture_output=True) + + def _init_review_project( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> tuple[Path, Path]: @@ -245,6 +250,45 @@ def test_review_list_filters_flagged_migrations( ] +def test_review_list_ignores_hidden_and_transaction_dirs( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + _, live_dir = _init_review_project(tmp_path, monkeypatch) + save_migration( + _make_manifest( + "visible-review", + awaiting_human_review=True, + human_review_reason="visible", + ), + live_dir / "visible-review" / "manifest.json", + ) + save_migration( + _make_manifest( + "hidden-review", + awaiting_human_review=True, + human_review_reason="hidden", + ), + live_dir / ".hidden-review" / "manifest.json", + ) + save_migration( + _make_manifest( + "transaction-review", + awaiting_human_review=True, + human_review_reason="transaction", + ), + live_dir / "__transactions__" / "manifest.json", + ) + + handle_review_list() + + out = capsys.readouterr().out + assert "visible-review\tready" in out + assert "hidden-review" not in out + assert "transaction-review" not in out + + @pytest.mark.parametrize( ("handler", "error_code", "setup", "expected_message"), [ @@ -341,6 +385,16 @@ def _setup_review_project( ), live_dir / "my-mig" / "manifest.json", ) + (live_dir / "my-mig" / "plan.md").write_text("# Plan\n", encoding="utf-8") + for phase in _PHASES: + (live_dir / "my-mig" / phase.file).write_text( + "# Phase\n\n" + "## Precondition\n\n" + "Ready.\n\n" + "## Definition of Done\n\n" + "Done.\n", + encoding="utf-8", + ) return repo, live_dir @@ -348,11 +402,12 @@ def test_review_perform_happy_path( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - _, live_dir = _setup_review_project( + repo, live_dir = _setup_review_project( tmp_path, monkeypatch, awaiting=True, human_review_reason="needs security audit", ) + _commit_all(repo) manifest_path = live_dir / "my-mig" / "manifest.json" captured_prompt: dict[str, str] = {} @@ -361,10 +416,14 @@ def fake_interactive( ) -> int: captured_prompt["prompt"] = prompt captured_prompt["repo_root"] = str(repo_root) - manifest = load_migration_manifest(manifest_path) + manifest = load_migration_manifest(repo_root / "manifest.json") from dataclasses import replace - updated = replace(manifest, awaiting_human_review=False) - save_migration(updated, manifest_path) + updated = replace( + manifest, + awaiting_human_review=False, + human_review_reason=None, + ) + save_migration(updated, repo_root / "manifest.json") return 0 monkeypatch.setattr( @@ -376,7 +435,8 @@ def fake_interactive( assert "needs security audit" in captured_prompt["prompt"] assert "phase-2-review-target.md" in captured_prompt["prompt"] assert "Name: review-target" in captured_prompt["prompt"] - assert captured_prompt["repo_root"] == str(Path.cwd().resolve()) + assert captured_prompt["repo_root"] != str(Path.cwd().resolve()) + assert captured_prompt["repo_root"].endswith("/work/my-mig") reloaded = load_migration_manifest(manifest_path) assert reloaded.awaiting_human_review is False @@ -387,12 +447,13 @@ def test_review_perform_happy_path_without_current_phase( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - _, live_dir = _setup_review_project( + repo, live_dir = _setup_review_project( tmp_path, monkeypatch, awaiting=True, current_phase="", human_review_reason="phase cursor cleared", ) + _commit_all(repo) manifest_path = live_dir / "my-mig" / "manifest.json" captured_prompt: dict[str, str] = {} @@ -400,10 +461,15 @@ def fake_interactive( agent: str, model: str, effort: str, prompt: str, repo_root: Path, ) -> int: captured_prompt["prompt"] = prompt - manifest = load_migration_manifest(manifest_path) + manifest = load_migration_manifest(repo_root / "manifest.json") from dataclasses import replace - updated = replace(manifest, awaiting_human_review=False) - save_migration(updated, manifest_path) + updated = replace( + manifest, + awaiting_human_review=False, + current_phase="review-target", + human_review_reason=None, + ) + save_migration(updated, repo_root / "manifest.json") return 0 monkeypatch.setattr( @@ -423,6 +489,7 @@ def test_review_perform_exits_1_when_flag_not_cleared( capsys: pytest.CaptureFixture[str], ) -> None: repo, live_dir = _setup_review_project(tmp_path, monkeypatch, awaiting=True) + _commit_all(repo) def fake_interactive( agent: str, model: str, effort: str, prompt: str, repo_root: Path, @@ -446,7 +513,8 @@ def test_review_perform_exits_with_agent_returncode( monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], ) -> None: - _setup_review_project(tmp_path, monkeypatch, awaiting=True) + repo, _live_dir = _setup_review_project(tmp_path, monkeypatch, awaiting=True) + _commit_all(repo) def fake_interactive( agent: str, model: str, effort: str, prompt: str, repo_root: Path, @@ -495,6 +563,41 @@ def test_review_perform_exits_2_when_not_flagged_for_review( assert "not flagged" in err +def test_top_level_review_perform_routes_to_migration_review_compatibility_path( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _repo, live_dir = _setup_review_project( + tmp_path, + monkeypatch, + awaiting=True, + human_review_reason="needs approval", + ) + seen: dict[str, object] = {} + + def fake_staged_review(request: object) -> None: + seen["slug"] = request.target.slug + seen["path"] = request.target.path + seen["agent"] = request.agent + seen["model"] = request.model + seen["effort"] = request.effort + + monkeypatch.setattr( + "continuous_refactoring.review_cli.handle_staged_migration_review", + fake_staged_review, + ) + + handle_review_perform(_make_perform_args("my-mig")) + + assert seen == { + "slug": "my-mig", + "path": live_dir / "my-mig", + "agent": "codex", + "model": "test-model", + "effort": "low", + } + + def test_review_dispatches_list_subcommand( monkeypatch: pytest.MonkeyPatch, ) -> None: diff --git a/tests/test_cli_taste_warning.py b/tests/test_cli_taste_warning.py index 93c1d50..e14986a 100644 --- a/tests/test_cli_taste_warning.py +++ b/tests/test_cli_taste_warning.py @@ -18,16 +18,34 @@ _LEGACY_TASTE = "- Old taste without version header.\n" -def _write_stale_taste(xdg_root: Path) -> None: +def _write_global_taste(xdg_root: Path, text: str) -> None: taste_dir = xdg_root / "continuous-refactoring" / "global" taste_dir.mkdir(parents=True, exist_ok=True) - (taste_dir / "taste.md").write_text(_LEGACY_TASTE, encoding="utf-8") + (taste_dir / "taste.md").write_text(text, encoding="utf-8") + + +def _write_stale_taste(xdg_root: Path) -> None: + _write_global_taste(xdg_root, _LEGACY_TASTE) def _write_current_taste(xdg_root: Path) -> None: - taste_dir = xdg_root / "continuous-refactoring" / "global" - taste_dir.mkdir(parents=True, exist_ok=True) - (taste_dir / "taste.md").write_text(default_taste_text(), encoding="utf-8") + _write_global_taste(xdg_root, default_taste_text()) + + +def _register_repo_with_taste( + *, + repo: Path, + monkeypatch: pytest.MonkeyPatch, + taste_text: str, +) -> None: + init_repo(repo) + monkeypatch.chdir(repo) + + project = register_project(repo) + set_repo_taste_path(project.entry.uuid, DEFAULT_REPO_TASTE_PATH) + taste_path = repo / DEFAULT_REPO_TASTE_PATH + taste_path.parent.mkdir(parents=True, exist_ok=True) + taste_path.write_text(taste_text, encoding="utf-8") _SUBCOMMANDS: list[tuple[list[str], str]] = [ @@ -125,14 +143,11 @@ def test_taste_warning_uses_configured_repo_taste( ) -> None: _write_current_taste(xdg_root) repo = tmp_path / "repo" - init_repo(repo) - monkeypatch.chdir(repo) - - project = register_project(repo) - set_repo_taste_path(project.entry.uuid, DEFAULT_REPO_TASTE_PATH) - taste_path = repo / DEFAULT_REPO_TASTE_PATH - taste_path.parent.mkdir(parents=True, exist_ok=True) - taste_path.write_text(_LEGACY_TASTE, encoding="utf-8") + _register_repo_with_taste( + repo=repo, + monkeypatch=monkeypatch, + taste_text=_LEGACY_TASTE, + ) monkeypatch.setattr(sys, "argv", ["cr", "upgrade"]) monkeypatch.setattr(cli, "_handle_upgrade", lambda _: None) @@ -150,14 +165,11 @@ def test_current_repo_taste_suppresses_stale_global_warning( ) -> None: _write_stale_taste(xdg_root) repo = tmp_path / "repo" - init_repo(repo) - monkeypatch.chdir(repo) - - project = register_project(repo) - set_repo_taste_path(project.entry.uuid, DEFAULT_REPO_TASTE_PATH) - taste_path = repo / DEFAULT_REPO_TASTE_PATH - taste_path.parent.mkdir(parents=True, exist_ok=True) - taste_path.write_text(default_taste_text(), encoding="utf-8") + _register_repo_with_taste( + repo=repo, + monkeypatch=monkeypatch, + taste_text=default_taste_text(), + ) monkeypatch.setattr(sys, "argv", ["cr", "upgrade"]) monkeypatch.setattr(cli, "_handle_upgrade", lambda _: None) diff --git a/tests/test_cli_upgrade.py b/tests/test_cli_upgrade.py index 1118839..1674a78 100644 --- a/tests/test_cli_upgrade.py +++ b/tests/test_cli_upgrade.py @@ -2,6 +2,7 @@ import argparse import json +from collections.abc import Callable from pathlib import Path import pytest @@ -16,13 +17,17 @@ register_project, ) +_LEGACY_TASTE = "- Old taste without version.\n" + def _upgrade_args() -> argparse.Namespace: return argparse.Namespace(command="upgrade") -def _set_xdg_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) +def _set_xdg_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + xdg_root = tmp_path / "xdg" + monkeypatch.setenv("XDG_DATA_HOME", str(xdg_root)) + return xdg_root def _register_project_with_upgrade_layout( @@ -35,6 +40,30 @@ def _register_project_with_upgrade_layout( register_project(repo) +def _write_stale_config_manifest(xdg_root: Path) -> None: + manifest_dir = xdg_root / "continuous-refactoring" + manifest_dir.mkdir(parents=True, exist_ok=True) + (manifest_dir / "manifest.json").write_text( + json.dumps({"projects": {}}), encoding="utf-8", + ) + + +def _write_global_taste(text: str) -> None: + gdir = global_dir() + gdir.mkdir(parents=True, exist_ok=True) + (gdir / "taste.md").write_text(text, encoding="utf-8") + + +def _assert_upgrade_fails_for_bad_config( + capsys: pytest.CaptureFixture[str], +) -> None: + with pytest.raises(SystemExit) as exc_info: + _handle_upgrade(_upgrade_args()) + + assert exc_info.value.code == 1 + assert "config version" in capsys.readouterr().err + + # --------------------------------------------------------------------------- # Happy path: current config version → exit 0 # --------------------------------------------------------------------------- @@ -60,49 +89,27 @@ def test_upgrade_is_idempotent( # --------------------------------------------------------------------------- -# Failure: missing config → exit 1 -# --------------------------------------------------------------------------- - - -def test_upgrade_fails_when_config_missing( - tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, - capsys: pytest.CaptureFixture[str], -) -> None: - _set_xdg_home(tmp_path, monkeypatch) - - with pytest.raises(SystemExit) as exc_info: - _handle_upgrade(_upgrade_args()) - - assert exc_info.value.code == 1 - err = capsys.readouterr().err - assert "config version" in err - - -# --------------------------------------------------------------------------- -# Failure: stale config version → exit 1 +# Failure: missing or stale config → exit 1 # --------------------------------------------------------------------------- -def test_upgrade_fails_when_config_stale( +@pytest.mark.parametrize( + "prepare_config", + [ + lambda xdg_root: None, + _write_stale_config_manifest, + ], + ids=["missing", "stale"], +) +def test_upgrade_fails_for_missing_or_stale_config( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], + prepare_config: Callable[[Path], None], ) -> None: - _set_xdg_home(tmp_path, monkeypatch) - - manifest_dir = tmp_path / "xdg" / "continuous-refactoring" - manifest_dir.mkdir(parents=True, exist_ok=True) - (manifest_dir / "manifest.json").write_text( - json.dumps({"projects": {}}), encoding="utf-8", - ) - - with pytest.raises(SystemExit) as exc_info: - _handle_upgrade(_upgrade_args()) - - assert exc_info.value.code == 1 - err = capsys.readouterr().err - assert "config version" in err + xdg_root = _set_xdg_home(tmp_path, monkeypatch) + prepare_config(xdg_root) + _assert_upgrade_fails_for_bad_config(capsys) # --------------------------------------------------------------------------- @@ -116,10 +123,7 @@ def test_upgrade_warns_on_stale_global_taste( capsys: pytest.CaptureFixture[str], ) -> None: _register_project_with_upgrade_layout(tmp_path, monkeypatch) - - gdir = global_dir() - gdir.mkdir(parents=True, exist_ok=True) - (gdir / "taste.md").write_text("- Old taste without version.\n", encoding="utf-8") + _write_global_taste(_LEGACY_TASTE) _handle_upgrade(_upgrade_args()) @@ -128,31 +132,21 @@ def test_upgrade_warns_on_stale_global_taste( assert "out of date" in err -def test_upgrade_no_taste_warning_when_current( - tmp_path: Path, - monkeypatch: pytest.MonkeyPatch, - capsys: pytest.CaptureFixture[str], -) -> None: - _register_project_with_upgrade_layout(tmp_path, monkeypatch) - - gdir = global_dir() - gdir.mkdir(parents=True, exist_ok=True) - (gdir / "taste.md").write_text(default_taste_text(), encoding="utf-8") - - _handle_upgrade(_upgrade_args()) - - err = capsys.readouterr().err - assert err == "" - - -def test_upgrade_no_taste_warning_when_absent( +@pytest.mark.parametrize( + "taste_text", + [default_taste_text(), None], + ids=["current", "absent"], +) +def test_upgrade_skips_taste_warning_when_global_taste_is_current_or_absent( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str], + taste_text: str | None, ) -> None: _register_project_with_upgrade_layout(tmp_path, monkeypatch) + if taste_text is not None: + _write_global_taste(taste_text) _handle_upgrade(_upgrade_args()) - err = capsys.readouterr().err - assert err == "" + assert capsys.readouterr().err == "" diff --git a/tests/test_cli_version.py b/tests/test_cli_version.py index 9b89d1b..b664b19 100644 --- a/tests/test_cli_version.py +++ b/tests/test_cli_version.py @@ -1,13 +1,14 @@ from __future__ import annotations +import sys + import pytest from continuous_refactoring import cli -def test_global_version_uses_installed_package_metadata( +def test_build_parser_uses_installed_package_metadata_for_version_banner( monkeypatch: pytest.MonkeyPatch, - capsys: pytest.CaptureFixture[str], ) -> None: package_names: list[str] = [] @@ -17,10 +18,22 @@ def fake_metadata_version(package_name: str) -> str: monkeypatch.setattr(cli, "metadata_version", fake_metadata_version) - parser = cli.build_parser() + cli.build_parser() + + assert package_names == [cli._PACKAGE_DISTRIBUTION] + + +def test_cli_main_version_prints_banner_without_stale_taste_warning( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + monkeypatch.setattr(cli, "metadata_version", lambda _: "9.8.7") + monkeypatch.setattr(sys, "argv", ["continuous-refactoring", "--version"]) + with pytest.raises(SystemExit) as exc_info: - parser.parse_args(["--version"]) + cli.cli_main() + captured = capsys.readouterr() assert exc_info.value.code == 0 - assert package_names == ["continuous-refactoring"] - assert capsys.readouterr().out == "continuous-refactoring 9.8.7\n" + assert captured.out == "continuous-refactoring 9.8.7\n" + assert captured.err == "" diff --git a/tests/test_commit_messages.py b/tests/test_commit_messages.py index b91a303..0550f56 100644 --- a/tests/test_commit_messages.py +++ b/tests/test_commit_messages.py @@ -58,3 +58,13 @@ def test_commit_rationale_ignores_placeholder_summary() -> None: ) assert rationale == "agent stdout explained the cleanup" + + +def test_commit_rationale_sanitizes_fallback_text() -> None: + rationale = commit_rationale( + None, + fallback="Touched /repo/src/continuous_refactoring/decisions.py", + repo_root=Path("/repo"), + ) + + assert rationale == "Touched /src/continuous_refactoring/decisions.py" diff --git a/tests/test_config.py b/tests/test_config.py index 8f0f969..872c75d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -86,15 +86,11 @@ def test_xdg_data_home_falls_back( # Manifest # --------------------------------------------------------------------------- -def test_load_manifest_empty( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch -) -> None: +def test_load_manifest_empty() -> None: assert load_manifest() == {} -def test_load_manifest_rejects_non_object_payload( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch -) -> None: +def test_load_manifest_rejects_non_object_payload(tmp_path: Path) -> None: manifest = tmp_path / "xdg" / "continuous-refactoring" / "manifest.json" manifest.parent.mkdir(parents=True, exist_ok=True) @@ -104,9 +100,7 @@ def test_load_manifest_rejects_non_object_payload( load_manifest() -def test_load_config_version_rejects_non_object_payload( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch -) -> None: +def test_load_config_version_rejects_non_object_payload(tmp_path: Path) -> None: manifest = tmp_path / "xdg" / "continuous-refactoring" / "manifest.json" manifest.parent.mkdir(parents=True, exist_ok=True) manifest.write_text("[]", encoding="utf-8") @@ -236,9 +230,7 @@ def broken_read_text(self: Path, *args: object, **kwargs: object) -> str: assert exc_info.value.__cause__ is io_error -def test_load_manifest_rejects_non_mapping_projects( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch -) -> None: +def test_load_manifest_rejects_non_mapping_projects(tmp_path: Path) -> None: manifest = tmp_path / "xdg" / "continuous-refactoring" / "manifest.json" manifest.parent.mkdir(parents=True, exist_ok=True) @@ -248,9 +240,7 @@ def test_load_manifest_rejects_non_mapping_projects( load_manifest() -def test_load_manifest_rejects_non_mapping_project_entry( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch -) -> None: +def test_load_manifest_rejects_non_mapping_project_entry(tmp_path: Path) -> None: manifest = tmp_path / "xdg" / "continuous-refactoring" / "manifest.json" manifest.parent.mkdir(parents=True, exist_ok=True) @@ -743,11 +733,7 @@ def test_entry_roundtrip_without_live_migrations_dir( assert loaded[entry.uuid].repo_taste_path is None -def test_legacy_manifest_without_live_migrations_dir( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch -) -> None: - import json - +def test_legacy_manifest_without_live_migrations_dir(tmp_path: Path) -> None: mpath = tmp_path / "xdg" / "continuous-refactoring" / "manifest.json" mpath.parent.mkdir(parents=True, exist_ok=True) uid = str(uuid.uuid4()) diff --git a/tests/test_continuous_refactoring.py b/tests/test_continuous_refactoring.py index 906270b..005e7d4 100644 --- a/tests/test_continuous_refactoring.py +++ b/tests/test_continuous_refactoring.py @@ -99,7 +99,10 @@ "check_phase_ready", "execute_phase", "PlanningOutcome", - "run_planning", + "PlanningRefineRequest", + "PlanningStepResult", + "run_next_planning_step", + "run_refine_planning_step", "CONTINUOUS_REFACTORING_STATUS_BEGIN", "CONTINUOUS_REFACTORING_STATUS_END", "CLASSIFIER_PROMPT", @@ -227,6 +230,10 @@ def test_package_exports_contain_known_public_symbols() -> None: "bump_last_touch", "check_phase_ready", "PlanningOutcome", + "PlanningRefineRequest", + "PlanningStepResult", + "run_next_planning_step", + "run_refine_planning_step", "compose_full_prompt", "ClassifierDecision", "run_once", diff --git a/tests/test_decisions.py b/tests/test_decisions.py index 6b52170..8386e7d 100644 --- a/tests/test_decisions.py +++ b/tests/test_decisions.py @@ -8,11 +8,17 @@ import pytest from continuous_refactoring.decisions import ( + AgentStatus, RetryRecommendation, RunnerDecision, + default_retry_recommendation, error_failure_kind, parse_status_block, + read_status, + resolved_phase_reached, sanitize_text, + sanitized_text_or, + status_summary, ) from continuous_refactoring.prompts import ( CONTINUOUS_REFACTORING_STATUS_BEGIN, @@ -154,6 +160,31 @@ def test_parse_status_block_never_raises_on_generated_corpus() -> None: assert all(isinstance(item, str) for item in status.evidence) +def test_read_status_prefers_codex_last_message_file(tmp_path: Path) -> None: + last_message_path = tmp_path / "codex-last-message.md" + last_message_path.write_text( + _status_block("summary: from file"), + encoding="utf-8", + ) + fallback = _status_block("summary: from fallback") + + codex_status = read_status( + "codex", + last_message_path=last_message_path, + fallback_text=fallback, + ) + other_status = read_status( + "claude", + last_message_path=last_message_path, + fallback_text=fallback, + ) + + assert codex_status is not None + assert codex_status.summary == "from file" + assert other_status is not None + assert other_status.summary == "from fallback" + + def test_sanitize_text_filters_and_redacts() -> None: repo_root = Path("/worktree/repo") text = "\n".join( @@ -195,6 +226,60 @@ def test_sanitize_text_is_idempotent() -> None: assert sanitize_text(once, repo_root) == once +def test_sanitized_text_or_prefers_sanitized_text() -> None: + repo_root = Path("/repo") + + assert ( + sanitized_text_or(" touched /repo/src/file.py ", repo_root, "fallback") + == "touched /src/file.py" + ) + + +def test_sanitized_text_or_uses_fallback_when_sanitized_text_is_empty() -> None: + assert ( + sanitized_text_or("codex exec --help", Path("/repo"), "fallback") + == "fallback" + ) + + +def test_status_summary_sanitizes_summary_and_focus() -> None: + status = AgentStatus( + summary=" touched /repo/src/file.py ", + next_retry_focus=" /tmp/logs/run.txt ", + ) + + assert status_summary(status, fallback="fallback", repo_root=Path("/repo")) == ( + "touched /src/file.py", + "", + ) + + +def test_resolved_phase_reached_uses_fallback_for_missing_status_or_phase() -> None: + fallback = "review" + + assert resolved_phase_reached(None, fallback) == fallback + assert resolved_phase_reached(AgentStatus(), fallback) == fallback + assert resolved_phase_reached(AgentStatus(phase_reached="refactor"), fallback) == ( + "refactor" + ) + + +@pytest.mark.parametrize( + ("decision", "expected"), + [ + ("commit", "none"), + ("retry", "same-target"), + ("abandon", "new-target"), + ("blocked", "human-review"), + ], +) +def test_default_retry_recommendation_maps_each_decision( + decision: RunnerDecision, + expected: RetryRecommendation, +) -> None: + assert default_retry_recommendation(decision) == expected + + @pytest.mark.parametrize( ("message", "expected"), [ diff --git a/tests/test_effort.py b/tests/test_effort.py index d714175..139c26d 100644 --- a/tests/test_effort.py +++ b/tests/test_effort.py @@ -8,7 +8,9 @@ cap_effort, effort_exceeds, resolve_effort_budget, + resolve_phase_effort, resolve_requested_effort, + resolve_target_effort_budget, ) @@ -52,3 +54,90 @@ def test_target_override_requests_default_then_caps_to_max() -> None: assert resolution.effective_effort == "medium" assert resolution.max_allowed_effort == "medium" assert resolution.capped is True + + +def test_target_effort_budget_uses_run_default_without_override() -> None: + budget = resolve_effort_budget("medium", "xhigh") + + target_budget, resolution = resolve_target_effort_budget(budget, None) + + assert target_budget.default_effort == "medium" + assert target_budget.max_allowed_effort == "xhigh" + assert resolution.source == "default" + assert resolution.requested_effort == "medium" + assert resolution.effective_effort == "medium" + assert resolution.reason == "run default effort" + + +def test_target_effort_budget_caps_override_and_updates_default() -> None: + budget = resolve_effort_budget("low", "medium") + + target_budget, resolution = resolve_target_effort_budget(budget, "xhigh") + + assert target_budget.default_effort == "medium" + assert target_budget.max_allowed_effort == "medium" + assert resolution.source == "target-override" + assert resolution.requested_effort == "xhigh" + assert resolution.effective_effort == "medium" + assert resolution.capped is True + assert resolution.reason == "target effort override capped by run budget" + + +def test_phase_effort_uses_default_when_no_requirement() -> None: + budget = resolve_effort_budget("medium", "xhigh") + + resolution = resolve_phase_effort(budget, None) + + assert resolution.source == "default" + assert resolution.requested_effort == "medium" + assert resolution.effective_effort == "medium" + assert resolution.capped is False + assert resolution.reason == "default effort" + + +def test_phase_effort_does_not_drop_below_default() -> None: + budget = resolve_effort_budget("high", "xhigh") + + resolution = resolve_phase_effort(budget, "medium") + + assert resolution.source == "phase-required" + assert resolution.requested_effort == "high" + assert resolution.effective_effort == "high" + assert resolution.capped is False + assert resolution.reason == "phase required effort" + + +def test_phase_effort_promotes_then_caps_to_max() -> None: + budget = resolve_effort_budget("medium", "high") + + resolution = resolve_phase_effort( + budget, + "xhigh", + reason="migration phase override", + ) + + assert resolution.source == "phase-required" + assert resolution.requested_effort == "xhigh" + assert resolution.effective_effort == "high" + assert resolution.max_allowed_effort == "high" + assert resolution.capped is True + assert resolution.reason == "migration phase override" + + +def test_resolution_event_fields_match_resolution() -> None: + budget = resolve_effort_budget("low", "medium") + resolution = resolve_requested_effort( + budget, + "xhigh", + source="target-override", + reason="test override", + ) + + assert resolution.event_fields() == { + "effort_source": "target-override", + "requested_effort": "xhigh", + "effective_effort": "medium", + "max_allowed_effort": "medium", + "effort_capped": True, + "effort_reason": "test override", + } diff --git a/tests/test_failure_report.py b/tests/test_failure_report.py index 24ca551..84dc0e8 100644 --- a/tests/test_failure_report.py +++ b/tests/test_failure_report.py @@ -48,6 +48,26 @@ def _record(**overrides: object) -> DecisionRecord: return DecisionRecord(**values) +def assert_attempt_decision( + artifacts: RunArtifacts, + *, + attempt: int, + retry: int, + record: DecisionRecord, + reason_doc_path: str | None, +) -> None: + stats = artifacts.attempts[attempt] + assert stats.target == record.target + assert stats.retry == retry + assert stats.call_role == record.call_role + assert stats.phase_reached == record.phase_reached + assert stats.decision == record.decision + assert stats.retry_recommendation == record.retry_recommendation + assert stats.failure_kind == record.failure_kind + assert stats.failure_summary == record.summary + assert stats.reason_doc_path == reason_doc_path + + def test_effective_record_abandons_after_max_attempts() -> None: record = _record( summary="Still red", @@ -230,10 +250,13 @@ def fail_if_snapshot_dir_requested(_repo_root: Path) -> Path: assert result is None assert failure_snapshot_calls == 0 - stats = artifacts.attempts[1] - assert stats.decision == "commit" - assert stats.retry == 2 - assert stats.reason_doc_path is None + assert_attempt_decision( + artifacts, + attempt=1, + retry=2, + record=record, + reason_doc_path=None, + ) assert not artifacts.events_path.exists() @@ -258,9 +281,195 @@ def test_persist_decision_records_non_commit_snapshot( assert result is not None assert result.exists() - stats = artifacts.attempts[1] - assert stats.decision == "retry" - assert stats.reason_doc_path == str(result) + assert_attempt_decision( + artifacts, + attempt=1, + retry=1, + record=record, + reason_doc_path=str(result), + ) events = artifacts.events_path.read_text(encoding="utf-8") assert '"event": "failure_doc_written"' in events assert '"event": "target_transition"' in events + + +def test_planning_step_failure_snapshot_names_step_and_resume_behavior( + tmp_path: Path, + monkeypatch, +) -> None: + monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) + repo_root = tmp_path / "repo" + repo_root.mkdir() + artifacts = _artifacts(tmp_path / "artifacts") + record = _record( + decision="abandon", + retry_recommendation="new-target", + target="auth-cleanup", + call_role="planning.review-2", + phase_reached="planning.review-2", + failure_kind="planning-step-failed", + summary="Revised plan still has findings", + next_retry_focus=None, + ) + + result = persist_decision( + repo_root, + artifacts, + attempt=1, + retry=1, + validation_command="uv run pytest", + record=record, + ) + + assert result is not None + content = result.read_text(encoding="utf-8") + assert 'call_role: "planning.review-2"' in content + assert "planning step `review-2`" in content + assert ".planning/state.json" in content + assert "failed current-step output" in content + assert "not resume input" in content + + events = artifacts.events_path.read_text(encoding="utf-8") + assert '"event": "planning_step_failure_doc_written"' in events + assert '"planning_step": "review-2"' in events + assert '"event": "failure_doc_written"' not in events + + +def test_planning_snapshot_inlines_bounded_reviewer_output( + tmp_path: Path, + monkeypatch, +) -> None: + monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) + repo_root = tmp_path / "repo" + repo_root.mkdir() + artifacts = _artifacts(tmp_path / "artifacts") + stage_dir = artifacts.root / "attempt-001" / "planning" / "review-2" + stage_dir.mkdir(parents=True) + long_tail = "x" * 5000 + stdout_path = stage_dir / "agent.stdout.log" + stdout_path.write_text( + f"1. still missing rollback in {repo_root}/src/auth.py\n{long_tail}", + encoding="utf-8", + ) + stderr_path = stage_dir / "agent.stderr.log" + stderr_path.write_text("warning from /tmp/planning-agent/session\n", encoding="utf-8") + last_message_path = stage_dir / "agent-last-message.md" + last_message_path.write_text("final reviewer note\n", encoding="utf-8") + record = _record( + decision="abandon", + retry_recommendation="new-target", + target="auth-cleanup", + call_role="planning.review-2", + phase_reached="planning.review-2", + failure_kind="planning-step-failed", + summary="Revised plan still has findings", + next_retry_focus=None, + agent_last_message_path=last_message_path, + agent_stdout_path=stdout_path, + agent_stderr_path=stderr_path, + ) + + result = write( + repo_root, + artifacts, + attempt=1, + retry=1, + validation_command="uv run pytest", + record=record, + ) + + content = result.read_text(encoding="utf-8") + assert 'agent_last_message: "attempt-001/planning/review-2/agent-last-message.md"' in content + assert 'agent_stdout: "attempt-001/planning/review-2/agent.stdout.log"' in content + assert "### Latest Agent Message\n```text\nfinal reviewer note\n" in content + assert "1. still missing rollback in /src/auth.py" in content + assert "warning from " in content + assert "\n...[truncated]\n```" in content + assert long_tail not in content + + +def test_planning_call_role_gets_planning_resume_wording_for_infra_failure_kind( + tmp_path: Path, + monkeypatch, +) -> None: + monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) + repo_root = tmp_path / "repo" + repo_root.mkdir() + artifacts = _artifacts(tmp_path / "artifacts") + record = _record( + decision="abandon", + retry_recommendation="new-target", + target="auth-cleanup", + call_role="planning.expand", + phase_reached="planning.expand", + failure_kind="agent-infra-failure", + summary="planning.expand failed: agent exited 1", + next_retry_focus=None, + ) + + result = persist_decision( + repo_root, + artifacts, + attempt=1, + retry=1, + validation_command="uv run pytest", + record=record, + ) + + assert result is not None + content = result.read_text(encoding="utf-8") + assert 'call_role: "planning.expand"' in content + assert "planning step `expand`" in content + assert ".planning/state.json" in content + assert "failed current-step output" in content + assert "not resume input" in content + assert 'failure_kind: "agent-infra-failure"' in content + + events = artifacts.events_path.read_text(encoding="utf-8") + assert '"event": "planning_step_failure_doc_written"' in events + assert '"planning_step": "expand"' in events + assert '"event": "failure_doc_written"' not in events + + +@pytest.mark.parametrize( + "call_role", + ["planning.state", "planning.publish", "planning.resume"], +) +def test_internal_planning_call_roles_keep_generic_failure_wording( + tmp_path: Path, + monkeypatch, + call_role: str, +) -> None: + monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) + repo_root = tmp_path / "repo" + repo_root.mkdir() + artifacts = _artifacts(tmp_path / "artifacts") + record = _record( + decision="abandon", + retry_recommendation="new-target", + target="auth-cleanup", + call_role=call_role, + phase_reached=call_role, + failure_kind="agent-infra-failure", + summary=f"{call_role} failed", + next_retry_focus=None, + ) + + result = persist_decision( + repo_root, + artifacts, + attempt=1, + retry=1, + validation_command="uv run pytest", + record=record, + ) + + assert result is not None + content = result.read_text(encoding="utf-8") + assert f'call_role: "{call_role}"' in content + assert "planning step `" not in content + assert "failed current-step output" not in content + + events = artifacts.events_path.read_text(encoding="utf-8") + assert '"event": "failure_doc_written"' in events + assert '"event": "planning_step_failure_doc_written"' not in events diff --git a/tests/test_focus_on_live_migrations.py b/tests/test_focus_on_live_migrations.py index 0704096..7b0cb7a 100644 --- a/tests/test_focus_on_live_migrations.py +++ b/tests/test_focus_on_live_migrations.py @@ -8,7 +8,7 @@ import pytest import continuous_refactoring -from continuous_refactoring.artifacts import ContinuousRefactorError +from continuous_refactoring.artifacts import CommandCapture, ContinuousRefactorError from continuous_refactoring.cli import build_parser from continuous_refactoring.decisions import DecisionRecord, RouteOutcome from continuous_refactoring.effort import EffortBudget @@ -19,6 +19,13 @@ migration_root, save_manifest, ) +from continuous_refactoring.planning_state import ( + complete_planning_step, + new_planning_state, + planning_stage_stdout_path, + planning_state_path, + save_planning_state, +) from conftest import make_run_loop_args @@ -63,11 +70,85 @@ def _seed_manifest( ) root = migration_root(live_dir, name) root.mkdir(parents=True, exist_ok=True) + (root / "plan.md").write_text("# Plan\n", encoding="utf-8") + for phase in phases: + phase_path = root / phase.file + phase_path.parent.mkdir(parents=True, exist_ok=True) + phase_path.write_text(f"# {phase.name}\n", encoding="utf-8") path = root / "manifest.json" save_manifest(manifest, path) return path +def _seed_planning_manifest( + live_dir: Path, + name: str, + *, + created_at: datetime | None = None, +) -> Path: + manifest = MigrationManifest( + name=name, + created_at=(created_at or _utc_now() - timedelta(days=2)).isoformat( + timespec="milliseconds", + ), + last_touch=(_utc_now() - timedelta(days=1)).isoformat(timespec="milliseconds"), + wake_up_on=None, + awaiting_human_review=False, + status="planning", + current_phase="", + phases=(), + ) + root = migration_root(live_dir, name) + root.mkdir(parents=True, exist_ok=True) + path = root / "manifest.json" + save_manifest(manifest, path) + return path + + +def _seed_planning_manifest_at_final_review( + repo_root: Path, + live_dir: Path, + name: str, + *, + created_at: datetime | None = None, +) -> Path: + path = _seed_planning_manifest(live_dir, name, created_at=created_at) + root = path.parent + (root / "plan.md").write_text("# Plan\n", encoding="utf-8") + (root / _PHASE.file).write_text( + "## Precondition\n\nalways\n\n" + "## Definition of Done\n\nSetup is complete.\n", + encoding="utf-8", + ) + manifest = load_manifest(path) + save_manifest( + replace(manifest, current_phase=_PHASE.name, phases=(_PHASE,)), + path, + ) + state = new_planning_state( + f"Finish {name}", + now="2026-04-29T12:00:00.000+00:00", + ) + for step, outcome, stdout in ( + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "clear", "no findings\n"), + ): + stdout_path = planning_stage_stdout_path(root, step) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + state = complete_planning_step( + state, + step, + outcome, + {"stdout": stdout_path.relative_to(repo_root).as_posix()}, + completed_at="2026-04-29T12:00:00.000+00:00", + ) + save_planning_state(state, planning_state_path(root), repo_root=repo_root) + return path + + def _mark_done(path: Path) -> None: manifest = load_manifest(path) updated = replace( @@ -79,6 +160,31 @@ def _mark_done(path: Path) -> None: save_manifest(updated, path) +def _prompt_migration_dir(prompt: str, repo_root: Path) -> Path: + for line in prompt.splitlines(): + if line.startswith("Migration directory:"): + path = Path(line.split(":", 1)[1].strip()) + return path if path.is_absolute() else repo_root / path + raise AssertionError("Migration directory missing from prompt") + + +def _planning_agent_result(kwargs: dict[str, object], stdout: str) -> CommandCapture: + stdout_path = Path(str(kwargs["stdout_path"])) + stderr_path = Path(str(kwargs["stderr_path"])) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + stderr_path.parent.mkdir(parents=True, exist_ok=True) + stderr_path.write_text("", encoding="utf-8") + return CommandCapture( + command=("fake",), + returncode=0, + stdout=stdout, + stderr="", + stdout_path=stdout_path, + stderr_path=stderr_path, + ) + + def _flag_for_review(path: Path) -> None: manifest = load_manifest(path) updated = replace( @@ -101,6 +207,30 @@ def _commit_ok(target: str) -> DecisionRecord: ) +def _planning_commit_ok(target: str) -> DecisionRecord: + return DecisionRecord( + decision="commit", + retry_recommendation="none", + target=target, + call_role="planning.approaches", + phase_reached="planning.approaches", + failure_kind="none", + summary="ok", + ) + + +def _planning_blocked(target: str) -> DecisionRecord: + return DecisionRecord( + decision="blocked", + retry_recommendation="human-review", + target=target, + call_role="planning.state", + phase_reached="planning.state", + failure_kind="planning-state-missing", + summary="missing planning state", + ) + + def _abandon(target: str) -> DecisionRecord: return DecisionRecord( decision="abandon", @@ -182,41 +312,20 @@ def test_focused_loop_eligibility_rechecks_effort_deferred_phase_when_cap_rises( # --------------------------------------------------------------------------- -def _make_handle_run_args( - repo_root: Path, *, focus: bool, -) -> argparse.Namespace: - return argparse.Namespace( +def test_handle_run_without_focus_requires_targeting( + tmp_path: Path, +) -> None: + repo_root = tmp_path / "repo" + repo_root.mkdir() + args = make_run_loop_args( + repo_root, agent="claude", model="opus", effort="medium", validation_command="uv run pytest", - extensions=None, - globs=None, - targets=None, - paths=None, scope_instruction=None, - timeout=None, - refactoring_prompt=None, - fix_prompt=None, - show_agent_logs=False, - show_command_logs=False, - repo_root=repo_root, - max_attempts=None, - max_refactors=None, - commit_message_prefix="continuous refactor", - max_consecutive_failures=3, - sleep=0.0, - focus_on_live_migrations=focus, ) - -def test_handle_run_without_focus_requires_targeting( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch, -) -> None: - repo_root = tmp_path / "repo" - repo_root.mkdir() - args = _make_handle_run_args(repo_root, focus=False) - from continuous_refactoring.cli import _handle_run with pytest.raises(SystemExit) as exc: @@ -229,7 +338,15 @@ def test_handle_run_with_focus_bypasses_targeting_and_max_refactors( ) -> None: repo_root = tmp_path / "repo" repo_root.mkdir() - args = _make_handle_run_args(repo_root, focus=True) + args = make_run_loop_args( + repo_root, + agent="claude", + model="opus", + effort="medium", + validation_command="uv run pytest", + scope_instruction=None, + focus_on_live_migrations=True, + ) calls: list[argparse.Namespace] = [] @@ -281,6 +398,86 @@ def test_focused_loop_exits_zero_when_no_live_migrations_remain( assert exit_code == 0 +def test_focused_loop_forwards_log_mirroring_to_baseline_and_ticks( + run_loop_env: Path, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = tmp_path / "live-migrations" + live_dir.mkdir() + planning_path = _seed_planning_manifest(live_dir, "plan-first") + phase_path = _seed_manifest(live_dir, "phase-second") + _install_focused_loop_env(run_loop_env, monkeypatch, live_dir) + captured_baseline: list[bool] = [] + captured_planning: list[object] = [] + captured_phase: list[object] = [] + + def fake_tests( + test_command: str, + repo_root: Path, + stdout_path: Path, + stderr_path: Path, + **kwargs: object, + ) -> CommandCapture: + captured_baseline.append(bool(kwargs["mirror_to_terminal"])) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stderr_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text("ok\n", encoding="utf-8") + stderr_path.write_text("", encoding="utf-8") + return CommandCapture( + command=(test_command,), + returncode=0, + stdout="ok\n", + stderr="", + stdout_path=stdout_path, + stderr_path=stderr_path, + ) + + def fake_planning_tick( + live_dir: Path, + taste: str, + repo_root: Path, + artifacts: object, + **kwargs: object, + ) -> tuple[RouteOutcome, DecisionRecord | None]: + captured_planning.append(kwargs["log_mirroring"]) + _mark_done(planning_path) + return ("commit", _commit_ok("plan-first")) + + def fake_phase_tick( + live_dir: Path, + taste: str, + repo_root: Path, + artifacts: object, + **kwargs: object, + ) -> tuple[RouteOutcome, DecisionRecord | None]: + captured_phase.append(kwargs["log_mirroring"]) + _mark_done(phase_path) + return ("commit", _commit_ok("phase-second")) + + monkeypatch.setattr("continuous_refactoring.loop.run_tests", fake_tests) + monkeypatch.setattr( + "continuous_refactoring.migration_tick.try_planning_tick", + fake_planning_tick, + ) + monkeypatch.setattr( + "continuous_refactoring.migration_tick.try_migration_tick", + fake_phase_tick, + ) + + args = make_run_loop_args( + run_loop_env, + focus_on_live_migrations=True, + show_agent_logs=True, + show_command_logs=True, + ) + + assert continuous_refactoring.run_migrations_focused_loop(args) == 0 + assert captured_baseline == [True] + assert [getattr(value, "agent") for value in captured_planning] == [True] + assert [getattr(value, "command") for value in captured_planning] == [True] + assert [getattr(value, "agent") for value in captured_phase] == [True] + assert [getattr(value, "command") for value in captured_phase] == [True] + + def test_focused_loop_raises_when_live_dir_unconfigured( run_loop_env: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -464,6 +661,227 @@ def fake_execute( captured = capsys.readouterr() assert "Migration tick deferred all eligible migrations: wait for follow-up" in captured.out +def test_e2e_focused_run_completes_planning_before_phase_execution( + run_loop_env: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = run_loop_env / "migrations" + live_dir.mkdir() + planning_path = _seed_planning_manifest_at_final_review( + run_loop_env, + live_dir, + "mid-planning", + ) + continuous_refactoring.run_command(["git", "add", "migrations"], cwd=run_loop_env) + continuous_refactoring.run_command( + ["git", "commit", "-m", "seed focused migrations"], + cwd=run_loop_env, + ) + _install_focused_loop_env(run_loop_env, monkeypatch, live_dir) + calls: list[str] = [] + + def final_review_agent(**kwargs: object) -> CommandCapture: + stage_label = Path(str(kwargs["stdout_path"])).parent.name + assert stage_label == "final-review" + migration_dir = _prompt_migration_dir( + str(kwargs["prompt"]), + Path(str(kwargs["repo_root"])), + ) + assert migration_dir.name == "mid-planning" + calls.append("planning:final-review") + return _planning_agent_result( + kwargs, + "final-decision: approve-auto - ready\n", + ) + + monkeypatch.setattr( + "continuous_refactoring.planning.maybe_run_agent", + final_review_agent, + ) + + executed: list[str] = [] + + def fake_ready( + phase: PhaseSpec, + manifest: MigrationManifest, + *_args: object, + **_kwargs: object, + ) -> tuple[str, str]: + calls.append(f"ready:{manifest.name}") + assert manifest.name == "mid-planning" + return ("yes", "ready") + + def fake_execute( + phase: PhaseSpec, + manifest: MigrationManifest, + *_args: object, + **_kwargs: object, + ) -> object: + calls.append(f"execute:{manifest.name}") + assert manifest.name == "mid-planning" + executed.append(manifest.name) + manifest_path = live_dir / manifest.name / "manifest.json" + _mark_done(manifest_path) + from continuous_refactoring.phases import ExecutePhaseOutcome + + return ExecutePhaseOutcome(status="done", reason="ok") + + monkeypatch.setattr("continuous_refactoring.migration_tick.check_phase_ready", fake_ready) + monkeypatch.setattr("continuous_refactoring.migration_tick.execute_phase", fake_execute) + + args = make_run_loop_args(run_loop_env, focus_on_live_migrations=True) + assert continuous_refactoring.run_migrations_focused_loop(args) == 0 + assert calls == [ + "planning:final-review", + "ready:mid-planning", + "execute:mid-planning", + ] + assert executed == ["mid-planning"] + assert load_manifest(planning_path).status == "done" + + +def test_focused_loop_skips_abandoned_planning_migration_while_other_is_eligible( + run_loop_env: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = run_loop_env / "migrations" + live_dir.mkdir() + now = _utc_now() + _seed_planning_manifest_at_final_review( + run_loop_env, + live_dir, + "alpha", + created_at=now - timedelta(hours=2), + ) + _seed_planning_manifest_at_final_review( + run_loop_env, + live_dir, + "beta", + created_at=now - timedelta(hours=1), + ) + continuous_refactoring.run_command(["git", "add", "migrations"], cwd=run_loop_env) + continuous_refactoring.run_command( + ["git", "commit", "-m", "seed focused planning migrations"], + cwd=run_loop_env, + ) + _install_focused_loop_env(run_loop_env, monkeypatch, live_dir) + calls: list[str] = [] + + def failing_planning( + migration_name: str, + *_args: object, + **_kwargs: object, + ) -> object: + calls.append(migration_name) + raise ContinuousRefactorError( + f"planning.review-2 failed: {migration_name} still has findings" + ) + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + failing_planning, + ) + + args = make_run_loop_args( + run_loop_env, + focus_on_live_migrations=True, + max_consecutive_failures=2, + ) + with pytest.raises(ContinuousRefactorError, match="2 consecutive failures"): + continuous_refactoring.run_migrations_focused_loop(args) + + assert calls == ["alpha", "beta"] + + +def test_focused_loop_retries_skipped_planning_after_phase_alternative_defers( + run_loop_env: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = run_loop_env / "migrations" + live_dir.mkdir() + _seed_planning_manifest_at_final_review(run_loop_env, live_dir, "alpha") + _seed_manifest(live_dir, "phase-alt") + continuous_refactoring.run_command(["git", "add", "migrations"], cwd=run_loop_env) + continuous_refactoring.run_command( + ["git", "commit", "-m", "seed focused alternatives"], + cwd=run_loop_env, + ) + _install_focused_loop_env(run_loop_env, monkeypatch, live_dir) + calls: list[str] = [] + + def failing_planning( + migration_name: str, + *_args: object, + **_kwargs: object, + ) -> object: + calls.append(f"planning:{migration_name}") + raise ContinuousRefactorError( + f"planning.review-2 failed: {migration_name} still has findings" + ) + + def deferred_phase( + *_args: object, + **_kwargs: object, + ) -> tuple[RouteOutcome, DecisionRecord]: + calls.append("phase") + return ( + "not-routed", + DecisionRecord( + decision="retry", + retry_recommendation="same-target", + target="migration/phase-alt", + call_role="phase.ready-check", + phase_reached="phase.ready-check", + failure_kind="phase-ready-no", + summary="phase deferred", + ), + ) + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + failing_planning, + ) + monkeypatch.setattr( + "continuous_refactoring.migration_tick.try_migration_tick", + deferred_phase, + ) + + args = make_run_loop_args( + run_loop_env, + focus_on_live_migrations=True, + max_consecutive_failures=2, + ) + with pytest.raises(ContinuousRefactorError, match="2 consecutive failures"): + continuous_refactoring.run_migrations_focused_loop(args) + + assert calls == ["planning:alpha", "phase", "planning:alpha"] + + +def test_focused_loop_stops_when_only_blocked_planning_remains( + run_loop_env: Path, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = tmp_path / "live-migrations" + live_dir.mkdir() + _seed_planning_manifest(live_dir, "blocked-planning") + _install_focused_loop_env(run_loop_env, monkeypatch, live_dir) + monkeypatch.setattr( + "continuous_refactoring.migration_tick.try_planning_tick", + lambda *_args, **_kwargs: ("blocked", _planning_blocked("blocked-planning")), + ) + monkeypatch.setattr( + "continuous_refactoring.migration_tick.try_migration_tick", + lambda *_args, **_kwargs: pytest.fail("blocked planning must stop phase tick"), + ) + + args = make_run_loop_args( + run_loop_env, + focus_on_live_migrations=True, + max_consecutive_failures=1, + ) + with pytest.raises(ContinuousRefactorError, match="1 consecutive failures"): + continuous_refactoring.run_migrations_focused_loop(args) def test_focused_loop_terminates_when_only_awaiting_human_review_remains( diff --git a/tests/test_git.py b/tests/test_git.py index 6aa2521..8506906 100644 --- a/tests/test_git.py +++ b/tests/test_git.py @@ -8,6 +8,28 @@ from conftest import init_repo +_FAILING_COMMAND = [ + "python", + "-c", + "import sys\nprint('out')\nprint('err', file=sys.stderr)\nraise SystemExit(1)", +] + + +def _assert_wrapped_error( + exc: pytest.ExceptionInfo[continuous_refactoring.GitCommandError], + *, + message_fragments: tuple[str, ...], + cause_type: type[BaseException], + cause_fragment: str | None = None, +) -> None: + error = exc.value + assert isinstance(error, continuous_refactoring.GitCommandError) + for fragment in message_fragments: + assert fragment in str(error) + assert isinstance(error.__cause__, cause_type) + if cause_fragment is not None: + assert cause_fragment in str(error.__cause__) + def test_discard_workspace_changes_restores_tracked_files_and_removes_untracked( tmp_path: Path, @@ -28,6 +50,30 @@ def test_discard_workspace_changes_restores_tracked_files_and_removes_untracked( assert continuous_refactoring.workspace_status_lines(repo) == [] +def test_repo_change_helpers_track_workspace_status(tmp_path: Path) -> None: + repo = tmp_path / "repo" + init_repo(repo) + + assert continuous_refactoring.repo_change_count(repo) == 0 + assert not continuous_refactoring.repo_has_changes(repo) + + (repo / "README.md").write_text("changed\n", encoding="utf-8") + (repo / "scratch.txt").write_text("scratch\n", encoding="utf-8") + + assert continuous_refactoring.repo_change_count(repo) == 2 + assert continuous_refactoring.repo_has_changes(repo) + + +def test_git_commit_rejects_clean_worktree(tmp_path: Path) -> None: + repo = tmp_path / "repo" + init_repo(repo) + + with pytest.raises(continuous_refactoring.ContinuousRefactorError) as exc: + continuous_refactoring.git_commit(repo, "no-op") + + assert str(exc.value) == "No changes to commit." + + def test_revert_to_restores_requested_head_and_removes_untracked( tmp_path: Path, ) -> None: @@ -55,31 +101,18 @@ def test_revert_to_restores_requested_head_and_removes_untracked( def test_run_command_checked_failure_raises_git_command_error(tmp_path: Path) -> None: - command = [ - "python", - "-c", - "import sys\nprint('out')\nprint('err', file=sys.stderr)\nraise SystemExit(1)", - ] - with pytest.raises(continuous_refactoring.GitCommandError): - continuous_refactoring.run_command(command, cwd=tmp_path) + continuous_refactoring.run_command(_FAILING_COMMAND, cwd=tmp_path) def test_run_command_checked_failure_includes_cause_and_payload(tmp_path: Path) -> None: - command = [ - "python", - "-c", - "import sys\nprint('out')\nprint('err', file=sys.stderr)\nraise SystemExit(1)", - ] - with pytest.raises(continuous_refactoring.GitCommandError) as exc: - continuous_refactoring.run_command(command, cwd=tmp_path) - - error = exc.value - assert isinstance(error.__cause__, subprocess.CalledProcessError) - assert "command failed (python -c" in str(error) - assert "stdout:\nout\n" in str(error) - assert "stderr:\nerr\n" in str(error) + continuous_refactoring.run_command(_FAILING_COMMAND, cwd=tmp_path) + _assert_wrapped_error( + exc, + message_fragments=("command failed (python -c", "stdout:\nout\n", "stderr:\nerr\n"), + cause_type=subprocess.CalledProcessError, + ) def test_run_command_missing_command_raises_git_command_error( @@ -93,19 +126,17 @@ def _raise(*_args: object, **_kwargs: object) -> subprocess.CompletedProcess[str with pytest.raises(continuous_refactoring.GitCommandError) as exc: continuous_refactoring.run_command(["nonexistent-command"], cwd=tmp_path) - - assert isinstance(exc.value.__cause__, FileNotFoundError) + _assert_wrapped_error( + exc, + message_fragments=("command could not be started", "nonexistent-command"), + cause_type=FileNotFoundError, + cause_fragment="command not found", + ) def test_run_command_unchecked_returns_completed_process(tmp_path: Path) -> None: - command = [ - "python", - "-c", - "import sys\nprint('out')\nprint('err', file=sys.stderr)\nraise SystemExit(1)", - ] - result = continuous_refactoring.run_command( - command, + _FAILING_COMMAND, cwd=tmp_path, check=False, ) diff --git a/tests/test_loop_migration_tick.py b/tests/test_loop_migration_tick.py index 1f83d1c..ac9be6b 100644 --- a/tests/test_loop_migration_tick.py +++ b/tests/test_loop_migration_tick.py @@ -12,13 +12,13 @@ import continuous_refactoring import continuous_refactoring.loop from continuous_refactoring.artifacts import ( - CommandCapture, ContinuousRefactorError, create_run_artifacts, ) from continuous_refactoring.config import default_taste_text from continuous_refactoring.decisions import DecisionRecord, RouteOutcome from continuous_refactoring.effort import EffortBudget +from continuous_refactoring.log_mirroring import LogMirroring from continuous_refactoring.migrations import ( MigrationManifest, PhaseSpec, @@ -27,16 +27,25 @@ migration_root, save_manifest, ) +from continuous_refactoring.planning import PlanningStepResult +from continuous_refactoring.planning_state import ( + complete_planning_step, + new_planning_state, + planning_stage_stdout_path, + planning_state_path, + save_planning_state, +) from continuous_refactoring.phases import ExecutePhaseOutcome from continuous_refactoring.migration_tick import ( enumerate_eligible_manifests, + enumerate_eligible_planning_manifests, + try_planning_tick, try_migration_tick, ) from conftest import ( + init_repo, make_run_once_args, - noop_agent, - noop_tests, patch_classifier_trap, ) @@ -93,11 +102,119 @@ def _make_manifest( def _save(manifest: MigrationManifest, live_dir: Path) -> Path: root = migration_root(live_dir, manifest.name) root.mkdir(parents=True, exist_ok=True) + if manifest.status in ("ready", "in-progress"): + plan_path = root / "plan.md" + if not plan_path.exists(): + plan_path.write_text("# Plan\n", encoding="utf-8") + for phase in manifest.phases: + phase_path = root / phase.file + if not phase_path.exists(): + phase_path.parent.mkdir(parents=True, exist_ok=True) + phase_path.write_text(f"# {phase.name}\n", encoding="utf-8") path = root / "manifest.json" save_manifest(manifest, path) return path +def _make_planning_manifest( + name: str, + *, + last_touch: datetime, + created_at: datetime | None = None, + awaiting_human_review: bool = False, + human_review_reason: str | None = None, + cooldown_until: datetime | None = None, +) -> MigrationManifest: + ts = (created_at or _utc_now()).isoformat(timespec="milliseconds") + return MigrationManifest( + name=name, + created_at=ts, + last_touch=last_touch.isoformat(timespec="milliseconds"), + wake_up_on=None, + awaiting_human_review=awaiting_human_review, + status="planning", + current_phase="", + phases=(), + human_review_reason=human_review_reason, + cooldown_until=( + cooldown_until.isoformat(timespec="milliseconds") + if cooldown_until is not None + else None + ), + ) + + +def _save_planning( + live_dir: Path, + repo_root: Path, + name: str, + *, + last_touch: datetime, + created_at: datetime | None = None, + awaiting_human_review: bool = False, + cooldown_until: datetime | None = None, + state: str = "valid", +) -> Path: + manifest = _make_planning_manifest( + name, + last_touch=last_touch, + created_at=created_at, + awaiting_human_review=awaiting_human_review, + human_review_reason="needs review" if awaiting_human_review else None, + cooldown_until=cooldown_until, + ) + root = migration_root(live_dir, manifest.name) + root.mkdir(parents=True, exist_ok=True) + path = root / "manifest.json" + save_manifest(manifest, path) + if state == "valid": + save_planning_state( + new_planning_state(f"Target {name}", now=manifest.created_at), + planning_state_path(root), + repo_root=repo_root, + published_migration_root=root, + ) + elif state == "invalid": + state_path = planning_state_path(root) + state_path.parent.mkdir(parents=True, exist_ok=True) + state_path.write_text("{not json", encoding="utf-8") + elif state != "missing": + raise AssertionError(f"unknown state fixture: {state}") + return path + + +def _save_review_two_planning_state( + repo_root: Path, + migration_dir: Path, + *, + now: str, +) -> None: + state = new_planning_state(f"Target {migration_dir.name}", now=now) + for step, outcome in ( + ("approaches", "completed"), + ("pick-best", "completed"), + ("expand", "completed"), + ("review", "findings"), + ("revise", "completed"), + ): + stdout_path = planning_stage_stdout_path(migration_dir, step) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(f"{step} output\n", encoding="utf-8") + state = complete_planning_step( + state, + step, + outcome, + {"stdout": stdout_path.relative_to(repo_root).as_posix()}, + completed_at=now, + ) + save_planning_state( + state, + planning_state_path(migration_dir), + repo_root=repo_root, + published_migration_root=migration_dir, + ) + + def _seed_manifest( run_once_env: Path, *, @@ -215,18 +332,6 @@ def trap(*_a: object, **_k: object) -> object: monkeypatch.setattr("continuous_refactoring.migration_tick.execute_phase", trap) -def _patch_one_shot(monkeypatch: pytest.MonkeyPatch) -> list[str]: - prompts: list[str] = [] - - def capture(**kwargs: object) -> CommandCapture: - prompts.append(str(kwargs.get("prompt", ""))) - return noop_agent(**kwargs) - - monkeypatch.setattr("continuous_refactoring.loop.maybe_run_agent", capture) - monkeypatch.setattr("continuous_refactoring.loop.run_tests", noop_tests) - return prompts - - def _tick( live_dir: Path, repo_root: Path, @@ -267,6 +372,42 @@ def noop_finalize(*_args: object, **_kwargs: object) -> None: ) +def _planning_tick( + live_dir: Path, + repo_root: Path, + *, + taste: str = "runtime taste", + attempt: int = 7, + finalize_commit: Callable[..., object] | None = None, + skip_migration_names: tuple[str, ...] = (), +) -> tuple[RouteOutcome, DecisionRecord | None]: + artifacts = create_run_artifacts( + repo_root=repo_root, + agent="codex", + model="fake-model", + effort="xhigh", + test_command="uv run pytest", + ) + + def noop_finalize(*_args: object, **_kwargs: object) -> None: + return None + + return try_planning_tick( + live_dir, + taste, + repo_root, + artifacts, + agent="codex", + model="fake-model", + effort="xhigh", + timeout=123, + commit_message_prefix="continuous refactor", + attempt=attempt, + finalize_commit=finalize_commit or noop_finalize, + skip_migration_names=skip_migration_names, + ) + + def test_enumerate_eligible_manifests_ignores_noise_and_sorts_by_created_at( tmp_path: Path, ) -> None: @@ -322,6 +463,620 @@ def test_enumerate_eligible_manifests_ignores_noise_and_sorts_by_created_at( assert [manifest.name for manifest, _ in candidates] == ["older", "newer"] assert [path.parent.name for _, path in candidates] == ["older", "newer"] + assert all(manifest.name != "__internal" for manifest, _ in candidates) + assert all(manifest.name != "awaiting-review" for manifest, _ in candidates) + assert all(manifest.name != "no-current-phase" for manifest, _ in candidates) + + +def test_try_planning_tick_forwards_log_mirroring( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root = tmp_path / "repo" + init_repo(repo_root) + live_dir = repo_root / "live" + live_dir.mkdir() + _save_planning(live_dir, repo_root, "planning", last_touch=_utc_now()) + captured: list[LogMirroring] = [] + + def fake_planning(*_args: object, **kwargs: object) -> PlanningStepResult: + captured.append(kwargs["log_mirroring"]) + return PlanningStepResult( + status="blocked", + migration_name="planning", + step="approaches", + next_step="approaches", + reason="publish blocked", + ) + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + fake_planning, + ) + artifacts = create_run_artifacts( + repo_root=repo_root, + agent="codex", + model="fake-model", + effort="xhigh", + test_command="uv run pytest", + ) + + outcome, _record = try_planning_tick( + live_dir, + "runtime taste", + repo_root, + artifacts, + agent="codex", + model="fake-model", + effort="xhigh", + timeout=123, + commit_message_prefix="continuous refactor", + attempt=7, + finalize_commit=lambda *_args, **_kwargs: None, + log_mirroring=LogMirroring(agent=True), + ) + + assert outcome == "blocked" + assert captured == [LogMirroring(agent=True)] + + +def test_try_migration_tick_forwards_log_mirroring_to_ready_and_execute( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root = tmp_path / "repo" + init_repo(repo_root) + live_dir = repo_root / "live" + live_dir.mkdir() + _save(_make_manifest("migration", last_touch=_utc_now() - timedelta(days=1)), live_dir) + captured: dict[str, LogMirroring] = {} + + def fake_ready( + _phase: PhaseSpec, _manifest: MigrationManifest, *_args: object, **kwargs: object, + ) -> tuple[str, str]: + captured["ready"] = kwargs["log_mirroring"] + return "yes", "ready" + + def fake_execute( + _phase: PhaseSpec, + _manifest: MigrationManifest, + _taste: object, + _repo_root: Path, + _live_dir: Path, + _artifacts: object, + **kwargs: object, + ) -> ExecutePhaseOutcome: + captured["execute"] = kwargs["log_mirroring"] + return ExecutePhaseOutcome(status="done", reason="ok") + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.check_phase_ready", + fake_ready, + ) + monkeypatch.setattr( + "continuous_refactoring.migration_tick.execute_phase", + fake_execute, + ) + artifacts = create_run_artifacts( + repo_root=repo_root, + agent="codex", + model="fake-model", + effort="xhigh", + test_command="uv run pytest", + ) + + outcome, _record = try_migration_tick( + live_dir, + "runtime taste", + repo_root, + artifacts, + agent="codex", + model="fake-model", + effort="xhigh", + timeout=123, + commit_message_prefix="continuous refactor", + validation_command="uv run pytest", + max_attempts=3, + attempt=7, + finalize_commit=lambda *_args, **_kwargs: None, + log_mirroring=LogMirroring(agent=True, command=True), + ) + + assert outcome == "commit" + assert captured == { + "ready": LogMirroring(agent=True, command=True), + "execute": LogMirroring(agent=True, command=True), + } + + +def test_enumeration_uses_visible_migration_dirs(tmp_path: Path) -> None: + live_dir = tmp_path / "live" + live_dir.mkdir() + now = _utc_now() + + _save( + _make_manifest(".hidden", last_touch=now - timedelta(days=1)), + live_dir, + ) + _save( + _make_manifest("__transactions__", last_touch=now - timedelta(days=1)), + live_dir, + ) + _save( + _make_manifest( + "visible", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=1), + ), + live_dir, + ) + + candidates = enumerate_eligible_manifests(live_dir, now) + + assert [manifest.name for manifest, _ in candidates] == ["visible"] + assert [path.parent.name for _, path in candidates] == ["visible"] + + +def test_enumerate_eligible_manifests_includes_cooling_effort_candidate_once( + tmp_path: Path, +) -> None: + live_dir = tmp_path / "live" + live_dir.mkdir() + now = _utc_now() + over_budget_phase = replace(_PHASE_0, required_effort="xhigh") + + _save( + replace( + _make_manifest( + "cooling-over-budget", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=2), + phases=(over_budget_phase, _PHASE_1), + ), + cooldown_until=(now + timedelta(hours=1)).isoformat(timespec="milliseconds"), + ), + live_dir, + ) + _save( + _make_manifest( + "ready-now", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=1), + ), + live_dir, + ) + + candidates = enumerate_eligible_manifests( + live_dir, + now, + EffortBudget(default_effort="high", max_allowed_effort="xhigh"), + ) + + assert [manifest.name for manifest, _ in candidates] == [ + "cooling-over-budget", + "ready-now", + ] + + +def test_enumerate_eligible_planning_manifests_includes_planning_migrations( + run_once_env: Path, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + _save_planning( + live_dir, + run_once_env, + "newer-plan", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=1), + ) + _save_planning( + live_dir, + run_once_env, + "older-plan", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=2), + ) + _save_planning( + live_dir, + run_once_env, + "needs-review", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=3), + awaiting_human_review=True, + ) + _save_planning( + live_dir, + run_once_env, + "cooling", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=4), + cooldown_until=now + timedelta(hours=1), + ) + _save( + _make_manifest( + "ready-now", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=5), + ), + live_dir, + ) + + candidates = enumerate_eligible_planning_manifests(live_dir, now) + + assert [manifest.name for manifest, _ in candidates] == [ + "older-plan", + "newer-plan", + ] + + +def test_try_migration_tick_completes_planning_before_ready_phase( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + _save_planning( + live_dir, + run_once_env, + "mid-plan", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=2), + ) + _save( + _make_manifest( + "ready-now", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=1), + ), + live_dir, + ) + planning_calls: list[tuple[str, str]] = [] + commits: list[tuple[str, str]] = [] + + def fake_planning( + migration_name: str, + target: str, + *_args: object, + **_kwargs: object, + ) -> PlanningStepResult: + planning_calls.append((migration_name, target)) + return PlanningStepResult( + status="published", + migration_name=migration_name, + step="approaches", + next_step="pick-best", + reason="planning accepted", + ) + + def finalize( + _repo_root: Path, + _head_before: str, + message: str, + **kwargs: object, + ) -> None: + commits.append((message, str(kwargs["phase"]))) + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + fake_planning, + ) + _patch_check_ready( + monkeypatch, + "yes", + "ready check must not run before planning", + ) + _patch_execute_phase_trap(monkeypatch) + + outcome, record = _planning_tick( + live_dir, + run_once_env, + finalize_commit=finalize, + ) + + assert outcome == "commit" + assert record is not None + assert record.call_role == "planning.approaches" + assert planning_calls == [("mid-plan", "Target mid-plan")] + assert commits == [ + ( + "continuous refactor: planning/mid-plan/approaches\n" + "\n" + "Why:\n" + "planning accepted", + "planning", + ) + ] + + +def test_try_migration_tick_does_not_call_ready_check_for_planning_status( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + _save_planning( + live_dir, + run_once_env, + "only-plan", + last_touch=now - timedelta(days=1), + ) + + def fake_planning(*_args: object, **_kwargs: object) -> PlanningStepResult: + return PlanningStepResult( + status="published", + migration_name="only-plan", + step="approaches", + next_step="pick-best", + reason="ok", + ) + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + fake_planning, + ) + _patch_check_ready(monkeypatch, "yes") + _patch_execute_phase_trap(monkeypatch) + + outcome, record = _planning_tick(live_dir, run_once_env) + + assert outcome == "commit" + assert record is not None + assert record.target == "only-plan" + + +def test_planning_tick_failed_review_two_record_includes_attempt_artifact_paths( + run_once_env: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + manifest_path = _save_planning( + live_dir, + run_once_env, + "review-plan", + last_touch=now - timedelta(days=1), + ) + _save_review_two_planning_state( + run_once_env, + manifest_path.parent, + now=now.isoformat(timespec="milliseconds"), + ) + artifacts = create_run_artifacts( + repo_root=run_once_env, + agent="codex", + model="fake-model", + effort="xhigh", + test_command="uv run pytest", + ) + + def fake_planning(*_args: object, **_kwargs: object) -> object: + raise ContinuousRefactorError( + "planning.review-2 failed: revised plan still has findings" + ) + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + fake_planning, + ) + + outcome, record = try_planning_tick( + live_dir, + "runtime taste", + run_once_env, + artifacts, + agent="codex", + model="fake-model", + effort="xhigh", + timeout=123, + commit_message_prefix="continuous refactor", + attempt=7, + finalize_commit=lambda *_args, **_kwargs: None, + ) + + stage_dir = artifacts.root / "attempt-007" / "planning" / "review-2" + assert outcome == "abandon" + assert record is not None + assert record.call_role == "planning.review-2" + assert record.failure_kind == "planning-step-failed" + assert record.agent_stdout_path == stage_dir / "agent.stdout.log" + assert record.agent_stderr_path == stage_dir / "agent.stderr.log" + assert record.agent_last_message_path == stage_dir / "agent-last-message.md" + + +def test_planning_tick_review_two_timeout_uses_timeout_failure_kind( + run_once_env: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + manifest_path = _save_planning( + live_dir, + run_once_env, + "review-plan", + last_touch=now - timedelta(days=1), + ) + _save_review_two_planning_state( + run_once_env, + manifest_path.parent, + now=now.isoformat(timespec="milliseconds"), + ) + artifacts = create_run_artifacts( + repo_root=run_once_env, + agent="codex", + model="fake-model", + effort="xhigh", + test_command="uv run pytest", + ) + + def fake_planning(*_args: object, **_kwargs: object) -> object: + raise ContinuousRefactorError("planning.review-2 failed: agent timed out") + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + fake_planning, + ) + + outcome, record = try_planning_tick( + live_dir, + "runtime taste", + run_once_env, + artifacts, + agent="codex", + model="fake-model", + effort="xhigh", + timeout=123, + commit_message_prefix="continuous refactor", + attempt=7, + finalize_commit=lambda *_args, **_kwargs: None, + ) + + stage_dir = artifacts.root / "attempt-007" / "planning" / "review-2" + assert outcome == "abandon" + assert record is not None + assert record.call_role == "planning.review-2" + assert record.failure_kind == "timeout" + assert record.agent_stdout_path == stage_dir / "agent.stdout.log" + assert record.agent_stderr_path == stage_dir / "agent.stderr.log" + assert record.agent_last_message_path == stage_dir / "agent-last-message.md" + + +def test_try_planning_tick_skips_requested_planning_migrations( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + _save_planning( + live_dir, + run_once_env, + "older-plan", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=2), + ) + _save_planning( + live_dir, + run_once_env, + "newer-plan", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=1), + ) + calls: list[str] = [] + + def fake_planning( + migration_name: str, + *_args: object, + **_kwargs: object, + ) -> PlanningStepResult: + calls.append(migration_name) + return PlanningStepResult( + status="blocked", + migration_name=migration_name, + step="approaches", + next_step="pick-best", + reason="needs human review", + ) + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + fake_planning, + ) + + outcome, record = _planning_tick( + live_dir, + run_once_env, + skip_migration_names=("older-plan",), + ) + + assert outcome == "blocked" + assert record is not None + assert record.target == "newer-plan" + assert calls == ["newer-plan"] + + +def test_missing_planning_state_blocks_before_ready_phase_or_source_routing( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + _save_planning( + live_dir, + run_once_env, + "missing-state", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=2), + state="missing", + ) + _save( + _make_manifest( + "ready-now", + last_touch=now - timedelta(days=1), + created_at=now - timedelta(hours=1), + ), + live_dir, + ) + _patch_check_ready(monkeypatch, "yes") + _patch_execute_phase_trap(monkeypatch) + + outcome, record = _planning_tick(live_dir, run_once_env) + + assert outcome == "blocked" + assert record is not None + assert record.call_role == "planning.state" + assert record.failure_kind == "planning-state-missing" + assert record.target == "missing-state" + assert ".planning/state.json" in record.summary + + +def test_invalid_planning_state_blocks_before_ready_phase_or_source_routing( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + _save_planning( + live_dir, + run_once_env, + "invalid-state", + last_touch=now - timedelta(days=1), + state="invalid", + ) + _patch_check_ready(monkeypatch, "yes") + _patch_execute_phase_trap(monkeypatch) + + outcome, record = _planning_tick(live_dir, run_once_env) + + assert outcome == "blocked" + assert record is not None + assert record.call_role == "planning.state" + assert record.failure_kind == "planning-state-invalid" + assert record.target == "invalid-state" + + +def test_planning_slug_mismatch_blocks_before_resume_publish( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + manifest_path = _save_planning( + live_dir, + run_once_env, + "visible-name", + last_touch=now - timedelta(days=1), + ) + manifest = load_manifest(manifest_path) + save_manifest(replace(manifest, name="manifest-name"), manifest_path) + + def fake_planning(*_args: object, **_kwargs: object) -> object: + raise AssertionError("slug mismatch must block before planning publish") + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + fake_planning, + ) + + outcome, record = _planning_tick(live_dir, run_once_env) + + assert outcome == "blocked" + assert record is not None + assert record.target == "visible-name" + assert record.call_role == "planning.state" + assert record.failure_kind == "planning-consistency-error" + assert "manifest-slug-mismatch" in record.summary def test_try_migration_tick_skips_migrations_awaiting_human_review( @@ -393,6 +1148,95 @@ def fail_ready(*_args: object, **_kwargs: object) -> tuple[str, str]: assert record.summary == "ready check failed at /phase.md " +def test_execution_gate_blocks_inconsistent_migration_before_ready_check( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + now = _utc_now() + manifest = _make_manifest( + "missing-plan", + last_touch=now - timedelta(days=1), + ) + root = migration_root(live_dir, manifest.name) + root.mkdir(parents=True) + (root / _PHASE_0.file).write_text("# Setup\n", encoding="utf-8") + save_manifest(manifest, root / "manifest.json") + + def fail_ready(*_args: object, **_kwargs: object) -> tuple[str, str]: + raise AssertionError("check_phase_ready must not be called") + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.check_phase_ready", + fail_ready, + ) + _patch_execute_phase_trap(monkeypatch) + + outcome, record = _tick(live_dir, run_once_env) + + assert outcome == "abandon" + assert record is not None + assert record.decision == "abandon" + assert record.call_role == "phase.execution-gate" + assert record.phase_reached == "phase.execution-gate" + assert record.failure_kind == "migration-consistency-error" + assert "missing-plan" in record.summary + + +def test_execution_gate_reports_malformed_manifest_before_candidate_loading( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + migration_dir = live_dir / "bad-manifest" + migration_dir.mkdir(parents=True) + (migration_dir / "manifest.json").write_text("{not json", encoding="utf-8") + + def fail_ready(*_args: object, **_kwargs: object) -> tuple[str, str]: + raise AssertionError("check_phase_ready must not be called") + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.check_phase_ready", + fail_ready, + ) + _patch_execute_phase_trap(monkeypatch) + + outcome, record = _tick(live_dir, run_once_env) + + assert outcome == "abandon" + assert record is not None + assert record.decision == "abandon" + assert record.target == "bad-manifest" + assert record.call_role == "phase.execution-gate" + assert record.failure_kind == "migration-consistency-error" + assert "invalid-manifest" in record.summary + + +def test_planning_tick_reports_malformed_manifest_before_candidate_loading( + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = _migrations_dir(run_once_env) + migration_dir = live_dir / "bad-manifest" + migration_dir.mkdir(parents=True) + (migration_dir / "manifest.json").write_text("{not json", encoding="utf-8") + + def fail_planning(*_args: object, **_kwargs: object) -> object: + raise AssertionError("run_next_planning_step must not be called") + + monkeypatch.setattr( + "continuous_refactoring.migration_tick.run_next_planning_step", + fail_planning, + ) + + outcome, record = _planning_tick(live_dir, run_once_env) + + assert outcome == "abandon" + assert record is not None + assert record.decision == "abandon" + assert record.target == "bad-manifest" + assert record.call_role == "phase.execution-gate" + assert record.failure_kind == "migration-consistency-error" + assert "invalid-manifest" in record.summary + + def test_ready_check_wrapped_failure_keeps_root_cause_in_summary( run_once_env: Path, monkeypatch: pytest.MonkeyPatch, @@ -419,6 +1263,7 @@ def fail_ready(*_args: object, **_kwargs: object) -> tuple[str, str]: assert outcome == "abandon" assert record is not None + assert record.failure_kind == "agent-infra-failure" assert record.summary == ( "Failed to start codex in : No such file or directory: 'codex'" ) @@ -1033,7 +1878,7 @@ def test_unverifiable_human_approval_uncertainty_still_blocks_for_review( def test_eligible_ready_migration_advances_phase( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: now = _utc_now() live_dir, _, manifest_path = _seed_manifest( @@ -1050,7 +1895,6 @@ def test_eligible_ready_migration_advances_phase( ) check_calls = _patch_check_ready(monkeypatch, "yes") exec_calls = _patch_execute_phase(monkeypatch, status="done") - _patch_one_shot(monkeypatch) exit_code = _run_once(run_once_env) @@ -1067,6 +1911,7 @@ def test_eligible_ready_migration_advances_phase( def test_migration_labels_use_phase_file_not_numeric_cursor( run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + prompt_capture: list[str], capsys: pytest.CaptureFixture[str], ) -> None: now = _utc_now() @@ -1086,7 +1931,6 @@ def test_migration_labels_use_phase_file_not_numeric_cursor( ) _patch_check_ready(monkeypatch, "yes") _patch_execute_phase(monkeypatch, status="done") - _patch_one_shot(monkeypatch) monkeypatch.setattr( "continuous_refactoring.loop._finalize_commit", lambda _repo_root, _head_before, message, **_kwargs: commit_messages.append(message), @@ -1103,7 +1947,7 @@ def test_migration_labels_use_phase_file_not_numeric_cursor( def test_phase_ready_check_receives_runtime_taste( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: now = _utc_now() live_dir, _, _ = _seed_manifest( @@ -1133,7 +1977,6 @@ def fake_check_ready( fake_check_ready, ) _patch_execute_phase_trap(monkeypatch) - _patch_one_shot(monkeypatch) exit_code = _run_once(run_once_env) @@ -1147,18 +1990,16 @@ def fake_check_ready( def test_no_eligible_migrations_falls_through( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: live_dir = _migrations_dir(run_once_env) _patch_live_dir(monkeypatch, live_dir) classifier_calls = _patch_classifier_cohesive(monkeypatch) - prompts = _patch_one_shot(monkeypatch) - exit_code = _run_once(run_once_env) assert exit_code == 0 - _assert_fell_through(classifier_calls, prompts) + _assert_fell_through(classifier_calls, prompt_capture) # --------------------------------------------------------------------------- @@ -1167,7 +2008,7 @@ def test_no_eligible_migrations_falls_through( def test_eligible_not_ready_bumps_wake_up_on( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: now = _utc_now() live_dir, _, manifest_path = _seed_manifest( @@ -1182,8 +2023,6 @@ def test_eligible_not_ready_bumps_wake_up_on( classifier_calls = _patch_classifier_cohesive(monkeypatch) _patch_check_ready(monkeypatch, "no", "prerequisites not met") _patch_execute_phase_trap(monkeypatch) - prompts = _patch_one_shot(monkeypatch) - exit_code = _run_once(run_once_env) assert exit_code == 0 @@ -1195,7 +2034,7 @@ def test_eligible_not_ready_bumps_wake_up_on( assert reloaded.current_phase == "setup" assert eligible_now(reloaded, _utc_now()) is False - _assert_fell_through(classifier_calls, prompts) + _assert_fell_through(classifier_calls, prompt_capture) # --------------------------------------------------------------------------- @@ -1204,7 +2043,7 @@ def test_eligible_not_ready_bumps_wake_up_on( def test_future_wake_up_blocks_execution( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: now = _utc_now() live_dir, manifest, _ = _seed_manifest( @@ -1220,16 +2059,14 @@ def test_future_wake_up_blocks_execution( _patch_live_dir(monkeypatch, live_dir) classifier_calls = _patch_classifier_cohesive(monkeypatch) _patch_execute_phase_trap(monkeypatch) - prompts = _patch_one_shot(monkeypatch) - exit_code = _run_once(run_once_env) assert exit_code == 0 - _assert_fell_through(classifier_calls, prompts) + _assert_fell_through(classifier_calls, prompt_capture) def test_unverifiable_phase_stores_human_review_reason( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: import pytest @@ -1246,7 +2083,6 @@ def test_unverifiable_phase_stores_human_review_reason( _patch_classifier_cohesive(monkeypatch) _patch_check_ready(monkeypatch, "unverifiable", reason) _patch_execute_phase_trap(monkeypatch) - _patch_one_shot(monkeypatch) with pytest.raises(ContinuousRefactorError, match="external dependency"): _run_once(run_once_env) @@ -1257,7 +2093,7 @@ def test_unverifiable_phase_stores_human_review_reason( def test_empty_current_phase_skips_migration_path( - run_once_env: Path, monkeypatch: pytest.MonkeyPatch, + run_once_env: Path, monkeypatch: pytest.MonkeyPatch, prompt_capture: list[str], ) -> None: now = _utc_now() live_dir, manifest, manifest_path = _seed_manifest( @@ -1274,13 +2110,11 @@ def test_empty_current_phase_skips_migration_path( check_calls = _patch_check_ready(monkeypatch, "yes") _patch_execute_phase_trap(monkeypatch) classifier_calls = _patch_classifier_cohesive(monkeypatch) - prompts = _patch_one_shot(monkeypatch) - exit_code = _run_once(run_once_env) assert exit_code == 0 assert check_calls == [] - _assert_fell_through(classifier_calls, prompts) + _assert_fell_through(classifier_calls, prompt_capture) reloaded = load_manifest(manifest_path) assert reloaded.current_phase == "" diff --git a/tests/test_migration_consistency.py b/tests/test_migration_consistency.py new file mode 100644 index 0000000..74e84c8 --- /dev/null +++ b/tests/test_migration_consistency.py @@ -0,0 +1,305 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +from continuous_refactoring.migration_consistency import ( + CONSISTENCY_MODES, + CONSISTENCY_SEVERITIES, + MigrationConsistencyFinding, + check_migration_consistency, + has_blocking_consistency_findings, + iter_visible_migration_dirs, +) +from continuous_refactoring.migrations import ( + MigrationManifest, + PhaseSpec, + save_manifest, +) + +_PHASE = PhaseSpec( + name="setup", + file="phase-0-setup.md", + done=False, + precondition="always", +) + + +def _manifest( + name: str, + *, + status: str = "ready", + phase: PhaseSpec = _PHASE, +) -> MigrationManifest: + return MigrationManifest( + name=name, + created_at="2025-01-01T00:00:00.000+00:00", + last_touch="2025-01-01T00:00:00.000+00:00", + wake_up_on=None, + awaiting_human_review=False, + status=status, + current_phase=phase.name, + phases=(phase,), + ) + + +def _write_migration( + root: Path, + slug: str, + *, + manifest_name: str | None = None, + status: str = "ready", + phase: PhaseSpec = _PHASE, + write_plan: bool = True, + write_phase: bool = True, +) -> Path: + migration_dir = root / slug + migration_dir.mkdir(parents=True) + if write_plan: + (migration_dir / "plan.md").write_text("# Plan\n", encoding="utf-8") + if write_phase: + phase_path = migration_dir / phase.file + phase_path.parent.mkdir(parents=True, exist_ok=True) + phase_path.write_text("# Setup\n", encoding="utf-8") + save_manifest( + _manifest(manifest_name or slug, status=status, phase=phase), + migration_dir / "manifest.json", + ) + return migration_dir + + +def _codes(findings: list[MigrationConsistencyFinding]) -> set[str]: + return {finding.code for finding in findings} + + +def test_visible_migration_dirs_skip_hidden_dotted_and_transaction_dirs( + tmp_path: Path, +) -> None: + live_dir = tmp_path / "live" + live_dir.mkdir() + (live_dir / "plain-file").write_text("ignore\n", encoding="utf-8") + (live_dir / "visible-b").mkdir() + (live_dir / ".staged").mkdir() + (live_dir / "__internal").mkdir() + (live_dir / "__transactions__").mkdir() + (live_dir / "visible-a").mkdir() + + dirs = iter_visible_migration_dirs(live_dir) + + assert [path.name for path in dirs] == ["visible-a", "visible-b"] + + +def test_visible_migration_dirs_skip_directory_symlinks(tmp_path: Path) -> None: + live_dir = tmp_path / "live" + live_dir.mkdir() + outside = tmp_path / "outside" + outside.mkdir() + (live_dir / "real").mkdir() + link = live_dir / "linked" + try: + link.symlink_to(outside, target_is_directory=True) + except (NotImplementedError, OSError) as error: + pytest.skip(f"directory symlinks unavailable: {error}") + + assert link.is_dir() + assert [path.name for path in iter_visible_migration_dirs(live_dir)] == ["real"] + + +def test_consistency_reports_missing_manifest(tmp_path: Path) -> None: + migration_dir = tmp_path / "missing-manifest" + migration_dir.mkdir() + + findings = check_migration_consistency(migration_dir, mode="doctor") + + assert [(finding.code, finding.severity, finding.mode, finding.path) for finding in findings] == [ + ( + "missing-manifest", + "error", + "doctor", + migration_dir / "manifest.json", + ) + ] + + +def test_consistency_rejects_manifest_slug_mismatch(tmp_path: Path) -> None: + migration_dir = _write_migration( + tmp_path, "actual-slug", manifest_name="different-slug", + ) + + findings = check_migration_consistency(migration_dir, mode="execution-gate") + + assert "manifest-slug-mismatch" in _codes(findings) + assert has_blocking_consistency_findings(findings) + + +def test_consistency_rejects_manifest_phase_symlink_escape(tmp_path: Path) -> None: + migration_dir = _write_migration(tmp_path, "symlink-escape", write_phase=False) + outside = tmp_path / "outside-phase.md" + outside.write_text("# Outside\n", encoding="utf-8") + try: + (migration_dir / _PHASE.file).symlink_to(outside) + except (NotImplementedError, OSError) as error: + pytest.skip(f"symlinks unavailable: {error}") + + findings = check_migration_consistency(migration_dir, mode="execution-gate") + + assert "phase-file-escapes-migration" in _codes(findings) + assert has_blocking_consistency_findings(findings) + + +def test_consistency_reports_duplicate_phase_doc_indexes(tmp_path: Path) -> None: + migration_dir = _write_migration(tmp_path, "duplicate-phase-index") + (migration_dir / "phase-0-other.md").write_text("# Other\n", encoding="utf-8") + + findings = check_migration_consistency(migration_dir, mode="doctor") + + assert "duplicate-phase-doc-index" in _codes(findings) + + +def test_consistency_reports_duplicate_phase_doc_names(tmp_path: Path) -> None: + migration_dir = _write_migration(tmp_path, "duplicate-phase-name") + (migration_dir / "phase-1-setup.md").write_text("# Other\n", encoding="utf-8") + + findings = check_migration_consistency(migration_dir, mode="doctor") + + assert "duplicate-phase-doc-name" in _codes(findings) + + +def test_consistency_reports_manifest_phase_missing_doc(tmp_path: Path) -> None: + migration_dir = _write_migration(tmp_path, "missing-phase-doc", write_phase=False) + + findings = check_migration_consistency(migration_dir, mode="execution-gate") + + assert "missing-phase-file" in _codes(findings) + assert has_blocking_consistency_findings(findings) + + +def test_consistency_requires_plan_for_ready_and_in_progress(tmp_path: Path) -> None: + migration_dir = _write_migration(tmp_path, "missing-plan", write_plan=False) + + execution_findings = check_migration_consistency( + migration_dir, mode="execution-gate", + ) + planning_findings = check_migration_consistency( + migration_dir, mode="planning-snapshot", + ) + + assert "missing-plan" in _codes(execution_findings) + assert "missing-plan" not in _codes(planning_findings) + assert has_blocking_consistency_findings(execution_findings) + + +@pytest.mark.parametrize("status", ["ready", "in-progress"]) +def test_doctor_requires_plan_only_for_ready_or_in_progress_statuses( + tmp_path: Path, + status: str, +) -> None: + migration_dir = _write_migration( + tmp_path, + f"doctor-missing-plan-{status}", + status=status, + write_plan=False, + ) + + findings = check_migration_consistency(migration_dir, mode="doctor") + + assert "missing-plan" in _codes(findings) + + +def test_doctor_skips_missing_plan_for_non_ready_statuses(tmp_path: Path) -> None: + migration_dir = _write_migration( + tmp_path, + "doctor-planning-status", + status="planning", + write_plan=False, + ) + + findings = check_migration_consistency(migration_dir, mode="doctor") + + assert "missing-plan" not in _codes(findings) + + +def test_consistency_modes_share_severity_blocking_contract(tmp_path: Path) -> None: + info = MigrationConsistencyFinding( + severity="info", + mode="doctor", + code="context", + path=tmp_path, + message="context", + ) + warning = MigrationConsistencyFinding( + severity="warning", + mode="ready-publish", + code="suspicious", + path=tmp_path, + message="suspicious", + ) + error = MigrationConsistencyFinding( + severity="error", + mode="execution-gate", + code="unsafe", + path=tmp_path, + message="unsafe", + ) + + assert set(CONSISTENCY_MODES) == { + "planning-snapshot", + "ready-publish", + "execution-gate", + "doctor", + } + assert set(CONSISTENCY_SEVERITIES) == {"info", "warning", "error"} + assert not has_blocking_consistency_findings([info, warning]) + assert has_blocking_consistency_findings([info, warning, error]) + + +def test_ready_publish_requires_precondition_and_definition_of_done_sections( + tmp_path: Path, +) -> None: + phase = PhaseSpec( + name="setup", + file="phase-0-setup.md", + done=False, + precondition="always", + ) + migration_dir = _write_migration(tmp_path, "phase-doc-contract", phase=phase) + (migration_dir / phase.file).write_text("# Setup\n", encoding="utf-8") + + findings = check_migration_consistency(migration_dir, mode="ready-publish") + + assert "missing-phase-precondition" in _codes(findings) + assert "missing-phase-definition-of-done" in _codes(findings) + assert has_blocking_consistency_findings(findings) + + +def test_planning_snapshot_skips_phase_doc_section_requirements(tmp_path: Path) -> None: + phase = PhaseSpec( + name="setup", + file="phase-0-setup.md", + done=False, + precondition="always", + ) + migration_dir = _write_migration(tmp_path, "planning-snapshot-no-phase-doc-check", phase=phase) + (migration_dir / phase.file).write_text("# Setup\n", encoding="utf-8") + + findings = check_migration_consistency(migration_dir, mode="planning-snapshot") + + assert "missing-phase-precondition" not in _codes(findings) + assert "missing-phase-definition-of-done" not in _codes(findings) + + +def test_execution_gate_skips_phase_doc_section_requirements(tmp_path: Path) -> None: + phase = PhaseSpec( + name="setup", + file="phase-0-setup.md", + done=False, + precondition="always", + ) + migration_dir = _write_migration(tmp_path, "execution-gate-no-phase-doc-check", phase=phase) + (migration_dir / phase.file).write_text("# Setup\n", encoding="utf-8") + + findings = check_migration_consistency(migration_dir, mode="execution-gate") + + assert "missing-phase-precondition" not in _codes(findings) + assert "missing-phase-definition-of-done" not in _codes(findings) diff --git a/tests/test_no_driver_branching.py b/tests/test_no_driver_branching.py index ae4dcfe..51818d4 100644 --- a/tests/test_no_driver_branching.py +++ b/tests/test_no_driver_branching.py @@ -15,6 +15,7 @@ ) from conftest import ( + install_run_command_spy, make_run_loop_args, make_run_once_args, noop_agent, @@ -139,21 +140,6 @@ def test_run_arg_helpers_match_cli_effort_defaults(run_once_env: Path) -> None: assert run_loop_args.max_allowed_effort == "xhigh" -def _install_argv_spy(monkeypatch: pytest.MonkeyPatch) -> list[tuple[str, ...]]: - """Record every argv passed to git.run_command across the driver.""" - captured: list[tuple[str, ...]] = [] - real_run_command = continuous_refactoring.git.run_command - - def spy(command, cwd, *args, **kwargs): # type: ignore[no-untyped-def] - captured.append(tuple(command)) - return real_run_command(command, cwd, *args, **kwargs) - - # The driver imports run_command into multiple modules; patch each binding. - monkeypatch.setattr("continuous_refactoring.git.run_command", spy) - monkeypatch.setattr("continuous_refactoring.loop.run_command", spy) - return captured - - def _assert_no_branching(captured: list[tuple[str, ...]]) -> None: branching = [argv for argv in captured if _is_branching_argv(argv)] assert not branching, ( @@ -182,6 +168,9 @@ def _seed_live_manifest(live_dir: Path, name: str = "auto-migration") -> None: ) migration_dir = live_dir / name migration_dir.mkdir(parents=True, exist_ok=True) + (migration_dir / "plan.md").write_text("# Plan\n", encoding="utf-8") + for phase in manifest.phases: + (migration_dir / phase.file).write_text(f"# {phase.name}\n", encoding="utf-8") save_manifest(manifest, migration_dir / "manifest.json") @@ -192,11 +181,12 @@ def test_run_once_makes_no_branching_calls( monkeypatch.setattr("continuous_refactoring.loop.maybe_run_agent", noop_agent) monkeypatch.setattr("continuous_refactoring.loop.run_tests", noop_tests) - captured = _install_argv_spy(monkeypatch) + captured = install_run_command_spy(monkeypatch) exit_code = continuous_refactoring.run_once(make_run_once_args(run_once_env)) assert exit_code == 0 + assert ("git", "ls-files", "-z") in captured _assert_no_branching(captured) @@ -224,7 +214,7 @@ def touching_agent(**kwargs: object) -> object: encoding="utf-8", ) - captured = _install_argv_spy(monkeypatch) + captured = install_run_command_spy(monkeypatch) exit_code = continuous_refactoring.run_loop( make_run_loop_args( @@ -289,7 +279,7 @@ def fake_execute_phase( "continuous_refactoring.migration_tick.execute_phase", fake_execute_phase, ) - captured = _install_argv_spy(monkeypatch) + captured = install_run_command_spy(monkeypatch) exit_code = continuous_refactoring.run_migrations_focused_loop( make_run_loop_args( diff --git a/tests/test_phases.py b/tests/test_phases.py index 2c60301..2b747fc 100644 --- a/tests/test_phases.py +++ b/tests/test_phases.py @@ -14,6 +14,7 @@ create_run_artifacts, ContinuousRefactorError, ) +from continuous_refactoring.log_mirroring import LogMirroring from continuous_refactoring.migrations import ( MigrationManifest, PhaseSpec, @@ -136,6 +137,22 @@ def _status_block( """ +def _assert_boundary_error( + exc: pytest.ExceptionInfo[ContinuousRefactorError], + *, + message_fragments: tuple[str, ...], + cause_type: type[BaseException] | None, +) -> None: + error = exc.value + assert isinstance(error, ContinuousRefactorError) + for fragment in message_fragments: + assert fragment in str(error) + if cause_type is None: + assert error.__cause__ is None + else: + assert isinstance(error.__cause__, cause_type) + + def _patch_status_agent( monkeypatch: pytest.MonkeyPatch, stdout: str, @@ -218,6 +235,41 @@ def test_check_ready_yes( assert reason == "yes" +def test_check_phase_ready_passes_log_mirroring_to_agent( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + captured: list[bool] = [] + + def fake_agent(**kwargs: object) -> CommandCapture: + captured.append(bool(kwargs["mirror_to_terminal"])) + for key in ("stdout_path", "stderr_path"): + path = Path(str(kwargs[key])) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("", encoding="utf-8") + return _fake_capture("ready: yes\n", tmp_path=tmp_path) + + monkeypatch.setattr( + "continuous_refactoring.phases.maybe_run_agent", + fake_agent, + ) + + verdict, _reason = check_phase_ready( + _PHASE_0, + _make_manifest(), + tmp_path, + _make_artifacts(tmp_path), + taste=_TASTE, + agent="codex", + model="fake", + effort="low", + timeout=None, + log_mirroring=LogMirroring(agent=True), + ) + + assert verdict == "yes" + assert captured == [True] + + def test_check_ready_yes_with_trailing_noise( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -277,8 +329,11 @@ def fail_agent(*_args: object, **_kwargs: object) -> None: _PHASE_0, _make_manifest(), tmp_path, _make_artifacts(tmp_path), taste=_TASTE, agent="codex", model="fake", effort="low", timeout=None, ) - - assert exc_info.value.__cause__ is failure + _assert_boundary_error( + exc_info, + message_fragments=("agent command failed",), + cause_type=OSError, + ) def test_check_ready_nonzero_exit_wraps_called_process_error( @@ -296,9 +351,13 @@ def test_check_ready_nonzero_exit_wraps_called_process_error( taste=_TASTE, agent="codex", model="fake", effort="low", timeout=None, ) - cause = exc_info.value.__cause__ - assert isinstance(cause, subprocess.CalledProcessError) - assert cause.returncode == 7 + _assert_boundary_error( + exc_info, + message_fragments=("Phase ready-check agent failed with exit code 7",), + cause_type=subprocess.CalledProcessError, + ) + assert isinstance(exc_info.value.__cause__, subprocess.CalledProcessError) + assert exc_info.value.__cause__.returncode == 7 def test_check_ready_no( @@ -437,6 +496,70 @@ def test_ready_yes_green_tests_flips_phase_done( assert reloaded.current_phase == "migrate" +def test_execute_phase_passes_log_mirroring_to_agent_and_validation( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir = tmp_path / "live" + manifest = _make_manifest() + _save_manifest_to_disk(manifest, live_dir) + captured_agent: list[bool] = [] + captured_command: list[bool] = [] + + monkeypatch.setattr( + "continuous_refactoring.phases.get_head_sha", lambda _: "abc123", + ) + + def fake_agent(**kwargs: object) -> CommandCapture: + captured_agent.append(bool(kwargs["mirror_to_terminal"])) + for key in ("stdout_path", "stderr_path"): + path = Path(str(kwargs[key])) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("", encoding="utf-8") + return _fake_capture("executed phase work\n", tmp_path=tmp_path) + + def fake_tests( + test_command: str, + repo_root: Path, + stdout_path: Path, + stderr_path: Path, + **kwargs: object, + ) -> CommandCapture: + captured_command.append(bool(kwargs["mirror_to_terminal"])) + return _passing_tests( + test_command, + repo_root, + stdout_path, + stderr_path, + **kwargs, + ) + + monkeypatch.setattr( + "continuous_refactoring.phases.maybe_run_agent", + fake_agent, + ) + monkeypatch.setattr("continuous_refactoring.phases.run_tests", fake_tests) + + outcome = execute_phase( + _PHASE_0, + manifest, + _TASTE, + tmp_path, + live_dir, + _make_artifacts(tmp_path), + agent="codex", + model="fake", + effort="low", + timeout=None, + validation_command="true", + max_attempts=1, + log_mirroring=LogMirroring(agent=True, command=True), + ) + + assert outcome.status == "done" + assert captured_agent == [True] + assert captured_command == [True] + + def test_execute_phase_call_messages_use_phase_name( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, @@ -1196,7 +1319,11 @@ def fail_if_called(*args: object, **kwargs: object) -> object: max_attempts=1, ) - assert exc_info.value.__cause__ is None + _assert_boundary_error( + exc_info, + message_fragments=("not found in manifest",), + cause_type=None, + ) assert manifest_path.read_text(encoding="utf-8") == original diff --git a/tests/test_planning.py b/tests/test_planning.py index 145856a..b6da35b 100644 --- a/tests/test_planning.py +++ b/tests/test_planning.py @@ -1,9 +1,11 @@ from __future__ import annotations +import shutil from pathlib import Path import pytest +from conftest import init_repo from continuous_refactoring.artifacts import ( CommandCapture, ContinuousRefactorError, @@ -12,19 +14,32 @@ ) from continuous_refactoring.migrations import ( MigrationManifest, - intentional_skips_dir, load_manifest, migration_root, save_manifest, ) from continuous_refactoring.planning import ( + _build_durable_planning_context, _parse_final_decision, _refresh_manifest, _review_has_findings, _discover_phase_files, PlanningOutcome, - run_planning, + PlanningRefineRequest, + run_next_planning_step, + run_refine_planning_step, ) +from continuous_refactoring.git import run_command +from continuous_refactoring.log_mirroring import LogMirroring +from continuous_refactoring.planning_state import ( + complete_planning_step, + load_planning_state, + new_planning_state, + planning_stage_stdout_path, + planning_state_path, + save_planning_state, +) +from continuous_refactoring.planning_publish import snapshot_tree_digest _TASTE = "- Prefer deletion over wrapping.\n- Fail fast at boundaries." @@ -37,7 +52,10 @@ def _planning_context( monkeypatch: pytest.MonkeyPatch, ) -> tuple[Path, Path]: monkeypatch.setenv("TMPDIR", str(tmp_path / "tmpdir")) + monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) (tmp_path / "tmpdir").mkdir() + (tmp_path / "xdg").mkdir() + init_repo(tmp_path) live_dir = tmp_path / "live" live_dir.mkdir() @@ -45,6 +63,26 @@ def _planning_context( return live_dir, mig_root +def _planning_repo_context( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> tuple[Path, Path, Path]: + monkeypatch.setenv("TMPDIR", str(tmp_path / "tmpdir")) + monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "xdg")) + (tmp_path / "tmpdir").mkdir() + (tmp_path / "xdg").mkdir() + repo_root = tmp_path / "repo" + init_repo(repo_root) + live_dir = repo_root / "live" + live_dir.mkdir() + return repo_root, live_dir, migration_root(live_dir, _MIGRATION) + + +def _commit_all(repo_root: Path, message: str) -> None: + run_command(["git", "add", "-A"], cwd=repo_root) + run_command(["git", "commit", "-m", message], cwd=repo_root) + + def _planning_decision_response(decision: str, reason: str) -> tuple[str, dict[str, str]]: return f"final-decision: {decision} — {reason}\n", {} @@ -59,11 +97,16 @@ def _run_planning( mock = _MockAgent(mig_root, responses) monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", mock) - outcome = run_planning( - _MIGRATION, _TARGET, _TASTE, tmp_path, live_dir, - _make_artifacts(tmp_path), - agent="codex", model="fake", effort="low", timeout=None, - ) + outcome: PlanningOutcome | None = None + while outcome is None: + result = run_next_planning_step( + _MIGRATION, _TARGET, _TASTE, tmp_path, live_dir, + _make_artifacts(tmp_path), + agent="codex", model="fake", effort="low", timeout=None, + ) + assert result.status == "published", result.reason + _commit_all(tmp_path, f"planning {result.step}") + outcome = result.terminal_outcome return outcome, mock, mig_root @@ -103,8 +146,12 @@ def __call__(self, **kwargs: object) -> CommandCapture: stdout_path = Path(str(kwargs["stdout_path"])) self.stage_labels.append(stdout_path.parent.name) + migration_dir = _prompt_migration_dir( + self.prompts[-1], + Path(str(kwargs["repo_root"])), + ) for rel_path, content in writes.items(): - full = self._mig_root / rel_path + full = migration_dir / rel_path full.parent.mkdir(parents=True, exist_ok=True) full.write_text(content, encoding="utf-8") @@ -124,6 +171,239 @@ def __call__(self, **kwargs: object) -> CommandCapture: ) +class _WorkspaceAgent: + def __init__( + self, + responses: list[tuple[str, dict[str, str], int]], + ) -> None: + self._responses = responses + self._index = 0 + self.stage_labels: list[str] = [] + self.prompts: list[str] = [] + self.migration_dirs: list[Path] = [] + self.mirror_to_terminal: list[bool] = [] + + def __call__(self, **kwargs: object) -> CommandCapture: + assert self._index < len(self._responses), ( + f"Unexpected agent call #{self._index + 1}" + ) + stdout, writes, returncode = self._responses[self._index] + self._index += 1 + prompt = str(kwargs["prompt"]) + stdout_path = Path(str(kwargs["stdout_path"])) + stderr_path = Path(str(kwargs["stderr_path"])) + migration_dir = _prompt_migration_dir(prompt, Path(str(kwargs["repo_root"]))) + + self.prompts.append(prompt) + self.stage_labels.append(stdout_path.parent.name) + self.migration_dirs.append(migration_dir) + self.mirror_to_terminal.append(bool(kwargs["mirror_to_terminal"])) + + for rel_path, content in writes.items(): + full = migration_dir / rel_path + full.parent.mkdir(parents=True, exist_ok=True) + full.write_text(content, encoding="utf-8") + + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + stderr_path.parent.mkdir(parents=True, exist_ok=True) + stderr_path.write_text("", encoding="utf-8") + return CommandCapture( + command=("fake",), + returncode=returncode, + stdout=stdout, + stderr="", + stdout_path=stdout_path, + stderr_path=stderr_path, + ) + + +def _prompt_migration_dir(prompt: str, repo_root: Path) -> Path: + for line in prompt.splitlines(): + if line.startswith("Migration directory:"): + path = Path(line.split(":", 1)[1].strip()) + return path if path.is_absolute() else repo_root / path + raise AssertionError("Migration directory missing from prompt") + + +def _workspace_response( + stdout: str, + writes: dict[str, str] | None = None, + *, + returncode: int = 0, +) -> tuple[str, dict[str, str], int]: + return stdout, writes or {}, returncode + + +def _run_next_step( + repo_root: Path, + live_dir: Path, + responses: list[tuple[str, dict[str, str], int]], + monkeypatch: pytest.MonkeyPatch, +): + mock = _WorkspaceAgent(responses) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", mock) + result = run_next_planning_step( + _MIGRATION, + _TARGET, + _TASTE, + repo_root, + live_dir, + _make_artifacts(repo_root), + agent="codex", + model="fake", + effort="low", + timeout=None, + ) + return result, mock + + +def test_run_next_planning_step_passes_log_mirroring_to_agent( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, _mig_root = _planning_repo_context(tmp_path, monkeypatch) + mock = _WorkspaceAgent( + [ + _workspace_response( + "approach: focused cleanup\n", + {"approaches/focused.md": "Use a focused cleanup.\n"}, + ) + ] + ) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", mock) + + result = run_next_planning_step( + _MIGRATION, + _TARGET, + _TASTE, + repo_root, + live_dir, + _make_artifacts(repo_root), + agent="codex", + model="fake", + effort="low", + timeout=None, + log_mirroring=LogMirroring(agent=True), + ) + + assert result.status == "published" + assert mock.mirror_to_terminal == [True] + staged_stdout = ( + migration_root(live_dir, _MIGRATION) + / ".planning" + / "stages" + / "approaches.stdout.md" + ) + assert staged_stdout.exists() + + +def _run_refine_step( + repo_root: Path, + live_dir: Path, + responses: list[tuple[str, dict[str, str], int]], + monkeypatch: pytest.MonkeyPatch, + *, + feedback: str = "Refine this plan.", +): + mock = _WorkspaceAgent(responses) + monkeypatch.setattr("continuous_refactoring.planning.maybe_run_agent", mock) + result = run_refine_planning_step( + PlanningRefineRequest( + migration_name=_MIGRATION, + feedback_text=feedback, + feedback_source="message", + taste=_TASTE, + repo_root=repo_root, + live_dir=live_dir, + artifacts=_make_artifacts(repo_root), + agent="codex", + model="fake", + effort="low", + ) + ) + return result, mock + + +def _seed_planning_snapshot( + repo_root: Path, + live_dir: Path, + completed: list[tuple[str, str, str]], + *, + plan_text: str | None = None, + phase_text: str | None = None, +) -> None: + mig_root = migration_root(live_dir, _MIGRATION) + mig_root.mkdir(parents=True, exist_ok=True) + manifest_path = mig_root / "manifest.json" + now = "2026-04-29T12:00:00.000+00:00" + manifest = MigrationManifest( + name=_MIGRATION, + created_at=now, + last_touch=now, + wake_up_on=None, + awaiting_human_review=False, + status="planning", + current_phase="", + phases=(), + ) + save_manifest(manifest, manifest_path) + if plan_text is not None: + (mig_root / "plan.md").write_text(plan_text, encoding="utf-8") + if phase_text is not None: + (mig_root / "phase-0-setup.md").write_text(phase_text, encoding="utf-8") + _refresh_manifest(manifest, manifest_path, mig_root=mig_root) + + state = new_planning_state(_TARGET, now=now) + for step, outcome, stdout in completed: + stdout_path = planning_stage_stdout_path(mig_root, step) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + state = complete_planning_step( + state, + step, + outcome, + {"stdout": stdout_path.relative_to(repo_root).as_posix()}, + completed_at=now, + ) + save_planning_state(state, planning_state_path(mig_root), repo_root=repo_root) + _commit_all(repo_root, "seed planning snapshot") + + +def _seed_ready_snapshot(repo_root: Path, live_dir: Path) -> None: + mig_root = migration_root(live_dir, _MIGRATION) + now = "2026-04-29T12:00:00.000+00:00" + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "clear", "No findings.\n"), + ], + plan_text="# Plan\n", + phase_text=_phase_doc("always", "Setup is complete."), + ) + state = load_planning_state(repo_root, planning_state_path(mig_root)) + stdout_path = planning_stage_stdout_path(mig_root, "final-review") + stdout_path.write_text( + "final-decision: approve-auto - ready\n", + encoding="utf-8", + ) + state = complete_planning_step( + state, + "final-review", + "approve-auto", + {"stdout": stdout_path.relative_to(repo_root).as_posix()}, + completed_at=now, + final_reason="ready", + ) + save_planning_state(state, planning_state_path(mig_root), repo_root=repo_root) + manifest = load_manifest(mig_root / "manifest.json") + _refresh_manifest(manifest, mig_root / "manifest.json", status="ready") + _commit_all(repo_root, "seed ready snapshot") + + def _phase_doc(precondition: str, definition_of_done: str) -> str: return ( f"## Precondition\n\n{precondition}\n\n" @@ -155,6 +435,510 @@ def _base_responses() -> list[tuple[str, dict[str, str]]]: ] +# --------------------------------------------------------------------------- +# one-step planning +# --------------------------------------------------------------------------- + + +def test_successful_step_publishes_docs_and_state_together( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + + result, mock = _run_next_step( + repo_root, + live_dir, + [ + _workspace_response( + "Generated 2 approaches\n", + {"approaches/incremental.md": "# Incremental\n"}, + ) + ], + monkeypatch, + ) + + assert result.status == "published" + assert result.step == "approaches" + assert result.next_step == "pick-best" + assert result.terminal_outcome is None + assert mock.stage_labels == ["approaches"] + assert mock.migration_dirs[0] != mig_root + + manifest = load_manifest(mig_root / "manifest.json") + state = load_planning_state(repo_root, planning_state_path(mig_root)) + assert manifest.status == "planning" + assert state.next_step == "pick-best" + assert [step.name for step in state.completed_steps] == ["approaches"] + assert (mig_root / "approaches" / "incremental.md").is_file() + assert planning_stage_stdout_path(mig_root, "approaches").read_text( + encoding="utf-8" + ) == "Generated 2 approaches\n" + + +def test_failed_step_does_not_publish_partial_docs_or_state( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ], + ) + before = snapshot_tree_digest(mig_root) + + with pytest.raises(ContinuousRefactorError, match="planning.expand failed"): + _run_next_step( + repo_root, + live_dir, + [ + _workspace_response( + "bad expansion\n", + {"plan.md": "# Partial bad plan\n"}, + returncode=1, + ) + ], + monkeypatch, + ) + + assert snapshot_tree_digest(mig_root) == before + assert not (mig_root / "plan.md").exists() + assert not planning_stage_stdout_path(mig_root, "expand").exists() + assert load_planning_state(repo_root, planning_state_path(mig_root)).next_step == "expand" + + +def test_resume_skips_completed_steps( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ], + ) + + result, mock = _run_next_step( + repo_root, + live_dir, + [ + _workspace_response( + "Expanded.\n", + { + "plan.md": "# Plan\n", + "phase-0-setup.md": _phase_doc("always", "Setup is complete."), + }, + ) + ], + monkeypatch, + ) + + assert result.status == "published" + assert mock.stage_labels == ["expand"] + state = load_planning_state(repo_root, planning_state_path(mig_root)) + assert [step.name for step in state.completed_steps] == [ + "approaches", + "pick-best", + "expand", + ] + assert state.next_step == "review" + + +def test_revise_path_records_review_findings_as_planning_state( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ], + plan_text="# Plan v1\n", + phase_text=_phase_doc("always", "Setup is complete."), + ) + + result, mock = _run_next_step( + repo_root, + live_dir, + [_workspace_response("1. Missing rollback step.\n", {})], + monkeypatch, + ) + + state = load_planning_state(repo_root, planning_state_path(mig_root)) + assert result.status == "published" + assert mock.stage_labels == ["review"] + assert state.next_step == "revise" + assert state.review_findings == "live/rework-auth/.planning/stages/review.stdout.md" + assert planning_stage_stdout_path(mig_root, "review").read_text( + encoding="utf-8" + ) == "1. Missing rollback step.\n" + + +def test_review_two_findings_fail_without_publish( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "findings", "1. Missing rollback step.\n"), + ("revise", "completed", "Revised.\n"), + ], + plan_text="# Plan v2\n", + phase_text=_phase_doc("always", "Setup is complete."), + ) + before = snapshot_tree_digest(mig_root) + + with pytest.raises( + ContinuousRefactorError, + match="planning.review-2 failed: revised plan still has findings", + ): + _run_next_step( + repo_root, + live_dir, + [_workspace_response("1. Still broken.\n", {})], + monkeypatch, + ) + + assert snapshot_tree_digest(mig_root) == before + assert not planning_stage_stdout_path(mig_root, "review-2").exists() + assert load_planning_state(repo_root, planning_state_path(mig_root)).next_step == "review-2" + + +def test_failed_review_two_outputs_are_attempt_scoped_and_not_durable( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "findings", "1. Missing rollback step.\n"), + ("revise", "completed", "Revised.\n"), + ], + plan_text="# Plan v2\n", + phase_text=_phase_doc("always", "Setup is complete."), + ) + artifacts = _make_artifacts(repo_root) + + for attempt, stdout in ( + (1, "1. first failed finding\n"), + (2, "1. second failed finding\n"), + ): + monkeypatch.setattr( + "continuous_refactoring.planning.maybe_run_agent", + _WorkspaceAgent([_workspace_response(stdout)]), + ) + with pytest.raises( + ContinuousRefactorError, + match="planning.review-2 failed: revised plan still has findings", + ): + run_next_planning_step( + _MIGRATION, + _TARGET, + _TASTE, + repo_root, + live_dir, + artifacts, + attempt=attempt, + retry=1, + agent="codex", + model="fake", + effort="low", + timeout=None, + ) + + first_stdout = ( + artifacts.root + / "attempt-001" + / "planning" + / "review-2" + / "agent.stdout.log" + ) + second_stdout = ( + artifacts.root + / "attempt-002" + / "planning" + / "review-2" + / "agent.stdout.log" + ) + assert first_stdout.read_text(encoding="utf-8") == "1. first failed finding\n" + assert second_stdout.read_text(encoding="utf-8") == "1. second failed finding\n" + assert not (artifacts.root / "planning" / "review-2" / "agent.stdout.log").exists() + assert not planning_stage_stdout_path(mig_root, "review-2").exists() + assert load_planning_state(repo_root, planning_state_path(mig_root)).next_step == "review-2" + + +def test_final_ready_rejects_inconsistent_manifest_docs_before_publish( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "clear", "no findings\n"), + ], + plan_text="# Plan\n", + phase_text="## Precondition\n\nalways\n", + ) + before = snapshot_tree_digest(mig_root) + + result, mock = _run_next_step( + repo_root, + live_dir, + [_workspace_response("final-decision: approve-auto - solid\n", {})], + monkeypatch, + ) + + assert result.status == "blocked" + assert "workspace validation failed" in result.reason + assert mock.stage_labels == ["final-review"] + assert snapshot_tree_digest(mig_root) == before + assert not planning_stage_stdout_path(mig_root, "final-review").exists() + assert load_manifest(mig_root / "manifest.json").status == "planning" + assert load_planning_state(repo_root, planning_state_path(mig_root)).next_step == "final-review" + + +def test_refine_planning_keeps_current_cursor( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_planning_snapshot( + repo_root, + live_dir, + [ + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose incremental.\n"), + ], + ) + + result, mock = _run_refine_step( + repo_root, + live_dir, + [ + _workspace_response( + "Expanded with feedback.\n", + { + "plan.md": "# Plan\n", + "phase-0-setup.md": _phase_doc("always", "Setup is complete."), + }, + ) + ], + monkeypatch, + feedback="Add a smaller first phase.", + ) + + state = load_planning_state(repo_root, planning_state_path(mig_root)) + assert result.status == "published" + assert result.step == "expand" + assert mock.stage_labels == ["expand"] + assert state.next_step == "review" + assert state.feedback[-1].text == "Add a smaller first phase." + + +def test_refine_ready_reopen_runs_one_revise_step( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_ready_snapshot(repo_root, live_dir) + + result, mock = _run_refine_step( + repo_root, + live_dir, + [ + _workspace_response( + "Revised with feedback.\n", + { + "plan.md": "# Plan v2\n", + "phase-0-setup.md": _phase_doc("always", "Setup is complete."), + }, + ) + ], + monkeypatch, + feedback="Narrow the rollout.", + ) + + manifest = load_manifest(mig_root / "manifest.json") + state = load_planning_state(repo_root, planning_state_path(mig_root)) + assert result.status == "published" + assert result.step == "revise" + assert mock.stage_labels == ["revise"] + assert manifest.status == "planning" + assert state.next_step == "review-2" + assert state.revision_base_step_counts == (5,) + + +def test_refine_repeated_steps_keep_original_stdout_history( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_ready_snapshot(repo_root, live_dir) + original_final_review = planning_stage_stdout_path(mig_root, "final-review") + original_text = original_final_review.read_text(encoding="utf-8") + + result, _mock = _run_refine_step( + repo_root, + live_dir, + [ + _workspace_response( + "Revised with feedback.\n", + { + "plan.md": "# Plan v2\n", + "phase-0-setup.md": _phase_doc("always", "Setup is complete."), + }, + ) + ], + monkeypatch, + ) + assert result.status == "published" + _commit_all(repo_root, "planning refine") + + for responses in ( + [_workspace_response("Reviewed revised plan. no findings.\n")], + [_workspace_response("final-decision: approve-auto - refined ready\n")], + ): + result, _mock = _run_next_step(repo_root, live_dir, responses, monkeypatch) + assert result.status == "published" + _commit_all(repo_root, f"planning {result.step}") + + state = load_planning_state(repo_root, planning_state_path(mig_root)) + final_review_refs = [ + step.outputs["stdout"] + for step in state.completed_steps + if step.name == "final-review" + ] + assert final_review_refs == [ + "live/rework-auth/.planning/stages/final-review.stdout.md", + "live/rework-auth/.planning/stages/final-review-2.stdout.md", + ] + assert original_final_review.read_text(encoding="utf-8") == original_text + assert ( + mig_root / ".planning" / "stages" / "final-review-2.stdout.md" + ).read_text(encoding="utf-8") == "final-decision: approve-auto - refined ready\n" + + +def test_refine_ready_can_reopen_after_prior_refine_cycle( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo_root, live_dir, mig_root = _planning_repo_context(tmp_path, monkeypatch) + _seed_ready_snapshot(repo_root, live_dir) + original_final_review = planning_stage_stdout_path(mig_root, "final-review") + original_final_review_text = original_final_review.read_text(encoding="utf-8") + + result, _mock = _run_refine_step( + repo_root, + live_dir, + [ + _workspace_response( + "Revised with feedback.\n", + { + "plan.md": "# Plan v2\n", + "phase-0-setup.md": _phase_doc("always", "Setup is complete."), + }, + ) + ], + monkeypatch, + feedback="Narrow the rollout.", + ) + assert result.status == "published" + assert result.step == "revise" + _commit_all(repo_root, "planning refine") + + for responses in ( + [_workspace_response("Reviewed revised plan. no findings.\n")], + [_workspace_response("final-decision: approve-auto - refined ready\n")], + ): + result, _mock = _run_next_step(repo_root, live_dir, responses, monkeypatch) + assert result.status == "published" + _commit_all(repo_root, f"planning {result.step}") + + result, mock = _run_refine_step( + repo_root, + live_dir, + [ + _workspace_response( + "Revised again.\n", + { + "plan.md": "# Plan v3\n", + "phase-0-setup.md": _phase_doc("always", "Setup is complete."), + }, + ) + ], + monkeypatch, + feedback="Make the second pass smaller.", + ) + + state = load_planning_state(repo_root, planning_state_path(mig_root)) + assert result.status == "published" + assert result.step == "revise" + assert mock.stage_labels == ["revise"] + assert state.next_step == "review-2" + assert [feedback.text for feedback in state.feedback] == [ + "Narrow the rollout.", + "Make the second pass smaller.", + ] + assert [step.name for step in state.completed_steps] == [ + "approaches", + "pick-best", + "expand", + "review", + "final-review", + "revise", + "review-2", + "final-review", + "revise", + ] + assert state.revision_base_step_counts == (5, 8) + final_review_refs = [ + step.outputs["stdout"] + for step in state.completed_steps + if step.name == "final-review" + ] + assert final_review_refs == [ + "live/rework-auth/.planning/stages/final-review.stdout.md", + "live/rework-auth/.planning/stages/final-review-2.stdout.md", + ] + assert original_final_review.read_text(encoding="utf-8") == original_final_review_text + assert ( + mig_root / ".planning" / "stages" / "final-review-2.stdout.md" + ).read_text(encoding="utf-8") == "final-decision: approve-auto - refined ready\n" + assert ( + mig_root / ".planning" / "stages" / "revise.stdout.md" + ).read_text(encoding="utf-8") == "Revised with feedback.\n" + assert ( + mig_root / ".planning" / "stages" / "revise-2.stdout.md" + ).read_text(encoding="utf-8") == "Revised again.\n" + + # --------------------------------------------------------------------------- # initial decisions # --------------------------------------------------------------------------- @@ -214,7 +998,7 @@ def test_initial_decisions( assert manifest.human_review_reason is None if should_skip: - skip_file = intentional_skips_dir(live_dir) / f"{_MIGRATION}.md" + skip_file = mig_root / "intentional-skip.md" assert skip_file.exists() skip_content = skip_file.read_text(encoding="utf-8") assert _TARGET in skip_content @@ -252,11 +1036,89 @@ def test_no_findings_path_keeps_stage_order_and_context_sources( "final-review", ] assert "Approaches:\n### incremental\n# Incremental\nStep by step approach." in mock.prompts[1] - assert "Chosen approach:\nChose incremental approach.\n" in mock.prompts[2] + assert "Chosen approach (from live/rework-auth/.planning/stages/pick-best.stdout.md):" in mock.prompts[2] + assert "Chose incremental approach.\n" in mock.prompts[2] assert "Plan:\n# Migration Plan\nPhased approach." in mock.prompts[3] assert "Plan:\n# Migration Plan\nPhased approach." in mock.prompts[4] +def test_run_planning_persists_durable_stage_outputs( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir, mig_root = _planning_context(tmp_path, monkeypatch) + _run_planning( + tmp_path, + live_dir, + _base_responses() + [_planning_decision_response("approve-auto", "plan is solid")], + monkeypatch, + ) + + state = load_planning_state(tmp_path, planning_state_path(mig_root)) + + assert state.next_step == "terminal-ready" + assert state.final_decision == "approve-auto" + assert state.final_reason == "plan is solid" + assert [step.name for step in state.completed_steps] == [ + "approaches", + "pick-best", + "expand", + "review", + "final-review", + ] + for step in state.completed_steps: + stdout_ref = step.outputs["stdout"] + assert stdout_ref.startswith("live/rework-auth/.planning/stages/") + assert (tmp_path / stdout_ref).is_file() + + +def test_planning_context_reconstructs_from_durable_stage_outputs( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir, mig_root = _planning_context(tmp_path, monkeypatch) + artifacts = _make_artifacts(tmp_path) + transient_stdout = ( + artifacts.attempt_dir(1) + / "planning" + / "pick-best" + / "agent.stdout.log" + ) + transient_stdout.parent.mkdir(parents=True) + transient_stdout.write_text("wrong transient output\n", encoding="utf-8") + + state = new_planning_state(_TARGET, now="2026-04-29T12:00:00.000+00:00") + for name, text in ( + ("approaches", "Generated approaches.\n"), + ("pick-best", "Chose incremental approach.\n"), + ): + stdout_path = planning_stage_stdout_path(mig_root, name) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(text, encoding="utf-8") + state = complete_planning_step( + state, + name, + "completed", + {"stdout": stdout_path.relative_to(tmp_path).as_posix()}, + completed_at="2026-04-29T12:01:00.000+00:00", + ) + save_planning_state(state, planning_state_path(mig_root), repo_root=tmp_path) + shutil.rmtree(artifacts.root) + + context = _build_durable_planning_context( + repo_root=tmp_path, + live_dir=live_dir, + migration_name=_MIGRATION, + state=state, + ) + + assert "Chosen approach" in context + assert "Chose incremental approach." in context + assert ".planning/stages/pick-best.stdout.md" in context + assert "wrong transient output" not in context + assert "agent.stdout.log" not in context + + # --------------------------------------------------------------------------- # review findings trigger revise + review-2 # --------------------------------------------------------------------------- @@ -366,7 +1228,11 @@ def test_revise_path_keeps_existing_prompt_stages_with_distinct_stage_labels( "You are a planning agent expanding the chosen approach into a detailed migration plan." in mock.prompts[4] ) - assert "Review findings to address:\n1. Missing rollback step.\n2. Phase order unclear.\n" in mock.prompts[4] + assert ( + "Review findings to address (from live/rework-auth/.planning/stages/review.stdout.md):" + in mock.prompts[4] + ) + assert "1. Missing rollback step.\n2. Phase order unclear.\n" in mock.prompts[4] assert "You are a planning reviewer examining a refactoring migration plan." in mock.prompts[5] assert "Plan (revised):\n# Plan v2 (revised)" in mock.prompts[5] @@ -374,7 +1240,7 @@ def test_revise_path_keeps_existing_prompt_stages_with_distinct_stage_labels( def test_review_two_findings_fail_before_final_review( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - live_dir, _ = _planning_context(tmp_path, monkeypatch) + live_dir, mig_root = _planning_context(tmp_path, monkeypatch) responses = _revise_responses() responses[5] = ("1. Still missing rollback validation.\n", {}) @@ -384,6 +1250,32 @@ def test_review_two_findings_fail_before_final_review( ): _run_planning(tmp_path, live_dir, responses, monkeypatch) + assert not planning_stage_stdout_path(mig_root, "review-2").exists() + state = load_planning_state(tmp_path, planning_state_path(mig_root)) + assert state.next_step == "review-2" + + +def test_failed_final_review_output_is_not_durable( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + live_dir, mig_root = _planning_context(tmp_path, monkeypatch) + + with pytest.raises( + ContinuousRefactorError, + match="planning.final-review failed: Final review produced no output", + ): + _run_planning( + tmp_path, + live_dir, + _base_responses() + [("debug line without decision\n", {})], + monkeypatch, + ) + + assert not planning_stage_stdout_path(mig_root, "final-review").exists() + state = load_planning_state(tmp_path, planning_state_path(mig_root)) + assert state.next_step == "final-review" + def test_manifest_phase_discovery_refreshes_only_after_file_writing_stages( tmp_path: Path, @@ -546,6 +1438,33 @@ def test_discover_phase_files_reads_optional_effort_metadata(tmp_path: Path) -> assert phases[0].effort_reason == "touches routing and planning" +def test_discover_phase_files_prefers_section_metadata_over_legacy_lines( + tmp_path: Path, +) -> None: + mig_root = tmp_path / "live" / "section-precedence" + mig_root.mkdir(parents=True) + + (mig_root / "phase-1-risky.md").write_text( + ( + "precondition: legacy precondition\n" + "required_effort: low\n" + "effort_reason: legacy reason\n\n" + "## Precondition\n\nsection precondition\n\n" + "## Required Effort\n\nhigh with extra context\n\n" + "## Effort Reason\n\nsection reason wins\n\n" + "## Definition of Done\n\nDone.\n" + ), + encoding="utf-8", + ) + + phases = _discover_phase_files(mig_root) + + assert len(phases) == 1 + assert phases[0].precondition == "section precondition" + assert phases[0].required_effort == "high" + assert phases[0].effort_reason == "section reason wins" + + def test_discover_phase_files_rejects_invalid_required_effort(tmp_path: Path) -> None: mig_root = tmp_path / "live" / "bad-effort" mig_root.mkdir(parents=True) diff --git a/tests/test_planning_publish.py b/tests/test_planning_publish.py new file mode 100644 index 0000000..8481c10 --- /dev/null +++ b/tests/test_planning_publish.py @@ -0,0 +1,611 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +import continuous_refactoring.planning_publish as planning_publish +from conftest import init_repo +from continuous_refactoring.artifacts import ContinuousRefactorError +from continuous_refactoring.git import run_command +from continuous_refactoring.migration_consistency import ( + check_migration_consistency, + has_blocking_consistency_findings, +) +from continuous_refactoring.migrations import ( + MigrationManifest, + PhaseSpec, + save_manifest, +) + + +_NOW = "2026-04-29T12:00:00.000+00:00" +_PHASE = PhaseSpec( + name="setup", + file="phase-0-setup.md", + done=False, + precondition="always", +) + + +def _manifest(slug: str) -> MigrationManifest: + return MigrationManifest( + name=slug, + created_at=_NOW, + last_touch=_NOW, + wake_up_on=None, + awaiting_human_review=False, + status="ready", + current_phase=_PHASE.name, + phases=(_PHASE,), + ) + + +def _write_snapshot(root: Path, slug: str, version: str, *, extra: bool = False) -> Path: + migration_dir = root / slug + migration_dir.mkdir(parents=True) + (migration_dir / "plan.md").write_text(f"# Plan {version}\n", encoding="utf-8") + (migration_dir / _PHASE.file).write_text( + f"## Precondition\n\nalways\n\n## Definition of Done\n\n{version}\n", + encoding="utf-8", + ) + if extra: + (migration_dir / "notes.md").write_text(f"{version}\n", encoding="utf-8") + save_manifest(_manifest(slug), migration_dir / "manifest.json") + return migration_dir + + +def _request( + repo_root: Path, + live_migrations_dir: Path, + slug: str, + workspace_dir: Path, + *, + base_snapshot_id: str | None = None, +) -> planning_publish.PlanningPublishRequest: + return planning_publish.PlanningPublishRequest( + repo_root=repo_root, + live_migrations_dir=live_migrations_dir, + slug=slug, + workspace_dir=workspace_dir, + base_snapshot_id=( + base_snapshot_id + if base_snapshot_id is not None + else planning_publish.snapshot_tree_digest(live_migrations_dir / slug) + ), + ) + + +def _tree(path: Path) -> dict[str, str]: + return { + child.relative_to(path).as_posix(): child.read_text(encoding="utf-8") + for child in sorted(path.rglob("*")) + if child.is_file() + } + + +def _commit_all(repo_root: Path, message: str = "commit") -> None: + run_command(["git", "add", "-A"], cwd=repo_root) + run_command(["git", "commit", "-m", message], cwd=repo_root) + + +def _tx(live_migrations_dir: Path, token: str) -> Path: + return live_migrations_dir / "__transactions__" / token + + +def _stable_token(monkeypatch: pytest.MonkeyPatch, token: str) -> None: + monkeypatch.setattr(planning_publish, "_new_transaction_token", lambda: token) + + +def _assert_publish_error( + exc: pytest.ExceptionInfo[planning_publish.PlanningPublishError], + *, + status: str, + message_fragments: tuple[str, ...], +) -> None: + error = exc.value + assert error.result.status == status + for fragment in message_fragments: + assert fragment in str(error) + + +def test_publish_creates_new_live_migration_from_staged_snapshot( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-create") + + result = planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert result.status == "published" + assert result.live_dir == live_dir / "auth-cleanup" + assert _tree(live_dir / "auth-cleanup") == _tree(workspace) + findings = check_migration_consistency( + live_dir / "auth-cleanup", mode="execution-gate" + ) + assert not has_blocking_consistency_findings(findings) + assert result.cleanup_error is None + assert not _tx(live_dir, "tx-create").exists() + + +def test_publish_replaces_existing_non_empty_live_dir_with_backup_transaction( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old", extra=True) + _commit_all(repo, "old migration") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-replace") + + result = planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert result.status == "published" + assert _tree(old_live) == _tree(workspace) + assert not (old_live / "notes.md").exists() + assert not (_tx(live_dir, "tx-replace") / "rollback").exists() + assert not (_tx(live_dir, "tx-replace") / "failed").exists() + + +def test_publish_requires_same_device_final_staging( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + old_tree = _tree(old_live) + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + seen: list[tuple[Path, Path]] = [] + _stable_token(monkeypatch, "tx-device") + + def different_device(source: Path, target_root: Path) -> bool: + seen.append((source, target_root)) + return False + + def fail_move(_source: Path, _destination: Path) -> None: + raise AssertionError("publish must not move live state across devices") + + monkeypatch.setattr(planning_publish, "_same_device", different_device) + monkeypatch.setattr(planning_publish, "_move_path", fail_move) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + _assert_publish_error( + exc, + status="blocked", + message_fragments=("same filesystem",), + ) + assert seen == [(_tx(live_dir, "tx-device") / "staged", live_dir)] + assert _tree(old_live) == old_tree + assert (_tx(live_dir, "tx-device") / "staged").is_dir() + assert not (_tx(live_dir, "tx-device") / "rollback").exists() + + +def test_staged_validation_failure_leaves_live_snapshot_unchanged( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + old_tree = _tree(old_live) + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + validated: list[Path] = [] + _stable_token(monkeypatch, "tx-stage-invalid") + + def validate(path: Path, mode: str = "ready-publish") -> None: + validated.append(path) + if path.name == "staged": + raise ContinuousRefactorError("staged invalid") + + def fail_live_move(source: Path, _destination: Path) -> None: + if source == old_live: + raise AssertionError("live dir must not move after staged validation fails") + + monkeypatch.setattr(planning_publish, "_validate_snapshot", validate) + monkeypatch.setattr(planning_publish, "_move_path", fail_live_move) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + _assert_publish_error( + exc, + status="blocked", + message_fragments=("staged invalid",), + ) + assert validated == [workspace, _tx(live_dir, "tx-stage-invalid") / "staged"] + assert _tree(old_live) == old_tree + assert (_tx(live_dir, "tx-stage-invalid") / "staged").is_dir() + assert not (_tx(live_dir, "tx-stage-invalid") / "rollback").exists() + + +def test_publish_rejects_stale_base_snapshot( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + stale_base = planning_publish.snapshot_tree_digest(old_live) + (old_live / "plan.md").write_text("# human edit\n", encoding="utf-8") + _commit_all(repo, "human migration edit") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-stale") + + def fail_live_move(source: Path, _destination: Path) -> None: + if source == old_live: + raise AssertionError("stale publish must not move live state") + + monkeypatch.setattr(planning_publish, "_move_path", fail_live_move) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request( + repo, + live_dir, + "auth-cleanup", + workspace, + base_snapshot_id=stale_base, + ) + ) + _assert_publish_error( + exc, + status="blocked", + message_fragments=("stale base snapshot", "base_snapshot_id"), + ) + assert (old_live / "plan.md").read_text(encoding="utf-8") == "# human edit\n" + assert (_tx(live_dir, "tx-stale") / "staged").is_dir() + assert not (_tx(live_dir, "tx-stale") / "rollback").exists() + + +def test_nested_transaction_named_dir_changes_snapshot_digest_and_blocks_stale_publish( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + stale_base = planning_publish.snapshot_tree_digest(old_live) + nested = old_live / "__transactions__" + nested.mkdir() + (nested / "user-note.md").write_text("do not drop\n", encoding="utf-8") + _commit_all(repo, "nested transaction-named user dir") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-nested-stale") + + def fail_live_move(source: Path, _destination: Path) -> None: + if source == old_live: + raise AssertionError("stale publish must not move live state") + + monkeypatch.setattr(planning_publish, "_move_path", fail_live_move) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request( + repo, + live_dir, + "auth-cleanup", + workspace, + base_snapshot_id=stale_base, + ) + ) + + assert exc.value.result.status == "blocked" + assert "stale base snapshot" in str(exc.value) + assert (nested / "user-note.md").read_text(encoding="utf-8") == "do not drop\n" + assert not (_tx(live_dir, "tx-nested-stale") / "rollback").exists() + + +def test_publish_cleans_backup_after_success( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-clean") + + result = planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert result.status == "published" + assert result.cleanup_error is None + assert _tree(old_live) == _tree(workspace) + assert not (_tx(live_dir, "tx-clean") / "rollback").exists() + assert not _tx(live_dir, "tx-clean").exists() + + +def test_publish_restores_rollback_when_live_replace_fails( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + old_tree = _tree(old_live) + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + original_move = planning_publish._move_path + _stable_token(monkeypatch, "tx-restore") + + def fail_install(source: Path, destination: Path) -> None: + if source == _tx(live_dir, "tx-restore") / "staged" and destination == old_live: + raise OSError("cannot install staged") + original_move(source, destination) + + monkeypatch.setattr(planning_publish, "_move_path", fail_install) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert exc.value.result.status == "failed" + assert "cannot install staged" in str(exc.value) + assert _tree(old_live) == old_tree + assert (_tx(live_dir, "tx-restore") / "staged").is_dir() + assert not (_tx(live_dir, "tx-restore") / "rollback").exists() + + +def test_publish_reports_live_rollback_staged_and_failed_paths_when_rollback_fails( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + original_move = planning_publish._move_path + _stable_token(monkeypatch, "tx-rollback-fails") + + def validate(path: Path, mode: str = "ready-publish") -> None: + if path == old_live: + raise ContinuousRefactorError("live validation failed") + + def fail_restore(source: Path, destination: Path) -> None: + if source == _tx(live_dir, "tx-rollback-fails") / "rollback" and destination == old_live: + raise OSError("rollback restore failed") + original_move(source, destination) + + monkeypatch.setattr(planning_publish, "_validate_snapshot", validate) + monkeypatch.setattr(planning_publish, "_move_path", fail_restore) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + message = str(exc.value) + assert exc.value.result.status == "failed" + assert "rollback restore failed" in message + assert "live=" in message + assert "rollback=" in message + assert "staged=" in message + assert "failed=" in message + assert (_tx(live_dir, "tx-rollback-fails") / "rollback").is_dir() + assert (_tx(live_dir, "tx-rollback-fails") / "failed").is_dir() + + +def test_publish_refuses_dirty_live_migration_dir( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + (old_live / "plan.md").write_text("# dirty tracked\n", encoding="utf-8") + (old_live / "local.md").write_text("local\n", encoding="utf-8") + tx_noise = live_dir / "__transactions__" / "old" / "staged" + tx_noise.mkdir(parents=True) + (tx_noise / "ignored.md").write_text("ignored\n", encoding="utf-8") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-dirty") + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + message = str(exc.value) + assert exc.value.result.status == "blocked" + assert "dirty live migration" in message + assert "migrations/auth-cleanup/plan.md" in message + assert "migrations/auth-cleanup/local.md" in message + assert "__transactions__" not in message + assert not _tx(live_dir, "tx-dirty").exists() + + +def test_publish_refuses_ignored_live_migration_files( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + _write_snapshot(live_dir, "auth-cleanup", "old") + (repo / ".gitignore").write_text( + "migrations/auth-cleanup/*.cache\n", + encoding="utf-8", + ) + _commit_all(repo, "old migration with ignore rule") + ignored = live_dir / "auth-cleanup" / "local.cache" + ignored.write_text("operator scratch\n", encoding="utf-8") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + _stable_token(monkeypatch, "tx-ignored-dirty") + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + message = str(exc.value) + assert exc.value.result.status == "blocked" + assert "dirty live migration" in message + assert "migrations/auth-cleanup/local.cache" in message + assert ignored.read_text(encoding="utf-8") == "operator scratch\n" + assert not _tx(live_dir, "tx-ignored-dirty").exists() + + +def test_lock_rejects_concurrent_mutation_and_reports_lock_path( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + lock_path = planning_publish.publish_lock_path(live_dir) + lock_path.mkdir(parents=True) + (lock_path / "owner.json").write_text( + '{"pid": 123, "operation": "review", ' + '"created_at": "2026-04-29T12:00:00.000+00:00"}\n', + encoding="utf-8", + ) + _stable_token(monkeypatch, "tx-locked") + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + message = str(exc.value) + assert exc.value.result.status == "blocked" + assert "concurrent mutation" in message + assert str(lock_path) in message + assert "123" in message + assert "review" in message + assert "2026-04-29T12:00:00.000+00:00" in message + assert _tree(old_live) != _tree(workspace) + assert not _tx(live_dir, "tx-locked").exists() + + +def test_publish_reports_lock_cleanup_failure_on_success( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + original_remove = planning_publish._remove_tree + _stable_token(monkeypatch, "tx-lock-cleanup-fails") + + def fail_lock_cleanup(path: Path) -> None: + if path == planning_publish.publish_lock_path(live_dir): + raise OSError("lock cleanup denied") + original_remove(path) + + monkeypatch.setattr(planning_publish, "_remove_tree", fail_lock_cleanup) + + result = planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert result.status == "published" + assert result.cleanup_error is not None + assert "lock cleanup denied" in result.cleanup_error + assert _tree(old_live) == _tree(workspace) + assert planning_publish.publish_lock_path(live_dir).is_dir() + + +def test_publish_moves_partial_live_to_failed_before_restoring_rollback( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + old_tree = _tree(old_live) + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + original_move = planning_publish._move_path + _stable_token(monkeypatch, "tx-partial-live") + + def fail_install_with_partial_live(source: Path, destination: Path) -> None: + if source == _tx(live_dir, "tx-partial-live") / "staged" and destination == old_live: + destination.mkdir() + (destination / "partial.md").write_text("bad partial\n", encoding="utf-8") + raise OSError("cannot install staged") + original_move(source, destination) + + monkeypatch.setattr(planning_publish, "_move_path", fail_install_with_partial_live) + + with pytest.raises(planning_publish.PlanningPublishError) as exc: + planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert exc.value.result.status == "failed" + assert _tree(old_live) == old_tree + assert ( + _tx(live_dir, "tx-partial-live") / "failed" / "partial.md" + ).read_text(encoding="utf-8") == "bad partial\n" + + +def test_transaction_dirs_are_left_for_doctor_when_cleanup_fails( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + repo = tmp_path / "repo" + init_repo(repo) + live_dir = repo / "migrations" + old_live = _write_snapshot(live_dir, "auth-cleanup", "old") + _commit_all(repo, "old migration") + old_tree = _tree(old_live) + workspace = _write_snapshot(tmp_path / "workspace", "auth-cleanup", "new") + original_remove = planning_publish._remove_tree + _stable_token(monkeypatch, "tx-cleanup-fails") + + def fail_cleanup(path: Path) -> None: + if path == _tx(live_dir, "tx-cleanup-fails") / "rollback": + raise OSError("cleanup denied") + original_remove(path) + + monkeypatch.setattr(planning_publish, "_remove_tree", fail_cleanup) + + result = planning_publish.publish_planning_workspace( + _request(repo, live_dir, "auth-cleanup", workspace) + ) + + assert result.status == "published" + assert result.cleanup_error is not None + assert "cleanup denied" in result.cleanup_error + assert _tree(old_live) == _tree(workspace) + assert _tree(_tx(live_dir, "tx-cleanup-fails") / "rollback") == old_tree + assert not (_tx(live_dir, "tx-cleanup-fails") / "failed").exists() diff --git a/tests/test_planning_state.py b/tests/test_planning_state.py new file mode 100644 index 0000000..c0f2abf --- /dev/null +++ b/tests/test_planning_state.py @@ -0,0 +1,873 @@ +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from continuous_refactoring.artifacts import ContinuousRefactorError +from continuous_refactoring.planning_state import ( + CompletedPlanningStep, + PlanningState, + append_planning_feedback, + complete_planning_step, + is_executable_planning_step, + load_planning_state, + new_planning_state, + planning_stage_stdout_path, + planning_state_path, + planning_step_stdout, + reopen_planning_for_revise, + save_planning_state, + write_planning_stage_stdout, +) + + +_NOW = "2026-04-29T12:00:00.000+00:00" +_LATER = "2026-04-29T12:01:00.000+00:00" + + +def _migration_root(tmp_path: Path) -> tuple[Path, Path]: + repo_root = tmp_path + mig_root = repo_root / "migrations" / "auth-cleanup" + mig_root.mkdir(parents=True) + return repo_root, mig_root + + +def _write_stdout(repo_root: Path, mig_root: Path, step: str, text: str = "ok\n") -> str: + path = planning_stage_stdout_path(mig_root, step) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + return path.relative_to(repo_root).as_posix() + + +def _completed( + repo_root: Path, + mig_root: Path, + name: str, + outcome: str = "completed", +) -> CompletedPlanningStep: + return CompletedPlanningStep( + name=name, + completed_at=_LATER, + outcome=outcome, + outputs={"stdout": _write_stdout(repo_root, mig_root, name)}, + ) + + +def _write_state_payload(path: Path, payload: dict[str, object]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload), encoding="utf-8") + + +def _payload( + *, + repo_root: Path, + mig_root: Path, + next_step: str, + completed_steps: list[dict[str, object]] | None = None, + review_findings: str | None = None, + final_decision: str | None = None, + final_reason: str | None = None, +) -> dict[str, object]: + return { + "schema_version": 1, + "target": "Clean up auth", + "next_step": next_step, + "completed_steps": completed_steps or [], + "started_at": _NOW, + "updated_at": _LATER, + "feedback": [], + "review_findings": review_findings, + "final_decision": final_decision, + "final_reason": final_reason, + } + + +def _ready_completed_payloads( + repo_root: Path, + mig_root: Path, + *, + final_outcome: str = "approve-auto", +) -> list[dict[str, object]]: + return [ + _completed(repo_root, mig_root, "approaches").to_payload(), + _completed(repo_root, mig_root, "pick-best").to_payload(), + _completed(repo_root, mig_root, "expand").to_payload(), + _completed(repo_root, mig_root, "review", "clear").to_payload(), + _completed(repo_root, mig_root, "final-review", final_outcome).to_payload(), + ] + + +def test_is_executable_planning_step_matches_planning_steps() -> None: + assert is_executable_planning_step("approaches") + assert is_executable_planning_step("review-2") + assert not is_executable_planning_step("terminal-ready") + assert not is_executable_planning_step("planning.state") + assert not is_executable_planning_step(True) + + +def test_planning_state_roundtrip_preserves_completed_steps_and_current_step( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = new_planning_state("Clean up auth", now=_NOW) + outputs = {"stdout": _write_stdout(repo_root, mig_root, "approaches")} + updated = complete_planning_step( + state, + "approaches", + "completed", + outputs, + completed_at=_LATER, + agent="codex", + model="gpt-5.5", + effort="low", + ) + + save_planning_state(updated, planning_state_path(mig_root), repo_root=repo_root) + loaded = load_planning_state(repo_root, planning_state_path(mig_root)) + + assert loaded.next_step == "pick-best" + assert [step.name for step in loaded.completed_steps] == ["approaches"] + assert loaded.completed_steps[0].outputs == outputs + assert loaded.completed_steps[0].agent == "codex" + assert loaded.completed_steps[0].model == "gpt-5.5" + assert loaded.completed_steps[0].effort == "low" + + +def test_planning_state_defaults_new_plan_to_first_step() -> None: + state = new_planning_state("Clean up auth", now=_NOW) + + assert state.schema_version == 1 + assert state.target == "Clean up auth" + assert state.next_step == "approaches" + assert state.completed_steps == () + assert state.review_findings is None + assert state.final_decision is None + assert state.final_reason is None + + +def test_planning_state_records_user_refinement_feedback(tmp_path: Path) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = new_planning_state("Clean up auth", now=_NOW) + + state = append_planning_feedback( + state, + "Keep rollout separate.", + "message", + now=_NOW, + ) + state = append_planning_feedback( + state, + "Use the staged publisher.", + "file", + now=_LATER, + ) + save_planning_state(state, planning_state_path(mig_root), repo_root=repo_root) + + payload = json.loads(planning_state_path(mig_root).read_text(encoding="utf-8")) + assert payload["feedback"] == [ + { + "received_at": _NOW, + "source": "message", + "text": "Keep rollout separate.", + }, + { + "received_at": _LATER, + "source": "file", + "text": "Use the staged publisher.", + }, + ] + + loaded = load_planning_state(repo_root, planning_state_path(mig_root)) + assert [feedback.source for feedback in loaded.feedback] == ["message", "file"] + assert [feedback.text for feedback in loaded.feedback] == [ + "Keep rollout separate.", + "Use the staged publisher.", + ] + + +def test_repeated_planning_step_stdout_keeps_prior_audit_output( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + + first = write_planning_stage_stdout(repo_root, mig_root, "final-review", "first\n") + second = write_planning_stage_stdout(repo_root, mig_root, "final-review", "second\n") + + assert first == { + "stdout": "migrations/auth-cleanup/.planning/stages/final-review.stdout.md" + } + assert second == { + "stdout": "migrations/auth-cleanup/.planning/stages/final-review-2.stdout.md" + } + assert planning_stage_stdout_path(mig_root, "final-review").read_text( + encoding="utf-8" + ) == "first\n" + assert ( + mig_root / ".planning" / "stages" / "final-review-2.stdout.md" + ).read_text(encoding="utf-8") == "second\n" + + +def test_reopen_planning_for_revise_appends_revision_anchors( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = new_planning_state("Clean up auth", now=_NOW) + + for step, outcome in ( + ("approaches", "completed"), + ("pick-best", "completed"), + ("expand", "completed"), + ("review", "clear"), + ("final-review", "approve-auto"), + ): + state = complete_planning_step( + state, + step, + outcome, + write_planning_stage_stdout(repo_root, mig_root, step, f"{step}\n"), + completed_at=_LATER, + final_reason="ready" if step == "final-review" else None, + ) + + state = reopen_planning_for_revise(state, now=_LATER) + assert state.next_step == "revise" + assert state.revision_base_step_counts == (5,) + + for step, outcome in ( + ("revise", "completed"), + ("review-2", "clear"), + ("final-review", "approve-auto"), + ): + state = complete_planning_step( + state, + step, + outcome, + write_planning_stage_stdout(repo_root, mig_root, step, f"{step} again\n"), + completed_at=_LATER, + final_reason="ready again" if step == "final-review" else None, + ) + + state = reopen_planning_for_revise(state, now=_LATER) + save_planning_state(state, planning_state_path(mig_root), repo_root=repo_root) + loaded = load_planning_state(repo_root, planning_state_path(mig_root)) + + assert loaded.next_step == "revise" + assert loaded.revision_base_step_counts == (5, 8) + + +def test_legacy_revision_base_step_count_decodes_as_single_anchor( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = new_planning_state("Clean up auth", now=_NOW) + + for step, outcome in ( + ("approaches", "completed"), + ("pick-best", "completed"), + ("expand", "completed"), + ("review", "clear"), + ("final-review", "approve-auto"), + ): + state = complete_planning_step( + state, + step, + outcome, + write_planning_stage_stdout(repo_root, mig_root, step, f"{step}\n"), + completed_at=_LATER, + final_reason="ready" if step == "final-review" else None, + ) + + payload = _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="revise", + completed_steps=[step.to_payload() for step in state.completed_steps], + ) + payload["revision_base_step_count"] = 5 + _write_state_payload(planning_state_path(mig_root), payload) + + loaded = load_planning_state(repo_root, planning_state_path(mig_root)) + assert loaded.next_step == "revise" + assert loaded.revision_base_step_counts == (5,) + assert loaded.revision_base_step_count == 5 + + save_planning_state(loaded, planning_state_path(mig_root), repo_root=repo_root) + saved = json.loads(planning_state_path(mig_root).read_text(encoding="utf-8")) + assert saved["revision_base_step_counts"] == [5] + assert "revision_base_step_count" not in saved + + +def test_legacy_revision_base_step_count_reopens_human_review_ready_state( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + payload = _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="revise", + completed_steps=_ready_completed_payloads( + repo_root, + mig_root, + final_outcome="approve-needs-human", + ), + ) + payload["revision_base_step_count"] = 5 + _write_state_payload(path, payload) + + loaded = load_planning_state(repo_root, path) + assert loaded.next_step == "revise" + assert loaded.revision_base_step_counts == (5,) + + +@pytest.mark.parametrize( + ("anchors", "message"), + [ + ([5, 5], "strictly increasing"), + ([5, 4], "strictly increasing"), + ([0], "outside completed history"), + ([6], "outside completed history"), + ], +) +def test_planning_state_rejects_invalid_revision_anchor_order_and_range( + tmp_path: Path, + anchors: list[int], + message: str, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + payload = _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="revise", + completed_steps=_ready_completed_payloads(repo_root, mig_root), + ) + payload["revision_base_step_counts"] = anchors + _write_state_payload(path, payload) + + with pytest.raises(ContinuousRefactorError, match=message): + load_planning_state(repo_root, path) + + +@pytest.mark.parametrize("anchor", [True, "5", 1.0]) +def test_planning_state_rejects_non_integer_revision_anchor( + tmp_path: Path, + anchor: object, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + payload = _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="revise", + completed_steps=_ready_completed_payloads(repo_root, mig_root), + ) + payload["revision_base_step_counts"] = [anchor] + _write_state_payload(path, payload) + + with pytest.raises( + ContinuousRefactorError, + match="Planning field 'revision_base_step_counts\\[0\\]' must be an integer", + ): + load_planning_state(repo_root, path) + + +def test_planning_state_rejects_mixed_legacy_and_current_revision_anchors( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + payload = _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="revise", + completed_steps=_ready_completed_payloads(repo_root, mig_root), + ) + payload["revision_base_step_count"] = 5 + payload["revision_base_step_counts"] = [5] + _write_state_payload(path, payload) + + with pytest.raises( + ContinuousRefactorError, + match="may not mix revision_base_step_count and revision_base_step_counts", + ): + load_planning_state(repo_root, path) + + +def test_planning_state_allows_null_legacy_anchor_with_current_revision_anchors( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + payload = _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="revise", + completed_steps=_ready_completed_payloads(repo_root, mig_root), + ) + payload["revision_base_step_count"] = None + payload["revision_base_step_counts"] = [5] + _write_state_payload(path, payload) + + loaded = load_planning_state(repo_root, path) + assert loaded.revision_base_step_counts == (5,) + + +def test_planning_state_rejects_revision_anchor_at_non_terminal_cursor( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + payload = _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="revise", + completed_steps=[ + _completed(repo_root, mig_root, "approaches").to_payload(), + _completed(repo_root, mig_root, "pick-best").to_payload(), + _completed(repo_root, mig_root, "expand").to_payload(), + ], + ) + payload["revision_base_step_counts"] = [3] + _write_state_payload(path, payload) + + with pytest.raises( + ContinuousRefactorError, + match="must point at a terminal ready cursor, got 'review'", + ): + load_planning_state(repo_root, path) + + +def test_planning_state_rejects_revision_anchor_at_skipped_terminal_cursor( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + payload = _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="revise", + completed_steps=_ready_completed_payloads( + repo_root, + mig_root, + final_outcome="reject", + ), + final_decision="reject", + final_reason="skip", + ) + payload["revision_base_step_counts"] = [5] + _write_state_payload(path, payload) + + with pytest.raises( + ContinuousRefactorError, + match="must point at a terminal ready cursor, got 'terminal-skipped'", + ): + load_planning_state(repo_root, path) + + +def test_planning_state_rejects_unknown_current_step(tmp_path: Path) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + _write_state_payload( + path, + _payload(repo_root=repo_root, mig_root=mig_root, next_step="wat"), + ) + + with pytest.raises(ContinuousRefactorError, match="Unknown planning cursor"): + load_planning_state(repo_root, path) + + +def test_planning_state_rejects_completed_step_after_current_step( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + _write_state_payload( + path, + _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="pick-best", + completed_steps=[ + { + "name": "approaches", + "completed_at": _LATER, + "outcome": "completed", + "outputs": { + "stdout": _write_stdout(repo_root, mig_root, "approaches") + }, + }, + { + "name": "pick-best", + "completed_at": _LATER, + "outcome": "completed", + "outputs": { + "stdout": _write_stdout(repo_root, mig_root, "pick-best") + }, + }, + ], + ), + ) + + with pytest.raises(ContinuousRefactorError, match="does not match replayed cursor"): + load_planning_state(repo_root, path) + + +def test_planning_state_rejects_review_to_final_review_when_findings_required_revise( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + review_path = _write_stdout(repo_root, mig_root, "review", "1. Fix it.\n") + path = planning_state_path(mig_root) + _write_state_payload( + path, + _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="final-review", + review_findings=review_path, + completed_steps=[ + _completed(repo_root, mig_root, "approaches").to_payload(), + _completed(repo_root, mig_root, "pick-best").to_payload(), + _completed(repo_root, mig_root, "expand").to_payload(), + { + "name": "review", + "completed_at": _LATER, + "outcome": "findings", + "outputs": {"stdout": review_path}, + }, + ], + ), + ) + + with pytest.raises(ContinuousRefactorError, match="does not match replayed cursor"): + load_planning_state(repo_root, path) + + +def test_planning_state_rejects_revise_without_prior_review_findings( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + _write_state_payload( + path, + _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="review-2", + completed_steps=[ + _completed(repo_root, mig_root, "approaches").to_payload(), + _completed(repo_root, mig_root, "pick-best").to_payload(), + _completed(repo_root, mig_root, "expand").to_payload(), + _completed(repo_root, mig_root, "review", "clear").to_payload(), + _completed(repo_root, mig_root, "revise").to_payload(), + ], + ), + ) + + with pytest.raises(ContinuousRefactorError, match="expected final-review"): + load_planning_state(repo_root, path) + + +def test_planning_state_replays_branching_transition_history(tmp_path: Path) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = new_planning_state("Clean up auth", now=_NOW) + for name in ("approaches", "pick-best", "expand"): + state = complete_planning_step( + state, + name, + "completed", + {"stdout": _write_stdout(repo_root, mig_root, name)}, + completed_at=_LATER, + ) + review_path = _write_stdout(repo_root, mig_root, "review", "1. Fix it.\n") + state = complete_planning_step( + state, + "review", + "findings", + {"stdout": review_path}, + completed_at=_LATER, + ) + state = complete_planning_step( + state, + "revise", + "completed", + {"stdout": _write_stdout(repo_root, mig_root, "revise")}, + completed_at=_LATER, + ) + state = complete_planning_step( + state, + "review-2", + "clear", + {"stdout": _write_stdout(repo_root, mig_root, "review-2")}, + completed_at=_LATER, + ) + + save_planning_state(state, planning_state_path(mig_root), repo_root=repo_root) + loaded = load_planning_state(repo_root, planning_state_path(mig_root)) + + assert loaded.next_step == "final-review" + assert loaded.review_findings == review_path + assert [step.name for step in loaded.completed_steps] == [ + "approaches", + "pick-best", + "expand", + "review", + "revise", + "review-2", + ] + + +def test_planning_state_rejects_missing_artifact_for_completed_step( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = PlanningState( + schema_version=1, + target="Clean up auth", + next_step="pick-best", + completed_steps=( + CompletedPlanningStep( + name="approaches", + completed_at=_LATER, + outcome="completed", + outputs={ + "stdout": ( + mig_root / ".planning" / "stages" / "approaches.stdout.md" + ).relative_to(repo_root).as_posix() + }, + ), + ), + started_at=_NOW, + updated_at=_LATER, + feedback=(), + review_findings=None, + final_decision=None, + final_reason=None, + ) + _write_state_payload( + planning_state_path(mig_root), + { + "schema_version": state.schema_version, + "target": state.target, + "next_step": state.next_step, + "completed_steps": [ + step.to_payload() for step in state.completed_steps + ], + "started_at": state.started_at, + "updated_at": state.updated_at, + "feedback": list(state.feedback), + "review_findings": state.review_findings, + "final_decision": state.final_decision, + "final_reason": state.final_reason, + }, + ) + + with pytest.raises(ContinuousRefactorError, match="missing planning output"): + load_planning_state(repo_root, planning_state_path(mig_root)) + + +@pytest.mark.parametrize( + ("stdout_ref", "message"), + [ + ("/tmp/agent.stdout.log", "repo-relative"), + ("../escape.stdout.md", "repo-relative"), + ("outside/stdout.md", "inside the migration directory"), + ( + "migrations/auth-cleanup/.planning/stages/missing.stdout.md", + "missing planning output", + ), + ], +) +def test_save_planning_state_rejects_invalid_output_refs_before_replacing( + tmp_path: Path, + stdout_ref: str, + message: str, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + path.parent.mkdir(parents=True) + original_content = '{"schema_version": 0}\n' + path.write_text(original_content, encoding="utf-8") + outside = repo_root / "outside" / "stdout.md" + outside.parent.mkdir() + outside.write_text("outside\n", encoding="utf-8") + state = PlanningState( + schema_version=1, + target="Clean up auth", + next_step="pick-best", + completed_steps=( + CompletedPlanningStep( + name="approaches", + completed_at=_LATER, + outcome="completed", + outputs={"stdout": stdout_ref}, + ), + ), + started_at=_NOW, + updated_at=_LATER, + feedback=(), + review_findings=None, + final_decision=None, + final_reason=None, + ) + + with pytest.raises(ContinuousRefactorError, match=message): + save_planning_state(state, path, repo_root=repo_root) + + assert path.read_text(encoding="utf-8") == original_content + + +def test_planning_state_atomic_save_preserves_existing_file_on_replace_failure( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + _, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + path.parent.mkdir(parents=True) + original_content = '{"schema_version": 0}\n' + path.write_text(original_content, encoding="utf-8") + + def fail_replace(self: Path, target: Path) -> Path: + raise OSError(f"cannot replace {target} from {self}") + + monkeypatch.setattr(Path, "replace", fail_replace) + + with pytest.raises(ContinuousRefactorError, match=f"Could not save planning state {path}"): + save_planning_state( + new_planning_state("Clean up auth", now=_NOW), + path, + repo_root=tmp_path, + ) + + assert path.read_text(encoding="utf-8") == original_content + assert list(path.parent.glob("*.tmp")) == [] + + +def test_complete_planning_step_rejects_impossible_in_memory_cursor( + tmp_path: Path, +) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = PlanningState( + schema_version=1, + target="Clean up auth", + next_step="pick-best", + completed_steps=(), + started_at=_NOW, + updated_at=_NOW, + feedback=(), + review_findings=None, + final_decision=None, + final_reason=None, + ) + + with pytest.raises(ContinuousRefactorError, match="does not match replayed cursor"): + complete_planning_step( + state, + "pick-best", + "completed", + {"stdout": _write_stdout(repo_root, mig_root, "pick-best")}, + completed_at=_LATER, + ) + + +def test_complete_planning_step_rejects_absolute_output_ref(tmp_path: Path) -> None: + state = new_planning_state("Clean up auth", now=_NOW) + + with pytest.raises(ContinuousRefactorError, match="repo-relative"): + complete_planning_step( + state, + "approaches", + "completed", + {"stdout": str(tmp_path / "agent.stdout.log")}, + completed_at=_LATER, + ) + + +def test_planning_step_stdout_rejects_unvalidated_output_ref(tmp_path: Path) -> None: + repo_root, mig_root = _migration_root(tmp_path) + state = PlanningState( + schema_version=1, + target="Clean up auth", + next_step="pick-best", + completed_steps=( + CompletedPlanningStep( + name="approaches", + completed_at=_LATER, + outcome="completed", + outputs={"stdout": str(tmp_path / "agent.stdout.log")}, + ), + ), + started_at=_NOW, + updated_at=_LATER, + feedback=(), + review_findings=None, + final_decision=None, + final_reason=None, + ) + + with pytest.raises(ContinuousRefactorError, match="repo-relative"): + planning_step_stdout( + state, + repo_root, + "approaches", + state_path=planning_state_path(mig_root), + ) + + +def test_planning_state_snapshot_paths_are_repo_relative(tmp_path: Path) -> None: + repo_root, mig_root = _migration_root(tmp_path) + path = planning_state_path(mig_root) + absolute_payload = _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="pick-best", + completed_steps=[ + { + "name": "approaches", + "completed_at": _LATER, + "outcome": "completed", + "outputs": {"stdout": str(tmp_path / "tmp" / "agent.stdout.log")}, + } + ], + ) + _write_state_payload(path, absolute_payload) + + with pytest.raises(ContinuousRefactorError, match="repo-relative"): + load_planning_state(repo_root, path) + + escape_payload = dict(absolute_payload) + escape_payload["completed_steps"] = [ + { + "name": "approaches", + "completed_at": _LATER, + "outcome": "completed", + "outputs": {"stdout": "../escape.stdout.md"}, + } + ] + _write_state_payload(path, escape_payload) + + with pytest.raises(ContinuousRefactorError, match="repo-relative"): + load_planning_state(repo_root, path) + + valid_ref = _write_stdout(repo_root, mig_root, "approaches") + _write_state_payload( + path, + _payload( + repo_root=repo_root, + mig_root=mig_root, + next_step="pick-best", + completed_steps=[ + { + "name": "approaches", + "completed_at": _LATER, + "outcome": "completed", + "outputs": {"stdout": valid_ref}, + } + ], + ), + ) + + assert load_planning_state(repo_root, path).completed_steps[0].outputs["stdout"] == valid_ref diff --git a/tests/test_prompts.py b/tests/test_prompts.py index 8f599d2..8e9dc05 100644 --- a/tests/test_prompts.py +++ b/tests/test_prompts.py @@ -1,5 +1,6 @@ from __future__ import annotations +from dataclasses import replace from pathlib import Path import pytest @@ -7,6 +8,15 @@ from continuous_refactoring.config import TASTE_CURRENT_VERSION, default_taste_text from continuous_refactoring.effort import EffortBudget from continuous_refactoring.migrations import MigrationManifest, PhaseSpec +from continuous_refactoring.planning import _build_durable_planning_context +from continuous_refactoring.planning_state import ( + PlanningState, + append_planning_feedback, + complete_planning_step, + new_planning_state, + planning_stage_stdout_path, + reopen_planning_for_revise, +) from continuous_refactoring.prompts import ( CLASSIFIER_PROMPT, CONTINUOUS_REFACTORING_STATUS_BEGIN, @@ -19,12 +29,14 @@ PLANNING_FINAL_REVIEW_PROMPT, PLANNING_PICK_BEST_PROMPT, PLANNING_REVIEW_PROMPT, + REVIEW_PERFORM_PROMPT, compose_full_prompt, compose_classifier_prompt, compose_interview_prompt, compose_phase_execution_prompt, compose_phase_ready_prompt, compose_planning_prompt, + compose_review_perform_prompt, compose_taste_refine_prompt, compose_taste_upgrade_prompt, ) @@ -63,6 +75,7 @@ PLANNING_FINAL_REVIEW_PROMPT, PHASE_READY_CHECK_PROMPT, PHASE_EXECUTION_PROMPT, + REVIEW_PERFORM_PROMPT, ) @@ -93,6 +106,29 @@ def _manifest() -> MigrationManifest: ) +def _terminal_ready_state(repo_root: Path, mig_root: Path) -> PlanningState: + state = new_planning_state("Clean up auth", now="2026-04-29T12:00:00.000+00:00") + for step, outcome, stdout in ( + ("approaches", "completed", "Generated approaches.\n"), + ("pick-best", "completed", "Chose approach.\n"), + ("expand", "completed", "Expanded.\n"), + ("review", "clear", "No findings.\n"), + ("final-review", "approve-auto", "final-decision: approve-auto - ready\n"), + ): + stdout_path = planning_stage_stdout_path(mig_root, step) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(stdout, encoding="utf-8") + state = complete_planning_step( + state, + step, + outcome, + {"stdout": stdout_path.relative_to(repo_root).as_posix()}, + completed_at="2026-04-29T12:00:00.000+00:00", + final_reason="ready" if step == "final-review" else None, + ) + return state + + # --------------------------------------------------------------------------- # Output contracts on prompt constants # --------------------------------------------------------------------------- @@ -185,6 +221,155 @@ def test_phase_ready_prompt_does_not_make_fresh_test_evidence_human_review() -> assert "Use `ready: unverifiable` only" in PHASE_READY_CHECK_PROMPT +def test_planning_prompts_name_staged_work_dir_and_keep_taste( + tmp_path: Path, +) -> None: + repo_root = tmp_path / "repo" + live_mig_root = repo_root / "migrations" / "auth-cleanup" + staged_parent = tmp_path / "xdg" / "planning" / "auth-cleanup" / "run" / "work" + state = new_planning_state("Clean up auth", now="2026-04-29T12:00:00.000+00:00") + + context = _build_durable_planning_context( + repo_root=repo_root, + live_dir=staged_parent, + migration_name="auth-cleanup", + state=state, + published_migration_root=live_mig_root, + ) + + for stage in _PLANNING_STAGES: + result = compose_planning_prompt(stage, "auth-cleanup", _TASTE, context) + + assert f"## Taste\n{_TASTE}" in result + assert f"Staged work dir: {staged_parent / 'auth-cleanup'}" in result + assert f"Work dir: {staged_parent / 'auth-cleanup'}" in result + assert f"Live migration dir: {live_mig_root}" in result + assert "Writable target: work dir only." in result + assert "Do not mutate the live migration directory." in result + assert ".planning/state.json" in result + assert ".planning/stages/" in result + assert "failed current-step output" in result + assert "not resume input" in result + + +def test_review_prompt_names_work_dir_and_forbids_live_dir_mutation() -> None: + manifest = replace( + _manifest(), + awaiting_human_review=True, + human_review_reason="Need Hiren to choose rollout order.", + ) + repo_root = Path("/repo") + work_dir = Path("/xdg/projects/p/planning/auth-cleanup/review-1/work/auth-cleanup") + live_dir = Path("/repo/migrations/auth-cleanup") + + result = compose_review_perform_prompt( + "auth-cleanup", + repo_root, + work_dir, + live_dir, + manifest.phases[1], + manifest, + _TASTE, + ) + + assert f"Repo root: {repo_root}" in result + assert f"Work dir: {work_dir}" in result + assert f"Live migration dir: {live_dir}" in result + assert "Writable target: work dir only." in result + assert "Do not mutate the live migration directory." in result + assert "Need Hiren to choose rollout order." in result + assert f"## Taste\n{_TASTE}" in result + + +def test_refine_prompt_names_work_dir_and_keeps_taste(tmp_path: Path) -> None: + repo_root = tmp_path / "repo" + live_dir = repo_root / "migrations" + mig_root = live_dir / "auth-cleanup" + mig_root.mkdir(parents=True) + (mig_root / "plan.md").write_text("# Plan\n", encoding="utf-8") + state = _terminal_ready_state(repo_root, mig_root) + state = append_planning_feedback( + state, + "Split the risky phase.", + "message", + now="2026-04-29T12:00:00.000+00:00", + ) + state = reopen_planning_for_revise( + state, + now="2026-04-29T12:01:00.000+00:00", + ) + + context = _build_durable_planning_context( + repo_root=repo_root, + live_dir=live_dir, + migration_name="auth-cleanup", + state=state, + extra_context="User refinement feedback:\nSplit the risky phase.", + published_migration_root=mig_root, + ) + result = compose_planning_prompt("expand", "auth-cleanup", _TASTE, context) + + assert f"Work dir: {mig_root}" in result + assert f"Live migration dir: {mig_root}" in result + assert "Writable target: work dir only." in result + assert "Do not mutate the live migration directory." in result + assert "User refinement feedback" in result + assert "Split the risky phase." in result + assert f"## Taste\n{_TASTE}" in result + + +def test_review_and_refine_prompts_forbid_live_dir_mutation(tmp_path: Path) -> None: + manifest = replace( + _manifest(), + awaiting_human_review=True, + human_review_reason="Need Hiren to choose rollout order.", + ) + repo_root = tmp_path / "repo" + review_work_dir = tmp_path / "xdg" / "planning" / "auth-cleanup" / "review" / "work" + live_mig_root = repo_root / "migrations" / "auth-cleanup" + review_prompt = compose_review_perform_prompt( + "auth-cleanup", + repo_root, + review_work_dir, + live_mig_root, + manifest.phases[1], + manifest, + _TASTE, + ) + + refine_state = new_planning_state( + "Clean up auth", + now="2026-04-29T12:00:00.000+00:00", + ) + refine_state = append_planning_feedback( + refine_state, + "Split the risky phase.", + "message", + now="2026-04-29T12:01:00.000+00:00", + ) + refine_context = _build_durable_planning_context( + repo_root=repo_root, + live_dir=review_work_dir.parent, + migration_name="work", + state=refine_state, + extra_context="User refinement feedback:\nSplit the risky phase.", + published_migration_root=live_mig_root, + ) + refine_prompt = compose_planning_prompt( + "expand", + "auth-cleanup", + _TASTE, + refine_context, + ) + + for prompt in (review_prompt, refine_prompt): + assert f"Live migration dir: {live_mig_root}" in prompt + assert "Writable target: work dir only." in prompt + assert "Do not mutate the live migration directory." in prompt + assert "not resume input" in prompt + assert f"## Taste\n{_TASTE}" in prompt + + @pytest.mark.parametrize("prompt", _PLANNING_PROMPTS_THAT_MENTION_PLAN_MD) @pytest.mark.parametrize("fragment", ("plan.md", "phase--.md")) def test_planning_prompts_reference_plan_artifacts(prompt: str, fragment: str) -> None: @@ -379,6 +564,43 @@ def test_planning_prompt_includes_effort_budget_guidance() -> None: assert "wait for a future run" in result +def test_planning_resume_prompt_uses_durable_state_and_keeps_taste( + tmp_path: Path, +) -> None: + live_dir = tmp_path / "migrations" + mig_root = live_dir / "auth-cleanup" + mig_root.mkdir(parents=True) + state = new_planning_state("Clean up auth", now="2026-04-29T12:00:00.000+00:00") + for name, text in ( + ("approaches", "Generated approaches.\n"), + ("pick-best", "Chose incremental approach.\n"), + ): + stdout_path = planning_stage_stdout_path(mig_root, name) + stdout_path.parent.mkdir(parents=True, exist_ok=True) + stdout_path.write_text(text, encoding="utf-8") + state = complete_planning_step( + state, + name, + "completed", + {"stdout": stdout_path.relative_to(tmp_path).as_posix()}, + completed_at="2026-04-29T12:01:00.000+00:00", + ) + + context = _build_durable_planning_context( + repo_root=tmp_path, + live_dir=live_dir, + migration_name="auth-cleanup", + state=state, + ) + result = compose_planning_prompt("expand", "auth-cleanup", _TASTE, context) + + assert f"## Taste\n{_TASTE}" in result + assert "migrations/auth-cleanup/.planning/stages/pick-best.stdout.md" in result + assert "Chose incremental approach." in result + assert "agent.stdout.log" not in result + assert str(tmp_path / "tmp") not in result + + def test_planning_prompts_describe_phase_effort_metadata() -> None: assert "required_effort: